Re: [Mesa-dev] [PATCH] mesa/st: Reduce the number of frontbuffer flush calls

2017-06-26 Thread Thomas Hellstrom

On 06/26/2017 09:31 PM, Marek Olšák wrote:

Reviewed-by: Marek Olšák 

Marek


Thanks for reviewing, Marek.

/Thomas





On Thu, Jun 22, 2017 at 1:00 PM, Thomas Hellstrom  wrote:

The mesa state tracker was needlessly flushing the front buffer even if it
hadn't been drawn to since the last flush. This was happening during
glXSwapBuffers if we at some point previously had set that frontbuffer as
a read- or draw renderbuffer, or at glFlush() or glFinish() if we at some
point previously had rendered to the front buffer. Since the frontbuffer
flush typically means a full drawable copy, it's a pretty big waste.

Signed-off-by: Thomas Hellstrom 
---
  src/mesa/state_tracker/st_cb_flush.c | 37 ++--
  src/mesa/state_tracker/st_manager.c  | 12 
  2 files changed, 10 insertions(+), 39 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_flush.c 
b/src/mesa/state_tracker/st_cb_flush.c
index e8c6672..5a26018 100644
--- a/src/mesa/state_tracker/st_cb_flush.c
+++ b/src/mesa/state_tracker/st_cb_flush.c
@@ -46,35 +46,6 @@
  #include "util/u_gen_mipmap.h"


-/** Check if we have a front color buffer and if it's been drawn to. */
-static inline GLboolean
-is_front_buffer_dirty(struct st_context *st)
-{
-   struct gl_framebuffer *fb = st->ctx->DrawBuffer;
-   struct st_renderbuffer *strb
-  = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
-   return strb && strb->defined;
-}
-
-
-/**
- * Tell the screen to display the front color buffer on-screen.
- */
-static void
-display_front_buffer(struct st_context *st)
-{
-   struct gl_framebuffer *fb = st->ctx->DrawBuffer;
-   struct st_renderbuffer *strb
-  = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
-
-   if (strb) {
-  /* Hook for copying "fake" frontbuffer if necessary:
-   */
-  st_manager_flush_frontbuffer(st);
-   }
-}
-
-
  void st_flush(struct st_context *st,
struct pipe_fence_handle **fence,
unsigned flags)
@@ -122,9 +93,7 @@ static void st_glFlush(struct gl_context *ctx)
  */
 st_flush(st, NULL, 0);

-   if (is_front_buffer_dirty(st)) {
-  display_front_buffer(st);
-   }
+   st_manager_flush_frontbuffer(st);
  }


@@ -137,9 +106,7 @@ static void st_glFinish(struct gl_context *ctx)

 st_finish(st);

-   if (is_front_buffer_dirty(st)) {
-  display_front_buffer(st);
-   }
+   st_manager_flush_frontbuffer(st);
  }


diff --git a/src/mesa/state_tracker/st_manager.c 
b/src/mesa/state_tracker/st_manager.c
index 9978e3f..ea67097 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -839,11 +839,15 @@ st_manager_flush_frontbuffer(struct st_context *st)

 if (stfb)
strb = 
st_renderbuffer(stfb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
-   if (!strb)
-  return;

-   /* never a dummy fb */
-   stfb->iface->flush_front(>iface, stfb->iface, ST_ATTACHMENT_FRONT_LEFT);
+   /* Do we have a front color buffer and has it been drawn to since last
+* frontbuffer flush?
+*/
+   if (strb && strb->defined) {
+  stfb->iface->flush_front(>iface, stfb->iface,
+   ST_ATTACHMENT_FRONT_LEFT);
+  strb->defined = GL_FALSE;
+   }
  }

  /**
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev=DwIFaQ=uilaK90D4TOVoH58JNXRgQ=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ=zWPqN0aIKcvp7mhTQrh2FZuo9wfU8CuODAoVnPhPwp4=YUzjEKkZ5Kpaxxih7C5MbrIs33JnL6Tc74IkM-I_4cQ=



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/92] gallium: add PIPE_CAP_NIR_LOWER_IO

2017-06-26 Thread Connor Abbott
Ok, so I just looked into it a little further, and I guess that since
nir_to_llvm needs to know about the ABI to know where all the
"special" inputs/outputs like tess factors, position etc. are,
switching radv to using the normal input/output intrinsics would be
better done as part of your series, so I'll leave that to you. In the
meantime, though, I'm going to send out some patches which improve
radv's local variable handling which also remove one of the users of
radv_get_deref_offset() that ultimately needs to go. It'll cause some
rebase conflicts for you, but they shouldn't be so hard to solve.

On Mon, Jun 26, 2017 at 12:25 PM, Connor Abbott  wrote:
> Forgot to mention... I think I'll take a pass at doing this today.
> It'll distract me from some weird bug I've been trying to fight with
> the AMD_shader_ballot stuff :)
>
> On Mon, Jun 26, 2017 at 12:24 PM, Connor Abbott  wrote:
>> So, I think that rather than doing this, we should make radv call
>> nir_lower_io instead. There's currently a bunch of code in the
>> NIR-to-LLVM translation to calculate dereference offsets and split up
>> loads, which is just silly - use the lowering pass instead!
>>
>> On Mon, Jun 26, 2017 at 7:09 AM, Nicolai Hähnle  wrote:
>>> From: Nicolai Hähnle 
>>>
>>> Existing NIR drivers return 1 here, since that reflects the current
>>> behavior.
>>> ---
>>>  src/gallium/docs/source/screen.rst   | 5 +
>>>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>>>  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>>>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>>>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>>>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
>>>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
>>>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
>>>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>>>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>>>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
>>>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>>>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>>>  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
>>>  src/gallium/drivers/vc4/vc4_screen.c | 1 +
>>>  src/gallium/drivers/virgl/virgl_screen.c | 1 +
>>>  src/gallium/include/pipe/p_defines.h | 1 +
>>>  17 files changed, 21 insertions(+)
>>>
>>> diff --git a/src/gallium/docs/source/screen.rst 
>>> b/src/gallium/docs/source/screen.rst
>>> index b375e53..9dab38b 100644
>>> --- a/src/gallium/docs/source/screen.rst
>>> +++ b/src/gallium/docs/source/screen.rst
>>> @@ -391,20 +391,25 @@ The integer capabilities:
>>>shader outputs.
>>>  * ``PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX``: Whether a buffer with just
>>>PIPE_BIND_CONSTANT_BUFFER can be legally passed to set_vertex_buffers.
>>>  * ``PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION``: As the name says.
>>>  * ``PIPE_CAP_POST_DEPTH_COVERAGE``: whether
>>>``TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE`` is supported.
>>>  * ``PIPE_CAP_BINDLESS_TEXTURE``: Whether bindless texture operations are
>>>supported.
>>>  * ``PIPE_CAP_NIR_SAMPLERS_AS_DEREF``: Whether NIR tex instructions should
>>>reference texture and sampler as NIR derefs instead of by indices.
>>> +* ``PIPE_CAP_NIR_LOWER_IO``: Whether the nir_lower_io pass should be run to
>>> +  replace variable accesses by the corresponding intrinsics. Note that when
>>> +  this is false, the state tracker can no longer leave accesses to
>>> +  default-block uniforms in the shader, meaning that 
>>> nir_lower_uniforms_to_ubo
>>> +  is used.
>>>
>>>
>>>  .. _pipe_capf:
>>>
>>>  PIPE_CAPF_*
>>>  
>>>
>>>  The floating-point capabilities are:
>>>
>>>  * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line.
>>> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
>>> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
>>> index 85b7e91..7f2a231 100644
>>> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
>>> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
>>> @@ -252,20 +252,21 @@ etna_screen_get_param(struct pipe_screen *pscreen, 
>>> enum pipe_cap param)
>>> case PIPE_CAP_TGSI_CLOCK:
>>> case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
>>> case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
>>> case PIPE_CAP_TGSI_BALLOT:
>>> case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
>>> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
>>> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
>>> case PIPE_CAP_POST_DEPTH_COVERAGE:
>>> case PIPE_CAP_BINDLESS_TEXTURE:
>>> case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
>>> +   case PIPE_CAP_NIR_LOWER_IO:
>>>return 0;
>>>
>>> /* Stream output. */
>>> case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
>>> case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
>>> case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:

[Mesa-dev] [PATCH 1/2] radeonsi: check nr_cbufs in other places before flushing CB

2017-06-26 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index b236bed..a674a60 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3968,21 +3968,22 @@ static void si_set_tess_state(struct pipe_context *ctx,
 
si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, );
pipe_resource_reference(, NULL);
 }
 
 static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
struct si_context *sctx = (struct si_context *)ctx;
 
/* Multisample surfaces are flushed in si_decompress_textures. */
-   if (sctx->framebuffer.nr_samples <= 1) {
+   if (sctx->framebuffer.nr_samples <= 1 &&
+   sctx->framebuffer.state.nr_cbufs) {
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
 SI_CONTEXT_INV_GLOBAL_L2 |
 SI_CONTEXT_FLUSH_AND_INV_CB;
}
sctx->framebuffer.do_update_surf_dirtiness = true;
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
 static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
@@ -4014,21 +4015,22 @@ static void si_memory_barrier(struct pipe_context *ctx, 
unsigned flags)
 * L1 isn't used.
 */
if (sctx->screen->b.chip_class <= CIK)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
 
/* MSAA color, any depth and any stencil are flushed in
 * si_decompress_textures when needed.
 */
if (flags & PIPE_BARRIER_FRAMEBUFFER &&
-   sctx->framebuffer.nr_samples <= 1) {
+   sctx->framebuffer.nr_samples <= 1 &&
+   sctx->framebuffer.state.nr_cbufs) {
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
 SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
 
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (sctx->screen->b.chip_class <= VI &&
flags & PIPE_BARRIER_INDIRECT_BUFFER)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radeonsi: move instance divisors into a constant buffer

2017-06-26 Thread Marek Olšák
From: Marek Olšák 

Shader key size: 107 -> 47

Divisors of 0 and 1 are encoded in the shader key. Greater instance divisors
are loaded from a constant buffer.

The shader code doing the division is huge. Is it something we need to
worry about? Does any app use instance divisors >= 2?

VS prolog disassembly:
s_load_dwordx4 s[12:15], s[0:1], 0x80  ; C00A0300 0080
s_nop 0; BF80
s_waitcnt lgkmcnt(0)   ; BF8C007F
s_buffer_load_dword s14, s[12:15], 0x4 ; C0220386 0004
s_waitcnt lgkmcnt(0)   ; BF8C007F
v_cvt_f32_u32_e32 v4, s14  ; 7E080C0E
v_rcp_iflag_f32_e32 v4, v4 ; 7E084704
v_mul_f32_e32 v4, 0x4f80, v4   ; 0A0808FF 4F80
v_cvt_u32_f32_e32 v4, v4   ; 7E080F04
v_mul_hi_u32 v5, v4, s14   ; D2860005 1D04
v_mul_lo_i32 v6, v4, s14   ; D2850006 1D04
v_cmp_eq_u32_e64 s[12:13], 0, v5   ; D0CA000C 00020A80
v_sub_i32_e32 v5, vcc, 0, v6   ; 340A0C80
v_cndmask_b32_e64 v5, v6, v5, s[12:13] ; D105 00320B06
v_mul_hi_u32 v5, v5, v4; D2860005 00020905
v_add_i32_e32 v6, vcc, v5, v4  ; 320C0905
v_subrev_i32_e32 v4, vcc, v5, v4   ; 36080905
v_cndmask_b32_e64 v4, v4, v6, s[12:13] ; D104 00320D04
v_mul_hi_u32 v5, v4, v1; D2860005 00020304
v_add_i32_e32 v4, vcc, s8, v0  ; 32080008
v_mul_lo_i32 v6, v5, s14   ; D2850006 1D05
v_add_i32_e32 v7, vcc, 1, v5   ; 320E0A81
v_cmp_ge_u32_e64 s[12:13], v1, v6  ; D0CE000C 00020D01
v_sub_i32_e32 v6, vcc, v1, v6  ; 340C0D01
v_cmp_le_u32_e32 vcc, s14, v6  ; 7D960C0E
v_cndmask_b32_e64 v8, 0, -1, s[12:13]  ; D108 00318280
v_cndmask_b32_e64 v6, 0, -1, vcc   ; D106 01A98280
v_and_b32_e32 v6, v8, v6   ; 260C0D08
v_cmp_eq_u32_e32 vcc, 0, v6; 7D940C80
v_cndmask_b32_e32 v6, v7, v5, vcc  ; 000C0B07
v_add_i32_e32 v5, vcc, -1, v5  ; 320A0AC1
v_cmp_eq_u32_e32 vcc, 0, v8; 7D941080
v_cndmask_b32_e32 v5, v6, v5, vcc  ; 000A0B06
v_add_i32_e32 v5, vcc, s9, v5  ; 320A0A09
---
 src/gallium/drivers/radeonsi/si_descriptors.c   |  2 +
 src/gallium/drivers/radeonsi/si_pipe.c  |  2 +
 src/gallium/drivers/radeonsi/si_shader.c| 78 +
 src/gallium/drivers/radeonsi/si_shader.h|  9 ++-
 src/gallium/drivers/radeonsi/si_state.c | 15 +
 src/gallium/drivers/radeonsi/si_state.h |  3 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  7 ++-
 7 files changed, 88 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 75d2a1d..88f7dce 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2185,20 +2185,22 @@ void si_emit_graphics_shader_userdata(struct si_context 
*sctx,
   
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
si_emit_shader_pointer(sctx, descs,
   
R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
} else {
si_emit_shader_pointer(sctx, descs,
   
R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs,
   
R_00B330_SPI_SHADER_USER_DATA_ES_0);
si_emit_shader_pointer(sctx, descs,
   
R_00B430_SPI_SHADER_USER_DATA_HS_0);
+   si_emit_shader_pointer(sctx, descs,
+  
R_00B530_SPI_SHADER_USER_DATA_LS_0);
}
}
 
mask = sctx->shader_pointers_dirty &
   u_bit_consecutive(SI_DESCS_FIRST_SHADER,
 SI_DESCS_FIRST_COMPUTE - 
SI_DESCS_FIRST_SHADER);
 
while (mask) {
unsigned i = u_bit_scan();
unsigned shader = (i - SI_DESCS_FIRST_SHADER) / 
SI_NUM_SHADER_DESCS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 4088849..a940bb8 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -301,20 +301,22 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
 
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
sctx->b.b.set_constant_buffer(>b.b, 
shader, i,
  
>null_const_buf);
}
}
 

Re: [Mesa-dev] [PATCH] pipe_loader_sw: fix compilation warning

2017-06-26 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Jun 26, 2017 at 10:48 PM, Brian Paul  wrote:
> Add the new 'flags' parameter to pipe_loader_sw_create_screen().
> ---
>  src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c 
> b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
> index 0fbc78e..46c6604 100644
> --- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
> +++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
> @@ -294,7 +294,8 @@ pipe_loader_sw_configuration(struct pipe_loader_device 
> *dev,
>  }
>
>  static struct pipe_screen *
> -pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
> +pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
> + unsigned flags)
>  {
> struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
> struct pipe_screen *screen;
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: skip varyings without slot

2017-06-26 Thread Timothy Arceri

On 16/06/17 18:12, Juan A. Suarez Romero wrote:


Commit 00620782c9 (i965: use nir_shader_gather_info() over
do_set_program_inouts()) changed how we compute the outputs written.

In the previous version it was using the IR declared outputs, while in
the new one it uses NIR to parse the instructions that write outputs.

Thus, if the shader has declared some output that is not written later
in the code, like this:

~~~
struct S {
 vec4 a;
 vec4 b;
 vec4 c;
};

layout (xfb_offset = sizeof_type) out S s;

void main()
{

 s.a = vec4(1.0, 0.0, 0.0, 1.0);
 s.c = vec4(0.0, 1.0, 0.0, 1.0);
}
~~~

The former version computing 3 outputs written (s.a, s.b and s.c), while
the new version only counts 2 (s.a and s.c).

This means that with the new version, then could be varyings in the VUE
map that do not have an slot assigned (s.b), that must be skipped.

This fixes KHR-GL45.enhanced_layouts.xfb_capture_struct.
---
  src/mesa/drivers/dri/i965/genX_state_upload.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index a5ad2ca..573f0e3 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -3102,9 +3102,10 @@ genX(upload_3dstate_so_decl_list)(struct brw_context 
*brw,
const unsigned stream_id = output->StreamId;
assert(stream_id < MAX_VERTEX_STREAMS);
  
-  buffer_mask[stream_id] |= 1 << buffer;

+  if (vue_map->varying_to_slot[varying] == -1)
+ continue;
  
-  assert(vue_map->varying_to_slot[varying] >= 0);

+  buffer_mask[stream_id] |= 1 << buffer;
  
My feeling is we should try to avoid adding it to the VUE map in the 
first place rather than trying to work around it.


Is it not possible to do that instead?



/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
 * array.  Instead, it simply increments DstOffset for the following


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/12] i965: Assert that blorp always handles color blits

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

---
 src/mesa/drivers/dri/i965/brw_blorp.c | 2 ++
 src/mesa/drivers/dri/i965/intel_fbo.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 92d1d2a..9c9b859 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -707,6 +707,8 @@ brw_blorp_framebuffer(struct brw_context *brw,
   }
}
 
+   /* try_blorp_blit should always be successful for color blits. */
+   assert(!(mask & GL_COLOR_BUFFER_BIT));
return mask;
 }
 
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index caf182c..f0f87bb 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -932,6 +932,9 @@ intel_blit_framebuffer(struct gl_context *ctx,
if (mask == 0x0)
   return;
 
+   /* brw_blorp_framebuffer should always be successful for color blits. */
+   assert(!(mask & GL_COLOR_BUFFER_BIT));
+
mask = _mesa_meta_BlitFramebuffer(ctx, readFb, drawFb,
  srcX0, srcY0, srcX1, srcY1,
  dstX0, dstY0, dstX1, dstY1,
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/12] i965: Silence unused parameter warning

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

brw_blorp.c: In function ‘brw_blorp_clear_depth_stencil’:
brw_blorp.c:913:53: warning: unused parameter ‘partial_clear’ 
[-Wunused-parameter]
   GLbitfield mask, bool partial_clear)
 ^

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 2 +-
 src/mesa/drivers/dri/i965/brw_blorp.h | 2 +-
 src/mesa/drivers/dri/i965/brw_clear.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 87c9dd4..5e2fb01 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -917,7 +917,7 @@ brw_blorp_clear_color(struct brw_context *brw, struct 
gl_framebuffer *fb,
 void
 brw_blorp_clear_depth_stencil(struct brw_context *brw,
   struct gl_framebuffer *fb,
-  GLbitfield mask, bool partial_clear)
+  GLbitfield mask)
 {
const struct gl_context *ctx = >ctx;
struct gl_renderbuffer *depth_rb =
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
b/src/mesa/drivers/dri/i965/brw_blorp.h
index 29d5788..f03413d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -65,7 +65,7 @@ brw_blorp_clear_color(struct brw_context *brw, struct 
gl_framebuffer *fb,
 void
 brw_blorp_clear_depth_stencil(struct brw_context *brw,
   struct gl_framebuffer *fb,
-  GLbitfield mask, bool partial_clear);
+  GLbitfield mask);
 
 void
 brw_blorp_resolve_color(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index 1e434c9..ce67e3f 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -235,7 +235,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
}
 
if (brw->gen >= 6 && (mask & BUFFER_BITS_DEPTH_STENCIL)) {
-  brw_blorp_clear_depth_stencil(brw, fb, mask, partial_clear);
+  brw_blorp_clear_depth_stencil(brw, fb, mask);
   debug_mask("blorp depth/stencil", mask & BUFFER_BITS_DEPTH_STENCIL);
   mask &= ~BUFFER_BITS_DEPTH_STENCIL;
}
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/12] meta: Stop frobbing MatrixMode

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

   textdata bss dec hex filename
7155246  256860   37332 7449438  71ab5e 32-bit i965_dri.so before
7155058  256860   37332 7449250  71aaa2 32-bit i965_dri.so after
6788683  328056   50704 7167443  6d5dd3 64-bit i965_dri.so before
6788611  328056   50704 7167371  6d5d8b 64-bit i965_dri.so after

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/common/meta.c | 47 --
 src/mesa/drivers/common/meta.h |  1 -
 2 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 58e28bd..9095caf 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -655,31 +655,31 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
}
 
if (state & MESA_META_TRANSFORM) {
-  GLuint activeTexture = ctx->Texture.CurrentUnit;
   memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m,
  16 * sizeof(GLfloat));
   memcpy(save->ProjectionMatrix, ctx->ProjectionMatrixStack.Top->m,
  16 * sizeof(GLfloat));
   memcpy(save->TextureMatrix, ctx->TextureMatrixStack[0].Top->m,
  16 * sizeof(GLfloat));
-  save->MatrixMode = ctx->Transform.MatrixMode;
+
   /* set 1:1 vertex:pixel coordinate transform */
-  _mesa_ActiveTexture(GL_TEXTURE0);
-  _mesa_MatrixMode(GL_TEXTURE);
-  _mesa_LoadIdentity();
-  _mesa_ActiveTexture(GL_TEXTURE0 + activeTexture);
-  _mesa_MatrixMode(GL_MODELVIEW);
-  _mesa_LoadIdentity();
-  _mesa_MatrixMode(GL_PROJECTION);
-  _mesa_LoadIdentity();
+  _mesa_load_identity_matrix(ctx, >ModelviewMatrixStack);
+  _mesa_load_identity_matrix(ctx, >ProjectionMatrixStack);
+  _mesa_load_identity_matrix(ctx, >TextureMatrixStack[0]);
 
   /* glOrtho with width = 0 or height = 0 generates GL_INVALID_VALUE.
* This can occur when there is no draw buffer.
*/
-  if (ctx->DrawBuffer->Width != 0 && ctx->DrawBuffer->Height != 0)
- _mesa_Ortho(0.0, ctx->DrawBuffer->Width,
- 0.0, ctx->DrawBuffer->Height,
- -1.0, 1.0);
+  if (ctx->DrawBuffer->Width != 0 && ctx->DrawBuffer->Height != 0) {
+ /* Don't FLUSH_VERTICES here because _mesa_load_identity_matrix will
+  * have already done it.
+  */
+ _math_matrix_ortho(ctx->ProjectionMatrixStack.Top,
+0.0f, (GLfloat) ctx->DrawBuffer->Width,
+0.0f, (GLfloat) ctx->DrawBuffer->Height,
+-1.0f, 1.0f);
+ ctx->NewState |= ctx->ProjectionMatrixStack.DirtyFlag;
+  }
 
   if (ctx->Extensions.ARB_clip_control) {
  save->ClipOrigin = ctx->Transform.ClipOrigin;
@@ -1066,19 +1066,9 @@ _mesa_meta_end(struct gl_context *ctx)
}
 
if (state & MESA_META_TRANSFORM) {
-  GLuint activeTexture = ctx->Texture.CurrentUnit;
-  _mesa_ActiveTexture(GL_TEXTURE0);
-  _mesa_MatrixMode(GL_TEXTURE);
-  _mesa_LoadMatrixf(save->TextureMatrix);
-  _mesa_ActiveTexture(GL_TEXTURE0 + activeTexture);
-
-  _mesa_MatrixMode(GL_MODELVIEW);
-  _mesa_LoadMatrixf(save->ModelviewMatrix);
-
-  _mesa_MatrixMode(GL_PROJECTION);
-  _mesa_LoadMatrixf(save->ProjectionMatrix);
-
-  _mesa_MatrixMode(save->MatrixMode);
+  _mesa_load_matrix(ctx, >ModelviewMatrixStack, 
save->ModelviewMatrix);
+  _mesa_load_matrix(ctx, >ProjectionMatrixStack, 
save->ProjectionMatrix);
+  _mesa_load_matrix(ctx, >TextureMatrixStack[0], save->TextureMatrix);
 
   if (ctx->Extensions.ARB_clip_control)
  _mesa_ClipControl(save->ClipOrigin, save->ClipDepthMode);
@@ -1455,8 +1445,7 @@ _mesa_meta_setup_ff_tnl_for_blit(struct gl_context *ctx,
0);
 
/* setup projection matrix */
-   _mesa_MatrixMode(GL_PROJECTION);
-   _mesa_LoadIdentity();
+   _mesa_load_identity_matrix(ctx, >ProjectionMatrixStack);
 }
 
 /**
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index c09cb3e..fa84995 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -133,7 +133,6 @@ struct save_state
struct gl_stencil_attrib Stencil;
 
/** MESA_META_TRANSFORM */
-   GLenum MatrixMode;
GLfloat ModelviewMatrix[16];
GLfloat ProjectionMatrix[16];
GLfloat TextureMatrix[16];
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/12] intel: Silence many unused parameter warnings in release builds

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

In file included from brw_context.h:40:0,
 from brw_blorp.c:32:
../../../../../src/intel/compiler/brw_compiler.h: In function 
‘brw_stage_has_packed_dispatch’:
../../../../../src/intel/compiler/brw_compiler.h:1150:61: warning: unused 
parameter ‘devinfo’ [-Wunused-parameter]
 brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
 ^~~

Signed-off-by: Ian Romanick 
---
 src/intel/compiler/brw_compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index 7887374..19dd43c 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -1147,7 +1147,7 @@ encode_slm_size(unsigned gen, uint32_t bytes)
  * '2^n - 1' for some n.
  */
 static inline bool
-brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
+brw_stage_has_packed_dispatch(MAYBE_UNUSED const struct gen_device_info 
*devinfo,
   gl_shader_stage stage,
   const struct brw_stage_prog_data *prog_data)
 {
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/12] i965: Don't recalculate partial_clear inside brw_fast_clear_depth

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

   textdata bss dec hex filename
7155954  256860   37332 7450146  71ae22 32-bit i965_dri.so before
7155858  256860   37332 7450050  71adc2 32-bit i965_dri.so after
6789395  328056   50704 7168155  6d609b 64-bit i965_dri.so before
6789299  328056   50704 7168059  6d603b 64-bit i965_dri.so after

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_clear.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
b/src/mesa/drivers/dri/i965/brw_clear.c
index 7fbaa3a..1e434c9 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -99,7 +99,7 @@ noop_scissor(struct gl_framebuffer *fb)
  * at least until a resolve to the real depth buffer happens.
  */
 static bool
-brw_fast_clear_depth(struct gl_context *ctx)
+brw_fast_clear_depth(struct gl_context *ctx, bool partial_clear)
 {
struct brw_context *brw = brw_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
@@ -118,7 +118,7 @@ brw_fast_clear_depth(struct gl_context *ctx)
 * a previous clear had happened at a different clear value and resolve it
 * first.
 */
-   if ((ctx->Scissor.EnableFlags & 1) && !noop_scissor(fb)) {
+   if (partial_clear) {
   perf_debug("Failed to fast clear %dx%d depth because of scissors.  "
  "Possible 5%% performance win if avoided.\n",
  mt->logical_width0, mt->logical_height0);
@@ -213,7 +213,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
 
if (mask & BUFFER_BIT_DEPTH) {
-  if (brw_fast_clear_depth(ctx)) {
+  if (brw_fast_clear_depth(ctx, partial_clear)) {
 DBG("fast clear: depth\n");
 mask &= ~BUFFER_BIT_DEPTH;
   }
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/12] i965: Silence uninitialized varible warnings

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

brw_blorp.c:1007:4: warning: ‘num_layers’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
blorp_clear_depth_stencil(, _surf, _surf,
^
  level, start_layer, num_layers,
  ~~~
  x0, y0, x1, y1,
  ~~~
  (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear,
  
  stencil_mask, ctx->Stencil.Clear);
  ~
brw_blorp.c:1007:4: warning: ‘start_layer’ may be used uninitialized in this 
function [-Wmaybe-uninitialized]
brw_blorp.c:1007:4: warning: ‘level’ may be used uninitialized in this function 
[-Wmaybe-uninitialized]

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 5e2fb01..92d1d2a 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -962,7 +962,7 @@ brw_blorp_clear_depth_stencil(struct brw_context *brw,
if (x0 == x1 || y0 == y1)
   return;
 
-   uint32_t level, start_layer, num_layers;
+   uint32_t level = 0, start_layer = 0, num_layers = 0;
struct isl_surf isl_tmp[4];
struct blorp_surf depth_surf, stencil_surf;
 
@@ -1008,6 +1008,7 @@ brw_blorp_clear_depth_stencil(struct brw_context *brw,
}
 
assert((mask & BUFFER_BIT_DEPTH) || stencil_mask);
+   assert(num_layers != 0);
 
struct blorp_batch batch;
blorp_batch_init(>blorp, , brw, 0);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/12] meta: Remove support for clearing integer buffers

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

Since i965 no longer uses this function for clearing color buffers,
there is no driver left that will ever support integer textures and use
_mesa_meta_glsl_Clear.

As a side note, the has_integer_textures check was rubbish anyway
because meta always smashes the API to API_OPENGL_COMPAT.

   textdata bss dec hex filename
7155858  256860   37332 7450050  71adc2 32-bit i965_dri.so before
7155026  256860   37332 7449218  71aa82 32-bit i965_dri.so after
6789299  328056   50704 7168059  6d603b 64-bit i965_dri.so before
6788499  328056   50704 7167259  6d5d1b 64-bit i965_dri.so after

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/common/meta.c | 55 ++
 src/mesa/drivers/common/meta.h |  1 -
 2 files changed, 2 insertions(+), 54 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 7314384..58e28bd 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -1497,7 +1497,6 @@ meta_glsl_clear_init(struct gl_context *ctx, struct 
clear_state *clear)
   "{\n"
   "   gl_FragColor = color;\n"
   "}\n";
-   bool has_integer_textures;
 
_mesa_meta_setup_vertex_objects(ctx, >VAO, >buf_obj, true,
3, 0, 0);
@@ -1507,49 +1506,6 @@ meta_glsl_clear_init(struct gl_context *ctx, struct 
clear_state *clear)
 
_mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, "meta clear",
>ShaderProg);
-
-   has_integer_textures = _mesa_is_gles3(ctx) ||
-  (_mesa_is_desktop_gl(ctx) && ctx->Const.GLSLVersion >= 130);
-
-   if (has_integer_textures) {
-  void *shader_source_mem_ctx = ralloc_context(NULL);
-  const char *vs_int_source =
- ralloc_asprintf(shader_source_mem_ctx,
- "#version 130\n"
- "#extension GL_AMD_vertex_shader_layer : enable\n"
- "#extension GL_ARB_draw_instanced : enable\n"
- "#extension GL_ARB_explicit_attrib_location :enable\n"
- "layout(location = 0) in vec4 position;\n"
- "void main()\n"
- "{\n"
- "#ifdef GL_AMD_vertex_shader_layer\n"
- "   gl_Layer = gl_InstanceID;\n"
- "#endif\n"
- "   gl_Position = position;\n"
- "}\n");
-  const char *fs_int_source =
- ralloc_asprintf(shader_source_mem_ctx,
- "#version 130\n"
- "#extension GL_ARB_explicit_attrib_location :enable\n"
- "#extension GL_ARB_explicit_uniform_location 
:enable\n"
- "layout(location = 0) uniform ivec4 color;\n"
- "out ivec4 out_color;\n"
- "\n"
- "void main()\n"
- "{\n"
- "   out_color = color;\n"
- "}\n");
-
-  _mesa_meta_compile_and_link_program(ctx, vs_int_source, fs_int_source,
-  "integer clear",
-  >IntegerShaderProg);
-  ralloc_free(shader_source_mem_ctx);
-
-  /* Note that user-defined out attributes get automatically assigned
-   * locations starting from 0, so we don't need to explicitly
-   * BindFragDataLocation to 0.
-   */
-   }
 }
 
 static void
@@ -1561,10 +1517,6 @@ meta_glsl_clear_cleanup(struct gl_context *ctx, struct 
clear_state *clear)
clear->VAO = 0;
_mesa_reference_buffer_object(ctx, >buf_obj, NULL);
_mesa_reference_shader_program(ctx, >ShaderProg, NULL);
-
-   if (clear->IntegerShaderProg) {
-  _mesa_reference_shader_program(ctx, >IntegerShaderProg, NULL);
-   }
 }
 
 /**
@@ -1725,6 +1677,7 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
 
_mesa_meta_begin(ctx, metaSave);
 
+   assert(!fb->_IntegerBuffers);
if (glsl) {
   meta_glsl_clear_init(ctx, clear);
 
@@ -1744,11 +1697,7 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
   z = invert_z(ctx->Depth.Clear);
}
 
-   if (fb->_IntegerBuffers) {
-  assert(glsl);
-  _mesa_meta_use_program(ctx, clear->IntegerShaderProg);
-  _mesa_Uniform4iv(0, 1, ctx->Color.ClearColor.i);
-   } else if (glsl) {
+   if (glsl) {
   _mesa_meta_use_program(ctx, clear->ShaderProg);
   _mesa_Uniform4fv(0, 1, ctx->Color.ClearColor.f);
}
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index 1b5cf42..c09cb3e 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -325,7 +325,6 @@ struct clear_state
GLuint VAO;
struct gl_buffer_object *buf_obj;
struct gl_shader_program *ShaderProg;
-   struct 

[Mesa-dev] [PATCH 02/12] i965/urb: Trigger upload_urb on NEW_BLORP

2017-06-26 Thread Ian Romanick
From: Jason Ekstrand 

It's a bit rare, but blorp can trigger a urb reconfiguration.  When that
happens, we need to re-upload the URB config.  Fortunately, this isn't as
bad as it looks because gen7_upload_urb will not re-emit the packet if it
would end up being a no-op so this doesn't mean that running blorp always
triggers a URB reconfig.

v2 (idr): Sort BRW_NEW_ tokens to match brw_recalculate_urb_fence and
gen6_urb.

v3 (idr): Don't whack BRW_NEW_URB_SIZE in blorp.  Suggested by Jason.
---
 src/mesa/drivers/dri/i965/gen7_urb.c| 3 ++-
 src/mesa/drivers/dri/i965/genX_blorp_exec.c | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c 
b/src/mesa/drivers/dri/i965/gen7_urb.c
index 525c9c4..c4b479c 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -236,7 +236,8 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
 const struct brw_tracked_state gen7_urb = {
.dirty = {
   .mesa = 0,
-  .brw = BRW_NEW_CONTEXT |
+  .brw = BRW_NEW_BLORP |
+ BRW_NEW_CONTEXT |
  BRW_NEW_URB_SIZE |
  BRW_NEW_GS_PROG_DATA |
  BRW_NEW_TCS_PROG_DATA |
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 8fd17fb..af3d609 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -183,8 +183,6 @@ blorp_emit_urb_config(struct blorp_batch *batch,
brw->urb.vsize >= vs_entry_size)
   return;
 
-   brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
-
gen7_upload_urb(brw, vs_entry_size, false, false);
 #elif GEN_GEN == 6
gen6_upload_urb(brw, vs_entry_size, false, 0);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/12] genxml: Silence about a billion unused parameter warnings

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

v2: Use textwrap.dedent to make the source line a lot shorter.
Shortening (?) the line was requested by Jason.

Signed-off-by: Ian Romanick 
---
 src/intel/genxml/gen_pack_header.py | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/intel/genxml/gen_pack_header.py 
b/src/intel/genxml/gen_pack_header.py
index fefbc9a..a96a232 100644
--- a/src/intel/genxml/gen_pack_header.py
+++ b/src/intel/genxml/gen_pack_header.py
@@ -8,6 +8,7 @@ import xml.parsers.expat
 import re
 import sys
 import copy
+import textwrap
 
 license =  """/*
  * Copyright (C) 2016 Intel Corporation
@@ -578,8 +579,12 @@ class Parser(object):
 
 def emit_pack_function(self, name, group):
 name = self.gen_prefix(name)
-print("static inline void\n%s_pack(__gen_user_data *data, void * 
restrict dst,\n%sconst struct %s * restrict values)\n{" %
-  (name, ' ' * (len(name) + 6), name))
+print(textwrap.dedent("""\
+static inline void
+%s_pack(__attribute__((unused)) __gen_user_data *data,
+%s__attribute__((unused)) void * restrict dst,
+%s__attribute__((unused)) const struct %s * restrict values)
+{""") % (name, ' ' * (len(name) + 6), ' ' * (len(name) + 6), name))
 
 (dwords, length) = group.collect_dwords_and_length()
 if length:
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/12] meta: Coalesce the GLSL and FF paths in meta_clear

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

   textdata bss dec hex filename
7154994  256860   37332 7449186  71aa62 32-bit i965_dri.so before
7154994  256860   37332 7449186  71aa62 32-bit i965_dri.so after
6788475  328056   50704 7167235  6d5d03 64-bit i965_dri.so before
6788451  328056   50704 7167211  6d5ceb 64-bit i965_dri.so after

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/common/meta.c | 23 +--
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 4ace12b..5dc81bc 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -1634,7 +1634,6 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
struct gl_framebuffer *fb = ctx->DrawBuffer;
struct vertex verts[4];
-   int i;
 
metaSave = (MESA_META_ALPHA_TEST |
MESA_META_BLEND |
@@ -1668,17 +1667,22 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
assert(!fb->_IntegerBuffers);
if (glsl) {
   meta_glsl_clear_init(ctx, clear);
+
+  _mesa_meta_use_program(ctx, clear->ShaderProg);
+  _mesa_Uniform4fv(0, 1, ctx->Color.ClearColor.f);
} else {
   _mesa_meta_setup_vertex_objects(ctx, >VAO, >buf_obj, false,
   3, 0, 4);
 
   /* setup projection matrix */
   _mesa_load_identity_matrix(ctx, >ProjectionMatrixStack);
-   }
 
-   if (glsl) {
-  _mesa_meta_use_program(ctx, clear->ShaderProg);
-  _mesa_Uniform4fv(0, 1, ctx->Color.ClearColor.f);
+  for (int i = 0; i < 4; i++) {
+ verts[i].r = ctx->Color.ClearColor.f[0];
+ verts[i].g = ctx->Color.ClearColor.f[1];
+ verts[i].b = ctx->Color.ClearColor.f[2];
+ verts[i].a = ctx->Color.ClearColor.f[3];
+  }
}
 
/* GL_COLOR_BUFFER_BIT */
@@ -1739,15 +1743,6 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
verts[3].y = y1;
verts[3].z = z;
 
-   if (!glsl) {
-  for (i = 0; i < 4; i++) {
- verts[i].r = ctx->Color.ClearColor.f[0];
- verts[i].g = ctx->Color.ClearColor.f[1];
- verts[i].b = ctx->Color.ClearColor.f[2];
- verts[i].a = ctx->Color.ClearColor.f[3];
-  }
-   }
-
/* upload new vertex data */
_mesa_buffer_data(ctx, clear->buf_obj, GL_NONE, sizeof(verts), verts,
  GL_DYNAMIC_DRAW, __func__);
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/12] mesa: Add matrix utility functions to load matrices

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

These are basically DSA versions of glLoadIdentity() and glLoadMatrix().

   textdata bss dec hex filename
7155026  256860   37332 7449218  71aa82 32-bit i965_dri.so before
7155246  256860   37332 7449438  71ab5e 32-bit i965_dri.so after
6788499  328056   50704 7167259  6d5d1b 64-bit i965_dri.so before
6788683  328056   50704 7167443  6d5dd3 64-bit i965_dri.so after

Signed-off-by: Ian Romanick 
---
 src/mesa/main/matrix.c | 32 +++-
 src/mesa/main/matrix.h |  8 
 2 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/src/mesa/main/matrix.c b/src/mesa/main/matrix.c
index 83f081e..29a047d 100644
--- a/src/mesa/main/matrix.c
+++ b/src/mesa/main/matrix.c
@@ -308,6 +308,16 @@ _mesa_PopMatrix( void )
 }
 
 
+void
+_mesa_load_identity_matrix(struct gl_context *ctx, struct gl_matrix_stack *s)
+{
+   FLUSH_VERTICES(ctx, 0);
+
+   _math_matrix_set_identity(s->Top);
+   ctx->NewState |= s->DirtyFlag;
+}
+
+
 /**
  * Replace the current matrix with the identity matrix.
  *
@@ -322,16 +332,24 @@ _mesa_LoadIdentity( void )
 {
GET_CURRENT_CONTEXT(ctx);
 
-   FLUSH_VERTICES(ctx, 0);
-
if (MESA_VERBOSE & VERBOSE_API)
   _mesa_debug(ctx, "glLoadIdentity()\n");
 
-   _math_matrix_set_identity( ctx->CurrentStack->Top );
-   ctx->NewState |= ctx->CurrentStack->DirtyFlag;
+   _mesa_load_identity_matrix(ctx, ctx->CurrentStack);
 }
 
 
+void
+_mesa_load_matrix(struct gl_context *ctx, struct gl_matrix_stack *s,
+  const GLfloat *m)
+{
+   if (memcmp(m, s->Top->m, 16 * sizeof(GLfloat)) != 0) {
+  FLUSH_VERTICES(ctx, 0);
+  _math_matrix_loadf(s->Top, m);
+  ctx->NewState |= s->DirtyFlag;
+   }
+}
+
 /**
  * Replace the current matrix with a given matrix.
  *
@@ -356,11 +374,7 @@ _mesa_LoadMatrixf( const GLfloat *m )
   m[2], m[6], m[10], m[14],
   m[3], m[7], m[11], m[15]);
 
-   if (memcmp(m, ctx->CurrentStack->Top->m, 16 * sizeof(GLfloat)) != 0) {
-  FLUSH_VERTICES(ctx, 0);
-  _math_matrix_loadf( ctx->CurrentStack->Top, m );
-  ctx->NewState |= ctx->CurrentStack->DirtyFlag;
-   }
+   _mesa_load_matrix(ctx, ctx->CurrentStack, m);
 }
 
 
diff --git a/src/mesa/main/matrix.h b/src/mesa/main/matrix.h
index 8eee67c..33d7767 100644
--- a/src/mesa/main/matrix.h
+++ b/src/mesa/main/matrix.h
@@ -31,6 +31,14 @@
 #include "glheader.h"
 
 struct gl_context;
+struct gl_matrix_stack;
+
+extern void
+_mesa_load_identity_matrix(struct gl_context *ctx, struct gl_matrix_stack *s);
+
+extern void
+_mesa_load_matrix(struct gl_context *ctx, struct gl_matrix_stack *s,
+  const GLfloat *m);
 
 extern void GLAPIENTRY
 _mesa_Frustum( GLdouble left, GLdouble right,
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/12] meta: Use same vertex coordinates for GLSL and FF clears

2017-06-26 Thread Ian Romanick
From: Ian Romanick 

   textdata bss dec hex filename
7155058  256860   37332 7449250  71aaa2 32-bit i965_dri.so before
7154994  256860   37332 7449186  71aa62 32-bit i965_dri.so after
6788611  328056   50704 7167371  6d5d8b 64-bit i965_dri.so before
6788475  328056   50704 7167235  6d5d03 64-bit i965_dri.so after

Signed-off-by: Ian Romanick 
---
 src/mesa/drivers/common/meta.c | 20 
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 9095caf..4ace12b 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -1633,7 +1633,6 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
GLbitfield metaSave;
const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
struct gl_framebuffer *fb = ctx->DrawBuffer;
-   float x0, y0, x1, y1, z;
struct vertex verts[4];
int i;
 
@@ -1669,21 +1668,12 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
assert(!fb->_IntegerBuffers);
if (glsl) {
   meta_glsl_clear_init(ctx, clear);
-
-  x0 = ((float) fb->_Xmin / fb->Width)  * 2.0f - 1.0f;
-  y0 = ((float) fb->_Ymin / fb->Height) * 2.0f - 1.0f;
-  x1 = ((float) fb->_Xmax / fb->Width)  * 2.0f - 1.0f;
-  y1 = ((float) fb->_Ymax / fb->Height) * 2.0f - 1.0f;
-  z = -invert_z(ctx->Depth.Clear);
} else {
   _mesa_meta_setup_vertex_objects(ctx, >VAO, >buf_obj, false,
   3, 0, 4);
 
-  x0 = (float) fb->_Xmin;
-  y0 = (float) fb->_Ymin;
-  x1 = (float) fb->_Xmax;
-  y1 = (float) fb->_Ymax;
-  z = invert_z(ctx->Depth.Clear);
+  /* setup projection matrix */
+  _mesa_load_identity_matrix(ctx, >ProjectionMatrixStack);
}
 
if (glsl) {
@@ -1730,6 +1720,12 @@ meta_clear(struct gl_context *ctx, GLbitfield buffers, 
bool glsl)
}
 
/* vertex positions */
+   const float x0 = ((float) fb->_Xmin / fb->Width)  * 2.0f - 1.0f;
+   const float y0 = ((float) fb->_Ymin / fb->Height) * 2.0f - 1.0f;
+   const float x1 = ((float) fb->_Xmax / fb->Width)  * 2.0f - 1.0f;
+   const float y1 = ((float) fb->_Ymax / fb->Height) * 2.0f - 1.0f;
+   const float z = -invert_z(ctx->Depth.Clear);
+
verts[0].x = x0;
verts[0].y = y0;
verts[0].z = z;
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] pipe_loader_sw: fix compilation warning

2017-06-26 Thread Brian Paul
Add the new 'flags' parameter to pipe_loader_sw_create_screen().
---
 src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 0fbc78e..46c6604 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -294,7 +294,8 @@ pipe_loader_sw_configuration(struct pipe_loader_device *dev,
 }
 
 static struct pipe_screen *
-pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
+pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
+ unsigned flags)
 {
struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
struct pipe_screen *screen;
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Android: add renderonly files to libmesa_gallium

2017-06-26 Thread Eric Anholt
Rob Herring  writes:

> vc4 now depends on renderonly functions, but these weren't added to the
> Android build resulting in the following errors:
>
> src/gallium/drivers/vc4/vc4_resource.c:380: error: undefined reference to 
> 'renderonly_scanout_destroy'
> src/gallium/drivers/vc4/vc4_resource.c:681: error: undefined reference to 
> 'renderonly_create_gpu_import_for_resource'
> src/gallium/drivers/vc4/vc4_screen.c:625: error: undefined reference to 
> 'renderonly_dup'
> src/gallium/winsys/pl111/drm/pl111_drm_winsys.c:37: error: undefined 
> reference to 'renderonly_create_gpu_import_for_resource'
> src/gallium/winsys/pl111/drm/pl111_drm_winsys.c:37: error: undefined 
> reference to 'renderonly_create_gpu_import_for_resource'
>
> Fixes: 7029ec05e2c7 ("gallium: Add renderonly-based support for pl111+vc4.")
> Cc: Eric Anholt 
> Signed-off-by: Rob Herring 
> ---
> Eric, not sure how this got missed. I'm guessing the build didn't link 
> due to some other errors.

Reviewed and pushed.  Thanks!


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: use #pragma pack to pack si_shader_key

2017-06-26 Thread Marek Olšák
On Thu, Jun 22, 2017 at 9:19 AM, Nicolai Hähnle  wrote:
> On 20.06.2017 20:00, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> sizeof(struct si_shader_key):
>>Before reverting the 2 commits: 120 bytes
>>After reverting the 2 commits: 128 bytes
>>With #pragma pack: 107 bytes
>>
>> I'm not sure if memcmp with a byte-aligned size is a good idea.
>
>
> Does this have a measurable impact? The code is nicer to read with the
> structure after the patches, but I'm not sure it's worth the risk of getting
> misaligned data somewhere.

It decreases the time spent in si_update_shaders by 6% when going from
128 bytes to 106 for the shader key.

What do you mean by the risk?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/anv: Add missing break in anv_CreateDevice()

2017-06-26 Thread Nanley Chery
On Mon, Jun 26, 2017 at 09:07:12PM +0300, Topi Pohjolainen wrote:
> CID: 1413018
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/intel/vulkan/anv_device.c | 1 +
>  1 file changed, 1 insertion(+)
> 

This patch is
Reviewed-by: Nanley Chery 

> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 5505befcfa..b09caa38a4 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1231,6 +1231,7 @@ VkResult anv_CreateDevice(
>break;
> case 9:
>result = gen9_init_device_state(device);
> +  break;
> case 10:
>result = gen10_init_device_state(device);
>break;
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 00/10] Add precise/invariant semantics to TGSI

2017-06-26 Thread Samuel Pitoiset

Looks good to me.

For the series:

Reviewed-by: Samuel Pitoiset 

On 06/23/2017 08:30 PM, Karol Herbst wrote:

Running Tomb Raider on Nouveau I found some flicker caused by ignoring precise
modifiers on variables inside Nouveau.

This series add precise/invariant handling to TGSI, which can be then used by
drivers to disable certain unsafe optimisations which may otherwise alter
calculations, which depend on having the same result across shaders.

This series fixes this bug in Tomb Raider and one CTS test for 4.4 and 4.5

No piglit regression on my nve6

Changes since v3:
* more documentation
* don't disable SAD opt for nv50
* bug fixes

Karol Herbst (10):
   tgsi: add precise flag to tgsi_instruction
   tgsi/dump: print _PRECISE modifier on Instructions
   st/glsl_to_tgsi: handle precise modifier
   tgsi: populate precise
   tgsi/text: parse _PRECISE modifier
   gallium/docs: add precise instruction modifier
   st/glsl_to_tgsi: don't optimize mul+add to mad if expression is
 precise
   nv50/ir: add precise field to Instruction
   nv50/ir/tgsi: handle precise for most ALU instructions
   nv50/ir: disable mul+add to mad for precise instructions

  src/gallium/auxiliary/tgsi/tgsi_build.c|  4 +++
  src/gallium/auxiliary/tgsi/tgsi_dump.c |  4 +++
  src/gallium/auxiliary/tgsi/tgsi_text.c | 17 --
  src/gallium/auxiliary/tgsi/tgsi_ureg.c |  8 -
  src/gallium/auxiliary/tgsi/tgsi_ureg.h | 14 -
  src/gallium/auxiliary/util/u_simple_shaders.c  |  2 +-
  src/gallium/docs/source/tgsi.rst   | 11 ++-
  src/gallium/drivers/nouveau/codegen/nv50_ir.cpp|  1 +
  src/gallium/drivers/nouveau/codegen/nv50_ir.h  |  1 +
  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  2 ++
  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   |  5 +--
  src/gallium/include/pipe/p_shader_tokens.h |  3 +-
  src/gallium/state_trackers/nine/nine_shader.c  |  6 ++--
  src/mesa/state_tracker/st_atifs_to_tgsi.c  | 36 +++---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 21 ++---
  src/mesa/state_tracker/st_mesa_to_tgsi.c   |  6 ++--
  16 files changed, 103 insertions(+), 38 deletions(-)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Android: add renderonly files to libmesa_gallium

2017-06-26 Thread Rob Herring
vc4 now depends on renderonly functions, but these weren't added to the
Android build resulting in the following errors:

src/gallium/drivers/vc4/vc4_resource.c:380: error: undefined reference to 
'renderonly_scanout_destroy'
src/gallium/drivers/vc4/vc4_resource.c:681: error: undefined reference to 
'renderonly_create_gpu_import_for_resource'
src/gallium/drivers/vc4/vc4_screen.c:625: error: undefined reference to 
'renderonly_dup'
src/gallium/winsys/pl111/drm/pl111_drm_winsys.c:37: error: undefined reference 
to 'renderonly_create_gpu_import_for_resource'
src/gallium/winsys/pl111/drm/pl111_drm_winsys.c:37: error: undefined reference 
to 'renderonly_create_gpu_import_for_resource'

Fixes: 7029ec05e2c7 ("gallium: Add renderonly-based support for pl111+vc4.")
Cc: Eric Anholt 
Signed-off-by: Rob Herring 
---
Eric, not sure how this got missed. I'm guessing the build didn't link 
due to some other errors.

Rob

 src/gallium/auxiliary/Android.mk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk
index e2a1fc214eb7..356390dfde6f 100644
--- a/src/gallium/auxiliary/Android.mk
+++ b/src/gallium/auxiliary/Android.mk
@@ -31,6 +31,7 @@ include $(CLEAR_VARS)
 LOCAL_SRC_FILES := \
$(C_SOURCES) \
$(NIR_SOURCES) \
+   $(RENDERONLY_SOURCES) \
$(VL_STUB_SOURCES)
 
 LOCAL_C_INCLUDES := \
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: perf: minimize the chances to spread queries across batchbuffers

2017-06-26 Thread Kenneth Graunke
On Wednesday, June 21, 2017 6:25:28 PM PDT Lionel Landwerlin wrote:
> Counter related to timings will be sensitive to any delay introduced
> by the software. In particular if our begin & end of performance
> queries end up in different batches, time related counters will
> exhibit biffer values caused by the time it takes for the kernel
> driver to load new requests into the hardware.
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 06576a54d03..6b874d0bbee 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -1063,6 +1063,14 @@ brw_end_perf_query(struct gl_context *ctx,
>   obj->oa.begin_report_id + 1);
>}
>  
> +  /* We flush the batchbuffer here to minimize the chances that MI_RPC
> +   * delimiting commands end up in different batchbuffers. If that's the
> +   * case, the measurement will include the time it takes for the kernel
> +   * scheduler to load a new request into the hardware. This is 
> manifested
> +   * in tools like frameretrace by spikes in the "GPU Core Clocks"
> +   * counter.
> +   */
> +  intel_batchbuffer_flush(brw);
>--brw->perfquery.n_active_oa_queries;
>  
>/* NB: even though the query has now ended, it can't be accumulated
> 

Seems reasonable to me.

Acked-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/11] mesa/glthread: remove HAVE_PTHREAD guards

2017-06-26 Thread Kyriazis, George
I can verify that the fix works.

Thank you!

George

> On Jun 26, 2017, at 3:24 PM, Marek Olšák  wrote:
> 
> I just pushed the fix.
> 
> Marek
> 
> On Mon, Jun 26, 2017 at 10:01 PM, Kyriazis, George
>  wrote:
>> Marek
>> 
>> Our windows mesa build broke with your checkin: "mesa/glthread: remove 
>> HAVE_PTHREAD guards”.
>> 
>> Namely:
>> 
>>  Compiling src\mesa\main\context.c ...
>> context.c
>> c:\users\gkyriazi\src\mesa\src\mesa\main\glthread.h(34): fatal error C1083: 
>> Cannot open include file: 'pthread.h': No such file or directory
>> scons: *** [build\windows-x86_64-debug\mesa\main\context.obj] Error 2
>> scons: building terminated because of errors.
>> 
>> Ideas?
>> 
>> George
>> 
>>> On Jun 21, 2017, at 8:02 PM, Marek Olšák  wrote:
>>> 
>>> From: Marek Olšák 
>>> 
>>> we are switching to util_queue.
>>> ---
>>> src/mapi/glapi/gen/gl_marshal.py |  5 +
>>> src/mesa/main/glthread.c |  4 
>>> src/mesa/main/glthread.h | 30 --
>>> src/mesa/main/marshal.c  |  4 
>>> src/mesa/main/marshal.h  | 27 ---
>>> 5 files changed, 1 insertion(+), 69 deletions(-)
>>> 
>>> diff --git a/src/mapi/glapi/gen/gl_marshal.py 
>>> b/src/mapi/glapi/gen/gl_marshal.py
>>> index f52b9b7..062afe5 100644
>>> --- a/src/mapi/glapi/gen/gl_marshal.py
>>> +++ b/src/mapi/glapi/gen/gl_marshal.py
>>> @@ -59,34 +59,31 @@ def indent(delta = 3):
>>> class PrintCode(gl_XML.gl_print_base):
>>>def __init__(self):
>>>super(PrintCode, self).__init__()
>>> 
>>>self.name = 'gl_marshal.py'
>>>self.license = license.bsd_license_template % (
>>>'Copyright (C) 2012 Intel Corporation', 'INTEL CORPORATION')
>>> 
>>>def printRealHeader(self):
>>>print header
>>> -print '#ifdef HAVE_PTHREAD'
>>> -print
>>>print 'static inline int safe_mul(int a, int b)'
>>>print '{'
>>>print 'if (a < 0 || b < 0) return -1;'
>>>print 'if (a == 0 || b == 0) return 0;'
>>>print 'if (a > INT_MAX / b) return -1;'
>>>print 'return a * b;'
>>>print '}'
>>>print
>>> 
>>>def printRealFooter(self):
>>> -print
>>> -print '#endif'
>>> +pass
>>> 
>>>def print_sync_call(self, func):
>>>call = 'CALL_{0}(ctx->CurrentServerDispatch, ({1}))'.format(
>>>func.name, func.get_called_parameter_string())
>>>if func.return_type == 'void':
>>>out('{0};'.format(call))
>>>else:
>>>out('return {0};'.format(call))
>>> 
>>>def print_sync_dispatch(self, func):
>>> diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
>>> index 455b829..e90709c 100644
>>> --- a/src/mesa/main/glthread.c
>>> +++ b/src/mesa/main/glthread.c
>>> @@ -31,22 +31,20 @@
>>> * quickly logs the GL commands to a buffer to be processed by a worker
>>> * thread.
>>> */
>>> 
>>> #include "main/mtypes.h"
>>> #include "main/glthread.h"
>>> #include "main/marshal.h"
>>> #include "main/marshal_generated.h"
>>> #include "util/u_thread.h"
>>> 
>>> -#ifdef HAVE_PTHREAD
>>> -
>>> static void
>>> glthread_allocate_batch(struct gl_context *ctx)
>>> {
>>>   struct glthread_state *glthread = ctx->GLThread;
>>> 
>>>   /* TODO: handle memory allocation failure. */
>>>   glthread->batch = malloc(sizeof(*glthread->batch));
>>>   if (!glthread->batch)
>>>  return;
>>>   memset(glthread->batch, 0, offsetof(struct glthread_batch, buffer));
>>> @@ -277,12 +275,10 @@ _mesa_glthread_finish(struct gl_context *ctx)
>>> _glapi_set_dispatch(dispatch);
>>>  }
>>>   } else {
>>>  _mesa_glthread_flush_batch_locked(ctx);
>>>  while (glthread->batch_queue || glthread->busy)
>>> pthread_cond_wait(>work_done, >mutex);
>>>   }
>>> 
>>>   pthread_mutex_unlock(>mutex);
>>> }
>>> -
>>> -#endif
>>> diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
>>> index 50c1db2..07bed38 100644
>>> --- a/src/mesa/main/glthread.h
>>> +++ b/src/mesa/main/glthread.h
>>> @@ -22,22 +22,20 @@
>>> */
>>> 
>>> #ifndef _GLTHREAD_H
>>> #define _GLTHREAD_H
>>> 
>>> #include "main/mtypes.h"
>>> 
>>> /* Command size is a number of bytes stored in a short. */
>>> #define MARSHAL_MAX_CMD_SIZE 65535
>>> 
>>> -#ifdef HAVE_PTHREAD
>>> -
>>> #include 
>>> #include 
>>> #include 
>>> 
>>> enum marshal_dispatch_cmd_id;
>>> 
>>> struct glthread_state
>>> {
>>>   /** The worker thread that asynchronously processes our GL commands. */
>>>   pthread_t thread;
>>> @@ -117,39 +115,11 @@ struct glthread_batch
>>>   uint8_t buffer[MARSHAL_MAX_CMD_SIZE];
>>> };
>>> 
>>> void _mesa_glthread_init(struct gl_context *ctx);
>>> void _mesa_glthread_destroy(struct gl_context *ctx);
>>> 
>>> void _mesa_glthread_restore_dispatch(struct gl_context *ctx);
>>> void _mesa_glthread_flush_batch(struct gl_context *ctx);
>>> void _mesa_glthread_finish(struct 

Re: [Mesa-dev] [PATCH] i965: Separate gen < 8 and gen >= 8 paths explicitly in wrap_mode()

2017-06-26 Thread Kenneth Graunke
On Monday, June 26, 2017 11:07:11 AM PDT Topi Pohjolainen wrote:
> Makes coverity happier.
> 
> Fix indentation in gen >= 8 block while at it.
> 
> CID: 1413020
> CC: Rafael Antognolli 
> Signed-off-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 3f8a7265db..d65b468863 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -4455,9 +4455,8 @@ translate_wrap_mode(struct brw_context *brw, GLenum 
> wrap, bool using_nearest)
> *
> * Gen8+ supports this natively.
> */
> - return TCM_HALF_BORDER;
> -#endif
> -
> +  return TCM_HALF_BORDER;
> +#else
>/* On Gen4-7.5, we clamp the coordinates in the fragment shader
> * and set clamp_border here, which gets the result desired.
> * We just use clamp(_to_edge) for nearest, because for nearest
> @@ -4468,6 +4467,7 @@ translate_wrap_mode(struct brw_context *brw, GLenum 
> wrap, bool using_nearest)
>   return TCM_CLAMP;
>else
>   return TCM_CLAMP_BORDER;
> +#endif
> case GL_CLAMP_TO_EDGE:
>return TCM_CLAMP;
> case GL_CLAMP_TO_BORDER:
> 

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] mesa: only flush vertices when the viewport is different

2017-06-26 Thread Marek Olšák
For the series:

Reviewed-by: Marek Olšák 

Marek

On Thu, Jun 22, 2017 at 4:35 PM, Samuel Pitoiset
 wrote:
> This prevents glViewport() and friends to always flush and
> trigger _NEW_VIEWPORT.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/mesa/main/viewport.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/main/viewport.c b/src/mesa/main/viewport.c
> index 33aa1269520..ae03d6a5dd2 100644
> --- a/src/mesa/main/viewport.c
> +++ b/src/mesa/main/viewport.c
> @@ -40,9 +40,6 @@ set_viewport_no_notify(struct gl_context *ctx, unsigned idx,
> GLfloat x, GLfloat y,
> GLfloat width, GLfloat height)
>  {
> -   FLUSH_VERTICES(ctx, ctx->DriverFlags.NewViewport ? 0 : _NEW_VIEWPORT);
> -   ctx->NewDriverState |= ctx->DriverFlags.NewViewport;
> -
> /* clamp width and height to the implementation dependent range */
> width  = MIN2(width, (GLfloat) ctx->Const.MaxViewportWidth);
> height = MIN2(height, (GLfloat) ctx->Const.MaxViewportHeight);
> @@ -70,6 +67,9 @@ set_viewport_no_notify(struct gl_context *ctx, unsigned idx,
> ctx->ViewportArray[idx].Height == height)
>return;
>
> +   FLUSH_VERTICES(ctx, ctx->DriverFlags.NewViewport ? 0 : _NEW_VIEWPORT);
> +   ctx->NewDriverState |= ctx->DriverFlags.NewViewport;
> +
> ctx->ViewportArray[idx].X = x;
> ctx->ViewportArray[idx].Width = width;
> ctx->ViewportArray[idx].Y = y;
> --
> 2.13.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/blorp: Use the renderbuffer format for clears

2017-06-26 Thread Ian Romanick
On 06/26/2017 09:01 AM, Jason Ekstrand wrote:
> This fixes the Piglit ARB_texture_views rendering-formats test.
> 
> Cc: "17.1" 
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index 87c9dd4..96dc657 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -746,9 +746,9 @@ do_single_blorp_clear(struct brw_context *brw, struct 
> gl_framebuffer *fb,
>  {
> struct gl_context *ctx = >ctx;
> struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> -   mesa_format format = irb->mt->format;
> uint32_t x0, x1, y0, y1;
>  
> +   mesa_format format = irb->Base.Base.Format;
> if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
>format = _mesa_get_srgb_format_linear(format);
>  
> @@ -772,6 +772,14 @@ do_single_blorp_clear(struct brw_context *brw, struct 
> gl_framebuffer *fb,
> if (set_write_disables(irb, ctx->Color.ColorMask[buf], 
> color_write_disable))
>can_fast_clear = false;
>  
> +   /* We store clear colors as floats or uints as needed.  If there are
> +* texture views in play, the formats will not properly be respected
> +* during resolves because the resolve operations only know about the
> +* miptree and not the renderbuffer.
> +*/
> +   if (irb->Base.Base.Format != irb->mt->format)
  ^
Should this be "format" since that is the linearized format?

> +  can_fast_clear = false;
> +
> if (!irb->mt->supports_fast_clear ||
> !brw_is_color_fast_clear_compatible(brw, irb->mt, 
> >Color.ClearColor))
>can_fast_clear = false;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/3] amd/common: fix off-by-one in sid_tables.py

2017-06-26 Thread Andres Gomez
On Mon, 2017-06-26 at 14:23 +0100, Emil Velikov wrote:
> On 23 June 2017 at 00:07, Andres Gomez  wrote:
> > On Mon, 2017-06-19 at 12:36 +0100, Emil Velikov wrote:
> > > Hi Nicolai,
> > > 
> > > On 12 June 2017 at 20:33, Nicolai Hähnle  wrote:
> > > > From: Nicolai Hähnle 
> > > > 
> > > > The very last entry in the sid_strings_offsets table ended up missing,
> > > > leading to out-of-bounds reads and potential crashes.
> > > 
> > > Should we have this and " [PATCH 2/3] r600: fix off-by-one in
> > > egd_tables.py" for -stable?
> > > Seems like a worthy material IMHO.
> > 
> > Thanks for spotting this, Emil, but egd_tables.py didn't make it for
> > -stable so I suppose we will still leave those 2 out too.
> > 
> 
> Right, so let's have only the sid_tables.py fix in?

Right! ☺

-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #4751 completed

2017-06-26 Thread AppVeyor


Build mesa 4751 completed



Commit 25ea7aa5cd by Marek Olšák on 6/26/2017 8:23 PM:

mesa/glthread: don't include pthread.h\n\nNot needed. This fixes the Windows build.


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/11] mesa/glthread: remove HAVE_PTHREAD guards

2017-06-26 Thread Marek Olšák
I just pushed the fix.

Marek

On Mon, Jun 26, 2017 at 10:01 PM, Kyriazis, George
 wrote:
> Marek
>
> Our windows mesa build broke with your checkin: "mesa/glthread: remove 
> HAVE_PTHREAD guards”.
>
> Namely:
>
>   Compiling src\mesa\main\context.c ...
> context.c
> c:\users\gkyriazi\src\mesa\src\mesa\main\glthread.h(34): fatal error C1083: 
> Cannot open include file: 'pthread.h': No such file or directory
> scons: *** [build\windows-x86_64-debug\mesa\main\context.obj] Error 2
> scons: building terminated because of errors.
>
> Ideas?
>
> George
>
>> On Jun 21, 2017, at 8:02 PM, Marek Olšák  wrote:
>>
>> From: Marek Olšák 
>>
>> we are switching to util_queue.
>> ---
>> src/mapi/glapi/gen/gl_marshal.py |  5 +
>> src/mesa/main/glthread.c |  4 
>> src/mesa/main/glthread.h | 30 --
>> src/mesa/main/marshal.c  |  4 
>> src/mesa/main/marshal.h  | 27 ---
>> 5 files changed, 1 insertion(+), 69 deletions(-)
>>
>> diff --git a/src/mapi/glapi/gen/gl_marshal.py 
>> b/src/mapi/glapi/gen/gl_marshal.py
>> index f52b9b7..062afe5 100644
>> --- a/src/mapi/glapi/gen/gl_marshal.py
>> +++ b/src/mapi/glapi/gen/gl_marshal.py
>> @@ -59,34 +59,31 @@ def indent(delta = 3):
>> class PrintCode(gl_XML.gl_print_base):
>> def __init__(self):
>> super(PrintCode, self).__init__()
>>
>> self.name = 'gl_marshal.py'
>> self.license = license.bsd_license_template % (
>> 'Copyright (C) 2012 Intel Corporation', 'INTEL CORPORATION')
>>
>> def printRealHeader(self):
>> print header
>> -print '#ifdef HAVE_PTHREAD'
>> -print
>> print 'static inline int safe_mul(int a, int b)'
>> print '{'
>> print 'if (a < 0 || b < 0) return -1;'
>> print 'if (a == 0 || b == 0) return 0;'
>> print 'if (a > INT_MAX / b) return -1;'
>> print 'return a * b;'
>> print '}'
>> print
>>
>> def printRealFooter(self):
>> -print
>> -print '#endif'
>> +pass
>>
>> def print_sync_call(self, func):
>> call = 'CALL_{0}(ctx->CurrentServerDispatch, ({1}))'.format(
>> func.name, func.get_called_parameter_string())
>> if func.return_type == 'void':
>> out('{0};'.format(call))
>> else:
>> out('return {0};'.format(call))
>>
>> def print_sync_dispatch(self, func):
>> diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
>> index 455b829..e90709c 100644
>> --- a/src/mesa/main/glthread.c
>> +++ b/src/mesa/main/glthread.c
>> @@ -31,22 +31,20 @@
>>  * quickly logs the GL commands to a buffer to be processed by a worker
>>  * thread.
>>  */
>>
>> #include "main/mtypes.h"
>> #include "main/glthread.h"
>> #include "main/marshal.h"
>> #include "main/marshal_generated.h"
>> #include "util/u_thread.h"
>>
>> -#ifdef HAVE_PTHREAD
>> -
>> static void
>> glthread_allocate_batch(struct gl_context *ctx)
>> {
>>struct glthread_state *glthread = ctx->GLThread;
>>
>>/* TODO: handle memory allocation failure. */
>>glthread->batch = malloc(sizeof(*glthread->batch));
>>if (!glthread->batch)
>>   return;
>>memset(glthread->batch, 0, offsetof(struct glthread_batch, buffer));
>> @@ -277,12 +275,10 @@ _mesa_glthread_finish(struct gl_context *ctx)
>>  _glapi_set_dispatch(dispatch);
>>   }
>>} else {
>>   _mesa_glthread_flush_batch_locked(ctx);
>>   while (glthread->batch_queue || glthread->busy)
>>  pthread_cond_wait(>work_done, >mutex);
>>}
>>
>>pthread_mutex_unlock(>mutex);
>> }
>> -
>> -#endif
>> diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
>> index 50c1db2..07bed38 100644
>> --- a/src/mesa/main/glthread.h
>> +++ b/src/mesa/main/glthread.h
>> @@ -22,22 +22,20 @@
>>  */
>>
>> #ifndef _GLTHREAD_H
>> #define _GLTHREAD_H
>>
>> #include "main/mtypes.h"
>>
>> /* Command size is a number of bytes stored in a short. */
>> #define MARSHAL_MAX_CMD_SIZE 65535
>>
>> -#ifdef HAVE_PTHREAD
>> -
>> #include 
>> #include 
>> #include 
>>
>> enum marshal_dispatch_cmd_id;
>>
>> struct glthread_state
>> {
>>/** The worker thread that asynchronously processes our GL commands. */
>>pthread_t thread;
>> @@ -117,39 +115,11 @@ struct glthread_batch
>>uint8_t buffer[MARSHAL_MAX_CMD_SIZE];
>> };
>>
>> void _mesa_glthread_init(struct gl_context *ctx);
>> void _mesa_glthread_destroy(struct gl_context *ctx);
>>
>> void _mesa_glthread_restore_dispatch(struct gl_context *ctx);
>> void _mesa_glthread_flush_batch(struct gl_context *ctx);
>> void _mesa_glthread_finish(struct gl_context *ctx);
>>
>> -#else /* HAVE_PTHREAD */
>> -
>> -static inline void
>> -_mesa_glthread_init(struct gl_context *ctx)
>> -{
>> -}
>> -
>> -static inline void
>> -_mesa_glthread_destroy(struct gl_context *ctx)
>> -{
>> -}
>> -
>> -static inline void
>> 

Re: [Mesa-dev] [PATCH 3/3] nv50/ir: fix combineLd/St to update existing records as necessary

2017-06-26 Thread Samuel Pitoiset

These two are good finds!

Patch 2&3 are:

Reviewed-by: Samuel Pitoiset 

On 06/25/2017 12:39 AM, Ilia Mirkin wrote:

Previously the logic would decide that the record is kept, which
translates into keep = false in the caller, which meant that these
passes did not run.

While it's right that keep = false which means that a new record does
not need to be added, we do still have to perform the usual list
maintenance. It's easiest to do this pre-merge rather than post.

The lowering that clip/cull distance passes produce triggers this bug in
TCS (since reading outputs is done differently in other stages), but it
should be possible to achieve it with the right sequence of regular
reads/writes.

Fixes: KHR-GL45.cull_distance.functional
Fixes: 
generated_tests/spec/arb_tessellation_shader/execution/tes-input/tes-input-gl_ClipDistance.shader_test
Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 8 
  1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 169436a4e39..57cb7ce214d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2485,6 +2485,10 @@ MemoryOpt::combineLd(Record *rec, Instruction *ld)
  
 assert(sizeRc + sizeLd <= 16 && offRc != offLd);
  
+   // lock any stores that overlap with the load being merged into the

+   // existing record.
+   lockStores(ld);
+
 for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j);
  
 if (offLd < offRc) {

@@ -2541,6 +2545,10 @@ MemoryOpt::combineSt(Record *rec, Instruction *st)
 if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
return false;
  
+   // remove any existing load/store records for the store being merged into

+   // the existing record.
+   purgeRecords(st, DATA_FILE_COUNT);
+
 st->takeExtraSources(0, extra); // save predicate and indirect address
  
 if (offRc < offSt) {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50, nvc0: remove IDX from bufctx immediately, to avoid conflicts with clear

2017-06-26 Thread Ilia Mirkin
Yes, but I've since rebased it out on my 'cts' branch.

On Mon, Jun 26, 2017 at 4:02 PM, Samuel Pitoiset
 wrote:
> Looks like the patch is based on your bindless branch. :)
>
> Reviewed-by: Samuel Pitoiset 
>
>
> On 06/24/2017 07:24 PM, Ilia Mirkin wrote:
>>
>> The idxbuf could linger, and when a clear happened, which also uses the
>> 3d bufctx, we could get an error trying to access it.
>>
>> This fixes spurious crashes/errors in CTS tests.
>>
>> Fixes: 61d8f3387d ("nv50,nvc0: clear index buffer bufctx bin
>> unconditionally")
>> Signed-off-by: Ilia Mirkin 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>   src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 9 +
>>   src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 9 +
>>   2 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
>> b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
>> index ac7d8267861..ed041121a26 100644
>> --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
>> @@ -770,7 +770,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct
>> pipe_draw_info *info)
>>  bool tex_dirty = false;
>>  int s;
>>   -   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
>>  if (info->index_size && !info->has_user_indices)
>> BCTX_REFN(nv50->bufctx_3d, 3D_INDEX,
>> nv04_resource(info->index.resource), RD);
>>   @@ -838,9 +837,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const
>> struct pipe_draw_info *info)
>>if (nv50->vbo_fifo) {
>> nv50_push_vbo(nv50, info);
>> -  push->kick_notify = nv50_default_kick_notify;
>> -  nouveau_pushbuf_bufctx(push, NULL);
>> -  return;
>> +  goto cleanup;
>>  }
>>if (nv50->state.instance_base != info->start_instance) {
>> @@ -894,9 +891,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct
>> pipe_draw_info *info)
>>  info->mode, info->start, info->count,
>>  info->instance_count);
>>  }
>> +
>> +cleanup:
>>  push->kick_notify = nv50_default_kick_notify;
>>nv50_release_user_vbufs(nv50);
>>nouveau_pushbuf_bufctx(push, NULL);
>> +
>> +   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
>>   }
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>> index 2856b4c6096..a5671ca09ac 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
>> @@ -921,7 +921,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct
>> pipe_draw_info *info)
>>  struct nvc0_screen *screen = nvc0->screen;
>>  int s;
>>   -   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
>>  nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS);
>>/* NOTE: caller must ensure that (min_index + index_bias) is >= 0
>> */
>> @@ -1040,9 +1039,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const
>> struct pipe_draw_info *info)
>>if (nvc0->state.vbo_mode) {
>> nvc0_push_vbo(nvc0, info);
>> -  push->kick_notify = nvc0_default_kick_notify;
>> -  nouveau_pushbuf_bufctx(push, NULL);
>> -  return;
>> +  goto cleanup;
>>  }
>>/* space for base instance, flush, and prim restart */
>> @@ -1089,9 +1086,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const
>> struct pipe_draw_info *info)
>>  info->mode, info->start, info->count,
>>  info->instance_count);
>>  }
>> +
>> +cleanup:
>>  push->kick_notify = nvc0_default_kick_notify;
>>nvc0_release_user_vbufs(nvc0);
>>nouveau_pushbuf_bufctx(push, NULL);
>> +
>> +   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
>>   }
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 00/16] anv: Fix our 48-bit problems

2017-06-26 Thread Andres Gomez
Jason, you CCed this whole series for stable. However:
 * Patch 1/16 already landed, but without the -stable tag. Should it be
   cherry-picked? It looks like yes.
 * Patch 2/16 did not land. Is it in need of review or it has been
   superseded?
 * Patches 3-15/16 were already cherry-picked in the 17.1.2 release.
 * Patch 16/16 has not landed yet. Is it in need of review or it has
   been superseded? Not sure if we want to cherry-pick it in any case
   ...

Please, let us know when you can.

Thanks!

On Thu, 2017-05-18 at 14:00 -0700, Jason Ekstrand wrote:
> This patch series aims to fix the remaining 48-bit problems in the Vulkan
> driver.  As such, the entire thing will be CC'd to stable before landing.
> 
> The first 5 patches fix the driver to handle memory aliasing correctly.
> Vulkan allows you to bind multiple buffers or images to overlapping memory
> regions so long as you get your layout transitions correct.  Up until now,
> we've been doing a memset at vkBindImageMemory time to initialize auxiliary
> surfaces which isn't valid in light of aliasing.  Instead, these patches
> provide actual support for layout transitions from UNDEFINED to other
> layouts.  This isn't actually a 48-bit issue but the other patches cause a
> change in the behavior of some CTS tests which makes them start failing due
> to memory aliasing problems.
> 
> The next 10 patches refactor memory type setup and make us advertise 2
> heaps on platforms with a lot of memory.  For justification, see the
> comment in patch 15.
> 
> The last patch just extends the new pass added in patch 2 for gen7-8.  It's
> fairly straightforward but completely untested.  Hopefully it will help
> Nanley or someone else if they ever need it.
> 
> Cc: "17.1" 
> Cc: Nanley Chery 
> 
> Jason Ekstrand (16):
>   isl: Make get_intratile_offset_el take the element size in bits
>   intel/blorp: Add a CCS ambiguation pass
>   anv: Handle color layout transitions from the UNINITIALIZED layout
>   anv: Handle transitioning depth from UNDEFINED to other layouts
>   anv/image: Get rid of the memset(aux, 0, sizeof(aux)) hack
>   anv: Predicate 48bit support on gen >= 8
>   anv: Set up memory types and heaps during physical device init
>   anv: Determine the type of mapping based on type metadata
>   anv: Add valid_bufer_usage to the memory type metadata
>   anv: Set image memory types based on the type count
>   anv: Stop setting BO flags in bo_init_new
>   anv: Make supports_48bit_addresses a heap property
>   anv: Refactor memory type setup
>   anv: Advertise both 32-bit and 48-bit heaps when we have enough memory
>   anv: Require vertex buffers to come from a 32-bit heap
>   intel/blorp: Add gen7-8 support to ccs_ambiguate
> 
>  src/intel/blorp/blorp.h|   5 +
>  src/intel/blorp/blorp_clear.c  | 159 +-
>  src/intel/isl/isl.c|   7 +-
>  src/intel/isl/isl.h|   6 +-
>  src/intel/vulkan/anv_allocator.c   |  17 +--
>  src/intel/vulkan/anv_blorp.c   |  40 +++
>  src/intel/vulkan/anv_device.c  | 201 
> -
>  src/intel/vulkan/anv_image.c   |  40 ++-
>  src/intel/vulkan/anv_private.h |  33 +-
>  src/intel/vulkan/anv_queue.c   |   4 +-
>  src/intel/vulkan/genX_cmd_buffer.c |  33 --
>  src/mesa/drivers/dri/i965/intel_blit.c |   2 +-
>  12 files changed, 428 insertions(+), 119 deletions(-)
> 
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/anv: Add missing break in anv_CreateDevice()

2017-06-26 Thread Eric Engestrom
On Monday, 2017-06-26 21:07:12 +0300, Topi Pohjolainen wrote:
> CID: 1413018
> Signed-off-by: Topi Pohjolainen 

Both patches are
Reviewed-by: Eric Engestrom 

> ---
>  src/intel/vulkan/anv_device.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 5505befcfa..b09caa38a4 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1231,6 +1231,7 @@ VkResult anv_CreateDevice(
>break;
> case 9:
>result = gen9_init_device_state(device);
> +  break;
> case 10:
>result = gen10_init_device_state(device);
>break;
> -- 
> 2.11.0
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] nv50/ir: VFETCH is also considered a load for MemoryOpt

2017-06-26 Thread Samuel Pitoiset
Would be better to introduce a helper into MemoryOpt for that (same for 
stores).


Either way, looks good.

Reviewed-by: Samuel Pitoiset 

On 06/25/2017 12:39 AM, Ilia Mirkin wrote:

This has no effect since in practice this will only play for
memory-backed files, for which VFETCH will never happen.

Signed-off-by: Ilia Mirkin 
---
  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 4c92a1efb51..3eab0426716 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2640,7 +2640,7 @@ MemoryOpt::findRecord(const Instruction *insn, bool load, 
bool& isAdj) const
 Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file];
  
 for (; it; it = it->next) {

-  if (it->locked && insn->op != OP_LOAD)
+  if (it->locked && insn->op != OP_LOAD && insn->op != OP_VFETCH)
   continue;
if ((it->offset >> 4) != (sym->reg.data.offset >> 4) ||
it->rel[0] != insn->getIndirect(0, 0) ||


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50, nvc0: remove IDX from bufctx immediately, to avoid conflicts with clear

2017-06-26 Thread Samuel Pitoiset

Looks like the patch is based on your bindless branch. :)

Reviewed-by: Samuel Pitoiset 

On 06/24/2017 07:24 PM, Ilia Mirkin wrote:

The idxbuf could linger, and when a clear happened, which also uses the
3d bufctx, we could get an error trying to access it.

This fixes spurious crashes/errors in CTS tests.

Fixes: 61d8f3387d ("nv50,nvc0: clear index buffer bufctx bin unconditionally")
Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
  src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 9 +
  src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 9 +
  2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c 
b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index ac7d8267861..ed041121a26 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -770,7 +770,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
 bool tex_dirty = false;
 int s;
  
-   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);

 if (info->index_size && !info->has_user_indices)
BCTX_REFN(nv50->bufctx_3d, 3D_INDEX, 
nv04_resource(info->index.resource), RD);
  
@@ -838,9 +837,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
  
 if (nv50->vbo_fifo) {

nv50_push_vbo(nv50, info);
-  push->kick_notify = nv50_default_kick_notify;
-  nouveau_pushbuf_bufctx(push, NULL);
-  return;
+  goto cleanup;
 }
  
 if (nv50->state.instance_base != info->start_instance) {

@@ -894,9 +891,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
 info->mode, info->start, info->count,
 info->instance_count);
 }
+
+cleanup:
 push->kick_notify = nv50_default_kick_notify;
  
 nv50_release_user_vbufs(nv50);
  
 nouveau_pushbuf_bufctx(push, NULL);

+
+   nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_3D_INDEX);
  }
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 2856b4c6096..a5671ca09ac 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -921,7 +921,6 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
 struct nvc0_screen *screen = nvc0->screen;
 int s;
  
-   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);

 nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS);
  
 /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */

@@ -1040,9 +1039,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
  
 if (nvc0->state.vbo_mode) {

nvc0_push_vbo(nvc0, info);
-  push->kick_notify = nvc0_default_kick_notify;
-  nouveau_pushbuf_bufctx(push, NULL);
-  return;
+  goto cleanup;
 }
  
 /* space for base instance, flush, and prim restart */

@@ -1089,9 +1086,13 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
 info->mode, info->start, info->count,
 info->instance_count);
 }
+
+cleanup:
 push->kick_notify = nvc0_default_kick_notify;
  
 nvc0_release_user_vbufs(nvc0);
  
 nouveau_pushbuf_bufctx(push, NULL);

+
+   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
  }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/11] mesa/glthread: remove HAVE_PTHREAD guards

2017-06-26 Thread Kyriazis, George
Marek

Our windows mesa build broke with your checkin: "mesa/glthread: remove 
HAVE_PTHREAD guards”.

Namely:

  Compiling src\mesa\main\context.c ...
context.c
c:\users\gkyriazi\src\mesa\src\mesa\main\glthread.h(34): fatal error C1083: 
Cannot open include file: 'pthread.h': No such file or directory
scons: *** [build\windows-x86_64-debug\mesa\main\context.obj] Error 2
scons: building terminated because of errors.

Ideas?

George

> On Jun 21, 2017, at 8:02 PM, Marek Olšák  wrote:
> 
> From: Marek Olšák 
> 
> we are switching to util_queue.
> ---
> src/mapi/glapi/gen/gl_marshal.py |  5 +
> src/mesa/main/glthread.c |  4 
> src/mesa/main/glthread.h | 30 --
> src/mesa/main/marshal.c  |  4 
> src/mesa/main/marshal.h  | 27 ---
> 5 files changed, 1 insertion(+), 69 deletions(-)
> 
> diff --git a/src/mapi/glapi/gen/gl_marshal.py 
> b/src/mapi/glapi/gen/gl_marshal.py
> index f52b9b7..062afe5 100644
> --- a/src/mapi/glapi/gen/gl_marshal.py
> +++ b/src/mapi/glapi/gen/gl_marshal.py
> @@ -59,34 +59,31 @@ def indent(delta = 3):
> class PrintCode(gl_XML.gl_print_base):
> def __init__(self):
> super(PrintCode, self).__init__()
> 
> self.name = 'gl_marshal.py'
> self.license = license.bsd_license_template % (
> 'Copyright (C) 2012 Intel Corporation', 'INTEL CORPORATION')
> 
> def printRealHeader(self):
> print header
> -print '#ifdef HAVE_PTHREAD'
> -print
> print 'static inline int safe_mul(int a, int b)'
> print '{'
> print 'if (a < 0 || b < 0) return -1;'
> print 'if (a == 0 || b == 0) return 0;'
> print 'if (a > INT_MAX / b) return -1;'
> print 'return a * b;'
> print '}'
> print
> 
> def printRealFooter(self):
> -print
> -print '#endif'
> +pass
> 
> def print_sync_call(self, func):
> call = 'CALL_{0}(ctx->CurrentServerDispatch, ({1}))'.format(
> func.name, func.get_called_parameter_string())
> if func.return_type == 'void':
> out('{0};'.format(call))
> else:
> out('return {0};'.format(call))
> 
> def print_sync_dispatch(self, func):
> diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
> index 455b829..e90709c 100644
> --- a/src/mesa/main/glthread.c
> +++ b/src/mesa/main/glthread.c
> @@ -31,22 +31,20 @@
>  * quickly logs the GL commands to a buffer to be processed by a worker
>  * thread.
>  */
> 
> #include "main/mtypes.h"
> #include "main/glthread.h"
> #include "main/marshal.h"
> #include "main/marshal_generated.h"
> #include "util/u_thread.h"
> 
> -#ifdef HAVE_PTHREAD
> -
> static void
> glthread_allocate_batch(struct gl_context *ctx)
> {
>struct glthread_state *glthread = ctx->GLThread;
> 
>/* TODO: handle memory allocation failure. */
>glthread->batch = malloc(sizeof(*glthread->batch));
>if (!glthread->batch)
>   return;
>memset(glthread->batch, 0, offsetof(struct glthread_batch, buffer));
> @@ -277,12 +275,10 @@ _mesa_glthread_finish(struct gl_context *ctx)
>  _glapi_set_dispatch(dispatch);
>   }
>} else {
>   _mesa_glthread_flush_batch_locked(ctx);
>   while (glthread->batch_queue || glthread->busy)
>  pthread_cond_wait(>work_done, >mutex);
>}
> 
>pthread_mutex_unlock(>mutex);
> }
> -
> -#endif
> diff --git a/src/mesa/main/glthread.h b/src/mesa/main/glthread.h
> index 50c1db2..07bed38 100644
> --- a/src/mesa/main/glthread.h
> +++ b/src/mesa/main/glthread.h
> @@ -22,22 +22,20 @@
>  */
> 
> #ifndef _GLTHREAD_H
> #define _GLTHREAD_H
> 
> #include "main/mtypes.h"
> 
> /* Command size is a number of bytes stored in a short. */
> #define MARSHAL_MAX_CMD_SIZE 65535
> 
> -#ifdef HAVE_PTHREAD
> -
> #include 
> #include 
> #include 
> 
> enum marshal_dispatch_cmd_id;
> 
> struct glthread_state
> {
>/** The worker thread that asynchronously processes our GL commands. */
>pthread_t thread;
> @@ -117,39 +115,11 @@ struct glthread_batch
>uint8_t buffer[MARSHAL_MAX_CMD_SIZE];
> };
> 
> void _mesa_glthread_init(struct gl_context *ctx);
> void _mesa_glthread_destroy(struct gl_context *ctx);
> 
> void _mesa_glthread_restore_dispatch(struct gl_context *ctx);
> void _mesa_glthread_flush_batch(struct gl_context *ctx);
> void _mesa_glthread_finish(struct gl_context *ctx);
> 
> -#else /* HAVE_PTHREAD */
> -
> -static inline void
> -_mesa_glthread_init(struct gl_context *ctx)
> -{
> -}
> -
> -static inline void
> -_mesa_glthread_destroy(struct gl_context *ctx)
> -{
> -}
> -
> -static inline void
> -_mesa_glthread_finish(struct gl_context *ctx)
> -{
> -}
> -
> -static inline void
> -_mesa_glthread_restore_dispatch(struct gl_context *ctx)
> -{
> -}
> -
> -static inline void
> -_mesa_glthread_flush_batch(struct gl_context *ctx)
> -{
> -}
> -
> -#endif /* !HAVE_PTHREAD */
> 

Re: [Mesa-dev] [PATCH] nv50/ir: fetch indirect sources BEFORE the op that uses them

2017-06-26 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

On 06/24/2017 06:50 PM, Ilia Mirkin wrote:

All the BuildUtil helpers just insert the operation into the current BB.
So we have to take care that any fetchSrc() operations happen before the
operation whose setIndirect() it goes into.

Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---

I don't know how any of this really worked before. I suppose that pre-SSA, we
could get lucky a lot and have it work out. But order of instructions matters.

  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 51 ++
  1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 0da069b2084..eadfca979ad 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -2632,6 +2632,10 @@ Converter::handleLOAD(Value *dst0[4])
 const int r = tgsi.getSrc(0).getIndex(0);
 int c;
 std::vector off, src, ldv, def;
+   Value *ind = NULL;
+
+   if (tgsi.getSrc(0).isIndirect(0))
+  ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
  
 switch (tgsi.getSrc(0).getFile()) {

 case TGSI_FILE_BUFFER:
@@ -2658,8 +2662,8 @@ Converter::handleLOAD(Value *dst0[4])
  
   Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);

   ld->cache = tgsi.getCacheMode();
- if (tgsi.getSrc(0).isIndirect(0))
-ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 
0));
+ if (ind)
+ld->setIndirect(0, 1, ind);
}
break;
 case TGSI_FILE_IMAGE: {
@@ -2681,8 +2685,8 @@ Converter::handleLOAD(Value *dst0[4])
ld->tex.mask = tgsi.getDst(0).getMask();
ld->tex.format = getImageFormat(code, r);
ld->cache = tgsi.getCacheMode();
-  if (tgsi.getSrc(0).isIndirect(0))
- ld->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
+  if (ind)
+ ld->setIndirectR(ind);
  
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)

   if (dst0[c] != def[c])
@@ -2770,6 +2774,10 @@ Converter::handleSTORE()
 const int r = tgsi.getDst(0).getIndex(0);
 int c;
 std::vector off, src, dummy;
+   Value *ind = NULL;
+
+   if (tgsi.getDst(0).isIndirect(0))
+  ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
  
 switch (tgsi.getDst(0).getFile()) {

 case TGSI_FILE_BUFFER:
@@ -2792,8 +2800,8 @@ Converter::handleSTORE()
  
   Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));

   st->cache = tgsi.getCacheMode();
- if (tgsi.getDst(0).isIndirect(0))
-st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 
0));
+ if (ind)
+st->setIndirect(0, 1, ind);
}
break;
 case TGSI_FILE_IMAGE: {
@@ -2811,8 +2819,8 @@ Converter::handleSTORE()
st->tex.mask = tgsi.getDst(0).getMask();
st->tex.format = getImageFormat(code, r);
st->cache = tgsi.getCacheMode();
-  if (tgsi.getDst(0).isIndirect(0))
- st->setIndirectR(fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL));
+  if (ind)
+ st->setIndirectR(ind);
}
break;
 default:
@@ -2881,6 +2889,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, 
uint16_t subOp)
 std::vector srcv;
 std::vector defv;
 LValue *dst = getScratch();
+   Value *ind = NULL;
+
+   if (tgsi.getSrc(0).isIndirect(0))
+  ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
  
 switch (tgsi.getSrc(0).getFile()) {

 case TGSI_FILE_BUFFER:
@@ -2890,23 +2902,21 @@ Converter::handleATOM(Value *dst0[4], DataType ty, 
uint16_t subOp)
  continue;
  
   Instruction *insn;

- Value *off = fetchSrc(1, c), *off2 = NULL;
+ Value *off = fetchSrc(1, c);
   Value *sym;
   if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
  sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
tgsi.getSrc(1).getValueU32(c, info));
   else
  sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
- if (tgsi.getSrc(0).isIndirect(0))
-off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
   if (subOp == NV50_IR_SUBOP_ATOM_CAS)
  insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, 
c));
   else
  insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
   if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
  insn->setIndirect(0, 0, off);
- if (off2)
-insn->setIndirect(0, 1, off2);
+ if (ind)
+insn->setIndirect(0, 1, ind);
   insn->subOp = subOp;
}
for (int c = 0; c < 4; ++c)
@@ -2929,8 +2939,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, 
uint16_t subOp)

Re: [Mesa-dev] [PATCH 28/30] i965: Pretend that CCS modified images are two planes

2017-06-26 Thread Pohjolainen, Topi
On Fri, Jun 16, 2017 at 03:41:50PM -0700, Jason Ekstrand wrote:
> From: Ben Widawsky 
> 
> v2: move is_aux into if block. (Jason)
> Use else block instead of goto (Jason)
> 
> v3: Fix up logic for is_aux (Ben)
> Fix up size calculations and add FIXME (Ben)
> 
> v4 (Jason Ekstrand):
> Use the aux_pitch in the image instead of calculating it
> 
> Cc: Jason Ekstrand 
> Signed-off-by: Ben Widawsky 
> Acked-by: Daniel Stone 
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 54 
> +++-
>  1 file changed, 33 insertions(+), 21 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 7d6adb7..6237931f 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -757,7 +757,7 @@ intel_query_image(__DRIimage *image, int attrib, int 
> *value)
> case __DRI_IMAGE_ATTRIB_FOURCC:
>return intel_lookup_fourcc(image->dri_format, value);
> case __DRI_IMAGE_ATTRIB_NUM_PLANES:
> -  *value = 1;
> +  *value = image->aux_offset ? 2: 1;

Missing space after '2'.
  
>return true;
> case __DRI_IMAGE_ATTRIB_OFFSET:
>*value = image->offset;
> @@ -1149,31 +1149,43 @@ intel_from_planar(__DRIimage *parent, int plane, void 
> *loaderPrivate)
>  struct intel_image_format *f;
>  __DRIimage *image;
>  
> -if (parent == NULL || parent->planar_format == NULL)
> -return NULL;
> -
> -f = parent->planar_format;
> -
> -if (plane >= f->nplanes)
> -return NULL;
> -
> -width = parent->width >> f->planes[plane].width_shift;
> -height = parent->height >> f->planes[plane].height_shift;
> -dri_format = f->planes[plane].dri_format;
> -index = f->planes[plane].buffer_index;
> -offset = parent->offsets[index];
> -stride = parent->strides[index];
> +if (parent == NULL) {
> +   return NULL;
> +} else if (parent->planar_format == NULL) {
> +   const bool is_aux = parent->aux_offset && plane == 1;
> +   if (!is_aux)
> +  return NULL;
> +
> +   width = parent->width;
> +   height = parent->height;
> +   dri_format = parent->dri_format;
> +   offset = parent->aux_offset;
> +   stride = parent->aux_pitch;
> +} else {
> +   /* Planar formats don't support aux buffers/images */
> +   assert(!parent->aux_offset);
> +   f = parent->planar_format;
> +
> +   if (plane >= f->nplanes)
> +  return NULL;
> +
> +   width = parent->width >> f->planes[plane].width_shift;
> +   height = parent->height >> f->planes[plane].height_shift;
> +   dri_format = f->planes[plane].dri_format;
> +   index = f->planes[plane].buffer_index;
> +   offset = parent->offsets[index];
> +   stride = parent->strides[index];
> +
> +   if (offset + height * stride > parent->bo->size) {
> +  _mesa_warning(NULL, "intel_create_sub_image: subimage out of 
> bounds");
> +  return NULL;
> +   }
> +}
>  
>  image = intel_allocate_image(parent->screen, dri_format, loaderPrivate);
>  if (image == NULL)
> return NULL;
>  
> -if (offset + height * stride > parent->bo->size) {
> -   _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
> -   free(image);
> -   return NULL;
> -}
> -
>  image->bo = parent->bo;
>  brw_bo_reference(parent->bo);
>  image->modifier = parent->modifier;
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 27/30] i965/screen: Support import and export of surfaces with CCS

2017-06-26 Thread Pohjolainen, Topi
On Fri, Jun 16, 2017 at 03:41:49PM -0700, Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 55 
> +---
>  1 file changed, 50 insertions(+), 5 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 94787ff..7d6adb7 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -671,7 +671,21 @@ intel_create_image_common(__DRIscreen *dri_screen,
>return NULL;
> }
>  
> -   image->bo = brw_bo_alloc_tiled(screen->bufmgr, "image", surf.size,
> +   struct isl_surf aux_surf;
> +   if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) {
> +  ok = isl_surf_get_ccs_surf(>isl_dev, , _surf, 0);
> +  assert(ok);
> +  if (!ok) {
> + free(image);
> + return NULL;
> +  }
> +   } else {
> +  assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE);
> +  aux_surf.size = 0;
> +   }
> +
> +   image->bo = brw_bo_alloc_tiled(screen->bufmgr, "image",
> +  surf.size + aux_surf.size,
>
> isl_tiling_to_i915_tiling(mod_info->tiling),
>surf.row_pitch, 0);
> if (image->bo == NULL) {
> @@ -683,6 +697,11 @@ intel_create_image_common(__DRIscreen *dri_screen,
> image->pitch = surf.row_pitch;
> image->modifier = modifier;
>  
> +   if (aux_surf.size) {
> +  image->aux_offset = surf.size;
> +  image->aux_pitch = aux_surf.row_pitch;
> +   }
> +
> return image;
>  }
>  
> @@ -896,18 +915,18 @@ intel_create_image_from_fds_common(__DRIscreen 
> *dri_screen,
> else
>image->modifier = tiling_to_modifier(image->bo->tiling_mode);
>  
> +   const struct isl_drm_modifier_info *mod_info =
> +  isl_drm_modifier_get_info(image->modifier);
> +
> int size = 0;
> +   struct isl_surf surf;
> for (i = 0; i < f->nplanes; i++) {
>index = f->planes[i].buffer_index;
>image->offsets[index] = offsets[index];
>image->strides[index] = strides[index];
>  
> -  const struct isl_drm_modifier_info *mod_info =
> - isl_drm_modifier_get_info(image->modifier);
> -
>mesa_format format = driImageFormatToGLFormat(f->planes[i].dri_format);
>  
> -  struct isl_surf surf;
>ok = isl_surf_init(>isl_dev, ,
>   .dim = ISL_SURF_DIM_2D,
>   .format = brw_isl_format_for_mesa_format(format),
> @@ -933,6 +952,32 @@ intel_create_image_from_fds_common(__DRIscreen 
> *dri_screen,
>   size = end;
> }
>  
> +   if (mod_info->aux_usage == ISL_AUX_USAGE_CCS_E) {
> +  /* Even though we initialize surf in the loop above, we know that
> +   * anything with CCS_E will have exactly one plane so surf is properly
> +   * initialized when we get here.
> +   */
> +  assert(f->nplanes == 1);
> +
> +  image->aux_offset = offsets[1];
> +  image->aux_pitch = strides[1];
> +
> +  struct isl_surf aux_surf;
> +  ok = isl_surf_get_ccs_surf(>isl_dev, , _surf,
> + image->aux_pitch);
> +  if (!ok) {
> + brw_bo_unreference(image->bo);
> + free(image);
> + return NULL;
> +  }
> +
> +  const int end = image->aux_offset + surf.size;

Shouldn't we use 'aux_surf.size' instead of 'surf.size'?

> +  if (size < end)
> + size = end;
> +   } else {
> +  assert(mod_info->aux_usage == ISL_AUX_USAGE_NONE);
> +   }
> +
> /* Check that the requested image actually fits within the BO. 'size'
>  * is already relative to the offsets, so we don't need to add that. */
> if (image->bo->size == 0) {
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] gallium/st: Add a method to flush outstanding swapbuffers

2017-06-26 Thread Marek Olšák
If there are no objections to other patches, patches 3-4 are:

Reviewed-by: Marek Olšák 

Marek

On Thu, Jun 22, 2017 at 12:42 PM, Thomas Hellstrom
 wrote:
> Add a state tracker interface method to flush outstanding swapbuffers, and
> add a call to it from the mesa state tracker during glFinish().
> This doesn't strictly mean the outstanding swapbuffers have actually finished
> executing but is sufficient for glFinish()
> to be able to be used as a replacement for glXWaitGL().
>
> Signed-off-by: Thomas Hellstrom 
> ---
>  src/gallium/include/state_tracker/st_api.h |  2 ++
>  src/mesa/state_tracker/st_cb_flush.c   |  2 ++
>  src/mesa/state_tracker/st_manager.c| 22 ++
>  src/mesa/state_tracker/st_manager.h|  3 +++
>  4 files changed, 29 insertions(+)
>
> diff --git a/src/gallium/include/state_tracker/st_api.h 
> b/src/gallium/include/state_tracker/st_api.h
> index e566b19..a7b2bba 100644
> --- a/src/gallium/include/state_tracker/st_api.h
> +++ b/src/gallium/include/state_tracker/st_api.h
> @@ -365,6 +365,8 @@ struct st_framebuffer_iface
> const enum st_attachment_type *statts,
> unsigned count,
> struct pipe_resource **out);
> +   boolean (*flush_swapbuffers) (struct st_context_iface *stctx,
> + struct st_framebuffer_iface *stfbi);
>  };
>
>  /**
> diff --git a/src/mesa/state_tracker/st_cb_flush.c 
> b/src/mesa/state_tracker/st_cb_flush.c
> index 6442fc9..e8c6672 100644
> --- a/src/mesa/state_tracker/st_cb_flush.c
> +++ b/src/mesa/state_tracker/st_cb_flush.c
> @@ -102,6 +102,8 @@ void st_finish( struct st_context *st )
>   PIPE_TIMEOUT_INFINITE);
>st->pipe->screen->fence_reference(st->pipe->screen, , NULL);
> }
> +
> +   st_manager_flush_swapbuffers();
>  }
>
>
> diff --git a/src/mesa/state_tracker/st_manager.c 
> b/src/mesa/state_tracker/st_manager.c
> index cc781f4..9978e3f 100644
> --- a/src/mesa/state_tracker/st_manager.c
> +++ b/src/mesa/state_tracker/st_manager.c
> @@ -863,6 +863,28 @@ st_manager_validate_framebuffers(struct st_context *st)
> st_context_validate(st, stdraw, stread);
>  }
>
> +
> +/**
> + * Flush any outstanding swapbuffers on the current draw framebuffer.
> + */
> +void
> +st_manager_flush_swapbuffers(void)
> +{
> +   GET_CURRENT_CONTEXT(ctx);
> +   struct st_context *st = (ctx) ? ctx->st : NULL;
> +   struct st_framebuffer *stfb;
> +
> +   if (!st)
> +  return;
> +
> +   stfb = st_ws_framebuffer(ctx->DrawBuffer);
> +   if (!stfb || !stfb->iface->flush_swapbuffers)
> +  return;
> +
> +   stfb->iface->flush_swapbuffers(>iface, stfb->iface);
> +}
> +
> +
>  /**
>   * Add a color renderbuffer on demand.  The FBO must correspond to a window,
>   * not a user-created FBO.
> diff --git a/src/mesa/state_tracker/st_manager.h 
> b/src/mesa/state_tracker/st_manager.h
> index 65874b0..b363f74 100644
> --- a/src/mesa/state_tracker/st_manager.h
> +++ b/src/mesa/state_tracker/st_manager.h
> @@ -44,4 +44,7 @@ boolean
>  st_manager_add_color_renderbuffer(struct st_context *st, struct 
> gl_framebuffer *fb,
>gl_buffer_index idx);
>
> +void
> +st_manager_flush_swapbuffers(void);
> +
>  #endif /* ST_MANAGER_H */
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/3] anv: Stop racing relocation offsets

2017-06-26 Thread Andres Gomez
Jason, it doesn't seem like this patch has landed in master. Are you in
need of review or is it that this has been superseded?

Thanks!

On Wed, 2017-05-10 at 16:08 -0700, Jason Ekstrand wrote:
> One of the key invariants of the relocation system is the
> presumed_offset field.  The assumption is made that the value currently
> in the address to be relocated agrees with the presumed_offset field.
> If presumed_offset is equal to the offset of the BO, the kernel will
> skip the relocation assuming that the value is already correct.
> 
> Our initial implementation of relocation handling had a race where we
> would read bo->offset once when we wrote the relocation entry and again
> when we filled out actual address.
> 
> Found with helgrind
> 
> Cc: "17.0 17.1" 
> ---
>  src/intel/vulkan/anv_batch_chain.c | 21 +
>  src/intel/vulkan/anv_private.h |  2 +-
>  src/intel/vulkan/genX_blorp_exec.c |  5 -
>  src/intel/vulkan/genX_cmd_buffer.c |  7 +--
>  4 files changed, 27 insertions(+), 8 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_batch_chain.c 
> b/src/intel/vulkan/anv_batch_chain.c
> index 9def174..13303b1 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -143,7 +143,8 @@ anv_reloc_list_grow(struct anv_reloc_list *list,
>  VkResult
>  anv_reloc_list_add(struct anv_reloc_list *list,
> const VkAllocationCallbacks *alloc,
> -   uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
> +   uint32_t offset, struct anv_bo *target_bo, uint32_t delta,
> +   uint64_t *bo_offset_out)
>  {
> struct drm_i915_gem_relocation_entry *entry;
> int index;
> @@ -155,6 +156,14 @@ anv_reloc_list_add(struct anv_reloc_list *list,
> if (result != VK_SUCCESS)
>return result;
>  
> +   /* Read the BO offset once.  This same value will be used in the 
> relocation
> +* entry and passed back to the caller for it to use when it writes the
> +* actual value.  This guarantees that the two values match even if there
> +* is a data race between now and when the caller gets around to writing
> +* the address into the BO.
> +*/
> +   uint64_t presumed_offset = target_bo->offset;
> +
> /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
> index = list->num_relocs++;
> list->reloc_bos[index] = target_bo;
> @@ -162,11 +171,13 @@ anv_reloc_list_add(struct anv_reloc_list *list,
> entry->target_handle = target_bo->gem_handle;
> entry->delta = delta;
> entry->offset = offset;
> -   entry->presumed_offset = target_bo->offset;
> +   entry->presumed_offset = presumed_offset;
> entry->read_domains = domain;
> entry->write_domain = domain;
> VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
>  
> +   *bo_offset_out = presumed_offset;
> +
> return VK_SUCCESS;
>  }
>  
> @@ -218,14 +229,16 @@ uint64_t
>  anv_batch_emit_reloc(struct anv_batch *batch,
>   void *location, struct anv_bo *bo, uint32_t delta)
>  {
> +   uint64_t bo_offset;
> VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc,
> -location - batch->start, bo, delta);
> +location - batch->start, bo, delta,
> +_offset);
> if (result != VK_SUCCESS) {
>anv_batch_set_error(batch, result);
>return 0;
> }
>  
> -   return bo->offset + delta;
> +   return bo_offset + delta;
>  }
>  
>  void
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 9b0dd67..1686da8 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -825,7 +825,7 @@ void anv_reloc_list_finish(struct anv_reloc_list *list,
>  VkResult anv_reloc_list_add(struct anv_reloc_list *list,
>  const VkAllocationCallbacks *alloc,
>  uint32_t offset, struct anv_bo *target_bo,
> -uint32_t delta);
> +uint32_t delta, uint64_t *bo_offset_out);
>  
>  struct anv_batch_bo {
> /* Link in the anv_cmd_buffer.owned_batch_bos list */
> diff --git a/src/intel/vulkan/genX_blorp_exec.c 
> b/src/intel/vulkan/genX_blorp_exec.c
> index 71ed707..513c269 100644
> --- a/src/intel/vulkan/genX_blorp_exec.c
> +++ b/src/intel/vulkan/genX_blorp_exec.c
> @@ -57,9 +57,12 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t 
> ss_offset,
>  struct blorp_address address, uint32_t delta)
>  {
> struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
> +   MAYBE_UNUSED uint64_t bo_offset;
> +
> VkResult result =
>anv_reloc_list_add(_buffer->surface_relocs, 
> _buffer->pool->alloc,
> - ss_offset, address.buffer, address.offset + delta);
> + ss_offset, address.buffer, 

Re: [Mesa-dev] [PATCH 25/30] i965/miptree: Allocate mcs_buf for an image's CCS

2017-06-26 Thread Pohjolainen, Topi
On Mon, Jun 26, 2017 at 10:30:40PM +0300, Pohjolainen, Topi wrote:
> On Fri, Jun 16, 2017 at 03:41:47PM -0700, Jason Ekstrand wrote:
> > From: Ben Widawsky 
> > 
> > This code will disable actually creating these buffers for the scanout,
> > but it puts the allocation in place.
> > 
> > Primarily this patch is split out for review, it can be squashed in
> > later if preferred.
> > 
> > v2:
> > assert(mt->offset == 0) in ccs creation (as requested by Topi)
> > Remove bogus is_scanout check in miptree_release
> > 
> > v3:
> > Remove is_scanout assert in intel_miptree_create. It doesn't work with
> > latest codebase - not sure it ever should have worked.
> > 
> > v4:
> > assert(mt->last_level == 0) and assert(mt->first_level == 0) in ccs setup
> > (Topi)
> > 
> > v5 (Jason Ekstrand):
> >  - Base the decision to allocate a CCS on the image modifier
> > 
> > Signed-off-by: Ben Widawsky 
> > Acked-by: Daniel Stone 
> > Reviewed-by: Topi Pohjolainen 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 123 
> > +++---
> >  1 file changed, 113 insertions(+), 10 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index e3de386..608317a 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -59,6 +59,11 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> >  struct intel_mipmap_tree *mt,
> >  GLuint num_samples);
> >  
> > +static void
> > +intel_miptree_init_mcs(struct brw_context *brw,
> > +   struct intel_mipmap_tree *mt,
> > +   int init_value);
> > +
> >  /**
> >   * Determine which MSAA layout should be used by the MSAA surface being
> >   * created, based on the chip generation and the surface type.
> > @@ -886,27 +891,99 @@ miptree_create_for_planar_image(struct brw_context 
> > *brw,
> > return planar_mt;
> >  }
> >  
> > +static bool
> > +create_ccs_buf_for_image(struct brw_context *brw,
> > + __DRIimage *image,
> > + struct intel_mipmap_tree *mt,
> > + enum isl_aux_state initial_state)
> > +{
> > +   struct isl_surf temp_main_surf, temp_ccs_surf;
> > +
> > +   /* There isn't anything specifically wrong with there being an offset, 
> > in
> > +* which case, the CCS miptree's offset should be mt->offset +
> > +* image->aux_offset. However, the code today only will have an offset 
> > when
> > +* this miptree is pointing to a slice from another miptree, and in 
> > that case
> > +* we'd need to offset within the AUX CCS buffer properly. It's 
> > questionable
> > +* whether our code handles that case properly, and since it can never 
> > happen
> > +* for scanout, just use the assertion to prevent it.
> > +*/
> > +   assert(mt->offset == 0);
> > +
> > +   /* CCS is only supported for very simple miptrees */
> > +   assert(image->aux_offset && image->aux_pitch);
> > +   assert(image->tile_x == 0 && image->tile_y == 0);
> > +   assert(mt->num_samples <= 1);
> > +   assert(mt->first_level == 0);
> > +   assert(mt->last_level == 0);
> > +   assert(mt->logical_depth0 == 1);
> > +
> > +   /* We shouldn't already have a CCS */
> > +   assert(!mt->mcs_buf);
> > +
> > +   intel_miptree_get_isl_surf(brw, mt, _main_surf);
> > +   if (!isl_surf_get_ccs_surf(>isl_dev, _main_surf, 
> > _ccs_surf))
> > +  return false;
> > +
> > +   assert(temp_ccs_surf.size <= image->bo->size - image->aux_offset);
> > +   assert(temp_ccs_surf.row_pitch <= image->aux_pitch);
> > +
> > +   mt->mcs_buf = calloc(sizeof(*mt->mcs_buf), 1);
> > +   if (mt->mcs_buf == NULL)
> > +  return false;
> > +
> > +   mt->aux_state = create_aux_state_map(mt, initial_state);
> > +   if (!mt->aux_state) {
> > +  free(mt->mcs_buf);
> > +  mt->mcs_buf = NULL;
> > +  return false;
> > +   }
> > +
> > +   mt->mcs_buf->bo = image->bo;
> > +   brw_bo_reference(image->bo);
> > +
> > +   mt->mcs_buf->offset = image->aux_offset;
> > +   mt->mcs_buf->size = image->bo->size - image->aux_offset;
> > +   mt->mcs_buf->pitch = image->aux_pitch;
> 
> I wonder if it would be more correct to use temp_ccs_surf.size and
> temp_ccs_surf.row_pitch instead?

And again, I started reading the next patch and it made me think this again.
Of course they need to be the given values. So ignore the comment.

> 
> > +   mt->mcs_buf->qpitch = 0;
> > +
> > +   intel_miptree_init_mcs(brw, mt, 0);
> > +   mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS;
> > +
> > +   return true;
> > +}
> > +
> >  struct intel_mipmap_tree *
> >  intel_miptree_create_for_dri_image(struct brw_context *brw,
> > __DRIimage *image, GLenum target,
> > mesa_format format,
> > 

Re: [Mesa-dev] [PATCH] mesa/st: Reduce the number of frontbuffer flush calls

2017-06-26 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Thu, Jun 22, 2017 at 1:00 PM, Thomas Hellstrom  wrote:
> The mesa state tracker was needlessly flushing the front buffer even if it
> hadn't been drawn to since the last flush. This was happening during
> glXSwapBuffers if we at some point previously had set that frontbuffer as
> a read- or draw renderbuffer, or at glFlush() or glFinish() if we at some
> point previously had rendered to the front buffer. Since the frontbuffer
> flush typically means a full drawable copy, it's a pretty big waste.
>
> Signed-off-by: Thomas Hellstrom 
> ---
>  src/mesa/state_tracker/st_cb_flush.c | 37 
> ++--
>  src/mesa/state_tracker/st_manager.c  | 12 
>  2 files changed, 10 insertions(+), 39 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_cb_flush.c 
> b/src/mesa/state_tracker/st_cb_flush.c
> index e8c6672..5a26018 100644
> --- a/src/mesa/state_tracker/st_cb_flush.c
> +++ b/src/mesa/state_tracker/st_cb_flush.c
> @@ -46,35 +46,6 @@
>  #include "util/u_gen_mipmap.h"
>
>
> -/** Check if we have a front color buffer and if it's been drawn to. */
> -static inline GLboolean
> -is_front_buffer_dirty(struct st_context *st)
> -{
> -   struct gl_framebuffer *fb = st->ctx->DrawBuffer;
> -   struct st_renderbuffer *strb
> -  = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
> -   return strb && strb->defined;
> -}
> -
> -
> -/**
> - * Tell the screen to display the front color buffer on-screen.
> - */
> -static void
> -display_front_buffer(struct st_context *st)
> -{
> -   struct gl_framebuffer *fb = st->ctx->DrawBuffer;
> -   struct st_renderbuffer *strb
> -  = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
> -
> -   if (strb) {
> -  /* Hook for copying "fake" frontbuffer if necessary:
> -   */
> -  st_manager_flush_frontbuffer(st);
> -   }
> -}
> -
> -
>  void st_flush(struct st_context *st,
>struct pipe_fence_handle **fence,
>unsigned flags)
> @@ -122,9 +93,7 @@ static void st_glFlush(struct gl_context *ctx)
>  */
> st_flush(st, NULL, 0);
>
> -   if (is_front_buffer_dirty(st)) {
> -  display_front_buffer(st);
> -   }
> +   st_manager_flush_frontbuffer(st);
>  }
>
>
> @@ -137,9 +106,7 @@ static void st_glFinish(struct gl_context *ctx)
>
> st_finish(st);
>
> -   if (is_front_buffer_dirty(st)) {
> -  display_front_buffer(st);
> -   }
> +   st_manager_flush_frontbuffer(st);
>  }
>
>
> diff --git a/src/mesa/state_tracker/st_manager.c 
> b/src/mesa/state_tracker/st_manager.c
> index 9978e3f..ea67097 100644
> --- a/src/mesa/state_tracker/st_manager.c
> +++ b/src/mesa/state_tracker/st_manager.c
> @@ -839,11 +839,15 @@ st_manager_flush_frontbuffer(struct st_context *st)
>
> if (stfb)
>strb = 
> st_renderbuffer(stfb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
> -   if (!strb)
> -  return;
>
> -   /* never a dummy fb */
> -   stfb->iface->flush_front(>iface, stfb->iface, 
> ST_ATTACHMENT_FRONT_LEFT);
> +   /* Do we have a front color buffer and has it been drawn to since last
> +* frontbuffer flush?
> +*/
> +   if (strb && strb->defined) {
> +  stfb->iface->flush_front(>iface, stfb->iface,
> +   ST_ATTACHMENT_FRONT_LEFT);
> +  strb->defined = GL_FALSE;
> +   }
>  }
>
>  /**
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 25/30] i965/miptree: Allocate mcs_buf for an image's CCS

2017-06-26 Thread Pohjolainen, Topi
On Fri, Jun 16, 2017 at 03:41:47PM -0700, Jason Ekstrand wrote:
> From: Ben Widawsky 
> 
> This code will disable actually creating these buffers for the scanout,
> but it puts the allocation in place.
> 
> Primarily this patch is split out for review, it can be squashed in
> later if preferred.
> 
> v2:
> assert(mt->offset == 0) in ccs creation (as requested by Topi)
> Remove bogus is_scanout check in miptree_release
> 
> v3:
> Remove is_scanout assert in intel_miptree_create. It doesn't work with
> latest codebase - not sure it ever should have worked.
> 
> v4:
> assert(mt->last_level == 0) and assert(mt->first_level == 0) in ccs setup
> (Topi)
> 
> v5 (Jason Ekstrand):
>  - Base the decision to allocate a CCS on the image modifier
> 
> Signed-off-by: Ben Widawsky 
> Acked-by: Daniel Stone 
> Reviewed-by: Topi Pohjolainen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 123 
> +++---
>  1 file changed, 113 insertions(+), 10 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index e3de386..608317a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -59,6 +59,11 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
>  struct intel_mipmap_tree *mt,
>  GLuint num_samples);
>  
> +static void
> +intel_miptree_init_mcs(struct brw_context *brw,
> +   struct intel_mipmap_tree *mt,
> +   int init_value);
> +
>  /**
>   * Determine which MSAA layout should be used by the MSAA surface being
>   * created, based on the chip generation and the surface type.
> @@ -886,27 +891,99 @@ miptree_create_for_planar_image(struct brw_context *brw,
> return planar_mt;
>  }
>  
> +static bool
> +create_ccs_buf_for_image(struct brw_context *brw,
> + __DRIimage *image,
> + struct intel_mipmap_tree *mt,
> + enum isl_aux_state initial_state)
> +{
> +   struct isl_surf temp_main_surf, temp_ccs_surf;
> +
> +   /* There isn't anything specifically wrong with there being an offset, in
> +* which case, the CCS miptree's offset should be mt->offset +
> +* image->aux_offset. However, the code today only will have an offset 
> when
> +* this miptree is pointing to a slice from another miptree, and in that 
> case
> +* we'd need to offset within the AUX CCS buffer properly. It's 
> questionable
> +* whether our code handles that case properly, and since it can never 
> happen
> +* for scanout, just use the assertion to prevent it.
> +*/
> +   assert(mt->offset == 0);
> +
> +   /* CCS is only supported for very simple miptrees */
> +   assert(image->aux_offset && image->aux_pitch);
> +   assert(image->tile_x == 0 && image->tile_y == 0);
> +   assert(mt->num_samples <= 1);
> +   assert(mt->first_level == 0);
> +   assert(mt->last_level == 0);
> +   assert(mt->logical_depth0 == 1);
> +
> +   /* We shouldn't already have a CCS */
> +   assert(!mt->mcs_buf);
> +
> +   intel_miptree_get_isl_surf(brw, mt, _main_surf);
> +   if (!isl_surf_get_ccs_surf(>isl_dev, _main_surf, 
> _ccs_surf))
> +  return false;
> +
> +   assert(temp_ccs_surf.size <= image->bo->size - image->aux_offset);
> +   assert(temp_ccs_surf.row_pitch <= image->aux_pitch);
> +
> +   mt->mcs_buf = calloc(sizeof(*mt->mcs_buf), 1);
> +   if (mt->mcs_buf == NULL)
> +  return false;
> +
> +   mt->aux_state = create_aux_state_map(mt, initial_state);
> +   if (!mt->aux_state) {
> +  free(mt->mcs_buf);
> +  mt->mcs_buf = NULL;
> +  return false;
> +   }
> +
> +   mt->mcs_buf->bo = image->bo;
> +   brw_bo_reference(image->bo);
> +
> +   mt->mcs_buf->offset = image->aux_offset;
> +   mt->mcs_buf->size = image->bo->size - image->aux_offset;
> +   mt->mcs_buf->pitch = image->aux_pitch;

I wonder if it would be more correct to use temp_ccs_surf.size and
temp_ccs_surf.row_pitch instead?

> +   mt->mcs_buf->qpitch = 0;
> +
> +   intel_miptree_init_mcs(brw, mt, 0);
> +   mt->msaa_layout = INTEL_MSAA_LAYOUT_CMS;
> +
> +   return true;
> +}
> +
>  struct intel_mipmap_tree *
>  intel_miptree_create_for_dri_image(struct brw_context *brw,
> __DRIimage *image, GLenum target,
> mesa_format format,
> bool is_winsys_image)
>  {
> +   uint32_t mt_layout_flags = 0;
> +
> if (image->planar_format && image->planar_format->nplanes > 0)
>return miptree_create_for_planar_image(brw, image, target);
>  
> if (!brw->ctx.TextureFormatSupported[format])
>return NULL;
>  
> +   const struct isl_drm_modifier_info *mod_info =
> +  isl_drm_modifier_get_info(image->modifier);
> +
> +   /* If this image comes in from a window 

Re: [Mesa-dev] [PATCH 18/92] gallium: add PIPE_CAP_NIR_LOWER_IO

2017-06-26 Thread Connor Abbott
Forgot to mention... I think I'll take a pass at doing this today.
It'll distract me from some weird bug I've been trying to fight with
the AMD_shader_ballot stuff :)

On Mon, Jun 26, 2017 at 12:24 PM, Connor Abbott  wrote:
> So, I think that rather than doing this, we should make radv call
> nir_lower_io instead. There's currently a bunch of code in the
> NIR-to-LLVM translation to calculate dereference offsets and split up
> loads, which is just silly - use the lowering pass instead!
>
> On Mon, Jun 26, 2017 at 7:09 AM, Nicolai Hähnle  wrote:
>> From: Nicolai Hähnle 
>>
>> Existing NIR drivers return 1 here, since that reflects the current
>> behavior.
>> ---
>>  src/gallium/docs/source/screen.rst   | 5 +
>>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>>  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
>>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
>>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
>>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
>>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>>  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
>>  src/gallium/drivers/vc4/vc4_screen.c | 1 +
>>  src/gallium/drivers/virgl/virgl_screen.c | 1 +
>>  src/gallium/include/pipe/p_defines.h | 1 +
>>  17 files changed, 21 insertions(+)
>>
>> diff --git a/src/gallium/docs/source/screen.rst 
>> b/src/gallium/docs/source/screen.rst
>> index b375e53..9dab38b 100644
>> --- a/src/gallium/docs/source/screen.rst
>> +++ b/src/gallium/docs/source/screen.rst
>> @@ -391,20 +391,25 @@ The integer capabilities:
>>shader outputs.
>>  * ``PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX``: Whether a buffer with just
>>PIPE_BIND_CONSTANT_BUFFER can be legally passed to set_vertex_buffers.
>>  * ``PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION``: As the name says.
>>  * ``PIPE_CAP_POST_DEPTH_COVERAGE``: whether
>>``TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE`` is supported.
>>  * ``PIPE_CAP_BINDLESS_TEXTURE``: Whether bindless texture operations are
>>supported.
>>  * ``PIPE_CAP_NIR_SAMPLERS_AS_DEREF``: Whether NIR tex instructions should
>>reference texture and sampler as NIR derefs instead of by indices.
>> +* ``PIPE_CAP_NIR_LOWER_IO``: Whether the nir_lower_io pass should be run to
>> +  replace variable accesses by the corresponding intrinsics. Note that when
>> +  this is false, the state tracker can no longer leave accesses to
>> +  default-block uniforms in the shader, meaning that 
>> nir_lower_uniforms_to_ubo
>> +  is used.
>>
>>
>>  .. _pipe_capf:
>>
>>  PIPE_CAPF_*
>>  
>>
>>  The floating-point capabilities are:
>>
>>  * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line.
>> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
>> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
>> index 85b7e91..7f2a231 100644
>> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
>> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
>> @@ -252,20 +252,21 @@ etna_screen_get_param(struct pipe_screen *pscreen, 
>> enum pipe_cap param)
>> case PIPE_CAP_TGSI_CLOCK:
>> case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
>> case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
>> case PIPE_CAP_TGSI_BALLOT:
>> case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
>> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
>> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
>> case PIPE_CAP_POST_DEPTH_COVERAGE:
>> case PIPE_CAP_BINDLESS_TEXTURE:
>> case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
>> +   case PIPE_CAP_NIR_LOWER_IO:
>>return 0;
>>
>> /* Stream output. */
>> case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
>> case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
>> case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
>> case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
>>return 0;
>>
>> /* Geometry shader output, unsupported. */
>> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
>> b/src/gallium/drivers/freedreno/freedreno_screen.c
>> index 6380996..8fab5fd 100644
>> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
>> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
>> @@ -174,20 +174,21 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
>> pipe_cap param)
>> case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
>> case PIPE_CAP_SEAMLESS_CUBE_MAP:
>> case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
>> case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
>> case 

Re: [Mesa-dev] [PATCH 18/92] gallium: add PIPE_CAP_NIR_LOWER_IO

2017-06-26 Thread Connor Abbott
So, I think that rather than doing this, we should make radv call
nir_lower_io instead. There's currently a bunch of code in the
NIR-to-LLVM translation to calculate dereference offsets and split up
loads, which is just silly - use the lowering pass instead!

On Mon, Jun 26, 2017 at 7:09 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Existing NIR drivers return 1 here, since that reflects the current
> behavior.
> ---
>  src/gallium/docs/source/screen.rst   | 5 +
>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
>  src/gallium/drivers/vc4/vc4_screen.c | 1 +
>  src/gallium/drivers/virgl/virgl_screen.c | 1 +
>  src/gallium/include/pipe/p_defines.h | 1 +
>  17 files changed, 21 insertions(+)
>
> diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> index b375e53..9dab38b 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -391,20 +391,25 @@ The integer capabilities:
>shader outputs.
>  * ``PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX``: Whether a buffer with just
>PIPE_BIND_CONSTANT_BUFFER can be legally passed to set_vertex_buffers.
>  * ``PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION``: As the name says.
>  * ``PIPE_CAP_POST_DEPTH_COVERAGE``: whether
>``TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE`` is supported.
>  * ``PIPE_CAP_BINDLESS_TEXTURE``: Whether bindless texture operations are
>supported.
>  * ``PIPE_CAP_NIR_SAMPLERS_AS_DEREF``: Whether NIR tex instructions should
>reference texture and sampler as NIR derefs instead of by indices.
> +* ``PIPE_CAP_NIR_LOWER_IO``: Whether the nir_lower_io pass should be run to
> +  replace variable accesses by the corresponding intrinsics. Note that when
> +  this is false, the state tracker can no longer leave accesses to
> +  default-block uniforms in the shader, meaning that 
> nir_lower_uniforms_to_ubo
> +  is used.
>
>
>  .. _pipe_capf:
>
>  PIPE_CAPF_*
>  
>
>  The floating-point capabilities are:
>
>  * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line.
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index 85b7e91..7f2a231 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -252,20 +252,21 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_TGSI_CLOCK:
> case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
> case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
> case PIPE_CAP_TGSI_BALLOT:
> case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> case PIPE_CAP_POST_DEPTH_COVERAGE:
> case PIPE_CAP_BINDLESS_TEXTURE:
> case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
> +   case PIPE_CAP_NIR_LOWER_IO:
>return 0;
>
> /* Stream output. */
> case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
> case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
> case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
> case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
>return 0;
>
> /* Geometry shader output, unsupported. */
> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
> b/src/gallium/drivers/freedreno/freedreno_screen.c
> index 6380996..8fab5fd 100644
> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> @@ -174,20 +174,21 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
> case PIPE_CAP_SEAMLESS_CUBE_MAP:
> case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
> case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
> case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
> case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
> case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
> case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
> case PIPE_CAP_STRING_MARKER:
> case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
> +   case PIPE_CAP_NIR_LOWER_IO:
> 

Re: [Mesa-dev] [PATCH 14/30] i965/miptree: Add support for window system images to create_for_dri_image

2017-06-26 Thread Pohjolainen, Topi
On Mon, Jun 26, 2017 at 09:27:53PM +0300, Pohjolainen, Topi wrote:
> On Fri, Jun 16, 2017 at 03:41:36PM -0700, Jason Ekstrand wrote:
> > We want to start using create_for_dri_image for all miptrees created
> > from __DRIimage, including those which come from a window system.  In
> > order to allow for fast clears to still work on window system buffers,
> > we need to allow for creating aux surfaces.
> > ---
> >  src/mesa/drivers/dri/i965/intel_fbo.c |  2 +-
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 16 +---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  3 ++-
> >  src/mesa/drivers/dri/i965/intel_tex_image.c   |  2 +-
> >  4 files changed, 17 insertions(+), 6 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
> > b/src/mesa/drivers/dri/i965/intel_fbo.c
> > index 130eab1..db4cfee 100644
> > --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> > +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> > @@ -363,7 +363,7 @@ intel_image_target_renderbuffer_storage(struct 
> > gl_context *ctx,
> >  * content.
> >  */
> > irb->mt = intel_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D,
> > -image->format);
> > +image->format, false);
> > if (!irb->mt)
> >return;
> >  
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 7b4d431..83c99ed 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -901,7 +901,8 @@ miptree_create_for_planar_image(struct brw_context *brw,
> >  struct intel_mipmap_tree *
> >  intel_miptree_create_for_dri_image(struct brw_context *brw,
> > __DRIimage *image, GLenum target,
> > -   mesa_format format)
> > +   mesa_format format,
> > +   bool is_winsys_image)
> >  {
> > if (image->planar_format && image->planar_format->nplanes > 0)
> >return miptree_create_for_planar_image(brw, image, target);
> > @@ -909,6 +910,16 @@ intel_miptree_create_for_dri_image(struct brw_context 
> > *brw,
> > if (!brw->ctx.TextureFormatSupported[format])
> >return NULL;
> >  
> > +   /* If this image comes in from a window system, we have different
> > +* requirements than if it comes in via an EGL import operation.  Window
> > +* system images can use any form of auxiliary compression we wish 
> > because
> > +* they get "flushed" before being handed off to the window system and 
> > we
> > +* have the opportunity to do resolves.  Window system buffers also may 
> > be
> > +* used for scanout so we need to flag that appropriately.
> > +*/
> > +   const uint32_t mt_layout_flags =
> > +  is_winsys_image ? MIPTREE_LAYOUT_FOR_SCANOUT : 
> > MIPTREE_LAYOUT_DISABLE_AUX;
> 
> Is there any particular why we couldn't pass 'layout_flags' directly instead
> of 'is_winsys_image'? That would work at least for the next patch in the
> series.

Okay, I just read patch 25. Ignore this comment.

> 
> > +
> > /* Disable creation of the texture's aux buffers because the driver 
> > exposes
> >  * no EGL API to manage them. That is, there is no API for resolving 
> > the aux
> >  * buffer's content to the main buffer nor for invalidating the aux 
> > buffer's
> > @@ -917,8 +928,7 @@ intel_miptree_create_for_dri_image(struct brw_context 
> > *brw,
> > struct intel_mipmap_tree *mt =
> >intel_miptree_create_for_bo(brw, image->bo, format,
> >image->offset, image->width, 
> > image->height, 1,
> > -  image->pitch,
> > -  MIPTREE_LAYOUT_DISABLE_AUX);
> > +  image->pitch, mt_layout_flags);
> > if (mt == NULL)
> >return NULL;
> >  
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > index 8044a1b..2a4cda2 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> > @@ -700,7 +700,8 @@ struct intel_mipmap_tree *
> >  intel_miptree_create_for_dri_image(struct brw_context *brw,
> > __DRIimage *image,
> > GLenum target,
> > -   mesa_format format);
> > +   mesa_format format,
> > +   bool is_winsys_image);
> >  
> >  bool
> >  intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
> > diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
> > b/src/mesa/drivers/dri/i965/intel_tex_image.c
> > index 76a6e13..53e1087 100644
> > --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> > +++ 

Re: [Mesa-dev] gallium: Reduce trace_dump_box_bytes size by box->x.

2017-06-26 Thread Marek Olšák
In my opinion, dumping resources isn't very useful. I think it would
be better to remove that completely.

Marek

On Mon, Jun 26, 2017 at 6:28 PM, Cherniak, Bruce
 wrote:
> Back in February, I submitted a patch for review to address an a crash in 
> GALLIUM_TRACE.
>
> It never got a review, and I forgot to follow up on it.  Is this a correct 
> fix and useful to anyone
> else?
>
> Thanks,
> Bruce
>
> 
>
> From patchwork Wed Feb  1 20:20:38 2017
> Content-Type: text/plain; charset="utf-8"
> MIME-Version: 1.0
> Content-Transfer-Encoding: 7bit
> Subject: [Mesa-dev] gallium: Reduce trace_dump_box_bytes size by box->x.
> From: Bruce Cherniak 
> X-Patchwork-Id: 136378
> Message-Id: <1485980438-102650-1-git-send-email-bruce.chern...@intel.com>
> To: mesa-dev@lists.freedesktop.org
> Date: Wed,  1 Feb 2017 14:20:38 -0600
>
> If stride is supplied (as either stride or slice_stride),
> trace_dump_box_bytes will try to read stride bytes, regardless whether
> start address is offset by box->x.  This causes access outside mapped
> region, and possible segv. (transfer_map stride and layer_stride are not
> adjusted for box dimensions)
>
> Note:  trace_dump_box_bytes only dumps PIPE_BUFFER resources, so there
> shouldn't be any complicated boxes.  trace_dump_bytes doesn't handle them
> anyway.
> ---
>  src/gallium/drivers/trace/tr_dump.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/trace/tr_dump.c 
> b/src/gallium/drivers/trace/tr_dump.c
> index b173b8a..591e273 100644
> --- a/src/gallium/drivers/trace/tr_dump.c
> +++ b/src/gallium/drivers/trace/tr_dump.c
> @@ -510,11 +510,13 @@ void trace_dump_box_bytes(const void *data,
>size = 0;
> } else {
>enum pipe_format format = resource->format;
> -  if (slice_stride)
> +  if (slice_stride) {
> + slice_stride -= util_format_get_blockwidth(format) * box->x;
>   size = box->depth * slice_stride;
> -  else if (stride)
> +  } else if (stride) {
> + stride -= util_format_get_blockwidth(format) * box->x;
>   size = util_format_get_nblocksy(format, box->height) * stride;
> -  else {
> +  } else {
>   size = util_format_get_nblocksx(format, box->width) * 
> util_format_get_blocksize(format);
>}
> }
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/92] st/nir: make nir_lower_tex configurable

2017-06-26 Thread Rob Clark
So the solution to this so far has been to call this both from driver
and from mesa/st.  (The mesa/st call to nir_lower_tex is only in the
rare case of gles + yuv EGLImageExternal (ie. basically a hack because
android expects YUV texture support.  So I guess not worth optimizing
for.)

BR,
-R

On Mon, Jun 26, 2017 at 10:09 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> ---
>  src/mesa/state_tracker/st_program.c | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_program.c 
> b/src/mesa/state_tracker/st_program.c
> index eb44fc5..a3b8bbf 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -1072,22 +1072,31 @@ st_create_fp_variant(struct st_context *st,
> sizeof(options.bias_state_tokens));
>   }
>
>   _mesa_add_state_reference(params, texcoord_state);
>   memcpy(options.texcoord_state_tokens, texcoord_state,
>  sizeof(options.texcoord_state_tokens));
>
>   NIR_PASS_V(tgsi.ir.nir, nir_lower_drawpixels, );
>}
>
> -  if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
> - nir_lower_tex_options options = {0};
> +  const nir_lower_tex_options *screen_lower_tex_options =
> + st->pipe->screen->get_struct_param(st->pipe->screen,
> +
> PIPE_STRUCT_CAP_NIR_LOWER_TEX_OPTIONS);
> +
> +  if (screen_lower_tex_options ||
> +  unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
> + nir_lower_tex_options options;
> + if (screen_lower_tex_options)
> +memcpy(, screen_lower_tex_options, sizeof(options));
> + else
> +memset(, 0, sizeof(options));
>   options.lower_y_uv_external = key->external.lower_nv12;
>   options.lower_y_u_v_external = key->external.lower_iyuv;
>   NIR_PASS_V(tgsi.ir.nir, nir_lower_tex, );
>}
>
>st_finalize_nir(st, >Base, tgsi.ir.nir);
>
>if (unlikely(key->external.lower_nv12 || key->external.lower_iyuv)) {
>   /* This pass needs to happen *after* nir_lower_sampler */
>   NIR_PASS_V(tgsi.ir.nir, st_nir_lower_tex_src_plane,
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/92] nir: add nir_lower_uniforms_to_ubo pass

2017-06-26 Thread Connor Abbott
I think this would probably be a lot simpler if it ran after
nir_lower_io, since you wouldn't need to do all the dereference
munging to compute the offset. That is, instead of translating
load_var intrinsics, it would translate load_uniform intrinsics into
load_ubo intrinsics. Also, it would mean that this would become
entirely a radeonsi-specific thing, instead of having to add a special
hook to the driver-independent state tracker code.

On Mon, Jun 26, 2017 at 7:09 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> This is an alternative lowering of default-block uniforms that doesn't
> use uniform load intrinsics. Instead, it translates the loads to UBO
> loads, allowing a simpler backend.
> ---
>  src/compiler/Makefile.sources|   1 +
>  src/compiler/nir/nir.h   |   1 +
>  src/compiler/nir/nir_lower_uniforms_to_ubo.c | 140 
> +++
>  3 files changed, 142 insertions(+)
>  create mode 100644 src/compiler/nir/nir_lower_uniforms_to_ubo.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 5f4440b..a0a0d62 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -230,20 +230,21 @@ NIR_FILES = \
> nir/nir_lower_patch_vertices.c \
> nir/nir_lower_phis_to_scalar.c \
> nir/nir_lower_regs_to_ssa.c \
> nir/nir_lower_returns.c \
> nir/nir_lower_samplers.c \
> nir/nir_lower_samplers_as_deref.c \
> nir/nir_lower_system_values.c \
> nir/nir_lower_tex.c \
> nir/nir_lower_to_source_mods.c \
> nir/nir_lower_two_sided_color.c \
> +   nir/nir_lower_uniforms_to_ubo.c \
> nir/nir_lower_vars_to_ssa.c \
> nir/nir_lower_var_copies.c \
> nir/nir_lower_vec_to_movs.c \
> nir/nir_lower_wpos_center.c \
> nir/nir_lower_wpos_ytransform.c \
> nir/nir_metadata.c \
> nir/nir_move_vec_src_uses_to_dest.c \
> nir/nir_normalize_cubemap_coords.c \
> nir/nir_opt_conditional_discard.c \
> nir/nir_opt_constant_folding.c \
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 779bf58..c21ecc5 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2568,20 +2568,21 @@ void nir_lower_drawpixels(nir_shader *shader,
>  typedef struct nir_lower_bitmap_options {
> unsigned sampler;
> bool swizzle_;
>  } nir_lower_bitmap_options;
>
>  void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options 
> *options);
>
>  bool nir_lower_atomics(nir_shader *shader,
> const struct gl_shader_program *shader_program);
>  bool nir_lower_atomics_to_ssbo(nir_shader *shader, unsigned ssbo_offset);
> +bool nir_lower_uniforms_to_ubo(nir_shader *shader);
>  bool nir_lower_to_source_mods(nir_shader *shader);
>
>  bool nir_lower_gs_intrinsics(nir_shader *shader);
>
>  typedef enum {
> nir_lower_imul64 = (1 << 0),
> nir_lower_isign64 = (1 << 1),
> /** Lower all int64 modulus and division opcodes */
> nir_lower_divmod64 = (1 << 2),
>  } nir_lower_int64_options;
> diff --git a/src/compiler/nir/nir_lower_uniforms_to_ubo.c 
> b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
> new file mode 100644
> index 000..63f3bc8
> --- /dev/null
> +++ b/src/compiler/nir/nir_lower_uniforms_to_ubo.c
> @@ -0,0 +1,140 @@
> +/*
> + * Copyright 2017 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * on the rights to use, copy, modify, merge, publish, distribute, sub
> + * license, and/or sell copies of the Software, and to permit persons to whom
> + * the Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +/*
> + * Remap uniform variable accesses to UBO accesses of UBO binding point 0, 
> with
> + * the offset determined by std140 layout, with the base address given by
> + * variable->data.driver_location (in 16-byte units).
> + *
> + * Simultaneously, remap existing UBO accesses by increasing their binding
> + * point by 1.
> 

[Mesa-dev] [PATCH 2/4] i965/miptree: Tighten up finish_mcs_write

2017-06-26 Thread Jason Ekstrand
Multisample surfaces only have a single miplevel so there's no reason to
be passing the extra parameters around.  It only leads to confusion.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index f5391a4..75e192a 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2149,18 +2149,18 @@ intel_miptree_finish_ccs_write(struct brw_context *brw,
 static void
 intel_miptree_finish_mcs_write(struct brw_context *brw,
struct intel_mipmap_tree *mt,
-   uint32_t level, uint32_t layer,
-   bool written_with_aux)
+   uint32_t layer,
+   bool written_with_mcs)
 {
-   switch (intel_miptree_get_aux_state(mt, level, layer)) {
+   switch (intel_miptree_get_aux_state(mt, 0, layer)) {
case ISL_AUX_STATE_CLEAR:
-  assert(written_with_aux);
-  intel_miptree_set_aux_state(brw, mt, level, layer, 1,
+  assert(written_with_mcs);
+  intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
   ISL_AUX_STATE_COMPRESSED_CLEAR);
   break;
 
case ISL_AUX_STATE_COMPRESSED_CLEAR:
-  assert(written_with_aux);
+  assert(written_with_mcs);
   break; /* Nothing to do */
 
case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
@@ -2365,8 +2365,9 @@ intel_miptree_finish_write(struct brw_context *brw,
  return;
 
   if (mt->num_samples > 1) {
+ assert(level == 0);
  for (uint32_t a = 0; a < num_layers; a++) {
-intel_miptree_finish_mcs_write(brw, mt, level, start_layer + a,
+intel_miptree_finish_mcs_write(brw, mt, start_layer + a,
written_with_aux);
  }
   } else {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] intel/blorp: Add a partial resolve pass for MCS

2017-06-26 Thread Jason Ekstrand
---
 src/intel/blorp/blorp.h |   6 +++
 src/intel/blorp/blorp_clear.c   | 105 +++-
 src/intel/blorp/blorp_nir_builder.h | 102 +++
 src/intel/blorp/blorp_priv.h|   1 +
 4 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 src/intel/blorp/blorp_nir_builder.h

diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
index 744c1b1..1a97f24 100644
--- a/src/intel/blorp/blorp.h
+++ b/src/intel/blorp/blorp.h
@@ -191,6 +191,12 @@ blorp_ccs_resolve(struct blorp_batch *batch,
   enum isl_format format,
   enum blorp_fast_clear_op resolve_op);
 
+void
+blorp_mcs_partial_resolve(struct blorp_batch *batch,
+  struct blorp_surf *surf,
+  enum isl_format format,
+  uint32_t start_layer, uint32_t num_layers);
+
 /**
  * For an overview of the HiZ operations, see the following sections of the
  * Sandy Bridge PRM, Volume 1, Part2:
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index efacadf..bdeaa06 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -29,7 +29,7 @@
 #include "blorp_priv.h"
 #include "compiler/brw_eu_defines.h"
 
-#include "compiler/nir/nir_builder.h"
+#include "blorp_nir_builder.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLORP
 
@@ -764,3 +764,106 @@ blorp_ccs_resolve(struct blorp_batch *batch,
 
batch->blorp->exec(batch, );
 }
+
+struct blorp_mcs_partial_resolve_key
+{
+   enum blorp_shader_type shader_type;
+   uint32_t num_samples;
+};
+
+static bool
+blorp_params_get_mcs_partial_resolve_kernel(struct blorp_context *blorp,
+struct blorp_params *params)
+{
+   const struct blorp_mcs_partial_resolve_key blorp_key = {
+  .shader_type = BLORP_SHADER_TYPE_MCS_PARTIAL_RESOLVE,
+  .num_samples = params->num_samples,
+   };
+
+   if (blorp->lookup_shader(blorp, _key, sizeof(blorp_key),
+>wm_prog_kernel, >wm_prog_data))
+  return true;
+
+   void *mem_ctx = ralloc_context(NULL);
+
+   nir_builder b;
+   nir_builder_init_simple_shader(, mem_ctx, MESA_SHADER_FRAGMENT, NULL);
+   b.shader->info.name = ralloc_strdup(b.shader, "BLORP-mcs-partial-resolve");
+
+   nir_variable *v_color =
+  BLORP_CREATE_NIR_INPUT(b.shader, clear_color, glsl_vec4_type());
+
+   nir_variable *frag_color =
+  nir_variable_create(b.shader, nir_var_shader_out,
+  glsl_vec4_type(), "gl_FragColor");
+   frag_color->data.location = FRAG_RESULT_COLOR;
+
+   /* Do an MCS fetch and check if it is equal to the magic clear value */
+   nir_ssa_def *mcs =
+  blorp_nir_txf_ms_mcs(, nir_f2i32(, blorp_nir_frag_coord()),
+   nir_load_layer_id());
+   nir_ssa_def *is_clear =
+  blorp_nir_mcs_is_clear_color(, mcs, blorp_key.num_samples);
+
+   /* If we aren't the clear value, discard. */
+   nir_intrinsic_instr *discard =
+  nir_intrinsic_instr_create(b.shader, nir_intrinsic_discard_if);
+   discard->src[0] = nir_src_for_ssa(nir_inot(, is_clear));
+   nir_builder_instr_insert(, >instr);
+
+   nir_copy_var(, frag_color, v_color);
+
+   struct brw_wm_prog_key wm_key;
+   brw_blorp_init_wm_prog_key(_key);
+   wm_key.tex.compressed_multisample_layout_mask = 1;
+   wm_key.tex.msaa_16 = blorp_key.num_samples == 16;
+   wm_key.multisample_fbo = true;
+
+   struct brw_wm_prog_data prog_data;
+   unsigned program_size;
+   const unsigned *program =
+  blorp_compile_fs(blorp, mem_ctx, b.shader, _key, false,
+   _data, _size);
+
+   bool result =
+  blorp->upload_shader(blorp, _key, sizeof(blorp_key),
+   program, program_size,
+   _data.base, sizeof(prog_data),
+   >wm_prog_kernel, >wm_prog_data);
+
+   ralloc_free(mem_ctx);
+   return result;
+}
+
+void
+blorp_mcs_partial_resolve(struct blorp_batch *batch,
+  struct blorp_surf *surf,
+  enum isl_format format,
+  uint32_t start_layer, uint32_t num_layers)
+{
+   struct blorp_params params;
+   blorp_params_init();
+
+   assert(batch->blorp->isl_dev->info->gen >= 7);
+
+   params.x0 = 0;
+   params.y0 = 0;
+   params.x1 = surf->surf->logical_level0_px.width;
+   params.y1 = surf->surf->logical_level0_px.height;
+
+   brw_blorp_surface_info_init(batch->blorp, , surf, 0,
+   start_layer, format, false);
+   brw_blorp_surface_info_init(batch->blorp, , surf, 0,
+   start_layer, format, true);
+
+   params.num_samples = params.dst.surf.samples;
+   params.num_layers = num_layers;
+
+   memcpy(_inputs.clear_color,
+  surf->clear_color.f32, sizeof(float) * 4);
+
+   if (!blorp_params_get_mcs_partial_resolve_kernel(batch->blorp, ))
+  return;
+
+   

[Mesa-dev] [PATCH 4/4] i965/miptree: Partially resolve MCS for texture views

2017-06-26 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index c94fb4f..829a4c5 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2511,14 +2511,14 @@ intel_miptree_prepare_texture_slices(struct brw_context 
*brw,
  aux_supported = clear_supported = true;
   } else {
  aux_supported = can_texture_with_ccs(brw, mt, view_format);
-
- /* Clear color is specified as ints or floats and the conversion is
-  * done by the sampler.  If we have a texture view, we would have to
-  * perform the clear color conversion manually.  Just disable clear
-  * color.
-  */
- clear_supported = aux_supported && (mt->format == view_format);
   }
+
+  /* Clear color is specified as ints or floats and the conversion is
+   * done by the sampler.  If we have a texture view, we would have to
+   * perform the clear color conversion manually.  Just disable clear
+   * color.
+   */
+  clear_supported = aux_supported && (mt->format == view_format);
} else if (mt->format == MESA_FORMAT_S_UINT8) {
   aux_supported = clear_supported = false;
} else {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] i965/miptree: Add support for partially resolving MCS

2017-06-26 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 24 
 src/mesa/drivers/dri/i965/brw_blorp.h |  5 
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 40 +--
 3 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 87c9dd4..5744a9c 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -1060,6 +1060,30 @@ brw_blorp_resolve_color(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
 }
 
+void
+brw_blorp_mcs_partial_resolve(struct brw_context *brw,
+  struct intel_mipmap_tree *mt,
+  uint32_t start_layer, uint32_t num_layers)
+{
+   DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
+   start_layer, start_layer + num_layers - 1);
+
+   const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
+   enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true);
+
+   struct isl_surf isl_tmp[1];
+   struct blorp_surf surf;
+   uint32_t level = 0;
+   blorp_surf_for_miptree(brw, , mt, true, false, 0,
+  , start_layer, num_layers, isl_tmp);
+
+   struct blorp_batch batch;
+   blorp_batch_init(>blorp, , brw, 0);
+   blorp_mcs_partial_resolve(, , isl_format,
+ start_layer, num_layers);
+   blorp_batch_finish();
+}
+
 /**
  * Perform a HiZ or depth resolve operation.
  *
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
b/src/mesa/drivers/dri/i965/brw_blorp.h
index 29d5788..c65a68a 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -74,6 +74,11 @@ brw_blorp_resolve_color(struct brw_context *brw,
 enum blorp_fast_clear_op resolve_op);
 
 void
+brw_blorp_mcs_partial_resolve(struct brw_context *brw,
+  struct intel_mipmap_tree *mt,
+  uint32_t start_layer, uint32_t num_layers);
+
+void
 intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
unsigned int level, unsigned int start_layer,
unsigned int num_layers, enum blorp_hiz_op op);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 75e192a..c94fb4f 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2147,6 +2147,35 @@ intel_miptree_finish_ccs_write(struct brw_context *brw,
 }
 
 static void
+intel_miptree_prepare_mcs_access(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ uint32_t layer,
+ bool mcs_supported,
+ bool fast_clear_supported)
+{
+   switch (intel_miptree_get_aux_state(mt, 0, layer)) {
+   case ISL_AUX_STATE_CLEAR:
+   case ISL_AUX_STATE_COMPRESSED_CLEAR:
+  assert(mcs_supported);
+  if (!fast_clear_supported) {
+ brw_blorp_mcs_partial_resolve(brw, mt, layer, 1);
+ intel_miptree_set_aux_state(brw, mt, 0, layer, 1,
+ ISL_AUX_STATE_COMPRESSED_NO_CLEAR);
+  }
+  break;
+
+   case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
+  assert(mcs_supported);
+  break; /* Nothing to do */
+
+   case ISL_AUX_STATE_RESOLVED:
+   case ISL_AUX_STATE_PASS_THROUGH:
+   case ISL_AUX_STATE_AUX_INVALID:
+  unreachable("Invalid aux state for MCS");
+   }
+}
+
+static void
 intel_miptree_finish_mcs_write(struct brw_context *brw,
struct intel_mipmap_tree *mt,
uint32_t layer,
@@ -2160,10 +2189,10 @@ intel_miptree_finish_mcs_write(struct brw_context *brw,
   break;
 
case ISL_AUX_STATE_COMPRESSED_CLEAR:
+   case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
   assert(written_with_mcs);
   break; /* Nothing to do */
 
-   case ISL_AUX_STATE_COMPRESSED_NO_CLEAR:
case ISL_AUX_STATE_RESOLVED:
case ISL_AUX_STATE_PASS_THROUGH:
case ISL_AUX_STATE_AUX_INVALID:
@@ -2317,7 +2346,14 @@ intel_miptree_prepare_access(struct brw_context *brw,
 
   if (mt->num_samples > 1) {
  /* Nothing to do for MSAA */
- assert(aux_supported && fast_clear_supported);
+ assert(start_level == 0 && num_levels == 1);
+ const uint32_t level_layers =
+miptree_layer_range_length(mt, 0, start_layer, num_layers);
+ for (uint32_t a = 0; a < level_layers; a++) {
+intel_miptree_prepare_mcs_access(brw, mt, start_layer + a,
+ aux_supported,
+ fast_clear_supported);
+ }
   } else {
  for (uint32_t l = 0; l < num_levels; l++) {
 const uint32_t level = start_level + l;
-- 

Re: [Mesa-dev] [PATCH] i965: Move surface resolves back to draw/dispatch time

2017-06-26 Thread Kenneth Graunke
On Saturday, June 24, 2017 3:54:59 PM PDT Jason Ekstrand wrote:
> This is effectively a revert of 388f02729bbf88ba104f4f8ee1fdf005a240969c
> though much code has been added since.  Kristian initially moved it to
> try and avoid locking problems with meta-based resolves.  Now that meta
> is gone from the resolve path (for good this time, we hope), we can move
> it back.  The problem with having it in intel_update_state was that the
> UpdateState hook gets called by core mesa directly and all sorts of
> things will cause a UpdateState to get called which may trigger resolves
> at inopportune times.  In particular, it gets called by _mesa_Clear and,
> if we have a HiZ buffer in the INVALID_AUX state, causes a HiZ resolve
> right before the clear which is pointless.  By moving it back to
> try_draw_prims time, we know it will only get called right before a draw
> which is where we want it.
> ---
>  src/mesa/drivers/dri/i965/brw_compute.c |   2 +
>  src/mesa/drivers/dri/i965/brw_context.c | 121 ---
>  src/mesa/drivers/dri/i965/brw_context.h |   2 +
>  src/mesa/drivers/dri/i965/brw_draw.c| 139 
> 
>  4 files changed, 143 insertions(+), 121 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
> b/src/mesa/drivers/dri/i965/brw_compute.c
> index 8046153..2867a14 100644
> --- a/src/mesa/drivers/dri/i965/brw_compute.c
> +++ b/src/mesa/drivers/dri/i965/brw_compute.c
> @@ -188,6 +188,8 @@ brw_dispatch_compute_common(struct gl_context *ctx)
>  
> brw_validate_textures(brw);
>  
> +   brw_predraw_resolve_inputs(brw);
> +
> const int sampler_state_size = 16; /* 16 bytes */
> estimated_buffer_space_needed = 512; /* batchbuffer commands */
> estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT *
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index e921a41..0b3fdc6 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -170,39 +170,17 @@ intel_update_framebuffer(struct gl_context *ctx,
>   fb->DefaultGeometry.NumSamples);
>  }
>  
> -static bool
> -intel_disable_rb_aux_buffer(struct brw_context *brw, const struct brw_bo *bo)
> -{
> -   const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
> -   bool found = false;
> -
> -   for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
> -  const struct intel_renderbuffer *irb =
> - intel_renderbuffer(fb->_ColorDrawBuffers[i]);
> -
> -  if (irb && irb->mt->bo == bo) {
> - found = brw->draw_aux_buffer_disabled[i] = true;
> -  }
> -   }
> -
> -   return found;
> -}
> -
>  static void
>  intel_update_state(struct gl_context * ctx)
>  {
> GLuint new_state = ctx->NewState;
> struct brw_context *brw = brw_context(ctx);
> -   struct intel_texture_object *tex_obj;
> -   struct intel_renderbuffer *depth_irb;
>  
> if (ctx->swrast_context)
>_swrast_InvalidateState(ctx, new_state);
>  
> brw->NewGLState |= new_state;
>  
> -   _mesa_unlock_context_textures(ctx);
> -
> if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
>_mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
>  
> @@ -218,105 +196,6 @@ intel_update_state(struct gl_context * ctx)
>  
> intel_prepare_render(brw);
>  
> -   /* Resolve the depth buffer's HiZ buffer. */
> -   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
> -   if (depth_irb && depth_irb->mt) {
> -  intel_miptree_prepare_depth(brw, depth_irb->mt,
> -  depth_irb->mt_level,
> -  depth_irb->mt_layer,
> -  depth_irb->layer_count);
> -   }
> -
> -   memset(brw->draw_aux_buffer_disabled, 0,
> -  sizeof(brw->draw_aux_buffer_disabled));
> -
> -   /* Resolve depth buffer and render cache of each enabled texture. */
> -   int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
> -   for (int i = 0; i <= maxEnabledUnit; i++) {
> -  if (!ctx->Texture.Unit[i]._Current)
> -  continue;
> -  tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
> -  if (!tex_obj || !tex_obj->mt)
> -  continue;
> -
> -  /* We need inte_texture_object::_Format to be valid */
> -  intel_finalize_mipmap_tree(brw, i);

This one line doesn't get moved over...and that's okay, because we already
call brw_validate_textures(brw) which finalizes miptrees.  Both
brw_try_draw_prims and brw_dispatch_compute_common handle this.

Reviewed-by: Kenneth Graunke 

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/30] i965/miptree: Allocate mt earlier in update winsys

2017-06-26 Thread Pohjolainen, Topi
On Fri, Jun 16, 2017 at 03:41:34PM -0700, Jason Ekstrand wrote:
> From: Ben Widawsky 
> 
> Allows us to continue utilizing common miptree creation using __DRIimage
> without creating a new DRIimage (for the intel_process_dri2_buffer()
> case).

Just looking this patch locally I don't really understand this commit
message. I'll keep on reading if the answer is later in the series..

> 
> This is a bit ugly, but I think it's the best one can do.
> 
> v2: This patch let's us remove the temporary no_aux variable since mt
> allocation should work correctly now.
> Unref the BO is miptree creation fails (Jason)
> v3: Rebase (Daniel)
> 
> Cc: Jason Ekstrand 
> Signed-off-by: Ben Widawsky 
> Acked-by: Daniel Stone 
> ---
>  src/mesa/drivers/dri/i965/brw_context.c   | 37 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 16 ++--
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  2 +-
>  3 files changed, 37 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index e963e13..f57045f 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -1611,10 +1611,26 @@ intel_process_dri2_buffer(struct brw_context *brw,
>return;
> }
>  
> -   if (!intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
> +   struct intel_mipmap_tree *mt =
> +  intel_miptree_create_for_bo(brw,
> +  bo,
> +  intel_rb_format(rb),
> +  0,
> +  drawable->w,
> +  drawable->h,
> +  1,
> +  buffer->pitch,
> +  MIPTREE_LAYOUT_FOR_SCANOUT);
> +   if (!mt) {
> +  brw_bo_unreference(bo);
> +  return;
> +   }
> +
> +   if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
>   drawable->w, drawable->h,
>   buffer->pitch)) {
>brw_bo_unreference(bo);
> +  intel_miptree_release();
>return;
> }
>  
> @@ -1672,10 +1688,25 @@ intel_update_image_buffer(struct brw_context *intel,
> if (last_mt && last_mt->bo == buffer->bo)
>return;
>  
> -   if (!intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
> +   struct intel_mipmap_tree *mt =
> +  intel_miptree_create_for_bo(intel,
> +  buffer->bo,
> +  intel_rb_format(rb),
> +  0,
> +  buffer->width,
> +  buffer->height,
> +  1,
> +  buffer->pitch,
> +  MIPTREE_LAYOUT_FOR_SCANOUT);
> +   if (!mt)
> +  return;
> +
> +   if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
>   buffer->width, 
> buffer->height,
> - buffer->pitch))
> + buffer->pitch)) {
> +  intel_miptree_release();
>return;
> +   }
>  
> if (_mesa_is_front_buffer_drawing(fb) &&
> buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 893f13e..08c13fc 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -960,11 +960,10 @@ intel_miptree_create_for_dri_image(struct brw_context 
> *brw,
>  bool
>  intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
>   struct intel_renderbuffer *irb,
> - struct brw_bo *bo,
> + struct intel_mipmap_tree 
> *singlesample_mt,
>   uint32_t width, uint32_t height,
>   uint32_t pitch)
>  {
> -   struct intel_mipmap_tree *singlesample_mt = NULL;
> struct intel_mipmap_tree *multisample_mt = NULL;
> struct gl_renderbuffer *rb = >Base.Base;
> mesa_format format = rb->Format;
> @@ -976,17 +975,7 @@ intel_update_winsys_renderbuffer_miptree(struct 
> brw_context *intel,
> assert(_mesa_get_format_base_format(format) == GL_RGB ||
>_mesa_get_format_base_format(format) == GL_RGBA);
>  
> -   singlesample_mt = intel_miptree_create_for_bo(intel,
> - bo,
> - format,
> - 0,
> -  

Re: [Mesa-dev] [PATCH 14/30] i965/miptree: Add support for window system images to create_for_dri_image

2017-06-26 Thread Pohjolainen, Topi
On Fri, Jun 16, 2017 at 03:41:36PM -0700, Jason Ekstrand wrote:
> We want to start using create_for_dri_image for all miptrees created
> from __DRIimage, including those which come from a window system.  In
> order to allow for fast clears to still work on window system buffers,
> we need to allow for creating aux surfaces.
> ---
>  src/mesa/drivers/dri/i965/intel_fbo.c |  2 +-
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 16 +---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  3 ++-
>  src/mesa/drivers/dri/i965/intel_tex_image.c   |  2 +-
>  4 files changed, 17 insertions(+), 6 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
> b/src/mesa/drivers/dri/i965/intel_fbo.c
> index 130eab1..db4cfee 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -363,7 +363,7 @@ intel_image_target_renderbuffer_storage(struct gl_context 
> *ctx,
>  * content.
>  */
> irb->mt = intel_miptree_create_for_dri_image(brw, image, GL_TEXTURE_2D,
> -image->format);
> +image->format, false);
> if (!irb->mt)
>return;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 7b4d431..83c99ed 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -901,7 +901,8 @@ miptree_create_for_planar_image(struct brw_context *brw,
>  struct intel_mipmap_tree *
>  intel_miptree_create_for_dri_image(struct brw_context *brw,
> __DRIimage *image, GLenum target,
> -   mesa_format format)
> +   mesa_format format,
> +   bool is_winsys_image)
>  {
> if (image->planar_format && image->planar_format->nplanes > 0)
>return miptree_create_for_planar_image(brw, image, target);
> @@ -909,6 +910,16 @@ intel_miptree_create_for_dri_image(struct brw_context 
> *brw,
> if (!brw->ctx.TextureFormatSupported[format])
>return NULL;
>  
> +   /* If this image comes in from a window system, we have different
> +* requirements than if it comes in via an EGL import operation.  Window
> +* system images can use any form of auxiliary compression we wish because
> +* they get "flushed" before being handed off to the window system and we
> +* have the opportunity to do resolves.  Window system buffers also may be
> +* used for scanout so we need to flag that appropriately.
> +*/
> +   const uint32_t mt_layout_flags =
> +  is_winsys_image ? MIPTREE_LAYOUT_FOR_SCANOUT : 
> MIPTREE_LAYOUT_DISABLE_AUX;

Is there any particular why we couldn't pass 'layout_flags' directly instead
of 'is_winsys_image'? That would work at least for the next patch in the
series.

> +
> /* Disable creation of the texture's aux buffers because the driver 
> exposes
>  * no EGL API to manage them. That is, there is no API for resolving the 
> aux
>  * buffer's content to the main buffer nor for invalidating the aux 
> buffer's
> @@ -917,8 +928,7 @@ intel_miptree_create_for_dri_image(struct brw_context 
> *brw,
> struct intel_mipmap_tree *mt =
>intel_miptree_create_for_bo(brw, image->bo, format,
>image->offset, image->width, 
> image->height, 1,
> -  image->pitch,
> -  MIPTREE_LAYOUT_DISABLE_AUX);
> +  image->pitch, mt_layout_flags);
> if (mt == NULL)
>return NULL;
>  
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index 8044a1b..2a4cda2 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -700,7 +700,8 @@ struct intel_mipmap_tree *
>  intel_miptree_create_for_dri_image(struct brw_context *brw,
> __DRIimage *image,
> GLenum target,
> -   mesa_format format);
> +   mesa_format format,
> +   bool is_winsys_image);
>  
>  bool
>  intel_update_winsys_renderbuffer_miptree(struct brw_context *intel,
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
> b/src/mesa/drivers/dri/i965/intel_tex_image.c
> index 76a6e13..53e1087 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> @@ -344,7 +344,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, 
> GLenum target,
> }
>  
> mt = intel_miptree_create_for_dri_image(brw, image, target,
> -   image->format);
> +   

Re: [Mesa-dev] [PATCH 03/92] nir: add NIR_PRINT environment variable

2017-06-26 Thread Rob Clark
On Mon, Jun 26, 2017 at 10:09 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>

this is nicer than my old approach of just hacking in
nir_print_shader() calls at various points and recompiling ;-)

I suppose one thing that would be clever is if you could give a list
of comma separated lowering stage names to control *which* stages you
get prints..

either way,

Reviewed-by: Rob Clark 


> ---
>  src/compiler/nir/nir.h | 19 +++
>  1 file changed, 19 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index ab7ba14..d1b90cb 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2301,47 +2301,66 @@ void nir_metadata_check_validation_flag(nir_shader 
> *shader);
>
>  static inline bool
>  should_clone_nir(void)
>  {
> static int should_clone = -1;
> if (should_clone < 0)
>should_clone = env_var_as_boolean("NIR_TEST_CLONE", false);
>
> return should_clone;
>  }
> +
> +static inline bool
> +should_print_nir(void)
> +{
> +   static int should_print = -1;
> +   if (should_print < 0)
> +  should_print = env_var_as_boolean("NIR_PRINT", false);
> +
> +   return should_print;
> +}
>  #else
>  static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
>  static inline void nir_metadata_set_validation_flag(nir_shader *shader) { 
> (void) shader; }
>  static inline void nir_metadata_check_validation_flag(nir_shader *shader) { 
> (void) shader; }
>  static inline bool should_clone_nir(void) { return false; }
> +static inline bool should_print_nir(void) { return false; }
>  #endif /* DEBUG */
>
>  #define _PASS(nir, do_pass) do { \
> do_pass   \
> nir_validate_shader(nir); \
> if (should_clone_nir()) { \
>nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \
>ralloc_free(nir);  \
>nir = clone;   \
> } \
>  } while (0)
>
>  #define NIR_PASS(progress, nir, pass, ...) _PASS(nir,\
> nir_metadata_set_validation_flag(nir);\
> +   if (should_print_nir())   \
> +  printf("%s\n", #pass); \
> if (pass(nir, ##__VA_ARGS__)) {   \
>progress = true;   \
> +  if (should_print_nir())\
> + nir_print_shader(nir, stdout);  \
>nir_metadata_check_validation_flag(nir);   \
> } \
>  )
>
>  #define NIR_PASS_V(nir, pass, ...) _PASS(nir,\
> +   if (should_print_nir())   \
> +  printf("%s\n", #pass); \
> pass(nir, ##__VA_ARGS__); \
> +   if (should_print_nir())   \
> +  nir_print_shader(nir, stdout); \
>  )
>
>  void nir_calc_dominance_impl(nir_function_impl *impl);
>  void nir_calc_dominance(nir_shader *shader);
>
>  nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2);
>  bool nir_block_dominates(nir_block *parent, nir_block *child);
>
>  void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp);
>  void nir_dump_dom_tree(nir_shader *shader, FILE *fp);
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] intel/anv: Add missing break in anv_CreateDevice()

2017-06-26 Thread Topi Pohjolainen
CID: 1413018
Signed-off-by: Topi Pohjolainen 
---
 src/intel/vulkan/anv_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 5505befcfa..b09caa38a4 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1231,6 +1231,7 @@ VkResult anv_CreateDevice(
   break;
case 9:
   result = gen9_init_device_state(device);
+  break;
case 10:
   result = gen10_init_device_state(device);
   break;
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Separate gen < 8 and gen >= 8 paths explicitly in wrap_mode()

2017-06-26 Thread Topi Pohjolainen
Makes coverity happier.

Fix indentation in gen >= 8 block while at it.

CID: 1413020
CC: Rafael Antognolli 
Signed-off-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/genX_state_upload.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 3f8a7265db..d65b468863 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4455,9 +4455,8 @@ translate_wrap_mode(struct brw_context *brw, GLenum wrap, 
bool using_nearest)
*
* Gen8+ supports this natively.
*/
- return TCM_HALF_BORDER;
-#endif
-
+  return TCM_HALF_BORDER;
+#else
   /* On Gen4-7.5, we clamp the coordinates in the fragment shader
* and set clamp_border here, which gets the result desired.
* We just use clamp(_to_edge) for nearest, because for nearest
@@ -4468,6 +4467,7 @@ translate_wrap_mode(struct brw_context *brw, GLenum wrap, 
bool using_nearest)
  return TCM_CLAMP;
   else
  return TCM_CLAMP_BORDER;
+#endif
case GL_CLAMP_TO_EDGE:
   return TCM_CLAMP;
case GL_CLAMP_TO_BORDER:
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] etnaviv: fix shader miscompilation with more than 16 labels

2017-06-26 Thread Trevor Woerner
Hi Lucas,

Thanks, I'll be trying these out shortly.

On Mon, Jun 26, 2017 at 12:24 PM, Lucas Stach  wrote:
> Fixes miscompilation of shaders in glmark2 ideas, leading to GPU hangs.

One of the quirks about the glmark2-es2[ideas] gpu hangcheck is that
after a fresh boot it usually runs okay if run non-full-screen and
just run by itself. The gpu hangcheck occurs when run subsequently, or
run as part of the full glmark2-es2 suit.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] i965/vec4/generator: use 1-Oword Block Read/Write messages for DF scratch writes/reads

2017-06-26 Thread Francisco Jerez
Samuel Iglesias Gonsálvez  writes:

> On Fri, 2017-06-23 at 11:06 -0700, Francisco Jerez wrote:
>> Samuel Iglesias Gonsálvez  writes:
>> 
>> > On Thu, 2017-06-22 at 16:25 -0700, Francisco Jerez wrote:
>> > > Samuel Iglesias Gonsálvez  writes:
>> > > 
>> > > > Signed-off-by: Samuel Iglesias Gonsálvez 
>> > > > ---
>> > > >  src/intel/compiler/brw_eu_defines.h  |   2 +
>> > > >  src/intel/compiler/brw_shader.cpp|   5 +
>> > > >  src/intel/compiler/brw_vec4.cpp  |   7 ++
>> > > >  src/intel/compiler/brw_vec4.h|   8 ++
>> > > >  src/intel/compiler/brw_vec4_generator.cpp| 136
>> > > > +++
>> > > >  src/intel/compiler/brw_vec4_reg_allocate.cpp |   6 +-
>> > > >  src/intel/compiler/brw_vec4_visitor.cpp  |  49 ++
>> > > >  7 files changed, 212 insertions(+), 1 deletion(-)
>> > > > 
>> > > > diff --git a/src/intel/compiler/brw_eu_defines.h
>> > > > b/src/intel/compiler/brw_eu_defines.h
>> > > > index 1af835d47e..3c148de0fa 100644
>> > > > --- a/src/intel/compiler/brw_eu_defines.h
>> > > > +++ b/src/intel/compiler/brw_eu_defines.h
>> > > > @@ -436,6 +436,8 @@ enum opcode {
>> > > > VEC4_OPCODE_PICK_HIGH_32BIT,
>> > > > VEC4_OPCODE_SET_LOW_32BIT,
>> > > > VEC4_OPCODE_SET_HIGH_32BIT,
>> > > > +   VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW,
>> > > > +   VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH,
>> > > >  
>> > > 
>> > > What's the point of introducing two different opcodes with
>> > > essentially
>> > > the same semantics (read 32B worth of data) as the current
>> > > SHADER_OPCODE_GEN4_SCRATCH_READ?
>> > 
>> > Originally I had only SHADER_OPCODE_GEN4_SCRATCH_READ but I changed
>> > it
>> > to don't allocate more registers than needed when doing scratch
>> > write
>> > of a partial DF write. Let me explain it:
>> > 
>> > When doing spilling, as DF instructions are both split and
>> > scalarized,
>> > we read the existing contents in scratch memory, overwrite them
>> > with
>> > the destination of the instruction, then emit scratch write.
>> > Together
>> > with the fact that I am not shuffling DF data, we only need to
>> > allocate
>> > 1 GRF to do so, instead of 2 (if I had emitted
>> > SHADER_OPCODE_GEN4_SCRATCH_READ), when doing spilling on partial DF
>> > writes.
>> > 
>> 
>> Why would you need to allocate more GRFs for
>> SHADER_OPCODE_GEN4_SCRATCH_READ?  It also only reads one register,
>> which
>> should be sufficient for a single scalarized instruction as long as
>> you
>> don't shuffle data around -- Have a look at how the FS back-end
>> addresses this problem.
>> 
>
> OK
>
>> > >   Is there any downside from using the
>> > > current opcode with force_writemask_all?  If anything it would
>> > > give
>> > > you
>> > > better performance because you'd only have to set up one header
>> > > (which
>> > > stalls the EU pipeline twice), send down one message to the
>> > > dataport,
>> > > and avoid stalling to shuffle the data around in the return
>> > > payload
>> > > (which prevents your two 1OWORD messages from being pipelined at
>> > > all).
>> > > 
>> > 
>> > Sorry, I am confused here. Do you mean using
>> > SHADER_OPCODE_GEN4_SCRATCH_READ as-is, which emits a "OWord Dual
>> > Block
>> > Read" message (so only one message)?
>> > 
>> > If that's the case, then I should shuffle the destination data of
>> > the
>> > partial DF write, change the 1-Oword block write offsets and so
>> > on...
>> 
>> Why would you need to shuffle any spilled data?  I don't think
>> there's
>> much of a benefit from shuffling since scratch overwrites need read
>> the
>> original data for the most part anyway because of writemasking.  In
>> fact
>> shuffling DF data is probably the reason things blow up right now
>> whenever you have mixed DF and single-precision reads or writes to
>> the
>> same spilled variable, which I guess is the reason you need to look
>> for
>> those cases and mark them as no_spill...
>> 
>
> Right, I don't need to shuffle data for the scratch write.
>
>> > in order to save it inside scratch memory in the proper place to
>> > make
>> > OWord Dual Block Read work. That would require to some extra
>> > instructions, but I don't know if this would give better
>> > performance
>> > against current implementation or not.
>> > 
>> 
>> I expect the most serious performance issue with the approach of this
>> patch will be the sequence of non-pipelined single-oword reads, which
>> means you get to pay for the EU-dataport roundtrip latency twice
>> instead
>> of once.
>> 
>> > Then, why do I need force_writemask=true when emitting
>> > SHADER_OPCODE_GEN4_SCRATCH_READ?
>> > 
>> 
>> Because you probably don't want to shuffle data in your scratch
>> buffer,
>> and you don't want the dataport to apply bogus 16B channel enables to
>> your reads and writes.
>> 
>
> If we save the dvec4 data of a vertex altogether in consecutive 32
> bytes in 

Re: [Mesa-dev] [PATCH 22/31] mesa: don't flag _NEW_TRANSFORM for st/mesa if possible

2017-06-26 Thread Marek Olšák
On Mon, Jun 26, 2017 at 4:56 PM, Cherniak, Bruce
 wrote:
>
>> On Jun 23, 2017, at 7:50 PM, Marek Olšák  wrote:
>>
>> Thanks. It can only be reproduced with process isolation enabled in
>> piglit, which is not something I test.
>
> I don't do anything special with process isolation.  For me this is enough to 
> reproduce:
>
> bin/shader_runner 
> tests/spec/glsl-1.20/execution/clipping/fixed-clip-enables.shader_test -auto 
> -fbo

Process isolation is enabled by default. If a shader test is isolated,
it means that one shader_runner instance runs only one test. What you
pasted is an isolated test.

If process isolation is disabled, one shader_runner instance can run
an unlimited number of tests. If shader_runner crashes, the piglit
runtime will resume testing by restarting shader_runner with remaining
tests. shader_runner switches between GL core and compat profiles
between tests as needed.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/2] i965: Fix broxton 2x6 l3 config

2017-06-26 Thread Anuj Phogat
Hi Andres,

You should cherry-pick below commits to  mesa stable:
1. Commit eb23be1 - i965: Add and initialize l3_banks field for gen7+
2. Commit 8521559 - i965: Fix broxton 2x6 l3 config

Please let me know if you have any problems cherry picking them.

Thanks
Anuj


On Mon, Jun 26, 2017 at 8:21 AM, Andres Gomez  wrote:
> Anuj, this depends on:
> https://cgit.freedesktop.org/mesa/mesa/commit/src/mesa?id=eb23be1d97da290073d76c2510b8999b250f0139
>
> Which didn't make it for -stable. Should we cherry-pick that too?
>
> On Mon, 2017-06-12 at 10:01 -0700, Anuj Phogat wrote:
>> The new table added in this patch matches with the table
>> in gfxspecs. We were programming the wrong values earlier.
>>
>> Signed-off-by: Anuj Phogat 
>> Cc: Francisco Jerez 
>> Cc: "17.1" 
>> ---
>>  src/intel/common/gen_device_info.c |  1 +
>>  src/intel/common/gen_device_info.h |  1 +
>>  src/intel/common/gen_l3_config.c   | 19 +++
>>  3 files changed, 21 insertions(+)
>>
>> diff --git a/src/intel/common/gen_device_info.c 
>> b/src/intel/common/gen_device_info.c
>> index 75284a6..eccb464 100644
>> --- a/src/intel/common/gen_device_info.c
>> +++ b/src/intel/common/gen_device_info.c
>> @@ -502,6 +502,7 @@ static const struct gen_device_info gen_device_info_bxt 
>> = {
>>
>>  static const struct gen_device_info gen_device_info_bxt_2x6 = {
>> GEN9_LP_FEATURES_2X6,
>> +   .is_broxton_2x6 = 1,
>> .l3_banks = 1,
>>  };
>>  /*
>> diff --git a/src/intel/common/gen_device_info.h 
>> b/src/intel/common/gen_device_info.h
>> index 6207630..4fe1b21 100644
>> --- a/src/intel/common/gen_device_info.h
>> +++ b/src/intel/common/gen_device_info.h
>> @@ -41,6 +41,7 @@ struct gen_device_info
>> bool is_haswell;
>> bool is_cherryview;
>> bool is_broxton;
>> +   bool is_broxton_2x6;
>> bool is_kabylake;
>>
>> bool has_hiz_and_separate_stencil;
>> diff --git a/src/intel/common/gen_l3_config.c 
>> b/src/intel/common/gen_l3_config.c
>> index ae31d08..e17994b 100644
>> --- a/src/intel/common/gen_l3_config.c
>> +++ b/src/intel/common/gen_l3_config.c
>> @@ -102,6 +102,23 @@ static const struct gen_l3_config chv_l3_configs[] = {
>>  };
>>
>>  /**
>> + * BXT 2x6 validated L3 configurations.  \sa ivb_l3_configs.
>> + * Number of ways =
>> + *Allocation in KB for SKU / (Way size per bank * Number of banks).
>> + * For BXT 2x6: Banks = 1, Way size per bank = 4.
>> + */
>> +static const struct gen_l3_config bxt_2x6_l3_configs[] = {
>> +   /*SLM URB  All DC  RO  IS   C   T */
>> +   {{  0, 32, 48,  0,  0,  0,  0,  0 }},
>> +   {{  0, 32,  0,  8, 40,  0,  0,  0 }},
>> +   {{  0, 32,  0, 32, 16,  0,  0,  0 }},
>> +   {{ 16, 16, 48,  0,  0,  0,  0,  0 }},
>> +   {{ 16, 16,  0, 40,  8,  0,  0,  0 }},
>> +   {{ 16, 16,  0, 16, 32,  0,  0,  0 }},
>> +   {{ 0 }}
>> +};
>> +
>> +/**
>>   * Return a zero-terminated array of validated L3 configurations for the
>>   * specified device.
>>   */
>> @@ -117,6 +134,8 @@ get_l3_configs(const struct gen_device_info *devinfo)
>>
>> case 9:
>> case 10:
>> +  if (devinfo->is_broxton_2x6)
>> + return bxt_2x6_l3_configs;
>>return chv_l3_configs;
>>
>> default:
> --
> Br,
>
> Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH mesa 2/2] anv: use Mesa's u_atomic.h header

2017-06-26 Thread Kenneth Graunke
On Monday, June 26, 2017 5:59:40 AM PDT Eric Engestrom wrote:
> Signed-off-by: Eric Engestrom 
> ---
>  src/intel/vulkan/anv_private.h | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index e5d88f2815..8079378ae6 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -46,6 +46,7 @@
>  #include "compiler/brw_compiler.h"
>  #include "util/macros.h"
>  #include "util/list.h"
> +#include "util/u_atomic.h"
>  #include "util/u_vector.h"
>  #include "vk_alloc.h"
>  
> @@ -1862,14 +1863,14 @@ static inline void
>  anv_shader_bin_ref(struct anv_shader_bin *shader)
>  {
> assert(shader && shader->ref_cnt >= 1);
> -   __sync_fetch_and_add(>ref_cnt, 1);
> +   p_atomic_inc(>ref_cnt);

p_atomic_inc is __sync_add_and_fetch, but since we don't use the return
value, either order works just fine...

>  }
>  
>  static inline void
>  anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin 
> *shader)
>  {
> assert(shader && shader->ref_cnt >= 1);
> -   if (__sync_fetch_and_add(>ref_cnt, -1) == 1)
> +   if (p_atomic_dec_zero(>ref_cnt))

and this is __sync_sub_and_fetch, which does the subtract before returning,
but you adjusted the comparison value from 1 to 0...so this works too.

Both patches are:
Reviewed-by: Kenneth Graunke 

>anv_shader_bin_destroy(device, shader);
>  }

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] glsl: relax loop unrolling restrictions

2017-06-26 Thread Marek Olšák
On Thu, Jun 22, 2017 at 12:25 PM, Timothy Arceri  wrote:
> On 22/06/17 18:46, Samuel Pitoiset wrote:
>>
>> Here's a shader-db report:
>>
>> https://pastebin.com/raw/QBMnF2pv
>>
>> This doesn't sound like a total win actually...
>
>
> I'm surprised to see this. I thought we pretty much unrolled everything
> already, although maybe that was what happened after NIR unrolling.
>
> I've done a full shader-db run and I'm getting very different results from
> you:
>
> https://pastebin.com/XRH7Vbvv
>
> There are still a little mixed but they look much more positive.

The problem with our shader-db stats is that they have very little to do
with performance changes. The code size increase is expected. Ignore
register usage, instead, look at max waves. SGPR spilling is cheap -
measurements can be done to see whether it's measurable.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gallium: Reduce trace_dump_box_bytes size by box->x.

2017-06-26 Thread Cherniak, Bruce
Back in February, I submitted a patch for review to address an a crash in 
GALLIUM_TRACE.

It never got a review, and I forgot to follow up on it.  Is this a correct fix 
and useful to anyone
else?

Thanks,
Bruce



From patchwork Wed Feb  1 20:20:38 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: [Mesa-dev] gallium: Reduce trace_dump_box_bytes size by box->x.
From: Bruce Cherniak 
X-Patchwork-Id: 136378
Message-Id: <1485980438-102650-1-git-send-email-bruce.chern...@intel.com>
To: mesa-dev@lists.freedesktop.org
Date: Wed,  1 Feb 2017 14:20:38 -0600

If stride is supplied (as either stride or slice_stride),
trace_dump_box_bytes will try to read stride bytes, regardless whether
start address is offset by box->x.  This causes access outside mapped
region, and possible segv. (transfer_map stride and layer_stride are not
adjusted for box dimensions)

Note:  trace_dump_box_bytes only dumps PIPE_BUFFER resources, so there
shouldn't be any complicated boxes.  trace_dump_bytes doesn't handle them
anyway.
---
 src/gallium/drivers/trace/tr_dump.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/trace/tr_dump.c 
b/src/gallium/drivers/trace/tr_dump.c
index b173b8a..591e273 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -510,11 +510,13 @@ void trace_dump_box_bytes(const void *data,
   size = 0;
} else {
   enum pipe_format format = resource->format;
-  if (slice_stride)
+  if (slice_stride) {
+ slice_stride -= util_format_get_blockwidth(format) * box->x;
  size = box->depth * slice_stride;
-  else if (stride)
+  } else if (stride) {
+ stride -= util_format_get_blockwidth(format) * box->x;
  size = util_format_get_nblocksy(format, box->height) * stride;
-  else {
+  } else {
  size = util_format_get_nblocksx(format, box->width) * 
util_format_get_blocksize(format);
   }
}

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/8] swr/rast: Switch intrinsic usage to SIMDLib

2017-06-26 Thread Rowley, Timothy O

> On Jun 26, 2017, at 8:11 AM, Emil Velikov  wrote:
> 
> Hi Tim,
> 
> On 22 June 2017 at 22:13, Tim Rowley  wrote:
>> Switch from a macro-based simd intrinsics layer to a more C++
>> implementation, which also adds AVX512 optimizations to 128-bit
>> and 256-bit SIMD.
> 
>> +   rasterizer/common/simdlib_128_avx.inl \
>> +   rasterizer/common/simdlib_128_avx2.inl \
>> +   rasterizer/common/simdlib_128_avx512.inl \
>> +   rasterizer/common/simdlib_256_avx.inl \
>> +   rasterizer/common/simdlib_256_avx2.inl \
>> +   rasterizer/common/simdlib_256_avx512.inl \
>> +   rasterizer/common/simdlib_512_avx512.inl \
>> +   rasterizer/common/simdlib_512_avx512_masks.inl \
>> +   rasterizer/common/simdlib_512_emu.inl \
>> +   rasterizer/common/simdlib_512_emu_masks.inl \
> Your commit message said "make dist/check" but I'd imagine you used
> SCons for the whole series as well, correct?
> 
> Merely double-checking as some versions of SCons had issues with non
> {c,h}{,pp} files listed as sources, IIRC. Sadly I don't recall the
> specifics.

I thought this had been tested internally on scons; turns out I was mistaken - 
we will get it working for the next version of the commit.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] etnaviv: flush source TS before resolve

2017-06-26 Thread Lucas Stach
If we blit from a rendertarget or a depthstencil buffer there might still
be dirty data in the TS buffer which needs to be flushed out.

Fixes missing shadow tiles in glmark2 shadow.

Signed-off-by: Lucas Stach 
---
This is on top of "etnaviv: flush color cache and depth cache together
before resolves". Without this commit flushing the TS is causing
rendering corruption.
---
 src/gallium/drivers/etnaviv/etnaviv_clear_blit.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c 
b/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
index e967595f424c..40a6832f8785 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
@@ -470,6 +470,10 @@ etna_try_rs_blit(struct pipe_context *pctx,
   etna_set_state(ctx->stream, VIVS_GL_FLUSH_CACHE,
 VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH);
   etna_stall(ctx->stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
+
+  if (src->levels[blit_info->src.level].ts_size &&
+  src->levels[blit_info->src.level].ts_valid)
+ etna_set_state(ctx->stream, VIVS_TS_FLUSH_CACHE, 
VIVS_TS_FLUSH_CACHE_FLUSH);
}
 
/* Set up color TS to source surface before blit, if needed */
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] etnaviv: fix shader miscompilation with more than 16 labels

2017-06-26 Thread Lucas Stach
The labels array may change its virtual address on a reallocation, so
it is invalid to cache pointers into the array. Rather than using the
pointer directly, remember the array index.

Fixes miscompilation of shaders in glmark2 ideas, leading to GPU hangs.

Fixes: c9e8b49b (etnaviv: gallium driver for Vivante GPUs)
Cc: mesa-sta...@lists.freedesktop.org
Signed-off-by: Lucas Stach 
Reviewed-by: Christian Gmeiner 
---
v2: Only fill in labels after checking instruction limit. Fixes out of
bounds array access.
---
 src/gallium/drivers/etnaviv/etnaviv_compiler.c | 60 ++
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c 
b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
index eafb511bb813..af0f76b58649 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
@@ -119,10 +119,10 @@ enum etna_compile_frame_type {
  */
 struct etna_compile_frame {
enum etna_compile_frame_type type;
-   struct etna_compile_label *lbl_else;
-   struct etna_compile_label *lbl_endif;
-   struct etna_compile_label *lbl_loop_bgn;
-   struct etna_compile_label *lbl_loop_end;
+   int lbl_else_idx;
+   int lbl_endif_idx;
+   int lbl_loop_bgn_idx;
+   int lbl_loop_end_idx;
 };
 
 struct etna_compile_file {
@@ -178,7 +178,7 @@ struct etna_compile {
/* Fields for handling nested conditionals */
struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
int frame_sp;
-   struct etna_compile_label *lbl_usage[ETNA_MAX_INSTRUCTIONS];
+   int lbl_usage[ETNA_MAX_INSTRUCTIONS];
 
unsigned labels_count, labels_sz;
struct etna_compile_label *labels;
@@ -990,7 +990,7 @@ etna_src_uniforms_conflict(struct etna_inst_src a, struct 
etna_inst_src b)
 }
 
 /* create a new label */
-static struct etna_compile_label *
+static unsigned int
 alloc_new_label(struct etna_compile *c)
 {
struct etna_compile_label label = {
@@ -999,7 +999,7 @@ alloc_new_label(struct etna_compile *c)
 
array_insert(c->labels, label);
 
-   return >labels[c->labels_count - 1];
+   return c->labels_count - 1;
 }
 
 /* place label at current instruction pointer */
@@ -1015,10 +1015,10 @@ label_place(struct etna_compile *c, struct 
etna_compile_label *label)
  * as the value becomes known.
  */
 static void
-label_mark_use(struct etna_compile *c, struct etna_compile_label *label)
+label_mark_use(struct etna_compile *c, int lbl_idx)
 {
assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
-   c->lbl_usage[c->inst_ptr] = label;
+   c->lbl_usage[c->inst_ptr] = lbl_idx;
 }
 
 /* walk the frame stack and return first frame with matching type */
@@ -1099,8 +1099,8 @@ trans_if(const struct instr_translater *t, struct 
etna_compile *c,
/* push IF to stack */
f->type = ETNA_COMPILE_FRAME_IF;
/* create "else" label */
-   f->lbl_else = alloc_new_label(c);
-   f->lbl_endif = NULL;
+   f->lbl_else_idx = alloc_new_label(c);
+   f->lbl_endif_idx = -1;
 
/* We need to avoid the emit_inst() below becoming two instructions */
if (etna_src_uniforms_conflict(src[0], imm_0))
@@ -1108,7 +1108,7 @@ trans_if(const struct instr_translater *t, struct 
etna_compile *c,
 
/* mark position in instruction stream of label reference so that it can be
 * filled in in next pass */
-   label_mark_use(c, f->lbl_else);
+   label_mark_use(c, f->lbl_else_idx);
 
/* create conditional branch to label if src0 EQ 0 */
emit_inst(c, &(struct etna_inst){
@@ -1129,8 +1129,8 @@ trans_else(const struct instr_translater *t, struct 
etna_compile *c,
assert(f->type == ETNA_COMPILE_FRAME_IF);
 
/* create "endif" label, and branch to endif label */
-   f->lbl_endif = alloc_new_label(c);
-   label_mark_use(c, f->lbl_endif);
+   f->lbl_endif_idx = alloc_new_label(c);
+   label_mark_use(c, f->lbl_endif_idx);
emit_inst(c, &(struct etna_inst) {
   .opcode = INST_OPCODE_BRANCH,
   .cond = INST_CONDITION_TRUE,
@@ -1138,7 +1138,7 @@ trans_else(const struct instr_translater *t, struct 
etna_compile *c,
});
 
/* mark "else" label at this position in instruction stream */
-   label_place(c, f->lbl_else);
+   label_place(c, >labels[f->lbl_else_idx]);
 }
 
 static void
@@ -1151,10 +1151,10 @@ trans_endif(const struct instr_translater *t, struct 
etna_compile *c,
 
/* assign "endif" or "else" (if no ELSE) label to current position in
 * instruction stream, pop IF */
-   if (f->lbl_endif != NULL)
-  label_place(c, f->lbl_endif);
+   if (f->lbl_endif_idx != -1)
+  label_place(c, >labels[f->lbl_endif_idx]);
else
-  label_place(c, f->lbl_else);
+  label_place(c, >labels[f->lbl_else_idx]);
 }
 
 static void
@@ -1166,10 +1166,10 @@ trans_loop_bgn(const struct instr_translater *t, struct 
etna_compile *c,
 
/* push LOOP to stack */
f->type = ETNA_COMPILE_FRAME_LOOP;
-   f->lbl_loop_bgn = alloc_new_label(c);
-   f->lbl_loop_end = 

Re: [Mesa-dev] [PATCH 3/8] swr/rast: Split rasterizer.cpp to improve compile times

2017-06-26 Thread Rowley, Timothy O

On Jun 26, 2017, at 8:02 AM, Emil Velikov 
> wrote:

On 22 June 2017 at 22:13, Tim Rowley 
> wrote:
Hardcode split to four files currently.  Decreases swr build
time on KNL by over 50%.
Out of curiosity what is KNL?

KNL is the Intel Xeon Phi x200 Processor family, codenamed Knights Landing, 
which has between 64 and 72 cores with AVX512.

Also, over 50% decrease - time to pop the champagne ;-)

---
src/gallium/drivers/swr/Makefile.am|   36 +-
src/gallium/drivers/swr/Makefile.sources   |2 +-
src/gallium/drivers/swr/SConscript |   24 +-
.../drivers/swr/rasterizer/codegen/gen_backends.py |   15 +-
.../codegen/templates/gen_rasterizer.cpp   |   42 +
src/gallium/drivers/swr/rasterizer/core/api.cpp|1 +
.../drivers/swr/rasterizer/core/multisample.cpp|   48 -
.../drivers/swr/rasterizer/core/rasterizer.cpp | 1788 +++-
.../drivers/swr/rasterizer/core/rasterizer.h   |   31 +-
.../drivers/swr/rasterizer/core/rasterizer_impl.h  | 1376 +++
10 files changed, 1738 insertions(+), 1625 deletions(-)
create mode 100644 
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_rasterizer.cpp
delete mode 100644 src/gallium/drivers/swr/rasterizer/core/multisample.cpp
create mode 100644 src/gallium/drivers/swr/rasterizer/core/rasterizer_impl.h

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 0daec90..1a69cfc 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -67,7 +67,12 @@ BUILT_SOURCES = \
   rasterizer/core/backends/gen_BackendPixelRate1.cpp \
   rasterizer/core/backends/gen_BackendPixelRate2.cpp \
   rasterizer/core/backends/gen_BackendPixelRate3.cpp \
-   rasterizer/core/backends/gen_BackendPixelRate.hpp
+   rasterizer/core/backends/gen_BackendPixelRate.hpp \
+   rasterizer/core/backends/gen_rasterizer0.cpp \
+   rasterizer/core/backends/gen_rasterizer1.cpp \
+   rasterizer/core/backends/gen_rasterizer2.cpp \
+   rasterizer/core/backends/gen_rasterizer3.cpp \
+   rasterizer/core/backends/gen_rasterizer.hpp

MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
@@ -170,6 +175,32 @@ backend.intermediate: rasterizer/codegen/gen_backends.py 
rasterizer/codegen/temp
   --cpp \
   --hpp

+rasterizer/core/backends/gen_rasterizer0.cpp \
+rasterizer/core/backends/gen_rasterizer1.cpp \
+rasterizer/core/backends/gen_rasterizer2.cpp \
+rasterizer/core/backends/gen_rasterizer3.cpp \
+rasterizer/core/backends/gen_rasterizer.hpp: \
+rasterizer.intermediate
+
+# 5 SWR_MULTISAMPLE_TYPE_COUNT
+# 2 CenterPattern
+# 2 Conservative
+# 3 SWR_INPUT_COVERAGE_COUNT
+# 5 STATE_VALID_TRI_EDGE_COUNT
+# 2 RasterScissorEdges
+
+.INTERMEDIATE: rasterizer.intermediate
Same question/suggestion as in PATCH 1 - please add a note (helps XXX) or drop

With that from build POV
Reviewed-by: Emil Velikov 
>

Mentioned in my other mail that I’m not wed to the .INTERMEDIATE approach; I’ll 
address this the same way we decide upon for the backend split-up.


--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_backends.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_backends.py

+
+args = parser.parse_args(args)

-args = parser.parse_args(args);

Unrelated cleanup?

I’ll try to pull the cleanups into a separate commit.


-Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: skip FLUSH_VERTICES() if no samplers were changed

2017-06-26 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Jun 23, 2017 at 12:56 AM, Timothy Arceri  wrote:
> ---
>  src/mesa/main/uniform_query.cpp | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
> index 1570770..9683fa8 100644
> --- a/src/mesa/main/uniform_query.cpp
> +++ b/src/mesa/main/uniform_query.cpp
> @@ -1071,21 +1071,26 @@ _mesa_uniform(GLint location, GLsizei count, const 
> GLvoid *values,
>  * element that exceeds the highest array element index used, as
>  * reported by GetActiveUniform, will be ignored by the GL."
>  *
>  * Clamp 'count' to a valid value.  Note that for non-arrays a count > 1
>  * will have already generated an error.
>  */
> if (uni->array_elements != 0) {
>count = MIN2(count, (int) (uni->array_elements - offset));
> }
>
> -   _mesa_flush_vertices_for_uniforms(ctx, uni);
> +   /* We check samplers for changes and flush if needed in the sampler
> +* handling code further down, so just skip them here.
> +*/
> +   if (!uni->type->is_sampler()) {
> +   _mesa_flush_vertices_for_uniforms(ctx, uni);
> +   }
>
> /* Store the data in the "actual type" backing storage for the uniform.
>  */
> if (!uni->type->is_boolean() && !uni->is_bindless) {
>memcpy(>storage[size_mul * components * offset], values,
>   sizeof(uni->storage[0]) * components * count * size_mul);
> } else if (uni->is_bindless) {
>const union gl_constant_value *src =
>   (const union gl_constant_value *) values;
>GLuint64 *dst = (GLuint64 *)>storage[components * offset].i;
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] ac/nir: Use correct LLVM intrinsics for atomic ops on imageBuffers

2017-06-26 Thread Alex Smith
The buffer intrinsics should be used instead of the image ones.

Signed-off-by: Alex Smith 
Cc: 
---
This applies on top of James Legg's recent series [1], since they both
touch the same function.

[1] https://lists.freedesktop.org/archives/mesa-dev/2017-June/160245.html
---
 src/amd/common/ac_nir_to_llvm.c | 63 ++---
 1 file changed, 34 insertions(+), 29 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 5e9f147..468ce4d 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3369,37 +3369,14 @@ static LLVMValueRef visit_image_atomic(struct 
nir_to_llvm_context *ctx,
int param_count = 0;
const nir_variable *var = instr->variables[0]->var;
 
-   const char *base_name = "llvm.amdgcn.image.atomic";
const char *atomic_name;
-   LLVMValueRef coords;
-   char intrinsic_name[41], coords_type[8];
+   char intrinsic_name[41];
const struct glsl_type *type = glsl_without_array(var->type);
+   MAYBE_UNUSED int length;
 
if (ctx->stage == MESA_SHADER_FRAGMENT)
ctx->shader_info->fs.writes_memory = true;
 
-   params[param_count++] = get_src(ctx, instr->src[2]);
-   if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
-   params[param_count++] = get_src(ctx, instr->src[3]);
-
-   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
-   params[param_count++] = get_sampler_desc(ctx, 
instr->variables[0], DESC_BUFFER);
-   coords = params[param_count++] = 
LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
-   
LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
-   params[param_count++] = ctx->i32zero; /* voffset */
-   params[param_count++] = ctx->i1false;  /* glc */
-   params[param_count++] = ctx->i1false;  /* slc */
-   } else {
-   bool da = glsl_sampler_type_is_array(type) ||
- glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
-
-   coords = params[param_count++] = get_image_coords(ctx, instr);
-   params[param_count++] = get_sampler_desc(ctx, 
instr->variables[0], DESC_IMAGE);
-   params[param_count++] = ctx->i1false; /* r128 */
-   params[param_count++] = da ? ctx->i1true : ctx->i1false;  
/* da */
-   params[param_count++] = ctx->i1false;  /* slc */
-   }
-
switch (instr->intrinsic) {
case nir_intrinsic_image_atomic_add:
atomic_name = "add";
@@ -3428,11 +3405,39 @@ static LLVMValueRef visit_image_atomic(struct 
nir_to_llvm_context *ctx,
default:
abort();
}
-   build_int_type_name(LLVMTypeOf(coords),
-   coords_type, sizeof(coords_type));
 
-   MAYBE_UNUSED const int length = snprintf(intrinsic_name, 
sizeof(intrinsic_name),
-"%s.%s.%s", base_name, 
atomic_name, coords_type);
+   params[param_count++] = get_src(ctx, instr->src[2]);
+   if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
+   params[param_count++] = get_src(ctx, instr->src[3]);
+
+   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
+   params[param_count++] = get_sampler_desc(ctx, 
instr->variables[0], DESC_BUFFER);
+   params[param_count++] = LLVMBuildExtractElement(ctx->builder, 
get_src(ctx, instr->src[0]),
+   
LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
+   params[param_count++] = ctx->i32zero; /* voffset */
+   params[param_count++] = ctx->i1false;  /* slc */
+
+   length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.buffer.atomic.%s", atomic_name);
+   } else {
+   char coords_type[8];
+
+   bool da = glsl_sampler_type_is_array(type) ||
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
+
+   LLVMValueRef coords = params[param_count++] = 
get_image_coords(ctx, instr);
+   params[param_count++] = get_sampler_desc(ctx, 
instr->variables[0], DESC_IMAGE);
+   params[param_count++] = ctx->i1false; /* r128 */
+   params[param_count++] = da ? ctx->i1true : ctx->i1false;  
/* da */
+   params[param_count++] = ctx->i1false;  /* slc */
+
+   build_int_type_name(LLVMTypeOf(coords),
+   coords_type, sizeof(coords_type));
+
+   length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.image.atomic.%s.%s", 
atomic_name, coords_type);
+ 

Re: [Mesa-dev] [PATCH 1/8] swr/rast: Split backend.cpp to improve compile time

2017-06-26 Thread Rowley, Timothy O

On Jun 26, 2017, at 7:57 AM, Emil Velikov 
> wrote:

Hi Tim,

On 22 June 2017 at 22:13, Tim Rowley 
> wrote:
Hardcode split to four files currently.  Decreases swr build
time on a quad-core by ~10%.
---
src/gallium/drivers/swr/Makefile.am|   26 +-
src/gallium/drivers/swr/Makefile.sources   |4 +
src/gallium/drivers/swr/SConscript |   19 +-
.../drivers/swr/rasterizer/codegen/gen_backends.py |   38 +-
.../drivers/swr/rasterizer/codegen/gen_common.py   |7 +
.../rasterizer/codegen/templates/gen_backend.cpp   |1 +
.../codegen/templates/gen_header_init.hpp  |   43 +
src/gallium/drivers/swr/rasterizer/core/api.cpp|7 +-
.../drivers/swr/rasterizer/core/backend.cpp|  809 +--
src/gallium/drivers/swr/rasterizer/core/backend.h  | 1033 +--
.../drivers/swr/rasterizer/core/backend_clear.cpp  |  281 ++
.../drivers/swr/rasterizer/core/backend_impl.h | 1067 
.../drivers/swr/rasterizer/core/backend_sample.cpp |  345 +++
.../swr/rasterizer/core/backend_singlesample.cpp   |  321 ++
14 files changed, 2160 insertions(+), 1841 deletions(-)
create mode 100644 
src/gallium/drivers/swr/rasterizer/codegen/templates/gen_header_init.hpp
create mode 100644 src/gallium/drivers/swr/rasterizer/core/backend_clear.cpp
create mode 100644 src/gallium/drivers/swr/rasterizer/core/backend_impl.h
create mode 100644 src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
create mode 100644 
src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 6650abd..0daec90 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -34,6 +34,7 @@ COMMON_CXXFLAGS = \
   $(LLVM_CXXFLAGS) \
   $(SWR_CXX11_CXXFLAGS) \
   -I$(builddir)/rasterizer/codegen \
+   -I$(builddir)/rasterizer/core \
   -I$(builddir)/rasterizer/jitter \
   -I$(builddir)/rasterizer/archrast \
   -I$(srcdir)/rasterizer \
@@ -62,7 +63,11 @@ BUILT_SOURCES = \
   rasterizer/archrast/gen_ar_event.cpp \
   rasterizer/archrast/gen_ar_eventhandler.hpp \
   rasterizer/archrast/gen_ar_eventhandlerfile.hpp \
-   rasterizer/core/gen_BackendPixelRate0.cpp
+   rasterizer/core/backends/gen_BackendPixelRate0.cpp \
+   rasterizer/core/backends/gen_BackendPixelRate1.cpp \
+   rasterizer/core/backends/gen_BackendPixelRate2.cpp \
+   rasterizer/core/backends/gen_BackendPixelRate3.cpp \
+   rasterizer/core/backends/gen_BackendPixelRate.hpp

MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
@@ -140,20 +145,30 @@ rasterizer/archrast/gen_ar_eventhandlerfile.hpp: 
rasterizer/codegen/gen_archrast
   --output rasterizer/archrast/gen_ar_eventhandlerfile.hpp \
   --gen_eventhandlerfile_h

+rasterizer/core/backends/gen_BackendPixelRate0.cpp \
+rasterizer/core/backends/gen_BackendPixelRate1.cpp \
+rasterizer/core/backends/gen_BackendPixelRate2.cpp \
+rasterizer/core/backends/gen_BackendPixelRate3.cpp \
+rasterizer/core/backends/gen_BackendPixelRate.hpp: \
+backend.intermediate
+
# 5 SWR_MULTISAMPLE_TYPE_COUNT
# 2 SWR_MSAA_SAMPLE_PATTERN_COUNT
# 3 SWR_INPUT_COVERAGE_COUNT
# 2 centroid
# 2 forcedSampleCount
# 2 canEarlyZ
-rasterizer/core/gen_BackendPixelRate0.cpp: rasterizer/codegen/gen_backends.py 
rasterizer/codegen/templates/gen_backend.cpp
+
+.INTERMEDIATE: backend.intermediate
I have limited experience with .INTERMEDIATE and it didn't seem to
bring single/incremental build times improvements.
Have you seen any on your end? If not I'll just drop it.

I’m not really familiar with .INTERMEDIATE myself; found it when googling 
around looking for a way to specify a code generator rule that produced 
multiple files.  If there’s a better/cleaner way of doing this I’d like to hear 
about it.


+backend.intermediate: rasterizer/codegen/gen_backends.py 
rasterizer/codegen/templates/gen_backend.cpp 
rasterizer/codegen/templates/gen_header_init.hpp
   $(MKDIR_GEN)
   $(PYTHON_GEN) \
   $(srcdir)/rasterizer/codegen/gen_backends.py \
-   --outdir rasterizer/core \
+   --outdir rasterizer/core/backends \
   --dim 5 2 3 2 2 2 \
-   --split 0 \
-   --cpp
+   --numfiles 4 \
+   --cpp \
+   --hpp

Hardcoding file names in generator scripts tends to be a bad idea. One
example is the extra code needed to generate the cmake bits :-)
One could prune that, but it's not a priority AFAICT.

I would like to be able to wildcard on the generated name, but it seems that 
automake wants to have a static list of filenames at invocation.  Our cmake 
approach internally generates a cmake fragment that is included by 

Re: [Mesa-dev] [PATCH 4/4] egl/dri2: add image extension to swrast_core_extensions

2017-06-26 Thread Gurchetan Singh
Ping...

On Wed, Jun 21, 2017 at 4:40 PM, Gurchetan Singh <
gurchetansi...@chromium.org> wrote:

> Emil,
>
> If I understand you correctly, you're proposing to add the ability to use
> the kms_swrast driver in platform_x11.c (the host is a standard Ubuntu box
> for the emulator use case, not CrOS) alongside swrast.
>
> In that case, we would need to:
>
> 1) Have a dri2_initialize_x11_kms_swrast function that's called when some
> environment variable is set instead of dri2_initialize_x11_swrast.
> 2) dri2_initialize_x11_kms_swrast would need access to the host card fd
> (dri_kms_init_screen requires this) and call dri2_load_driver instead of
> dri2_load_driver_swrast .
> 3) Use dri2_loader_extensions instead of swrast_loader_extensions,
> dri2_x11_display_vtbl instead dri2_x11_swrast_display_vtbl etc.
>
> I'm having trouble getting this to work, and I was wondering if what I'm
> trying to do is what you want.  Attached is the patch I'm trying (it
> compiles, but will crash your display).
>
> Regarding the issues with the emulator, I filed a bug based your comments
> and the emulator team has started looking at it (see
> https://android-review.googlesource.com/#/c/418541/).
>
>
> On Tue, Jun 20, 2017 at 1:19 AM, Emil Velikov 
> wrote:
>
>> On 19 June 2017 at 20:46, Chad Versace  wrote:
>> > On Thu 15 Jun 2017, Gurchetan Singh wrote:
>> >> Emil, would you be fine with leaving the image extension in dri2.c but
>> still
>> >> adding it as a drisw extension?  That solution would look like:
>> >>
>> >> [1]https://patchwork.freedesktop.org/patch/154807/
>> >
>> > Observations:
>> > - src/gallium/state_trackers/dri/dri2.c:dri2ImageExtension
>> advertises v15 of __DRI_IMAGE.
>> > - egl_dri2.c requires only v1 of __DRI_IMAGE. Maybe a higher version
>> >   is required in practive, but the egl_dri2.c code checks only for
>> v1.
>> >
>> > Questions:
>> > 1. All functions implemented in dri2.c:dri2ImageExtensions, do they
>> >under swrast? Honest question, because I'm no expert on
>> >gallium.
>> >
>> > If question #1 is true, then I see no problem with your latest plan. But
>> > maybe Emil does.
>> >
>> > If question #1 is false, it should be straightforward to implement in
>> > drisw.c the small subset of __DRI_IMAGE functions required for v1.
>>
>> While I haven't checked how much [or well] DRI_IMAGE works with
>> swrast, there's no need to actually add it there.
>> An alternative is to add kms_swrast support for EGL like we already do
>> for GBM, as mentioned earlier [1].
>>
>> Gents, keep in mind that:
>>  - one cannot pull DRM specifics (dri2.c) code within drisw.c, and
>>  - DRI_IMAGE pulls DRM specifics, hence adding it into drisw.c is
>> again a no-go :-\
>>
>> FWIW the above architectural split applies for classic drivers as
>> well. swrast_dri.so simply cannot depend on anything DRM related.
>>
>> -Emil
>>
>> [1] https://lists.freedesktop.org/archives/mesa-dev/2017-June/159519.html
>>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/8] swr: update rasterizer

2017-06-26 Thread Rowley, Timothy O

> On Jun 26, 2017, at 7:41 AM, Emil Velikov  wrote:
> On 22 June 2017 at 22:12, Tim Rowley  wrote:
>> Highlights include splitting the heavily templated files into multiple
>> chunks to speed compile (2x for a large machine), and switching the
>> simd intrinsic usage from a macro-based header to a more c++ feeling
>> library.
>> 
> Yay \o/. Out of curiosity - does the simd library bring much more
> apart from a C++ feel?

A couple major intentions, mainly to produce better code for avx512:

* hide the differences in masking operations - avx/avx2 uses a normal ymm 
register for masking, while avx512 has separate mask registers

* allow reduced vector width operations to be implemented in terms of avx512 
code, so that a larger register set and mask registers can be used

> Did you notice the errors in the Travis build [1]? For some reason
> they don't flag up when building locally, although a few C++17
> warnings did pop-up. Speaking for which since we're back to C++11 for
> SWR can we toggle back to GCC 4.8(.1) for Travis?
> 
> Can you guys look at those, please... in case you haven't already.

Sorry, had a patch for this ready to go Friday, but we were working through 
some other issues and I forgot to send it to the list.  I’ve done so now.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/blorp: Use the renderbuffer format for clears

2017-06-26 Thread Jason Ekstrand
This fixes the Piglit ARB_texture_views rendering-formats test.

Cc: "17.1" 
---
 src/mesa/drivers/dri/i965/brw_blorp.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 87c9dd4..96dc657 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -746,9 +746,9 @@ do_single_blorp_clear(struct brw_context *brw, struct 
gl_framebuffer *fb,
 {
struct gl_context *ctx = >ctx;
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-   mesa_format format = irb->mt->format;
uint32_t x0, x1, y0, y1;
 
+   mesa_format format = irb->Base.Base.Format;
if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
   format = _mesa_get_srgb_format_linear(format);
 
@@ -772,6 +772,14 @@ do_single_blorp_clear(struct brw_context *brw, struct 
gl_framebuffer *fb,
if (set_write_disables(irb, ctx->Color.ColorMask[buf], color_write_disable))
   can_fast_clear = false;
 
+   /* We store clear colors as floats or uints as needed.  If there are
+* texture views in play, the formats will not properly be respected
+* during resolves because the resolve operations only know about the
+* miptree and not the renderbuffer.
+*/
+   if (irb->Base.Base.Format != irb->mt->format)
+  can_fast_clear = false;
+
if (!irb->mt->supports_fast_clear ||
!brw_is_color_fast_clear_compatible(brw, irb->mt, 
>Color.ClearColor))
   can_fast_clear = false;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr/rast: adjust std::string usage to fix build

2017-06-26 Thread Cherniak, Bruce
Reviewed-by: Bruce Cherniak 

> On Jun 26, 2017, at 9:34 AM, Eric Engestrom  wrote:
> 
> On Monday, 2017-06-26 09:03:13 -0500, Tim Rowley wrote:
>> Some combinations of c++ compilers and standard libraries had problems
>> with the string::replace code we were using previously.
>> 
>> This should fix the travis-ci system.
> 
> Yup, confirmed: https://travis-ci.org/1ace/mesa/builds/247116248
> 
> Tested-by: Eric Engestrom 
> 
> Thanks!
> 
>> ---
>> .../drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp   | 12 
>> +---
>> 1 file changed, 9 insertions(+), 3 deletions(-)
>> 
>> diff --git 
>> a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp 
>> b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
>> index 0527bf3..e109fd2 100644
>> --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
>> +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
>> @@ -141,21 +141,27 @@ extern GlobalKnobs g_GlobalKnobs;
>> void KnobBase::autoExpandEnvironmentVariables(std::string )
>> {
>> {
>> +// unix style variable replacement
>> static std::regex env("\\$\\{([^}]+)\\}");
>> std::smatch match;
>> while (std::regex_search(text, match, env))
>> {
>> const std::string var = GetEnv(match[1].str());
>> -text.replace(match[0].first, match[0].second, var);
>> +// certain combinations of gcc/libstd++ have problems with this
>> +// text.replace(match[0].first, match[0].second, var);
>> +text.replace(match.prefix().length(), match[0].length(), var);
>> }
>> }
>> {
>> +// win32 style variable replacement
>> static std::regex env("\\%([^}]+)\\%");
>> std::smatch match;
>> while (std::regex_search(text, match, env))
>> {
>> const std::string var = GetEnv(match[1].str());
>> -text.replace(match[0].first, match[0].second, var);
>> +// certain combinations of gcc/libstd++ have problems with this
>> +// text.replace(match[0].first, match[0].second, var);
>> +text.replace(match.prefix().length(), match[0].length(), var);
>> }
>> }
>> }
>> @@ -232,4 +238,4 @@ std::string GlobalKnobs::ToString(const char* 
>> optPerLinePrefix)
>> return ' '*(max_len - name_len)
>> 
>> 
>> -%>
>> \ No newline at end of file
>> +%>
>> -- 
>> 2.7.4
>> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH shader-db] Drop Orbital Explorer shader.

2017-06-26 Thread Eero Tamminen

Hi,

On 22.06.2017 23:14, Chad Versace wrote:

On Thu 22 Jun 2017, Chad Versace wrote:

On Thu 22 Jun 2017, Kenneth Graunke wrote:

The author eventually emailed me and said that he considers it a
"finished experiment" and said the rendering method (geometry shader
based approach) is inefficient, and he intends to fully rewrite it
someday.


A total tangent... The author and I had lunch last week, where he
introduced me to a great mathy Android puzzle game. The game's puzzles
require you to make geometric constructions with a straight-edge and
compass in the minimum number of moves, à la Euclid.

Euclidea 



Oh yeah, I almost forgot.
Reviewed-by: Chad Versace 

This geometry shader of doom doesn't belong in shader-db.


Should there be a separate repository for "doom" shaders, which can
be used to occasionally test valid, but corner-case code?

With e.g. WebGL, drivers need to be robust against all kinds of weird 
shaders, so collecting odd ones somewhere might not be a bad idea.



- Eero


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] swr: set an explicit clear_rect if scissor is not enabled.

2017-06-26 Thread Rowley, Timothy O
Reviewed-by: Tim Rowley 
>

On Jun 26, 2017, at 10:26 AM, Bruce Cherniak 
> wrote:

Fix regression of "no rendering" on simple apps like glxgears by
setting an explicit full surface clear_rect when scissor is not
enabled.

This regressed with commit 00173d91 "st/mesa: don't set 16
scissors and 16 viewports if they're unused" due to an assumption
that a default scissor rect is always set, which was the case prior
to this optimization.
---
src/gallium/drivers/swr/swr_clear.cpp | 10 +-
1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_clear.cpp 
b/src/gallium/drivers/swr/swr_clear.cpp
index 53f4e02d45..3a35805a7a 100644
--- a/src/gallium/drivers/swr/swr_clear.cpp
+++ b/src/gallium/drivers/swr/swr_clear.cpp
@@ -68,11 +68,19 @@ swr_clear(struct pipe_context *pipe,
   ((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your const'd-ness 
*/
#endif

+   SWR_RECT clear_rect;
+   /* If enabled, clear to scissor; otherwise clear full surface */
+   if (ctx->rasterizer && ctx->rasterizer->scissor) {
+  clear_rect = ctx->swr_scissor;
+   } else {
+  clear_rect = {0, 0, (int32_t)fb->width, (int32_t)fb->height};
+   }
+
   for (unsigned i = 0; i < layers; ++i) {
  swr_update_draw_context(ctx);
  SwrClearRenderTarget(ctx->swrContext, clearMask, i,
   color->f, depth, stencil,
-   ctx->swr_scissor);
+   clear_rect);

  // Mask out the attachments that are out of layers.
  if (fb->zsbuf &&
--
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/3] nir: implement GLSL.std.450 NMax, NMIn and NClamp operations

2017-06-26 Thread tournier.elie
On 26 June 2017 at 11:15, Juan A. Suarez Romero  wrote:
> On Tue, 2017-06-13 at 11:14 +0200, Juan A. Suarez Romero wrote:
>> v2: NIR fmax/fmin already handles NaN (Connor).
>> ---
>
> Implemented the functions using fmax/fmin.
>
> Could you review it? Thank you!

Sorry, I missed this one.
Reviewed by: Elie Tournier 
>
>
> J.A.
>
>>  src/compiler/spirv/vtn_glsl450.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/src/compiler/spirv/vtn_glsl450.c 
>> b/src/compiler/spirv/vtn_glsl450.c
>> index 96e3407dee..1d7e2b8d95 100644
>> --- a/src/compiler/spirv/vtn_glsl450.c
>> +++ b/src/compiler/spirv/vtn_glsl450.c
>> @@ -433,9 +433,11 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 
>> opcode)
>> case GLSLstd450Log2:  return nir_op_flog2;
>> case GLSLstd450Sqrt:  return nir_op_fsqrt;
>> case GLSLstd450InverseSqrt:   return nir_op_frsq;
>> +   case GLSLstd450NMin:  return nir_op_fmin;
>> case GLSLstd450FMin:  return nir_op_fmin;
>> case GLSLstd450UMin:  return nir_op_umin;
>> case GLSLstd450SMin:  return nir_op_imin;
>> +   case GLSLstd450NMax:  return nir_op_fmax;
>> case GLSLstd450FMax:  return nir_op_fmax;
>> case GLSLstd450UMax:  return nir_op_umax;
>> case GLSLstd450SMax:  return nir_op_imax;
>> @@ -537,6 +539,7 @@ handle_glsl450_alu(struct vtn_builder *b, enum 
>> GLSLstd450 entrypoint,
>>return;
>>
>> case GLSLstd450FClamp:
>> +   case GLSLstd450NClamp:
>>val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
>>return;
>> case GLSLstd450UClamp:
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/vcn: enable h264 decode entension support

2017-06-26 Thread Christian König

Am 26.06.2017 um 15:29 schrieb Leo Liu:

It's enabled through message buffer for UVD

Signed-off-by: Leo Liu 


Acked-by: Christian König 


---
  src/gallium/drivers/radeon/radeon_vcn_dec.c | 1 +
  src/gallium/drivers/radeon/radeon_vcn_dec.h | 2 ++
  2 files changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c 
b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index 82dfa71..bd93b84 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -111,6 +111,7 @@ static rvcn_dec_message_avc_t get_h264_msg(struct 
radeon_decoder *dec,
result.sps_info_flags |= pic->pps->sps->mb_adaptive_frame_field_flag << 
1;
result.sps_info_flags |= pic->pps->sps->frame_mbs_only_flag << 2;
result.sps_info_flags |= pic->pps->sps->delta_pic_order_always_zero_flag 
<< 3;
+   result.sps_info_flags |= 1 << 
RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT;
  
  	result.bit_depth_luma_minus8 = pic->pps->sps->bit_depth_luma_minus8;

result.bit_depth_chroma_minus8 = pic->pps->sps->bit_depth_chroma_minus8;
diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.h 
b/src/gallium/drivers/radeon/radeon_vcn_dec.h
index d5516b6..accffef 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.h
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.h
@@ -103,6 +103,8 @@
  
  #define RDECODE_FEEDBACK_PROFILING			0x0001
  
+#define RDECODE_SPS_INFO_H264_EXTENSION_SUPPORT_FLAG_SHIFT	7

+
  typedef struct rvcn_dec_message_index_s {
unsigned intmessage_id;
unsigned intoffset;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] swr: set an explicit clear_rect if scissor is not enabled.

2017-06-26 Thread Bruce Cherniak
Fix regression of "no rendering" on simple apps like glxgears by
setting an explicit full surface clear_rect when scissor is not
enabled.

This regressed with commit 00173d91 "st/mesa: don't set 16
scissors and 16 viewports if they're unused" due to an assumption
that a default scissor rect is always set, which was the case prior
to this optimization.
---
 src/gallium/drivers/swr/swr_clear.cpp | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/swr_clear.cpp 
b/src/gallium/drivers/swr/swr_clear.cpp
index 53f4e02d45..3a35805a7a 100644
--- a/src/gallium/drivers/swr/swr_clear.cpp
+++ b/src/gallium/drivers/swr/swr_clear.cpp
@@ -68,11 +68,19 @@ swr_clear(struct pipe_context *pipe,
((union pipe_color_union *)color)->f[3] = 1.0; /* cast off your 
const'd-ness */
 #endif
 
+   SWR_RECT clear_rect;
+   /* If enabled, clear to scissor; otherwise clear full surface */
+   if (ctx->rasterizer && ctx->rasterizer->scissor) {
+  clear_rect = ctx->swr_scissor;
+   } else {
+  clear_rect = {0, 0, (int32_t)fb->width, (int32_t)fb->height};
+   }
+
for (unsigned i = 0; i < layers; ++i) {
   swr_update_draw_context(ctx);
   SwrClearRenderTarget(ctx->swrContext, clearMask, i,
color->f, depth, stencil,
-   ctx->swr_scissor);
+   clear_rect);
 
   // Mask out the attachments that are out of layers.
   if (fb->zsbuf &&
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 1/2] i965: Fix broxton 2x6 l3 config

2017-06-26 Thread Andres Gomez
Anuj, this depends on:
https://cgit.freedesktop.org/mesa/mesa/commit/src/mesa?id=eb23be1d97da290073d76c2510b8999b250f0139

Which didn't make it for -stable. Should we cherry-pick that too?

On Mon, 2017-06-12 at 10:01 -0700, Anuj Phogat wrote:
> The new table added in this patch matches with the table
> in gfxspecs. We were programming the wrong values earlier.
> 
> Signed-off-by: Anuj Phogat 
> Cc: Francisco Jerez 
> Cc: "17.1" 
> ---
>  src/intel/common/gen_device_info.c |  1 +
>  src/intel/common/gen_device_info.h |  1 +
>  src/intel/common/gen_l3_config.c   | 19 +++
>  3 files changed, 21 insertions(+)
> 
> diff --git a/src/intel/common/gen_device_info.c 
> b/src/intel/common/gen_device_info.c
> index 75284a6..eccb464 100644
> --- a/src/intel/common/gen_device_info.c
> +++ b/src/intel/common/gen_device_info.c
> @@ -502,6 +502,7 @@ static const struct gen_device_info gen_device_info_bxt = 
> {
>  
>  static const struct gen_device_info gen_device_info_bxt_2x6 = {
> GEN9_LP_FEATURES_2X6,
> +   .is_broxton_2x6 = 1,
> .l3_banks = 1,
>  };
>  /*
> diff --git a/src/intel/common/gen_device_info.h 
> b/src/intel/common/gen_device_info.h
> index 6207630..4fe1b21 100644
> --- a/src/intel/common/gen_device_info.h
> +++ b/src/intel/common/gen_device_info.h
> @@ -41,6 +41,7 @@ struct gen_device_info
> bool is_haswell;
> bool is_cherryview;
> bool is_broxton;
> +   bool is_broxton_2x6;
> bool is_kabylake;
>  
> bool has_hiz_and_separate_stencil;
> diff --git a/src/intel/common/gen_l3_config.c 
> b/src/intel/common/gen_l3_config.c
> index ae31d08..e17994b 100644
> --- a/src/intel/common/gen_l3_config.c
> +++ b/src/intel/common/gen_l3_config.c
> @@ -102,6 +102,23 @@ static const struct gen_l3_config chv_l3_configs[] = {
>  };
>  
>  /**
> + * BXT 2x6 validated L3 configurations.  \sa ivb_l3_configs.
> + * Number of ways =
> + *Allocation in KB for SKU / (Way size per bank * Number of banks).
> + * For BXT 2x6: Banks = 1, Way size per bank = 4.
> + */
> +static const struct gen_l3_config bxt_2x6_l3_configs[] = {
> +   /*SLM URB  All DC  RO  IS   C   T */
> +   {{  0, 32, 48,  0,  0,  0,  0,  0 }},
> +   {{  0, 32,  0,  8, 40,  0,  0,  0 }},
> +   {{  0, 32,  0, 32, 16,  0,  0,  0 }},
> +   {{ 16, 16, 48,  0,  0,  0,  0,  0 }},
> +   {{ 16, 16,  0, 40,  8,  0,  0,  0 }},
> +   {{ 16, 16,  0, 16, 32,  0,  0,  0 }},
> +   {{ 0 }}
> +};
> +
> +/**
>   * Return a zero-terminated array of validated L3 configurations for the
>   * specified device.
>   */
> @@ -117,6 +134,8 @@ get_l3_configs(const struct gen_device_info *devinfo)
>  
> case 9:
> case 10:
> +  if (devinfo->is_broxton_2x6)
> + return bxt_2x6_l3_configs;
>return chv_l3_configs;
>  
> default:
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Explicit sync tests on android

2017-06-26 Thread Marathe, Yogesh
Can someone please confirm if we can claim explicit sync support on android 
with mesa today? 
If yes, which tests can be used to verify this other than flatland? 

Sorry for typo in last email below (corrected).

Regards,
Yogesh.


From: mesa-dev [mailto:mesa-dev-boun...@lists.freedesktop.org] On Behalf Of 
Marathe, Yogesh
Sent: Friday, June 23, 2017 3:50 PM
To: mesa-dev@lists.freedesktop.org
Subject: [Mesa-dev] Explicit sync tests on android

Hi Rob,

Is there any test other than _flatland_ on android with which we can confirm 
explicit sync support?

Regards,
Yogesh.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3 v2] r600g: take into account offset to system inputs at tgsi_interp_egcm()

2017-06-26 Thread Constantine Kharlamov
On 26.06.2017 16:58, Emil Velikov wrote:
> Hi Constantine,
> 
> Thanks for giving r600 some much needed love.
> 
> While the patch has landed, just going to share some generic comments.
> Most of which are documented here 
> https://www.mesa3d.org/submittingpatches.html.

Thanks for the feedback!

> On 24 June 2017 at 15:06, Constantine Kharlamov  wrote:
>> Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=100785
> s/Fixes/Bugzilla/ and keep this above the s-o-b/r-b/other tags.
> 
> Should this fix land in the stable releases - see
> https://www.mesa3d.org/submittingpatches.html#nominations for the
> specifics.

This is a good question. AFAIK it's working in stable, even though by pure 
coincidence.
The bug been revealed due to recent TGSI changes.

Tbh ofc there could've been real world situations to reveal the bug in some 
other way.
I'd recommend it into stable after I got it piglit-tested just for the safe 
case. I
mean, I'm sure it should be okay, but due to not very good familiarity with the 
code
yet I'm a little paranoid. I've just tried building stable, but it fails due to 
some
llvm changes. And if it built, I'm not sure I won't fall into the hang
https://bugs.freedesktop.org/show_bug.cgi?id=101575

Additionally: α) the bug is specific to interpolateAt* which are supported 
since 4.00,
whilst the core version of half the r600g cards stuck at 3.3 due to missing hw 
support
for some 64 operations, β) from a discussion a month ago on IRC I got that the 
function
is rarely used, usually when app doing something funky, and γ) out of curiosity 
I grepped
through Wine sources for "interpolateAt", and indeed it's not used anywhere.

All in all, I think it's just not worth the hassle.

>>
>> v2: I was too much twiddling whether to initialize nsys_inputs at the 
>> beginning of shader initialization or for allocation of system values, and 
>> by the time I decided to go with the first one, I forgot to change it back.
>>
> Mentioning in the commit message why you opted for the current
> location might be a good idea?
> 
> In either way, please keep the text within ~72 columns.

You're right. I was thinking that if allocate_system_value_inputs() wasn't 
called for
some reason, the nsys_inputs could have a junk value. AFAIK nothing bad can 
happen ATM,
but it might change in the future.

>> Signed-off-by: Constantine Kharlamov 
>> ---
>>  src/gallium/drivers/r600/r600_shader.c | 8 
>>  src/gallium/drivers/r600/r600_shader.h | 5 +++--
>>  2 files changed, 7 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c 
>> b/src/gallium/drivers/r600/r600_shader.c
>> index 156dba085d..2eb8187341 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -1134,9 +1134,10 @@ static int allocate_system_value_inputs(struct 
>> r600_shader_ctx *ctx, int gpr_off
> 
>> -   k = ctx->shader->ninput ++;
>> +   k = ctx->shader->ninput++;
> 
> [snip]
> 
>> -   unsignedinterpolate_location; //  
>> TGSI_INTERPOLATE_LOC_CENTER, CENTROID, SAMPLE
>> +   unsignedinterpolate_location; //  
>> TGSI_INTERPOLATE_LOC_CENTER, CENTROID, SAMPLE
>> unsignedlds_pos; /* for evergreen */
>> unsignedback_color_input;
>> unsignedwrite_mask;
>> -   int ring_offset;
>> +   int ring_offset;
> 
> These seem like unrelated white space changes. Please try to keep
> those separate patches since they only distract from the important
> parts of the commit.
> 
> -Emil
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 22/31] mesa: don't flag _NEW_TRANSFORM for st/mesa if possible

2017-06-26 Thread Cherniak, Bruce

> On Jun 23, 2017, at 7:50 PM, Marek Olšák  wrote:
> 
> Thanks. It can only be reproduced with process isolation enabled in
> piglit, which is not something I test.

I don't do anything special with process isolation.  For me this is enough to 
reproduce:

bin/shader_runner 
tests/spec/glsl-1.20/execution/clipping/fixed-clip-enables.shader_test -auto 
-fbo

Regards,
Bruce

> Marek
> 
> On Fri, Jun 23, 2017 at 9:16 PM, Cherniak, Bruce
>  wrote:
>> 
>>> On Jun 22, 2017, at 2:23 AM, Michel Dänzer  wrote:
>>> 
>>> On 13/06/17 01:55 AM, Marek Olšák wrote:
 From: Marek Olšák 
>>> 
>>> This broke piglit spec@glsl-1.20@execution@clipping@fixed-clip-enables
>>> on my Kaveri:
>> 
>> I am seeing this same regression on llvmpipe and swr renderers.  Anyone else?
>> (bisected to this specific change)
>> 
>>> Probe color at (200,75)
>>> Expected: 0.00 0.00 0.00 0.00
>>> Observed: 1.00 1.00 1.00 1.00
>>> Test failure on line 85
>>> Probe color at (50,75)
>>> Expected: 0.00 0.00 0.00 0.00
>>> Observed: 1.00 1.00 1.00 1.00
>>> Test failure on line 89
>>> Probe color at (200,75)
>>> Expected: 0.00 0.00 0.00 0.00
>>> Observed: 1.00 1.00 1.00 1.00
>>> Test failure on line 111
>>> Probe color at (125,200)
>>> Expected: 0.00 0.00 0.00 0.00
>>> Observed: 1.00 1.00 1.00 1.00
>>> Test failure on line 113
>>> Probe color at (50,75)
>>> Expected: 0.00 0.00 0.00 0.00
>>> Observed: 1.00 1.00 1.00 1.00
>>> Test failure on line 141
>>> PIGLIT: {"result": "fail" }
>>> 
>>> 
>>> --
>>> Earthling Michel Dänzer   |   http://www.amd.com
>>> Libre software enthusiast | Mesa and X developer
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] i965/vec4/generator: use 1-Oword Block Read/Write messages for DF scratch writes/reads

2017-06-26 Thread Samuel Iglesias Gonsálvez
On Fri, 2017-06-23 at 11:06 -0700, Francisco Jerez wrote:
> Samuel Iglesias Gonsálvez  writes:
> 
> > On Thu, 2017-06-22 at 16:25 -0700, Francisco Jerez wrote:
> > > Samuel Iglesias Gonsálvez  writes:
> > > 
> > > > Signed-off-by: Samuel Iglesias Gonsálvez 
> > > > ---
> > > >  src/intel/compiler/brw_eu_defines.h  |   2 +
> > > >  src/intel/compiler/brw_shader.cpp|   5 +
> > > >  src/intel/compiler/brw_vec4.cpp  |   7 ++
> > > >  src/intel/compiler/brw_vec4.h|   8 ++
> > > >  src/intel/compiler/brw_vec4_generator.cpp| 136
> > > > +++
> > > >  src/intel/compiler/brw_vec4_reg_allocate.cpp |   6 +-
> > > >  src/intel/compiler/brw_vec4_visitor.cpp  |  49 ++
> > > >  7 files changed, 212 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/src/intel/compiler/brw_eu_defines.h
> > > > b/src/intel/compiler/brw_eu_defines.h
> > > > index 1af835d47e..3c148de0fa 100644
> > > > --- a/src/intel/compiler/brw_eu_defines.h
> > > > +++ b/src/intel/compiler/brw_eu_defines.h
> > > > @@ -436,6 +436,8 @@ enum opcode {
> > > > VEC4_OPCODE_PICK_HIGH_32BIT,
> > > > VEC4_OPCODE_SET_LOW_32BIT,
> > > > VEC4_OPCODE_SET_HIGH_32BIT,
> > > > +   VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_LOW,
> > > > +   VEC4_OPCODE_GEN4_SCRATCH_READ_1OWORD_HIGH,
> > > >  
> > > 
> > > What's the point of introducing two different opcodes with
> > > essentially
> > > the same semantics (read 32B worth of data) as the current
> > > SHADER_OPCODE_GEN4_SCRATCH_READ?
> > 
> > Originally I had only SHADER_OPCODE_GEN4_SCRATCH_READ but I changed
> > it
> > to don't allocate more registers than needed when doing scratch
> > write
> > of a partial DF write. Let me explain it:
> > 
> > When doing spilling, as DF instructions are both split and
> > scalarized,
> > we read the existing contents in scratch memory, overwrite them
> > with
> > the destination of the instruction, then emit scratch write.
> > Together
> > with the fact that I am not shuffling DF data, we only need to
> > allocate
> > 1 GRF to do so, instead of 2 (if I had emitted
> > SHADER_OPCODE_GEN4_SCRATCH_READ), when doing spilling on partial DF
> > writes.
> > 
> 
> Why would you need to allocate more GRFs for
> SHADER_OPCODE_GEN4_SCRATCH_READ?  It also only reads one register,
> which
> should be sufficient for a single scalarized instruction as long as
> you
> don't shuffle data around -- Have a look at how the FS back-end
> addresses this problem.
> 

OK

> > >   Is there any downside from using the
> > > current opcode with force_writemask_all?  If anything it would
> > > give
> > > you
> > > better performance because you'd only have to set up one header
> > > (which
> > > stalls the EU pipeline twice), send down one message to the
> > > dataport,
> > > and avoid stalling to shuffle the data around in the return
> > > payload
> > > (which prevents your two 1OWORD messages from being pipelined at
> > > all).
> > > 
> > 
> > Sorry, I am confused here. Do you mean using
> > SHADER_OPCODE_GEN4_SCRATCH_READ as-is, which emits a "OWord Dual
> > Block
> > Read" message (so only one message)?
> > 
> > If that's the case, then I should shuffle the destination data of
> > the
> > partial DF write, change the 1-Oword block write offsets and so
> > on...
> 
> Why would you need to shuffle any spilled data?  I don't think
> there's
> much of a benefit from shuffling since scratch overwrites need read
> the
> original data for the most part anyway because of writemasking.  In
> fact
> shuffling DF data is probably the reason things blow up right now
> whenever you have mixed DF and single-precision reads or writes to
> the
> same spilled variable, which I guess is the reason you need to look
> for
> those cases and mark them as no_spill...
> 

Right, I don't need to shuffle data for the scratch write.

> > in order to save it inside scratch memory in the proper place to
> > make
> > OWord Dual Block Read work. That would require to some extra
> > instructions, but I don't know if this would give better
> > performance
> > against current implementation or not.
> > 
> 
> I expect the most serious performance issue with the approach of this
> patch will be the sequence of non-pipelined single-oword reads, which
> means you get to pay for the EU-dataport roundtrip latency twice
> instead
> of once.
> 
> > Then, why do I need force_writemask=true when emitting
> > SHADER_OPCODE_GEN4_SCRATCH_READ?
> > 
> 
> Because you probably don't want to shuffle data in your scratch
> buffer,
> and you don't want the dataport to apply bogus 16B channel enables to
> your reads and writes.
> 

If we save the dvec4 data of a vertex altogether in consecutive 32
bytes in scratch memory (i.e. no need of shuffling and we use
force_writemask_all as you said), then we need to create a special case
for IVB and partial DFs reads on HSW+ when unspilling the data.

What I 

Re: [Mesa-dev] [PATCH] swr/rast: adjust std::string usage to fix build

2017-06-26 Thread Eric Engestrom
On Monday, 2017-06-26 09:03:13 -0500, Tim Rowley wrote:
> Some combinations of c++ compilers and standard libraries had problems
> with the string::replace code we were using previously.
> 
> This should fix the travis-ci system.

Yup, confirmed: https://travis-ci.org/1ace/mesa/builds/247116248

Tested-by: Eric Engestrom 

Thanks!

> ---
>  .../drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp   | 12 
> +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git 
> a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp 
> b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
> index 0527bf3..e109fd2 100644
> --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
> @@ -141,21 +141,27 @@ extern GlobalKnobs g_GlobalKnobs;
>  void KnobBase::autoExpandEnvironmentVariables(std::string )
>  {
>  {
> +// unix style variable replacement
>  static std::regex env("\\$\\{([^}]+)\\}");
>  std::smatch match;
>  while (std::regex_search(text, match, env))
>  {
>  const std::string var = GetEnv(match[1].str());
> -text.replace(match[0].first, match[0].second, var);
> +// certain combinations of gcc/libstd++ have problems with this
> +// text.replace(match[0].first, match[0].second, var);
> +text.replace(match.prefix().length(), match[0].length(), var);
>  }
>  }
>  {
> +// win32 style variable replacement
>  static std::regex env("\\%([^}]+)\\%");
>  std::smatch match;
>  while (std::regex_search(text, match, env))
>  {
>  const std::string var = GetEnv(match[1].str());
> -text.replace(match[0].first, match[0].second, var);
> +// certain combinations of gcc/libstd++ have problems with this
> +// text.replace(match[0].first, match[0].second, var);
> +text.replace(match.prefix().length(), match[0].length(), var);
>  }
>  }
>  }
> @@ -232,4 +238,4 @@ std::string GlobalKnobs::ToString(const char* 
> optPerLinePrefix)
>  return ' '*(max_len - name_len)
>  
>  
> -%>
> \ No newline at end of file
> +%>
> -- 
> 2.7.4
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/marshal: add custom marshallingforglNamedBuffer(Sub)Data

2017-06-26 Thread Marc Dietrich
Hi Roland,

Am Montag, 26. Juni 2017, 15:51:12 CEST schrieb Marc Dietrich:
> Am Montag, 26. Juni 2017, 15:35:15 CEST schrieb Grigori Goronzy:
> > On 2017-06-26 15:11, Marc Dietrich wrote:
> > > unfortunately, this change broke vmware/vmplayer here (bisected).
> > > Windows
> > > guest on linux host. Sig 11 in SVGA driver. All good if
> > > mesa_glthread=false.
> > 
> > Can you provide instructions how to reproduce this problem? A backtrace
> > might help, too.
> 
> well, this is all proprietary software, so the backtrace doesn't really tell
> something.
> 
> > I don't really get it, by the way. Isn't the SVGA driver for Linux
> > guests?
> 
> I think the windows driver is named the same. Here is a paste of vmware.log:
> 
> https://pastebin.com/X3CS7rCP
> 
> I also have core dump, maybe only useful for VMWARE staff...

can you help?

Marc

> > Best regards
> > Grigori
> > 
> > >> > Best regards
> > >> > Grigori
> > >> > 
> > >> >> [1]
> > >> >> https://lists.freedesktop.org/archives/mesa-dev/2017-June/160329.htm
> > >> >> l
> > >> >> 
> > >> >> On 25/06/17 02:59, Grigori Goronzy wrote:
> > >> >>> These entry points are used by Alien Isolation and caused
> > >> >>> synchronization with glthread. The async marshalling implementation
> > >> >>> is similar to glBuffer(Sub)Data.
> > >> >>> 
> > >> >>> Results in an approximately 6x drop in glthread synchronizations
> > >> >>> and
> > >> >>> a
> > >> >>> ~30% FPS jump in Alien Isolation (Medium preset, Athlon 860K, RX
> > >> >>> 480).
> > >> >>> 
> > >> >>> This does not care about the EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD
> > >> >>> special
> > >> >>> case like the Buffer(Sub)Data marshalling functions.
> > >> >>> ---
> > >> >>> I'm not a fan of the code duplication and I'll try to address that
> > >> >>> in
> > >> >>> further changes to glthread/marshalling, but the improvement is so
> > >> >>> noticeable that I'd like to share it. Alien Isolation is now
> > >> >>> playable on
> > >> >>> my system while it wasn't before.
> > >> >>> 
> > >> >>>   src/mapi/glapi/gen/ARB_direct_state_access.xml |   4 +-
> > >> >>>   src/mesa/main/marshal.c| 108
> > >> >>> 
> > >> >>> +
> > >> >>> 
> > >> >>>   src/mesa/main/marshal.h|  18 +
> > >> >>>   3 files changed, 128 insertions(+), 2 deletions(-)
> > >> >>> 
> > >> >>> diff --git a/src/mapi/glapi/gen/ARB_direct_state_access.xml
> > >> >>> b/src/mapi/glapi/gen/ARB_direct_state_access.xml
> > >> >>> index cb24d79..d3d2246 100644
> > >> >>> --- a/src/mapi/glapi/gen/ARB_direct_state_access.xml
> > >> >>> +++ b/src/mapi/glapi/gen/ARB_direct_state_access.xml
> > >> >>> @@ -61,14 +61,14 @@
> > >> >>> 
> > >> >>> 
> > >> >>>  
> > >> >>>  
> > >> >>>   
> > >> >>>   -   
> > >> >>> 
> > >> >>> +   
> > >> >>> 
> > >> >>> 
> > >> >>> 
> > >> >>> 
> > >> >>> 
> > >> >>>  
> > >> >>>  
> > >> >>>   
> > >> >>>   -   
> > >> >>> 
> > >> >>> +> >> >>> marshal="custom">
> > >> >>> 
> > >> >>> 
> > >> >>> 
> > >> >>> 
> > >> >>> 
> > >> >>> diff --git a/src/mesa/main/marshal.c b/src/mesa/main/marshal.c
> > >> >>> index 4840f32..1fddf8e 100644
> > >> >>> --- a/src/mesa/main/marshal.c
> > >> >>> +++ b/src/mesa/main/marshal.c
> > >> >>> @@ -408,6 +408,114 @@ _mesa_marshal_BufferSubData(GLenum target,
> > >> >>> GLintptr offset, GLsizeiptr size,
> > >> >>> 
> > >> >>>  }
> > >> >>>   
> > >> >>>   }
> > >> >>>   +/* NamedBufferData: marshalled asynchronously */
> > >> >>> 
> > >> >>> +struct marshal_cmd_NamedBufferData
> > >> >>> +{
> > >> >>> +   struct marshal_cmd_base cmd_base;
> > >> >>> +   GLuint name;
> > >> >>> +   GLsizei size;
> > >> >>> +   GLenum usage;
> > >> >>> +   /* Next size bytes are GLubyte data[size] */
> > >> >>> +};
> > >> >>> +
> > >> >>> +void
> > >> >>> +_mesa_unmarshal_NamedBufferData(struct gl_context *ctx,
> > >> >>> +const struct
> > >> >>> marshal_cmd_NamedBufferData *cmd)
> > >> >>> +{
> > >> >>> +   const GLuint name = cmd->name;
> > >> >>> +   const GLsizei size = cmd->size;
> > >> >>> +   const GLenum usage = cmd->usage;
> > >> >>> +   const void *data = (const void *) (cmd + 1);
> > >> >>> +
> > >> >>> +   CALL_NamedBufferData(ctx->CurrentServerDispatch,
> > >> >>> +  (name, size, data, usage));
> > >> >>> +}
> > >> >>> +
> > >> >>> +void GLAPIENTRY
> > >> >>> +_mesa_marshal_NamedBufferData(GLuint buffer, GLsizeiptr size,
> > >> >>> +  const GLvoid * data, GLenum usage)
> > >> >>> +{
> > >> >>> +   GET_CURRENT_CONTEXT(ctx);
> > >> >>> +   size_t cmd_size = sizeof(struct marshal_cmd_NamedBufferData) +
> > >> >>> size;
> > >> >>> +
> > >> >>> +   debug_print_marshal("NamedBufferData");
> > >> >>> +   if (unlikely(size < 0)) {
> > >> >>> +  _mesa_glthread_finish(ctx);
> > >> >>> +  _mesa_error(ctx, GL_INVALID_VALUE, "NamedBufferData(size <
> > >> >>> 0)");
> > >> 

Re: [Mesa-dev] [PATCH 2/4] etnaviv: add support for swizzled texture formats

2017-06-26 Thread Wladimir J. van der Laan
On Sat, Jun 24, 2017 at 10:41:58AM +0200, Christian Gmeiner wrote:
> Hi
> 
> 2017-06-22 14:39 GMT+02:00 Wladimir J. van der Laan :
> > On Wed, Jun 21, 2017 at 10:36:46PM +0200, Christian Gmeiner wrote:
> >> Passes all ext_texture_swizzle piglits.
> >
> > You seem to have dropped the check in etnaviv_screen.c for HALTI0 when 
> > texture
> > formats have implicit non-identity swizzle.
> > I do think that is necessary as those formats don't work on  >
> 
> Yeah I totally overlooked it :( What do you think about something like this:

Reviewed-By: Wladimir J. van der Laan 

> 
> >8---
> 
> From ad780e77bfac7ed5c5427ad4b850d3b596558f0a Mon Sep 17 00:00:00 2001
> From: Christian Gmeiner 
> Date: Fri, 16 Jun 2017 17:02:29 +0200
> Subject: [PATCH] etnaviv: add support for swizzled texture formats
> 
> Passes all ext_texture_swizzle piglits.
> 
> Signed-off-by: Christian Gmeiner 
> ---
>  src/gallium/drivers/etnaviv/etnaviv_format.c  | 103 
> ++
>  src/gallium/drivers/etnaviv/etnaviv_format.h  |   7 ++
>  src/gallium/drivers/etnaviv/etnaviv_screen.c  |  19 +++--
>  src/gallium/drivers/etnaviv/etnaviv_texture.c |   9 +--
>  4 files changed, 99 insertions(+), 39 deletions(-)
> 
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_format.c
> b/src/gallium/drivers/etnaviv/etnaviv_format.c
> index e9cd1040b5..ee50b52962 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_format.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
> @@ -40,6 +40,7 @@ struct etna_format {
> unsigned tex;
> unsigned rs;
> boolean present;
> +   const unsigned char tex_swiz[4];
>  };
> 
>  #define RS_FORMAT_NONE ~0
> @@ -51,22 +52,31 @@ struct etna_format {
>  #define RS_FORMAT_X8B8G8R8(RS_FORMAT_X8R8G8B8 | RS_FORMAT_RB_SWAP)
>  #define RS_FORMAT_A8B8G8R8(RS_FORMAT_A8R8G8B8 | RS_FORMAT_RB_SWAP)
> 
> +#define SWIZ(x,y,z,w) {\
> +   PIPE_SWIZZLE_##x,   \
> +   PIPE_SWIZZLE_##y,   \
> +   PIPE_SWIZZLE_##z,   \
> +   PIPE_SWIZZLE_##w\
> +}
> +
>  /* vertex + texture */
> -#define VT(pipe, vtxfmt, texfmt, rsfmt)   \
> +#define VT(pipe, vtxfmt, texfmt, texswiz, rsfmt)  \
> [PIPE_FORMAT_##pipe] = {   \
>.vtx = VIVS_FE_VERTEX_ELEMENT_CONFIG_TYPE_##vtxfmt, \
>.tex = TEXTURE_FORMAT_##texfmt, \
>.rs = RS_FORMAT_##rsfmt,\
>.present = 1,   \
> +  .tex_swiz = texswiz,\
> }
> 
>  /* texture-only */
> -#define _T(pipe, fmt, rsfmt)   \
> +#define _T(pipe, fmt, swiz, rsfmt) \
> [PIPE_FORMAT_##pipe] = {\
>.vtx = ETNA_NO_MATCH,\
>.tex = TEXTURE_FORMAT_##fmt, \
>.rs = RS_FORMAT_##rsfmt, \
>.present = 1,\
> +  .tex_swiz = swiz,\
> }
> 
>  /* vertex-only */
> @@ -87,9 +97,9 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
> V_(R8_USCALED, UNSIGNED_BYTE, NONE),
> V_(R8_SSCALED, BYTE,  NONE),
> 
> -   _T(A8_UNORM, A8, NONE),
> -   _T(L8_UNORM, L8, NONE),
> -   _T(I8_UNORM, I8, NONE),
> +   _T(A8_UNORM, A8, SWIZ(X, Y, Z, W), NONE),
> +   _T(L8_UNORM, L8, SWIZ(X, Y, Z, W), NONE),
> +   _T(I8_UNORM, I8, SWIZ(X, Y, Z, W), NONE),
> 
> /* 16-bit */
> V_(R16_UNORM,   UNSIGNED_SHORT, NONE),
> @@ -100,15 +110,15 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
> V_(R16_SSCALED, SHORT,  NONE),
> V_(R16_FLOAT,   HALF_FLOAT, NONE),
> 
> -   _T(B4G4R4A4_UNORM, A4R4G4B4, A4R4G4B4),
> -   _T(B4G4R4X4_UNORM, X4R4G4B4, X4R4G4B4),
> +   _T(B4G4R4A4_UNORM, A4R4G4B4, SWIZ(X, Y, Z, W), A4R4G4B4),
> +   _T(B4G4R4X4_UNORM, X4R4G4B4, SWIZ(X, Y, Z, W), X4R4G4B4),
> 
> -   _T(L8A8_UNORM, A8L8, NONE),
> +   _T(L8A8_UNORM, A8L8, SWIZ(X, Y, Z, W), NONE),
> 
> -   _T(Z16_UNORM,  D16,  A4R4G4B4),
> -   _T(B5G6R5_UNORM,   R5G6B5,   R5G6B5),
> -   _T(B5G5R5A1_UNORM, A1R5G5B5, A1R5G5B5),
> -   _T(B5G5R5X1_UNORM, X1R5G5B5, X1R5G5B5),
> +   _T(Z16_UNORM,  D16,  SWIZ(X, Y, Z, W), A4R4G4B4),
> +   _T(B5G6R5_UNORM,   R5G6B5,   SWIZ(X, Y, Z, W), R5G6B5),
> +   _T(B5G5R5A1_UNORM, A1R5G5B5, SWIZ(X, Y, Z, W), A1R5G5B5),
> +   _T(B5G5R5X1_UNORM, X1R5G5B5, SWIZ(X, Y, Z, W), X1R5G5B5),
> 
> V_(R8G8_UNORM,   UNSIGNED_BYTE,  NONE),
> V_(R8G8_SNORM,   BYTE,   NONE),
> @@ -147,25 +157,25 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
> 
> V_(R8G8B8A8_UNORM,   UNSIGNED_BYTE, A8B8G8R8),
> V_(R8G8B8A8_SNORM,   BYTE,  A8B8G8R8),
> -   _T(R8G8B8X8_UNORM,   X8B8G8R8,  X8B8G8R8),
> +   _T(R8G8B8X8_UNORM,   X8B8G8R8,  SWIZ(X, Y, Z, W), X8B8G8R8),
> V_(R8G8B8A8_UINT,UNSIGNED_BYTE, A8B8G8R8),
> V_(R8G8B8A8_SINT,BYTE,  A8B8G8R8),
> V_(R8G8B8A8_USCALED, UNSIGNED_BYTE, A8B8G8R8),
> 

Re: [Mesa-dev] [PATCH 6/9] glsl/blob: add valgrind checks that written data is defined

2017-06-26 Thread Emil Velikov
On 26 June 2017 at 10:40, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Undefined data will eventually trigger a valgrind error while computing
> its CRC32 while writing it into the disk cache, but at that point, it is
> basically impossible to track down where the undefined data came from.
>
> With this change, finding the origin of undefined data becomes easy.
> ---
>  src/compiler/Makefile.am |  2 ++
>  src/compiler/glsl/blob.c | 12 
>  2 files changed, 14 insertions(+)
>
> diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am
> index d52da91..dcbd63e 100644
> --- a/src/compiler/Makefile.am
> +++ b/src/compiler/Makefile.am
> @@ -32,24 +32,26 @@ AM_CPPFLAGS = \
> -I$(top_srcdir)/src/compiler/glsl\
> -I$(top_srcdir)/src/compiler/glsl/glcpp\
> -I$(top_builddir)/src/compiler/nir \
> -I$(top_srcdir)/src/compiler/nir \
> -I$(top_srcdir)/src/gallium/include \
> -I$(top_srcdir)/src/gallium/auxiliary \
> -I$(top_srcdir)/src/gtest/include \
> $(DEFINES)
>
>  AM_CFLAGS = \
> +   $(VALGRIND_CFLAGS) \
You can move this to AM_CPPFLAGS above and drop the duplicate line in CXXFLAGS.

> $(VISIBILITY_CFLAGS) \
> $(MSVC2013_COMPAT_CFLAGS)
>
>  AM_CXXFLAGS = \
> +   $(VALGRIND_CFLAGS) \
> $(VISIBILITY_CXXFLAGS) \
> $(MSVC2013_COMPAT_CXXFLAGS)
>

With the above, patch is
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 83/92] ac/nir: add always_vector argument to ac_build_gather_values_extended

2017-06-26 Thread Nicolai Hähnle
From: Nicolai Hähnle 

This simplifies a bunch of places that no longer need special treatment
of value_count == 1. We rely on LLVM to optimize away the 1-element vector
types.

This fixes a bunch of bugs where 1-element arrays are indexed indirectly.
---
 src/amd/common/ac_llvm_build.c  |  7 ---
 src/amd/common/ac_llvm_build.h  |  3 ++-
 src/amd/common/ac_nir_to_llvm.c | 22 +++---
 3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 9d78b12..0a3cc8a 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -175,27 +175,28 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char 
*buf, unsigned bufsize)
snprintf(buf, bufsize, "f64");
break;
}
 }
 
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
unsigned value_stride,
-   bool load)
+   bool load,
+   bool always_vector)
 {
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef vec = NULL;
unsigned i;
 
-   if (value_count == 1) {
+   if (value_count == 1 && !always_vector) {
if (load)
return LLVMBuildLoad(builder, values[0], "");
return values[0];
} else if (!value_count)
unreachable("value_count is 0");
 
for (i = 0; i < value_count; i++) {
LLVMValueRef value = values[i * value_stride];
if (load)
value = LLVMBuildLoad(builder, value, "");
@@ -206,21 +207,21 @@ ac_build_gather_values_extended(struct ac_llvm_context 
*ctx,
vec = LLVMBuildInsertElement(builder, vec, value, index, "");
}
return vec;
 }
 
 LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
   LLVMValueRef *values,
   unsigned value_count)
 {
-   return ac_build_gather_values_extended(ctx, values, value_count, 1, 
false);
+   return ac_build_gather_values_extended(ctx, values, value_count, 1, 
false, false);
 }
 
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
  LLVMValueRef num,
  LLVMValueRef den)
 {
LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
 
if (!LLVMIsConstant(ret))
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index b9aeacd..9ad13cc 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -72,21 +72,22 @@ ac_build_intrinsic(struct ac_llvm_context *ctx, const char 
*name,
   LLVMTypeRef return_type, LLVMValueRef *params,
   unsigned param_count, unsigned attrib_mask);
 
 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned 
bufsize);
 
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count,
unsigned value_stride,
-   bool load);
+   bool load,
+   bool always_vector);
 LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
   LLVMValueRef *values,
   unsigned value_count);
 
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
  LLVMValueRef num,
  LLVMValueRef den);
 
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index f428d7c..e24ad65 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1032,25 +1032,20 @@ static LLVMValueRef trim_vector(struct ac_llvm_context 
*ctx,
 static void
 build_store_values_extended(struct ac_llvm_context *ac,
 LLVMValueRef *values,
 unsigned value_count,
 unsigned value_stride,
 LLVMValueRef vec)
 {
LLVMBuilderRef builder = ac->builder;
unsigned i;
 
-   if (value_count == 1) {
-   LLVMBuildStore(builder, vec, values[0]);
-   return;
-   }
-
for (i = 0; i < value_count; i++) {
LLVMValueRef ptr = values[i * value_stride];
LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
LLVMValueRef value = LLVMBuildExtractElement(builder, vec, 
index, "");
LLVMBuildStore(builder, value, ptr);
}
 }
 
 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
 const nir_ssa_def *def)
@@ -2918,58 +2913,58 @@ static LLVMValueRef 

[Mesa-dev] [PATCH 89/92] ac/nir, radeonsi: add and use ac_shader_abi::param_frag_pos

2017-06-26 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/amd/common/ac_nir_to_llvm.c  | 30 +++---
 src/amd/common/ac_shader_abi.h   |  1 +
 src/gallium/drivers/radeonsi/si_shader.c |  1 +
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 1cb920c..0457d43 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -60,20 +60,21 @@ struct ac_nir_context {
 
LLVMValueRef main_function;
LLVMBasicBlockRef continue_block;
LLVMBasicBlockRef break_block;
 
LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
 
int num_locals;
LLVMValueRef *locals;
 
+   LLVMValueRef frag_pos[4];
LLVMValueRef ddxy_lds;
 
struct nir_to_llvm_context *nctx; /* TODO get rid of this */
 };
 
 struct nir_to_llvm_context {
struct ac_llvm_context ac;
const struct ac_nir_compiler_options *options;
struct ac_shader_variant_info *shader_info;
struct ac_shader_abi abi;
@@ -121,21 +122,20 @@ struct nir_to_llvm_context {
 
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring;
LLVMValueRef hs_ring_tess_offchip;
LLVMValueRef hs_ring_tess_factor;
 
LLVMValueRef prim_mask;
LLVMValueRef sample_pos_offset;
LLVMValueRef persp_sample, persp_center, persp_centroid;
LLVMValueRef linear_sample, linear_center, linear_centroid;
-   LLVMValueRef frag_pos[4];
 
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i16;
LLVMTypeRef i32;
LLVMTypeRef i64;
LLVMTypeRef v2i32;
LLVMTypeRef v3i32;
LLVMTypeRef v4i32;
LLVMTypeRef v8i32;
@@ -822,24 +822,25 @@ static void create_function(struct nir_to_llvm_context 
*ctx)
add_user_sgpr_argument(, ctx->i32, 
>sample_pos_offset); /* sample position offset */
add_sgpr_argument(, ctx->i32, >prim_mask); /* prim 
mask */
add_vgpr_argument(, ctx->v2i32, >persp_sample); /* 
persp sample */
add_vgpr_argument(, ctx->v2i32, >persp_center); /* 
persp center */
add_vgpr_argument(, ctx->v2i32, >persp_centroid); /* 
persp centroid */
add_vgpr_argument(, ctx->v3i32, NULL); /* persp pull model 
*/
add_vgpr_argument(, ctx->v2i32, >linear_sample); /* 
linear sample */
add_vgpr_argument(, ctx->v2i32, >linear_center); /* 
linear center */
add_vgpr_argument(, ctx->v2i32, >linear_centroid); /* 
linear centroid */
add_vgpr_argument(, ctx->f32, NULL);  /* line stipple tex 
*/
-   add_vgpr_argument(, ctx->f32, >frag_pos[0]);  /* pos 
x float */
-   add_vgpr_argument(, ctx->f32, >frag_pos[1]);  /* pos 
y float */
-   add_vgpr_argument(, ctx->f32, >frag_pos[2]);  /* pos 
z float */
-   add_vgpr_argument(, ctx->f32, >frag_pos[3]);  /* pos 
w float */
+   ctx->abi.param_frag_pos =
+   add_vgpr_argument(, ctx->f32, NULL);  /* pos x 
float */
+   add_vgpr_argument(, ctx->f32, NULL);  /* pos y float */
+   add_vgpr_argument(, ctx->f32, NULL);  /* pos z float */
+   add_vgpr_argument(, ctx->f32, NULL);  /* pos w float */
ctx->abi.param_front_face =
add_vgpr_argument(, ctx->i32, NULL);  /* front 
face */
ctx->abi.param_ancillary =
add_vgpr_argument(, ctx->i32, NULL);  /* ancillary 
*/
ctx->abi.param_sample_coverage =
add_vgpr_argument(, ctx->i32, NULL);  /* sample 
coverage */
add_vgpr_argument(, ctx->i32, NULL);  /* fixed pt */
break;
default:
unreachable("Shader stage not implemented");
@@ -3244,21 +3245,21 @@ static LLVMValueRef get_image_coords(struct 
ac_nir_context *ctx,
int chan;
 
fmask_load_address[0] = 
LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
fmask_load_address[1] = 
LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
if (glsl_sampler_type_is_array(type))
fmask_load_address[2] = 
LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
else
fmask_load_address[2] = NULL;
if (add_frag_pos) {
for (chan = 0; chan < 2; ++chan)
-   fmask_load_address[chan] = 
LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan], 
LLVMBuildFPToUI(ctx->ac.builder, ctx->nctx->frag_pos[chan], ctx->ac.i32, ""), 
"");
+   fmask_load_address[chan] = 
LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan], 
LLVMBuildFPToUI(ctx->ac.builder, ctx->frag_pos[chan], ctx->ac.i32, ""), "");

Re: [Mesa-dev] [PATCH] mesa: don't set _NEW_PROGRAM_CONSTANTS for non-bindless opaque uniforms

2017-06-26 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Jun 23, 2017 at 12:44 AM, Timothy Arceri  wrote:
> v2: rebase on new _mesa_flush_vertices_for_uniforms() helper
> ---
>  src/mesa/main/uniform_query.cpp | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
> index 5eb0efc..5fe63e0 100644
> --- a/src/mesa/main/uniform_query.cpp
> +++ b/src/mesa/main/uniform_query.cpp
> @@ -999,20 +999,26 @@ validate_uniform(GLint location, GLsizei count, const 
> GLvoid *values,
>}
> }
>
> return uni;
>  }
>
>  void
>  _mesa_flush_vertices_for_uniforms(struct gl_context *ctx,
>const struct gl_uniform_storage *uni)
>  {
> +
> +   if (!uni->is_bindless && uni->type->contains_opaque()) {
> +  FLUSH_VERTICES(ctx, 0);
> +  return;
> +   }
> +
> uint64_t new_driver_state = 0;
> unsigned mask = uni->active_shader_mask;
>
> while (mask) {
>unsigned index = u_bit_scan();
>
>assert(index < MESA_SHADER_STAGES);
>new_driver_state |= ctx->DriverFlags.NewShaderConstants[index];
> }
>
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 90/92] ac/nir: pass ac_llvm_context to unpack_param

2017-06-26 Thread Nicolai Hähnle
From: Nicolai Hähnle 

---
 src/amd/common/ac_nir_to_llvm.c | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 0457d43..156b685 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -489,42 +489,42 @@ static int get_elem_bits(struct ac_llvm_context *ctx, 
LLVMTypeRef type)
if (type == ctx->f16)
return 16;
if (type == ctx->f32)
return 32;
if (type == ctx->f64)
return 64;
 
unreachable("Unhandled type kind in get_elem_bits");
 }
 
-static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
+static LLVMValueRef unpack_param(struct ac_llvm_context *ctx,
 LLVMValueRef param, unsigned rshift,
 unsigned bitwidth)
 {
LLVMValueRef value = param;
if (rshift)
value = LLVMBuildLShr(ctx->builder, value,
  LLVMConstInt(ctx->i32, rshift, false), 
"");
 
if (rshift + bitwidth < 32) {
unsigned mask = (1 << bitwidth) - 1;
value = LLVMBuildAnd(ctx->builder, value,
 LLVMConstInt(ctx->i32, mask, false), "");
}
return value;
 }
 
 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
 {
switch (ctx->stage) {
case MESA_SHADER_TESS_CTRL:
-   return unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
+   return unpack_param(>ac, ctx->tcs_rel_ids, 0, 8);
case MESA_SHADER_TESS_EVAL:
return ctx->tes_rel_patch_id;
break;
default:
unreachable("Illegal stage");
}
 }
 
 /* Tessellation shaders pass outputs to the next shader using LDS.
  *
@@ -543,48 +543,48 @@ static LLVMValueRef get_rel_patch_id(struct 
nir_to_llvm_context *ctx)
  * - TCS outputs for patch 2= get_tcs_out_current_patch_offset (if 
RelPatchID==2)
  * - Per-patch TCS outputs for patch 2  = 
get_tcs_out_current_patch_data_offset (if RelPatchID==2)
  * - ...
  *
  * All three shaders VS(LS), TCS, TES share the same LDS space.
  */
 static LLVMValueRef
 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
 {
if (ctx->stage == MESA_SHADER_VERTEX)
-   return unpack_param(ctx, ctx->ls_out_layout, 0, 13);
+   return unpack_param(>ac, ctx->ls_out_layout, 0, 13);
else if (ctx->stage == MESA_SHADER_TESS_CTRL)
-   return unpack_param(ctx, ctx->tcs_in_layout, 0, 13);
+   return unpack_param(>ac, ctx->tcs_in_layout, 0, 13);
else {
assert(0);
return NULL;
}
 }
 
 static LLVMValueRef
 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
 {
-   return unpack_param(ctx, ctx->tcs_out_layout, 0, 13);
+   return unpack_param(>ac, ctx->tcs_out_layout, 0, 13);
 }
 
 static LLVMValueRef
 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
 {
return LLVMBuildMul(ctx->builder,
-   unpack_param(ctx, ctx->tcs_out_offsets, 0, 16),
+   unpack_param(>ac, ctx->tcs_out_offsets, 0, 16),
LLVMConstInt(ctx->i32, 4, false), "");
 }
 
 static LLVMValueRef
 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
 {
return LLVMBuildMul(ctx->builder,
-   unpack_param(ctx, ctx->tcs_out_offsets, 16, 16),
+   unpack_param(>ac, ctx->tcs_out_offsets, 16, 
16),
LLVMConstInt(ctx->i32, 4, false), "");
 }
 
 static LLVMValueRef
 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx)
 {
LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
 
return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, "");
@@ -2567,22 +2567,22 @@ lds_store(struct nir_to_llvm_context *ctx,
  * Note that every attribute has 4 components.
  */
 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx,
LLVMValueRef vertex_index,
LLVMValueRef param_index)
 {
LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
LLVMValueRef param_stride, constant16;
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
 
-   vertices_per_patch = unpack_param(ctx, ctx->tcs_offchip_layout, 9, 6);
-   num_patches = unpack_param(ctx, ctx->tcs_offchip_layout, 0, 9);
+   vertices_per_patch = unpack_param(>ac, ctx->tcs_offchip_layout, 9, 
6);
+   num_patches = unpack_param(>ac, ctx->tcs_offchip_layout, 0, 9);
total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
 

[Mesa-dev] [PATCH 86/92] radeonsi: tweak next-shader assumptions when streamout is used

2017-06-26 Thread Nicolai Hähnle
From: Nicolai Hähnle 

VS with streamout is always a HW VS.
---
 src/gallium/drivers/radeonsi/si_state_shaders.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index afa78eb..af19cfc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1730,40 +1730,42 @@ static int si_shader_select(struct pipe_context *ctx,
 {
struct si_context *sctx = (struct si_context *)ctx;
struct si_shader_key key;
 
si_shader_selector_key(ctx, state->cso, );
return si_shader_select_with_key(sctx->screen, state, compiler_state,
 , -1);
 }
 
 static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
+ bool streamout,
  struct si_shader_key *key)
 {
unsigned next_shader = info->properties[TGSI_PROPERTY_NEXT_SHADER];
 
switch (info->processor) {
case PIPE_SHADER_VERTEX:
switch (next_shader) {
case PIPE_SHADER_GEOMETRY:
key->as_es = 1;
break;
case PIPE_SHADER_TESS_CTRL:
case PIPE_SHADER_TESS_EVAL:
key->as_ls = 1;
break;
default:
-   /* If POSITION isn't written, it can't be a HW VS.
-* Assume that it's a HW LS. (the next shader is TCS)
+   /* If POSITION isn't written, it can only be a HW VS
+* if streamout is used. If streamout isn't used,
+* assume that it's a HW LS. (the next shader is TCS)
 * This heuristic is needed for separate shader objects.
 */
-   if (!info->writes_position)
+   if (!info->writes_position && !streamout)
key->as_ls = 1;
}
break;
 
case PIPE_SHADER_TESS_EVAL:
if (next_shader == PIPE_SHADER_GEOMETRY ||
!info->writes_position)
key->as_es = 1;
break;
}
@@ -1798,21 +1800,23 @@ void si_init_shader_selector_async(void *job, int 
thread_index)
if (!sscreen->use_monolithic_shaders) {
struct si_shader *shader = CALLOC_STRUCT(si_shader);
void *tgsi_binary = NULL;
 
if (!shader) {
fprintf(stderr, "radeonsi: can't allocate a main shader 
part\n");
return;
}
 
shader->selector = sel;
-   si_parse_next_shader_property(>info, >key);
+   si_parse_next_shader_property(>info,
+ sel->so.num_outputs != 0,
+ >key);
 
if (sel->tokens)
tgsi_binary = si_get_tgsi_binary(sel);
 
/* Try to load the shader from the shader cache. */
mtx_lock(>shader_cache_mutex);
 
if (tgsi_binary &&
si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) {
mtx_unlock(>shader_cache_mutex);
@@ -1880,21 +1884,23 @@ void si_init_shader_selector_async(void *job, int 
thread_index)
}
}
}
 
/* Pre-compilation. */
if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
struct si_shader_key key;
 
memset(, 0, sizeof(key));
-   si_parse_next_shader_property(>info, );
+   si_parse_next_shader_property(>info,
+ sel->so.num_outputs != 0,
+ );
 
/* Set reasonable defaults, so that the shader key doesn't
 * cause any code to be eliminated.
 */
switch (sel->type) {
case PIPE_SHADER_TESS_CTRL:
key.part.tcs.epilog.prim_mode = PIPE_PRIM_TRIANGLES;
break;
case PIPE_SHADER_FRAGMENT:
key.part.ps.prolog.bc_optimize_for_persp =
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   >