[Mesa-dev] [PATCH 3/3] st/mesa: don't flush the front buffer if it's a pbuffer

2019-04-26 Thread Marek Olšák
From: Marek Olšák 

This is the best guess I can make here.
---
 src/mesa/state_tracker/st_manager.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_manager.c 
b/src/mesa/state_tracker/st_manager.c
index 5efbd1fa1d2..bee1f6b1366 100644
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -1123,23 +1123,33 @@ st_api_destroy(struct st_api *stapi)
 
 /**
  * Flush the front buffer if the current context renders to the front buffer.
  */
 void
 st_manager_flush_frontbuffer(struct st_context *st)
 {
struct st_framebuffer *stfb = st_ws_framebuffer(st->ctx->DrawBuffer);
struct st_renderbuffer *strb = NULL;
 
-   if (stfb)
-  strb = st_renderbuffer(stfb->Base.Attachment[BUFFER_FRONT_LEFT].
- Renderbuffer);
+   if (!stfb)
+  return;
+
+   /* If the context uses a doublebuffered visual, but the buffer is
+* single-buffered, guess that it's a pbuffer, which doesn't need
+* flushing.
+*/
+   if (st->ctx->Visual.doubleBufferMode &&
+   !stfb->Base.Visual.doubleBufferMode)
+  return;
+
+   strb = st_renderbuffer(stfb->Base.Attachment[BUFFER_FRONT_LEFT].
+  Renderbuffer);
 
/* Do we have a front color buffer and has it been drawn to since last
 * frontbuffer flush?
 */
if (strb && strb->defined) {
   stfb->iface->flush_front(>iface, stfb->iface,
ST_ATTACHMENT_FRONT_LEFT);
   strb->defined = GL_FALSE;
 
   /* Trigger an update of strb->defined on next draw */
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] mesa: fix pbuffers because internally they are front buffers

2019-04-26 Thread Marek Olšák
From: Marek Olšák 

This fixes the egl_ext_device_base piglit test, which uses EGL pbuffers.
---
 src/mesa/main/buffers.c | 56 ++---
 1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 2148fa1316c..1ac0d5d0798 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -86,46 +86,62 @@ supported_buffer_bitmask(const struct gl_context *ctx,
 
 
 /**
  * Helper routine used by glDrawBuffer and glDrawBuffersARB.
  * Given a GLenum naming one or more color buffers (such as
  * GL_FRONT_AND_BACK), return the corresponding bitmask of BUFFER_BIT_* flags.
  */
 static GLbitfield
 draw_buffer_enum_to_bitmask(const struct gl_context *ctx, GLenum buffer)
 {
+   /* If the front buffer is the only buffer, GL_BACK and all other flags
+* that include BACK select the front buffer for drawing. There are
+* several reasons we want to do this.
+*
+* 1) OpenGL ES 3.0 requires it:
+*
+*   Page 181 (page 192 of the PDF) in section 4.2.1 of the OpenGL
+*   ES 3.0.1 specification says:
+*
+* "When draw buffer zero is BACK, color values are written
+* into the sole buffer for single-buffered contexts, or into
+* the back buffer for double-buffered contexts."
+*
+*   We also do this for GLES 1 and 2 because those APIs have no
+*   concept of selecting the front and back buffer anyway and it's
+*   convenient to be able to maintain the magic behaviour of
+*   GL_BACK in that case.
+*
+* 2) Pbuffers are back buffers from the application point of view,
+*but they are front buffers from the Mesa point of view,
+*because they are always single buffered.
+*/
+   if (!ctx->DrawBuffer->Visual.doubleBufferMode) {
+  switch (buffer) {
+  case GL_BACK:
+ buffer = GL_FRONT;
+ break;
+  case GL_BACK_RIGHT:
+ buffer = GL_FRONT_RIGHT;
+ break;
+  case GL_BACK_LEFT:
+ buffer = GL_FRONT_LEFT;
+ break;
+  }
+   }
+
switch (buffer) {
   case GL_NONE:
  return 0;
   case GL_FRONT:
  return BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT;
   case GL_BACK:
- if (_mesa_is_gles(ctx)) {
-/* Page 181 (page 192 of the PDF) in section 4.2.1 of the OpenGL
- * ES 3.0.1 specification says:
- *
- * "When draw buffer zero is BACK, color values are written
- * into the sole buffer for single-buffered contexts, or into
- * the back buffer for double-buffered contexts."
- *
- * Since there is no stereo rendering in ES 3.0, only return the
- * LEFT bits.  This also satisfies the "n must be 1" requirement.
- *
- * We also do this for GLES 1 and 2 because those APIs have no
- * concept of selecting the front and back buffer anyway and it's
- * convenient to be able to maintain the magic behaviour of
- * GL_BACK in that case.
- */
-if (ctx->DrawBuffer->Visual.doubleBufferMode)
-   return BUFFER_BIT_BACK_LEFT;
-return BUFFER_BIT_FRONT_LEFT;
- }
  return BUFFER_BIT_BACK_LEFT | BUFFER_BIT_BACK_RIGHT;
   case GL_RIGHT:
  return BUFFER_BIT_FRONT_RIGHT | BUFFER_BIT_BACK_RIGHT;
   case GL_FRONT_RIGHT:
  return BUFFER_BIT_FRONT_RIGHT;
   case GL_BACK_RIGHT:
  return BUFFER_BIT_BACK_RIGHT;
   case GL_BACK_LEFT:
  return BUFFER_BIT_BACK_LEFT;
   case GL_FRONT_AND_BACK:
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] mesa: rework error handling in glDrawBuffers

2019-04-26 Thread Marek Olšák
From: Marek Olšák 

It's needed by the next pbuffer fix, which changes the behavior of
draw_buffer_enum_to_bitmask, so it can't be used to help with error
checking.
---
 src/mesa/main/buffers.c | 53 ++---
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index d98c015bb24..2148fa1316c 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -430,65 +430,70 @@ draw_buffers(struct gl_context *ctx, struct 
gl_framebuffer *fb, GLsizei n,
  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid buffers)", caller);
  return;
   }
}
 
supportedMask = supported_buffer_bitmask(ctx, fb);
usedBufferMask = 0x0;
 
/* complicated error checking... */
for (output = 0; output < n; output++) {
-  destMask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
-
   if (!no_error) {
- /* From the OpenGL 3.0 specification, page 258:
-  * "Each buffer listed in bufs must be one of the values from tables
-  *  4.5 or 4.6.  Otherwise, an INVALID_ENUM error is generated.
-  */
- if (destMask[output] == BAD_MASK) {
-_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
-caller, _mesa_enum_to_string(buffers[output]));
-return;
- }
-
  /* From the OpenGL 4.5 specification, page 493 (page 515 of the PDF)
   * "An INVALID_ENUM error is generated if any value in bufs is FRONT,
   * LEFT, RIGHT, or FRONT_AND_BACK . This restriction applies to both
   * the default framebuffer and framebuffer objects, and exists because
   * these constants may themselves refer to multiple buffers, as shown
   * in table 17.4."
   *
-  * And on page 492 (page 514 of the PDF):
+  * From the OpenGL 4.5 specification, page 492 (page 514 of the PDF):
   * "If the default framebuffer is affected, then each of the constants
   * must be one of the values listed in table 17.6 or the special value
   * BACK. When BACK is used, n must be 1 and color values are written
   * into the left buffer for single-buffered contexts, or into the back
   * left buffer for double-buffered contexts."
   *
   * Note "special value BACK". GL_BACK also refers to multiple buffers,
   * but it is consider a special case here. This is a change on 4.5.
   * For OpenGL 4.x we check that behaviour. For any previous version we
   * keep considering it wrong (as INVALID_ENUM).
   */
- if (util_bitcount(destMask[output]) > 1) {
-if (_mesa_is_winsys_fbo(fb) && ctx->Version >= 40 &&
-buffers[output] == GL_BACK) {
-   if (n != 1) {
-  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(with GL_BACK n 
must be 1)",
-  caller);
-  return;
-   }
-} else {
-   _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
-   caller, _mesa_enum_to_string(buffers[output]));
+ if (buffers[output] == GL_BACK &&
+ _mesa_is_winsys_fbo(fb) &&
+ _mesa_is_desktop_gl(ctx) &&
+ ctx->Version >= 40) {
+if (n != 1) {
+   _mesa_error(ctx, GL_INVALID_OPERATION, "%s(with GL_BACK n must 
be 1)",
+   caller);
return;
 }
+ } else if (buffers[output] == GL_FRONT ||
+buffers[output] == GL_LEFT ||
+buffers[output] == GL_RIGHT ||
+buffers[output] == GL_FRONT_AND_BACK) {
+_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
+caller, _mesa_enum_to_string(buffers[output]));
+return;
+ }
+  }
+
+  destMask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
+
+  if (!no_error) {
+ /* From the OpenGL 3.0 specification, page 258:
+  * "Each buffer listed in bufs must be one of the values from tables
+  *  4.5 or 4.6.  Otherwise, an INVALID_ENUM error is generated.
+  */
+ if (destMask[output] == BAD_MASK) {
+_mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid buffer %s)",
+caller, _mesa_enum_to_string(buffers[output]));
+return;
  }
 
  /* Section 4.2 (Whole Framebuffer Operations) of the OpenGL ES 3.0
   * specification says:
   *
   * "If the GL is bound to a draw framebuffer object, the ith
   * buffer listed in bufs must be COLOR_ATTACHMENTi or NONE .
   * Specifying a buffer out of order, BACK , or COLOR_ATTACHMENTm
   * where m is greater than or equal to the value of MAX_-
   * COLOR_ATTACHMENTS , will generate 

[Mesa-dev] [PATCH 0/3] Pbuffer fixes

2019-04-26 Thread Marek Olšák
Hi,

This series fixes pbuffers for EGL as exercised by the egl_ext_device-
_base piglit test.

It passes piglit, GL-CTS, dEQP, and The Hitchhiker's Guide to the Galaxy,
but I didn't test GLX, so things might still break horribly there.

Rbs welcome,

Thanks,
Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] panfrosti/ci: Initial commit

2019-04-26 Thread Alyssa Rosenzweig
> Hopefully just current expected fails get stored in git.

ATM it looks like both passes and fails are there. Also, we're failing
thousands of tests in GLES2 alone...

> VK-GL-CTS is the official conformance suite, and it includes dEQP.  You
> need to use a release tag, or you'll have extra garbage tests expecting
> nonstandardized behavior being run.  Same for dEQP master.

Ah-ha, gotcha, thank you!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110525] [CTS] dEQP-VK.api.invariance.random crashes

2019-04-26 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110525

--- Comment #1 from Bas Nieuwenhuizen  ---
Can't reproduce:

[bas@bas-workstation vulkan]$
VK_CD_FILENAMES=/home/bas/projects/mesa-build/src/amd/vulkan/dev_icd.json
./deqp-vk -n dEQP-VK.api.invariance.random --deqp-vk-device-id=1
Writing test log into TestResults.qpa
dEQP Core git-5deebf4709ed3e2c3aaad3b8bb8703f236309b97 (0x5deebf47) starting..
  target implementation = 'Default'

Test case 'dEQP-VK.api.invariance.random'..
  Pass (Pass)

DONE!

Test run totals:
  Passed:1/1 (100.0%)
  Failed:0/1 (0.0%)
  Not supported: 0/1 (0.0%)
  Warnings:  0/1 (0.0%)
[bas@bas-workstation vulkan]$
VK_CD_FILENAMES=/home/bas/projects/mesa-build/src/amd/vulkan/dev_icd.json
./deqp-vk -n dEQP-VK.api.invariance.random --deqp-vk-device-id=2
Writing test log into TestResults.qpa
dEQP Core git-5deebf4709ed3e2c3aaad3b8bb8703f236309b97 (0x5deebf47) starting..
  target implementation = 'Default'

Test case 'dEQP-VK.api.invariance.random'..
  Pass (Pass)

DONE!

Test run totals:
  Passed:1/1 (100.0%)
  Failed:0/1 (0.0%)
  Not supported: 0/1 (0.0%)
  Warnings:  0/1 (0.0%)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] radeonsi: use new atomic LLVM helpers

2019-04-26 Thread Marek Olšák
From: Marek Olšák 

This depends on "ac,ac/nir: use a better sync scope for shared atomics"
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index eb90bfb10ff..5e540fc5098 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -776,38 +776,36 @@ static void store_emit(
emit_data->output[emit_data->chan] =
ac_build_image_opcode(>ac, );
}
 }
 
 static void atomic_emit_memory(struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data) {
LLVMBuilderRef builder = ctx->ac.builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
LLVMValueRef ptr, result, arg;
+   const char *sync_scope = HAVE_LLVM >= 0x0900 ? "workgroup-one-as" : 
"workgroup";
 
ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
 
arg = lp_build_emit_fetch(>bld_base, inst, 2, 0);
arg = ac_to_integer(>ac, arg);
 
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
LLVMValueRef new_data;
new_data = lp_build_emit_fetch(>bld_base,
   inst, 3, 0);
 
new_data = ac_to_integer(>ac, new_data);
 
-   result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
-  LLVMAtomicOrderingSequentiallyConsistent,
-  LLVMAtomicOrderingSequentiallyConsistent,
-  false);
-
+   result = ac_build_atomic_cmp_xchg(>ac, ptr, arg, new_data,
+ sync_scope);
result = LLVMBuildExtractValue(builder, result, 0, "");
} else {
LLVMAtomicRMWBinOp op;
 
switch(inst->Instruction.Opcode) {
case TGSI_OPCODE_ATOMUADD:
op = LLVMAtomicRMWBinOpAdd;
break;
case TGSI_OPCODE_ATOMXCHG:
op = LLVMAtomicRMWBinOpXchg;
@@ -830,23 +828,21 @@ static void atomic_emit_memory(struct si_shader_context 
*ctx,
case TGSI_OPCODE_ATOMIMIN:
op = LLVMAtomicRMWBinOpMin;
break;
case TGSI_OPCODE_ATOMIMAX:
op = LLVMAtomicRMWBinOpMax;
break;
default:
unreachable("unknown atomic opcode");
}
 
-   result = LLVMBuildAtomicRMW(builder, op, ptr, arg,
-  LLVMAtomicOrderingSequentiallyConsistent,
-  false);
+   result = ac_build_atomic_rmw(>ac, op, ptr, arg, 
sync_scope);
}
emit_data->output[emit_data->chan] =
LLVMBuildBitCast(builder, result, ctx->f32, "");
 }
 
 static void atomic_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radeonsi: set sampler state and view functions for compute-only contexts

2019-04-26 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pipe.c  |  2 +-
 src/gallium/drivers/radeonsi/si_state.c | 17 ++---
 src/gallium/drivers/radeonsi/si_state.h |  2 +-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index aaf5138a3a2..b0e0ca7af05 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -496,30 +496,30 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
if (!sctx->border_color_map)
goto fail;
 
/* Initialize context functions used by graphics and compute. */
sctx->b.emit_string_marker = si_emit_string_marker;
sctx->b.set_debug_callback = si_set_debug_callback;
sctx->b.set_log_context = si_set_log_context;
sctx->b.set_context_param = si_set_context_param;
sctx->b.get_device_reset_status = si_get_reset_status;
sctx->b.set_device_reset_callback = si_set_device_reset_callback;
-   sctx->b.memory_barrier = si_memory_barrier;
 
si_init_all_descriptors(sctx);
si_init_buffer_functions(sctx);
si_init_clear_functions(sctx);
si_init_blit_functions(sctx);
si_init_compute_functions(sctx);
si_init_compute_blit_functions(sctx);
si_init_debug_functions(sctx);
si_init_fence_functions(sctx);
+   si_init_state_compute_functions(sctx);
 
if (sscreen->debug_flags & DBG(FORCE_DMA))
sctx->b.resource_copy_region = sctx->dma_copy;
 
/* Initialize graphics-only context functions. */
if (sctx->has_graphics) {
si_init_context_texture_functions(sctx);
si_init_query_functions(sctx);
si_init_msaa_functions(sctx);
si_init_shader_functions(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 2266b0a0824..876a993b158 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4713,21 +4713,21 @@ static void si_texture_barrier(struct pipe_context 
*ctx, unsigned flags)
 
/* Multisample surfaces are flushed in si_decompress_textures. */
if (sctx->framebuffer.uncompressed_cb_mask) {
si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
   
sctx->framebuffer.CB_has_shader_readable_metadata,
   
sctx->framebuffer.all_DCC_pipe_aligned);
}
 }
 
 /* This only ensures coherency for shader image/buffer stores. */
-void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
+static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 {
struct si_context *sctx = (struct si_context *)ctx;
 
if (!(flags & ~PIPE_BARRIER_UPDATE))
return;
 
/* Subsequent commands must wait for all shader invocations to
 * complete. */
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 SI_CONTEXT_CS_PARTIAL_FLUSH;
@@ -4778,20 +4778,29 @@ static void *si_create_blend_custom(struct si_context 
*sctx, unsigned mode)
struct pipe_blend_state blend;
 
memset(, 0, sizeof(blend));
blend.independent_blend_enable = true;
blend.rt[0].colormask = 0xf;
return si_create_blend_state_mode(>b, , mode);
 }
 
 static void si_init_config(struct si_context *sctx);
 
+void si_init_state_compute_functions(struct si_context *sctx)
+{
+   sctx->b.create_sampler_state = si_create_sampler_state;
+   sctx->b.delete_sampler_state = si_delete_sampler_state;
+   sctx->b.create_sampler_view = si_create_sampler_view;
+   sctx->b.sampler_view_destroy = si_sampler_view_destroy;
+   sctx->b.memory_barrier = si_memory_barrier;
+}
+
 void si_init_state_functions(struct si_context *sctx)
 {
sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state;
sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs;
sctx->atoms.s.db_render_state.emit = si_emit_db_render_state;
sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state;
sctx->atoms.s.msaa_config.emit = si_emit_msaa_config;
sctx->atoms.s.sample_mask.emit = si_emit_sample_mask;
sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state;
sctx->atoms.s.blend_color.emit = si_emit_blend_color;
@@ -4816,26 +4825,20 @@ void si_init_state_functions(struct si_context *sctx)
sctx->custom_blend_resolve = si_create_blend_custom(sctx, 
V_028808_CB_RESOLVE);
sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, 
V_028808_CB_FMASK_DECOMPRESS);
sctx->custom_blend_eliminate_fastclear = si_create_blend_custom(sctx, 
V_028808_CB_ELIMINATE_FAST_CLEAR);
sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, 
V_028808_CB_DCC_DECOMPRESS);
 

Re: [Mesa-dev] [PATCH] panfrosti/ci: Initial commit

2019-04-26 Thread Eric Anholt
Alyssa Rosenzweig  writes:

>> We start by building a container in Docker that contains a suitable
>> rootfs and kernel for the DUT, deqp and all dependencies for building
>> Mesa itself.
>
> Out of curiosity, what's the performance impact of this? If there are no
> changes to the kernel or to deqp (but mesa had a commit somewhere in
> Panfrost space), do we have to rebuild the former two? Does ccache maybe
> pick that up? I'm trying to get a sense for how long it takes between
> pushing a commit and getting a CI answer, and maybe if that can be
> shortened.
>
>> the expectations that are stored
>> in git.
>
> Might it be better to track this outside so we don't pollute mesa with
> changes to that largely autogenerated file? Or I guess that's
> problematic since then we lose branch information / etc.

Hopefully just current expected fails get stored in git.

> Is there an automated way to do this based on the results of LAVA/CI?
>> +  git clone --depth 1 https://github.com/KhronosGroup/VK-GL-CTS.git .   
>> && \
>
> Is this the right repo? I recall getting deqp source from Google's
> servers (Chromium git). I suppose it's the same.

VK-GL-CTS is the official conformance suite, and it includes dEQP.  You
need to use a release tag, or you'll have extra garbage tests expecting
nonstandardized behavior being run.  Same for dEQP master.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/dri: decrease input lag by syncing sooner in SwapBuffers

2019-04-26 Thread Marek Olšák
On Fri, Apr 26, 2019 at 12:56 PM Axel Davy  wrote:

> On 26/04/2019 10:08, Michel Dänzer wrote:
> > On 2019-04-26 4:06 a.m., Marek Olšák wrote:
> >> From: Marek Olšák 
> >>
> >> It's done by:
> >> - decrease the number of frames in flight by 1
> >> - flush before throttling in SwapBuffers
> >>(instead of wait-then-flush, do flush-then-wait)
> >>
> >> The improvement is apparent with Unigine Heaven.
> >>
> >> Previously:
> >>  draw frame 2
> >>  wait frame 0
> >>  flush frame 2
> >>  present frame 2
> >>
> >>  The input lag is 2 frames.
> >>
> >> Now:
> >>  draw frame 2
> >>  flush frame 2
> >>  wait frame 1
> >>  present frame 2
> >>
> >>  The input lag is 1 frame. Flushing is done before waiting, because
> >>  otherwise the device would be idle after waiting.
> > Nice idea. Not sure offhand about all ramifications, but certainly worth
> > a go.
> >
> >
> >> Nine is affected because it also uses the pipe cap.
> >> ---
> >>   src/gallium/auxiliary/util/u_screen.c |  2 +-
> >>   src/gallium/state_trackers/dri/dri_drawable.c | 20 +--
> >>   2 files changed, 11 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/src/gallium/auxiliary/util/u_screen.c
> b/src/gallium/auxiliary/util/u_screen.c
> >> index 27f51e0898e..410f17421e6 100644
> >> --- a/src/gallium/auxiliary/util/u_screen.c
> >> +++ b/src/gallium/auxiliary/util/u_screen.c
> >> @@ -349,21 +349,21 @@ u_pipe_screen_get_param_defaults(struct
> pipe_screen *pscreen,
> >>  case PIPE_CAP_MAX_VARYINGS:
> >> return 8;
> >>
> >>  case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
> >> return 0;
> >>
> >>  case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
> >> return 0;
> >>
> >>  case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:
> >> -  return 2;
> >> +  return 1;
> > This might be slightly misleading, as there can still be two frames in
> > flight (on the GPU) at the same time. Might be better to leave this at 2
> > (so Nine isn't affected) and adjust its treatment in
> > src/gallium/state_trackers/dri/dri_drawable.c .
> >
> >
> Checking what gallium nine does currently, it seems we already do flush
> then wait,
> however we call swap_fences_pop_front and swap_fences_push_back in the
> reverse order compared to your patch.
> We compensate by taking PIPE_CAP_MAX_FRAMES_IN_FLIGHT + 1
>
> In conclusion, with the proposed patch, gl and nine should have the same
> behaviour (and thus if gl benefits from a value of 1, nine should as well).
> I haven't have noticed input lag, I guess I have to test on heaven if
> you see a difference.
> How can I slow down my gpu to test that ? I use to use the
> /sys/kernel/debug/dri/0/ vars to force low dpm, but it doesn't seem to
> be possible anymore as the related files are gone (rx480) ?
>

I set maximum settings, windowed, resolution: custom, and I type in the 4K
resolution (I don't have a 4K monitor). When it's running, I enable
wireframe. It should be pretty slow.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/dri: decrease input lag by syncing sooner in SwapBuffers

2019-04-26 Thread Marek Olšák
On Fri, Apr 26, 2019 at 4:08 AM Michel Dänzer  wrote:

> On 2019-04-26 4:06 a.m., Marek Olšák wrote:
> > From: Marek Olšák 
> >
> > It's done by:
> > - decrease the number of frames in flight by 1
> > - flush before throttling in SwapBuffers
> >   (instead of wait-then-flush, do flush-then-wait)
> >
> > The improvement is apparent with Unigine Heaven.
> >
> > Previously:
> > draw frame 2
> > wait frame 0
> > flush frame 2
> > present frame 2
> >
> > The input lag is 2 frames.
> >
> > Now:
> > draw frame 2
> > flush frame 2
> > wait frame 1
> > present frame 2
> >
> > The input lag is 1 frame. Flushing is done before waiting, because
> > otherwise the device would be idle after waiting.
>
> Nice idea. Not sure offhand about all ramifications, but certainly worth
> a go.
>
>
> > Nine is affected because it also uses the pipe cap.
> > ---
> >  src/gallium/auxiliary/util/u_screen.c |  2 +-
> >  src/gallium/state_trackers/dri/dri_drawable.c | 20 +--
> >  2 files changed, 11 insertions(+), 11 deletions(-)
> >
> > diff --git a/src/gallium/auxiliary/util/u_screen.c
> b/src/gallium/auxiliary/util/u_screen.c
> > index 27f51e0898e..410f17421e6 100644
> > --- a/src/gallium/auxiliary/util/u_screen.c
> > +++ b/src/gallium/auxiliary/util/u_screen.c
> > @@ -349,21 +349,21 @@ u_pipe_screen_get_param_defaults(struct
> pipe_screen *pscreen,
> > case PIPE_CAP_MAX_VARYINGS:
> >return 8;
> >
> > case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
> >return 0;
> >
> > case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
> >return 0;
> >
> > case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:
> > -  return 2;
> > +  return 1;
>
> This might be slightly misleading, as there can still be two frames in
> flight (on the GPU) at the same time. Might be better to leave this at 2
> (so Nine isn't affected) and adjust its treatment in
> src/gallium/state_trackers/dri/dri_drawable.c .
>

When dri_flush is waiting, there can be at most 2 frames submitted and not
finished (that's why it's waiting). When dri_flush is done, there is at
most 1 frame submitted and not finished. The CAP is enforced by dri_flush.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] st: require compatible driver in autotools

2019-04-26 Thread Alyssa Ross
> if test -n "$with_gallium_drivers" -a "x$with_gallium_drivers" != xswrast; 
> then
> if test "x$enable_xvmc" = xauto -a "x$have_xvmc_platform" = xyes; then
> PKG_CHECK_EXISTS([xvmc >= $XVMC_REQUIRED], [enable_xvmc=yes],
> [enable_xvmc=no])
> fi
> ...
> fi
>
> Thus auto-detection will disable xvmc and other VL state-trackers,
> when no gallium drivers or swrast only is set.
> Thus the NEED_GALLIUM_VL_WINSYS shortly afterwords is set to
> no/disabled, and vl_winsys_dri.c et al is not build.

Perhaps I wasn't clear in my initial message. It's not an autodetection
issue, it's that there is no error when trying to use, say,
--enable-omx-bellagio when there's only an swrast driver.

> A random old checkout commit 7be26976b8e8bc34fa7d0014197ed2af488f
> seems happy with the following:
>
> mkdir aa; cd aa;
> ../autogen.sh  --enable-autotools \
>  --with-platforms=x11 \
>  --with-dri-drivers= \
>  --with-gallium-drivers=swrast \
>  --disable-glx \
>  --disable-dri3 \
>  --disable-gbm
>
> Am I missing something?

If you try adding --enable-omx-bellagio, I think you should see the
configure script run successfully, despite the incompatibility between
--with-gallium-drivers=swrast and --enable-omx-bellagio that it should
have caught.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110459] Escape from Tarkov on DXVK renders wrong windows reflection unless RADV_DEBUG=nohiz is passed

2019-04-26 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110459

--- Comment #5 from Rhys Perry  ---
Try running

WINEPREFIX=/home/faalagorn/Games/escape-from-tarkov renderdoccmd capture
/home/faalagorn/.local/share/lutris/runners/wine/wine-tkg-git-escapefromtarkov/bin/wine64
/home/faalagorn/Games/escape-from-tarkov/drive_c/Battlestate
Games/BsgLauncher/BsgLauncher.exe

instead (environment variables should go before the "renderdoccmd", not the
program you're capturing).

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/dri: decrease input lag by syncing sooner in SwapBuffers

2019-04-26 Thread Axel Davy

On 26/04/2019 10:08, Michel Dänzer wrote:

On 2019-04-26 4:06 a.m., Marek Olšák wrote:

From: Marek Olšák 

It's done by:
- decrease the number of frames in flight by 1
- flush before throttling in SwapBuffers
   (instead of wait-then-flush, do flush-then-wait)

The improvement is apparent with Unigine Heaven.

Previously:
 draw frame 2
 wait frame 0
 flush frame 2
 present frame 2

 The input lag is 2 frames.

Now:
 draw frame 2
 flush frame 2
 wait frame 1
 present frame 2

 The input lag is 1 frame. Flushing is done before waiting, because
 otherwise the device would be idle after waiting.

Nice idea. Not sure offhand about all ramifications, but certainly worth
a go.



Nine is affected because it also uses the pipe cap.
---
  src/gallium/auxiliary/util/u_screen.c |  2 +-
  src/gallium/state_trackers/dri/dri_drawable.c | 20 +--
  2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_screen.c 
b/src/gallium/auxiliary/util/u_screen.c
index 27f51e0898e..410f17421e6 100644
--- a/src/gallium/auxiliary/util/u_screen.c
+++ b/src/gallium/auxiliary/util/u_screen.c
@@ -349,21 +349,21 @@ u_pipe_screen_get_param_defaults(struct pipe_screen 
*pscreen,
 case PIPE_CAP_MAX_VARYINGS:
return 8;
  
 case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:

return 0;
  
 case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:

return 0;
  
 case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:

-  return 2;
+  return 1;

This might be slightly misleading, as there can still be two frames in
flight (on the GPU) at the same time. Might be better to leave this at 2
(so Nine isn't affected) and adjust its treatment in
src/gallium/state_trackers/dri/dri_drawable.c .


Checking what gallium nine does currently, it seems we already do flush 
then wait,
however we call swap_fences_pop_front and swap_fences_push_back in the 
reverse order compared to your patch.

We compensate by taking PIPE_CAP_MAX_FRAMES_IN_FLIGHT + 1

In conclusion, with the proposed patch, gl and nine should have the same 
behaviour (and thus if gl benefits from a value of 1, nine should as well).
I haven't have noticed input lag, I guess I have to test on heaven if 
you see a difference.
How can I slow down my gpu to test that ? I use to use the 
/sys/kernel/debug/dri/0/ vars to force low dpm, but it doesn't seem to 
be possible anymore as the related files are gone (rx480) ?



Axel

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110350] DOOM 2016 crash + severe artifacting on RADV + Vega VII

2019-04-26 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110350

--- Comment #8 from Samuel Pitoiset  ---
Got a Vega20, I tried to reproduce the problem with mesa-git, llvm-git and the
same settings as you, but it doesn't happen.

I also tried some older LLVM commits, no graphic glitches so far.

Are you still able to reproduce the problem?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] shader-db MR: run: set NULL as initial value for binding_list

2019-04-26 Thread apinheiro
Without it, under some specific compilation options, it can be 
initialized to NULL or to garbage. On the latter case, if the shader 
doesn't require a binding_list, would cause a crash later when it 
attempts to be used.


https://gitlab.freedesktop.org/mesa/shader-db/merge_requests/7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] panfrosti/ci: Initial commit

2019-04-26 Thread Alyssa Rosenzweig
> We start by building a container in Docker that contains a suitable
> rootfs and kernel for the DUT, deqp and all dependencies for building
> Mesa itself.

Out of curiosity, what's the performance impact of this? If there are no
changes to the kernel or to deqp (but mesa had a commit somewhere in
Panfrost space), do we have to rebuild the former two? Does ccache maybe
pick that up? I'm trying to get a sense for how long it takes between
pushing a commit and getting a CI answer, and maybe if that can be
shortened.

> the expectations that are stored
> in git.

Might it be better to track this outside so we don't pollute mesa with
changes to that largely autogenerated file? Or I guess that's
problematic since then we lose branch information / etc.

> Any code that changes the expectations (hopefully tests are
> fixed) needs to also update the expectations file.

Is there an automated way to do this based on the results of LAVA/CI?
> +  git clone --depth 1 https://github.com/KhronosGroup/VK-GL-CTS.git .
>&& \

Is this the right repo? I recall getting deqp source from Google's
servers (Chromium git). I suppose it's the same.

> +  git clone --depth 1 https://gitlab.freedesktop.org/tomeu/mesa.git -b 
> panfrost-ci . && \

U

> +# To prevent memory leaks from slowing throughput, restart everything 
> between batches

*blush*
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] MR: Some softpipe fixes

2019-04-26 Thread Gert Wollny
Hi Roland, 

I did some softpipe patches, maybe you could take a look?

Fixing LOD evaluation, cube face selection, and add support for 
explicit derivatives: 
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/702

On top of that (i.e. the MR includes the above) 

Softpipe add support for TGSI_OPCODE_INTERP_* and raise GLSL level to
4.0: 
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/725

I'm not sure whether we really want to raise the GLSL 4.0 before we
land real MSAA support, but it also exposes OES_geometry_shader and a
few other things that are of interest. I think the fixes for texturing
should definitely land before doing this, that's why I put the series
that does that on top of !702.  

Many thanks, 
Gert


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] ac, ac/nir: use a better sync scope for shared atomics

2019-04-26 Thread Samuel Pitoiset
Thanks for your investigations Rhys. As discussed over IRC, this makes 
sense to me.


Reviewed-by: Samuel Pitoiset 

On 4/26/19 3:48 PM, Rhys Perry wrote:

https://reviews.llvm.org/rL356946 (present in LLVM 9 and later) changed
the meaning of the "system" sync scope, making it no longer restricted to
the memory operation's address space. So a single address space sync scope
is needed for shared atomic operations (such as "system-one-as" or
"workgroup-one-as") otherwise buffer_wbinvl1 and s_waitcnt instructions
can be created at each shared atomic operation.

This mostly reimplements LLVMBuildAtomicRMW and LLVMBuildAtomicCmpXchg
to allow for more sync scopes and uses the new functions in ac->nir with
the "workgroup-one-as" or "workgroup" sync scopes.

   F1 2017 (4K, Ultra High settings, TAA), avg FPS : 59 -> 59.67 (+1.14%)
  Strange Brigade (4K, ~highest settings), avg FPS : 51.5 -> 51.6 (+0.19%)
RotTR/mountain (4K, VeryHigh settings, FXAA), avg FPS : 57.2 -> 57.2 (+0.0%)
 RotTR/tomb (4K, VeryHigh settings, FXAA), avg FPS : 42.5 -> 43.0 (+1.17%)
   RotTR/valley (4K, VeryHigh settings, FXAA), avg FPS : 40.7 -> 41.6 (+2.21%)
  Warhammer II/fallen, avg FPS : 31.63 -> 31.83 (+0.63%)
  Warhammer II/skaven, avg FPS : 37.77 -> 38.07 (+0.79%)

Signed-off-by: Rhys Perry 
---
  src/amd/common/ac_llvm_build.h| 10 +-
  src/amd/common/ac_llvm_helper.cpp | 59 +++
  src/amd/common/ac_nir_to_llvm.c   | 12 +++
  3 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f4cee667153..98f856106d6 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -26,7 +26,7 @@
  #define AC_LLVM_BUILD_H
  
  #include 

-#include 
+#include 
  #include "compiler/nir/nir.h"
  #include "amd_family.h"
  
@@ -694,6 +694,14 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);

  LLVMValueRef
  ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
  
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,

+LLVMValueRef ptr, LLVMValueRef val,
+const char *sync_scope);
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, 
LLVMValueRef ptr,
+ LLVMValueRef cmp, LLVMValueRef val,
+ const char *sync_scope);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index dcfb8008546..e5030c6f472 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -31,6 +31,7 @@
  
  #include "ac_binary.h"

  #include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
  
  #include 

  #include 
@@ -167,3 +168,61 @@ void ac_enable_global_isel(LLVMTargetMachineRef tm)
  {
reinterpret_cast(tm)->setGlobalISel(true);
  }
+
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, 
LLVMAtomicRMWBinOp op,
+LLVMValueRef ptr, LLVMValueRef val,
+const char *sync_scope) {
+   llvm::AtomicRMWInst::BinOp binop;
+   switch (op) {
+   case LLVMAtomicRMWBinOpXchg:
+   binop = llvm::AtomicRMWInst::Xchg;
+   break;
+   case LLVMAtomicRMWBinOpAdd:
+   binop = llvm::AtomicRMWInst::Add;
+   break;
+   case LLVMAtomicRMWBinOpSub:
+   binop = llvm::AtomicRMWInst::Sub;
+   break;
+   case LLVMAtomicRMWBinOpAnd:
+   binop = llvm::AtomicRMWInst::And;
+   break;
+   case LLVMAtomicRMWBinOpNand:
+   binop = llvm::AtomicRMWInst::Nand;
+   break;
+   case LLVMAtomicRMWBinOpOr:
+   binop = llvm::AtomicRMWInst::Or;
+   break;
+   case LLVMAtomicRMWBinOpXor:
+   binop = llvm::AtomicRMWInst::Xor;
+   break;
+   case LLVMAtomicRMWBinOpMax:
+   binop = llvm::AtomicRMWInst::Max;
+   break;
+   case LLVMAtomicRMWBinOpMin:
+   binop = llvm::AtomicRMWInst::Min;
+   break;
+   case LLVMAtomicRMWBinOpUMax:
+   binop = llvm::AtomicRMWInst::UMax;
+   break;
+   case LLVMAtomicRMWBinOpUMin:
+   binop = llvm::AtomicRMWInst::UMin;
+   break;
+   default:
+   unreachable(!"invalid LLVMAtomicRMWBinOp");
+  break;
+   }
+   unsigned SSID = 
llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+   return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
+   binop, llvm::unwrap(ptr), llvm::unwrap(val),
+   llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+}
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, 
LLVMValueRef ptr,
+ 

Re: [Mesa-dev] [PATCH] vulkan/wsi: don't use DUMB_CLOSE for normal GEM handles

2019-04-26 Thread Emil Velikov
On Thu, 25 Apr 2019 at 11:44, Bas Nieuwenhuizen  
wrote:
>
> r-b
>
Thank you Bas. Pushed both fixes to master.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac, ac/nir: use a better sync scope for shared atomics

2019-04-26 Thread Rhys Perry
https://reviews.llvm.org/rL356946 (present in LLVM 9 and later) changed
the meaning of the "system" sync scope, making it no longer restricted to
the memory operation's address space. So a single address space sync scope
is needed for shared atomic operations (such as "system-one-as" or
"workgroup-one-as") otherwise buffer_wbinvl1 and s_waitcnt instructions
can be created at each shared atomic operation.

This mostly reimplements LLVMBuildAtomicRMW and LLVMBuildAtomicCmpXchg
to allow for more sync scopes and uses the new functions in ac->nir with
the "workgroup-one-as" or "workgroup" sync scopes.

  F1 2017 (4K, Ultra High settings, TAA), avg FPS : 59 -> 59.67 (+1.14%)
 Strange Brigade (4K, ~highest settings), avg FPS : 51.5 -> 51.6 (+0.19%)
RotTR/mountain (4K, VeryHigh settings, FXAA), avg FPS : 57.2 -> 57.2 (+0.0%)
RotTR/tomb (4K, VeryHigh settings, FXAA), avg FPS : 42.5 -> 43.0 (+1.17%)
  RotTR/valley (4K, VeryHigh settings, FXAA), avg FPS : 40.7 -> 41.6 (+2.21%)
 Warhammer II/fallen, avg FPS : 31.63 -> 31.83 (+0.63%)
 Warhammer II/skaven, avg FPS : 37.77 -> 38.07 (+0.79%)

Signed-off-by: Rhys Perry 
---
 src/amd/common/ac_llvm_build.h| 10 +-
 src/amd/common/ac_llvm_helper.cpp | 59 +++
 src/amd/common/ac_nir_to_llvm.c   | 12 +++
 3 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index f4cee667153..98f856106d6 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -26,7 +26,7 @@
 #define AC_LLVM_BUILD_H
 
 #include 
-#include 
+#include 
 #include "compiler/nir/nir.h"
 #include "amd_family.h"
 
@@ -694,6 +694,14 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, 
LLVMValueRef interp_ij);
 LLVMValueRef
 ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
 
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, 
LLVMAtomicRMWBinOp op,
+LLVMValueRef ptr, LLVMValueRef val,
+const char *sync_scope);
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, 
LLVMValueRef ptr,
+ LLVMValueRef cmp, LLVMValueRef val,
+ const char *sync_scope);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index dcfb8008546..e5030c6f472 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -31,6 +31,7 @@
 
 #include "ac_binary.h"
 #include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
 
 #include 
 #include 
@@ -167,3 +168,61 @@ void ac_enable_global_isel(LLVMTargetMachineRef tm)
 {
   reinterpret_cast(tm)->setGlobalISel(true);
 }
+
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, 
LLVMAtomicRMWBinOp op,
+LLVMValueRef ptr, LLVMValueRef val,
+const char *sync_scope) {
+   llvm::AtomicRMWInst::BinOp binop;
+   switch (op) {
+   case LLVMAtomicRMWBinOpXchg:
+   binop = llvm::AtomicRMWInst::Xchg;
+   break;
+   case LLVMAtomicRMWBinOpAdd:
+   binop = llvm::AtomicRMWInst::Add;
+   break;
+   case LLVMAtomicRMWBinOpSub:
+   binop = llvm::AtomicRMWInst::Sub;
+   break;
+   case LLVMAtomicRMWBinOpAnd:
+   binop = llvm::AtomicRMWInst::And;
+   break;
+   case LLVMAtomicRMWBinOpNand:
+   binop = llvm::AtomicRMWInst::Nand;
+   break;
+   case LLVMAtomicRMWBinOpOr:
+   binop = llvm::AtomicRMWInst::Or;
+   break;
+   case LLVMAtomicRMWBinOpXor:
+   binop = llvm::AtomicRMWInst::Xor;
+   break;
+   case LLVMAtomicRMWBinOpMax:
+   binop = llvm::AtomicRMWInst::Max;
+   break;
+   case LLVMAtomicRMWBinOpMin:
+   binop = llvm::AtomicRMWInst::Min;
+   break;
+   case LLVMAtomicRMWBinOpUMax:
+   binop = llvm::AtomicRMWInst::UMax;
+   break;
+   case LLVMAtomicRMWBinOpUMin:
+   binop = llvm::AtomicRMWInst::UMin;
+   break;
+   default:
+   unreachable(!"invalid LLVMAtomicRMWBinOp");
+  break;
+   }
+   unsigned SSID = 
llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+   return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
+   binop, llvm::unwrap(ptr), llvm::unwrap(val),
+   llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+}
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, 
LLVMValueRef ptr,
+ LLVMValueRef cmp, LLVMValueRef val,
+ const char *sync_scope) {
+   unsigned SSID = 

Re: [Mesa-dev] [PATCH] radv: implement a workaround for VK_EXT_conditional_rendering

2019-04-26 Thread Samuel Pitoiset


On 4/26/19 1:58 PM, Samuel Pitoiset wrote:

Per the Vulkan spec 1.1.107, the predicate is a 32-bit value. Though
the AMD hardware treats it as a 64-bit value which means it might
fail to discard.

I don't know why this extension has been drafted like that but this
definitely not fit with AMD. The hardware doesn't seem to support
a 32-bit value for the predicate, so we need to implement a workaround.

This fixes an issue when DXVK enables conditional rendering with RADV.

This also fixes the Sasha conditionalrender demo.


Fixes: e45ba51ea45 ("radv: add support for VK_EXT_conditional_rendering")
Reported-by: Philip Rebohle 
Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
---
  src/amd/vulkan/radv_cmd_buffer.c | 46 +---
  1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 7ee5a5ca7dc..9d9f2577f75 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -4932,8 +4932,11 @@ void radv_CmdBeginConditionalRenderingEXT(
  {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, 
pConditionalRenderingBegin->buffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
bool draw_visible = true;
-   uint64_t va;
+   uint64_t pred_value = 0;
+   uint64_t va, new_va;
+   unsigned pred_offset;
  
  	va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset;
  
@@ -4949,13 +4952,50 @@ void radv_CmdBeginConditionalRenderingEXT(
  
  	si_emit_cache_flush(cmd_buffer);
  
+	/* From the Vulkan spec 1.1.107:

+*
+* "If the 32-bit value at offset in buffer memory is zero, then the
+*  rendering commands are discarded, otherwise they are executed as
+*  normal. If the value of the predicate in buffer memory changes while
+*  conditional rendering is active, the rendering commands may be
+*  discarded in an implementation-dependent way. Some implementations
+*  may latch the value of the predicate upon beginning conditional
+*  rendering while others may read it before every rendering command."
+*
+* But, the AMD hardware treats the predicate as a 64-bit value which
+* means we need a workaround in the driver. Luckily, it's not required
+* to support if the value changes when predication is active.
+*
+* The workaround is as follows:
+* 1) allocate a 64-value in the upload BO and initialize it to 0
+* 2) copy the 32-bit predicate value to the upload BO
+* 3) use the new allocated VA address for predication
+*
+* Based on the conditionalrender demo, it's faster to do the COPY_DATA
+* in ME  (+ sync PFP) instead of PFP.
+*/
+   radv_cmd_buffer_upload_data(cmd_buffer, 8, 16, _value, 
_offset);
+
+   new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+
+   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+   COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   radeon_emit(cs, new_va);
+   radeon_emit(cs, new_va >> 32);
+
+   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+   radeon_emit(cs, 0);
+
/* Enable predication for this command buffer. */
-   si_emit_set_predication_state(cmd_buffer, draw_visible, va);
+   si_emit_set_predication_state(cmd_buffer, draw_visible, new_va);
cmd_buffer->state.predicating = true;
  
  	/* Store conditional rendering user info. */

cmd_buffer->state.predication_type = draw_visible;
-   cmd_buffer->state.predication_va = va;
+   cmd_buffer->state.predication_va = new_va;
  }
  
  void radv_CmdEndConditionalRenderingEXT(

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: implement a workaround for VK_EXT_conditional_rendering

2019-04-26 Thread Samuel Pitoiset
Per the Vulkan spec 1.1.107, the predicate is a 32-bit value. Though
the AMD hardware treats it as a 64-bit value which means it might
fail to discard.

I don't know why this extension has been drafted like that but this
definitely not fit with AMD. The hardware doesn't seem to support
a 32-bit value for the predicate, so we need to implement a workaround.

This fixes an issue when DXVK enables conditional rendering with RADV.

Fixes: e45ba51ea45 ("radv: add support for VK_EXT_conditional_rendering")
Reported-by: Philip Rebohle 
Signed-off-by: Samuel Pitoiset 
Reviewed-by: Bas Nieuwenhuizen 
---
 src/amd/vulkan/radv_cmd_buffer.c | 46 +---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 7ee5a5ca7dc..9d9f2577f75 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -4932,8 +4932,11 @@ void radv_CmdBeginConditionalRenderingEXT(
 {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, buffer, 
pConditionalRenderingBegin->buffer);
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
bool draw_visible = true;
-   uint64_t va;
+   uint64_t pred_value = 0;
+   uint64_t va, new_va;
+   unsigned pred_offset;
 
va = radv_buffer_get_va(buffer->bo) + 
pConditionalRenderingBegin->offset;
 
@@ -4949,13 +4952,50 @@ void radv_CmdBeginConditionalRenderingEXT(
 
si_emit_cache_flush(cmd_buffer);
 
+   /* From the Vulkan spec 1.1.107:
+*
+* "If the 32-bit value at offset in buffer memory is zero, then the
+*  rendering commands are discarded, otherwise they are executed as
+*  normal. If the value of the predicate in buffer memory changes while
+*  conditional rendering is active, the rendering commands may be
+*  discarded in an implementation-dependent way. Some implementations
+*  may latch the value of the predicate upon beginning conditional
+*  rendering while others may read it before every rendering command."
+*
+* But, the AMD hardware treats the predicate as a 64-bit value which
+* means we need a workaround in the driver. Luckily, it's not required
+* to support if the value changes when predication is active.
+*
+* The workaround is as follows:
+* 1) allocate a 64-value in the upload BO and initialize it to 0
+* 2) copy the 32-bit predicate value to the upload BO
+* 3) use the new allocated VA address for predication
+*
+* Based on the conditionalrender demo, it's faster to do the COPY_DATA
+* in ME  (+ sync PFP) instead of PFP.
+*/
+   radv_cmd_buffer_upload_data(cmd_buffer, 8, 16, _value, 
_offset);
+
+   new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+
+   radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+   radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+   COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   radeon_emit(cs, new_va);
+   radeon_emit(cs, new_va >> 32);
+
+   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+   radeon_emit(cs, 0);
+
/* Enable predication for this command buffer. */
-   si_emit_set_predication_state(cmd_buffer, draw_visible, va);
+   si_emit_set_predication_state(cmd_buffer, draw_visible, new_va);
cmd_buffer->state.predicating = true;
 
/* Store conditional rendering user info. */
cmd_buffer->state.predication_type = draw_visible;
-   cmd_buffer->state.predication_va = va;
+   cmd_buffer->state.predication_va = new_va;
 }
 
 void radv_CmdEndConditionalRenderingEXT(
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] ac: tidy up ac_build_llvm8_tbuffer_{load, store}

2019-04-26 Thread Samuel Pitoiset
For consistency with ac_build_llvm8_buffer_{load,store}_common
helpers and that will help a bit for removing the vec3 restriction.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/common/ac_llvm_build.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 4fdf73c99ba..22b771db774 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1512,18 +1512,17 @@ ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
args[idx++] = soffset ? soffset : ctx->i32_0;
args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-   unsigned func = CLAMP(num_channels, 1, 3) - 1;
-
-   LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
-   const char *type_names[] = {"i32", "v2i32", "v4i32"};
+   unsigned func = num_channels == 3 ? 4 : num_channels;
const char *indexing_kind = structurized ? "struct" : "raw";
-   char name[256];
+   char name[256], type_name[8];
+
+   LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
+   ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
 
snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
-indexing_kind, type_names[func]);
+indexing_kind, type_name);
 
-   return ac_build_intrinsic(ctx, name, types[func], args,
- idx,
+   return ac_build_intrinsic(ctx, name, type, args, idx,
  ac_get_load_intr_attribs(can_speculate));
 }
 
@@ -1699,14 +1698,15 @@ ac_build_llvm8_tbuffer_store(struct ac_llvm_context 
*ctx,
args[idx++] = soffset ? soffset : ctx->i32_0;
args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
-   unsigned func = CLAMP(num_channels, 1, 3) - 1;
-
-   const char *type_names[] = {"i32", "v2i32", "v4i32"};
+   unsigned func = num_channels == 3 ? 4 : num_channels;
const char *indexing_kind = structurized ? "struct" : "raw";
-   char name[256];
+   char name[256], type_name[8];
+
+   LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
+   ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
 
snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
-indexing_kind, type_names[func]);
+indexing_kind, type_name);
 
ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
   ac_get_store_intr_attribs(writeonly_memory));
-- 
2.21.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] vulkan/wsi: check if the display_fd given is master

2019-04-26 Thread Eric Engestrom

On 2019-04-19 at 16:01, Emil Velikov  wrote:
> From: Emil Velikov 
> 
> As effectively required by the extension, we need to ensure we're master
> 
> Currently drivers employ vendor specific solutions, which check if the
> device behind the fd is capable*, yet none of them do the master check.
> 
> *In the radv case, if acceleration is available.
> 
> Instead of duplicating the check in each driver, keep it where it's
> needed and used.
> 
> Note this copies libdrm's drmIsMaster() to avoid depending on bleeding
> edge version of the library.
> 
> v2: set the fd to -1 if not master (Bas)
> 
> Cc: Keith Packard 
> Cc: Jason Ekstrand 
> Cc: Bas Nieuwenhuizen 
> Cc: Andres Rodriguez 
> Reported-by: Andres Rodriguez 
> Fixes: da997ebec92 ("vulkan: Add KHR_display extension using DRM [v10]")
> Signed-off-by: Emil Velikov 
> ---
>  src/vulkan/wsi/wsi_common_display.c | 27 +++
>  1 file changed, 27 insertions(+)
> 
> diff --git a/src/vulkan/wsi/wsi_common_display.c 
> b/src/vulkan/wsi/wsi_common_display.c
> index 74ed36ed646..2be20e85046 100644
> --- a/src/vulkan/wsi/wsi_common_display.c
> +++ b/src/vulkan/wsi/wsi_common_display.c
> @@ -1812,6 +1812,30 @@ fail_attr_init:
> return ret;
>  }
>  
> +
> +/*
> + * Local version fo the libdrm helper. Added to avoid depending on bleeding
> + * edge version of the library.

Could you specify the actual version in this comment, so that we can drop this 
copy
once we start depending on that version anyway?

With that:
Acked-by: Eric Engestrom 

> + */
> +static int
> +local_drmIsMaster(int fd)
> +{
> +   /* Detect master by attempting something that requires master.
> +*
> +* Authenticating magic tokens requires master and 0 is an
> +* internal kernel detail which we could use. Attempting this on
> +* a master fd would fail therefore fail with EINVAL because 0
> +* is invalid.
> +*
> +* A non-master fd will fail with EACCES, as the kernel checks
> +* for master before attempting to do anything else.
> +*
> +* Since we don't want to leak implementation details, use
> +* EACCES.
> +*/
> +   return drmAuthMagic(fd, 0) != -EACCES;
> +}
> +
>  VkResult
>  wsi_display_init_wsi(struct wsi_device *wsi_device,
>   const VkAllocationCallbacks *alloc,
> @@ -1827,6 +1851,9 @@ wsi_display_init_wsi(struct wsi_device *wsi_device,
> }
>  
> wsi->fd = display_fd;
> +   if (wsi->fd != -1 && !local_drmIsMaster(wsi->fd))
> +  wsi->fd = -1;
> +
> wsi->alloc = alloc;
>  
> list_inithead(>connectors);
> -- 
> 2.21.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110530] [CTS] dEQP-VK.ycbcr.format.g8_b8_r8_3plane_420* reports VM faults on Vega10

2019-04-26 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110530

Bug ID: 110530
   Summary: [CTS] dEQP-VK.ycbcr.format.g8_b8_r8_3plane_420*
reports VM faults on Vega10
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: samuel.pitoi...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

$ ./deqp-vk
--deqp-case=dEQP-VK.ycbcr.format.g8_b8_r8_3plane_420_unorm.tess_eval_linear_array
Writing test log into TestResults.qpa
dEQP Core git-5deebf4709ed3e2c3aaad3b8bb8703f236309b97 (0x5deebf47) starting..
  target implementation = 'Default'

Test case
'dEQP-VK.ycbcr.format.g8_b8_r8_3plane_420_unorm.tess_eval_linear_array'..
  Fail (Got invalid results)

DONE!

Test run totals:
  Passed:0/1 (0.0%)
  Failed:1/1 (100.0%)
  Not supported: 0/1 (0.0%)
  Warnings:  0/1 (0.0%)

[ 7661.527948] amdgpu :03:00.0: [gfxhub] VMC page fault (src_id:0 ring:56
vmid:4 pasid:32775, for process deqp-vk pid 21982 thread deqp-vk pid 21982)
[ 7661.527950] amdgpu :03:00.0:   in page starting at address
0x800100088000 from 27
[ 7661.527951] amdgpu :03:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x004C0071
[ 7661.527955] amdgpu :03:00.0: [gfxhub] VMC page fault (src_id:0 ring:56
vmid:4 pasid:32775, for process deqp-vk pid 21982 thread deqp-vk pid 21982)
[ 7661.527956] amdgpu :03:00.0:   in page starting at address
0x800100088000 from 27
[ 7661.527957] amdgpu :03:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x
[ 7661.527960] amdgpu :03:00.0: [gfxhub] VMC page fault (src_id:0 ring:56
vmid:4 pasid:32775, for process deqp-vk pid 21982 thread deqp-vk pid 21982)
[ 7661.527961] amdgpu :03:00.0:   in page starting at address
0x800100088000 from 27
[ 7661.527962] amdgpu :03:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x
[ 7661.527965] amdgpu :03:00.0: [gfxhub] VMC page fault (src_id:0 ring:56
vmid:4 pasid:32775, for process deqp-vk pid 21982 thread deqp-vk pid 21982)
[ 7661.527966] amdgpu :03:00.0:   in page starting at address
0x800100088000 from 27
[ 7661.527967] amdgpu :03:00.0: VM_L2_PROTECTION_FAULT_STATUS:0x

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/3] llvmpipe: add lp_fence_timedwait() helper

2019-04-26 Thread Emil Velikov
On Thu, 25 Apr 2019 at 19:24, Gustaw Smolarczyk  wrote:
>
> czw., 25 kwi 2019 o 20:11 Gustaw Smolarczyk  napisał(a):
> >
> > czw., 25 kwi 2019 o 19:42 Emil Velikov  
> > napisał(a):
> > >
> > > The function is analogous to lp_fence_wait() while taking at timeout
> > > (ns) parameter, as needed for EGL fence/sync.
> > >
> > > v2:
> > >  - use absolute UTC time, as per spec (Gustaw)
> > >  - bail out on cnd_timedwait() failure (Gustaw)
> > >
> > > Cc: Gustaw Smolarczyk 
> > > Cc: Roland Scheidegger 
> > > Signed-off-by: Emil Velikov 
> > > Reviewed-by: Roland Scheidegger  (v1)
> > > ---
> > >  src/gallium/drivers/llvmpipe/lp_fence.c | 30 +
> > >  src/gallium/drivers/llvmpipe/lp_fence.h |  3 +++
> > >  2 files changed, 33 insertions(+)
> > >
> > > diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c 
> > > b/src/gallium/drivers/llvmpipe/lp_fence.c
> > > index 20cd91cd63d..b79d773bf6c 100644
> > > --- a/src/gallium/drivers/llvmpipe/lp_fence.c
> > > +++ b/src/gallium/drivers/llvmpipe/lp_fence.c
> > > @@ -125,3 +125,33 @@ lp_fence_wait(struct lp_fence *f)
> > >  }
> > >
> > >
> > > +boolean
> > > +lp_fence_timedwait(struct lp_fence *f, uint64_t timeout)
> > > +{
> > > +   struct timespec ts;
> > > +   int ret;
> > > +
> > > +   timespec_get(, TIME_UTC);
> > > +
> > > +   ts.tv_nsec += timeout % 10L;
> > > +   ts.tv_sec += timeout / 10L;
> > > +   if (ts.tv_nsec >= 10L) {
> > > +  ts.tv_sec++;
> > > +  ts.tv_nsec -= 10L;
> > > +   }
> > > +
> > > +   if (LP_DEBUG & DEBUG_FENCE)
> > > +  debug_printf("%s %d\n", __FUNCTION__, f->id);
> > > +
> > > +   mtx_lock(>mutex);
> > > +   assert(f->issued);
> > > +   while (f->count < f->rank) {
> > > +  ret = cnd_timedwait(>signalled, >mutex, );
> > > +  if (ret != thrd_success)
> > > + break;
> > > +   }
> > > +   mtx_unlock(>mutex);
> > > +   return (f->count >= f->rank && ret == thrd_success);
>
> Hmm, you are reading from the fence object outside of the critical
> section, which doesn't sound safe. Maybe compute the return value
> before the mutex is unlocked?
>
>const boolean result = (f->count >= f->rank);
>mtx_unlock(>mutex);
>return result;
>
> Since f->rank is immutable and f->count never decreases, it might
> still be ok without this change, though it is racy.
>
In all fairness it shouldn't matters that much but it is the correct
thing regardless.
Will fixup and push in a bit. Thanks for the help Gustaw!

Aside: nearly all of mesa get this and the while loop around
cnd_timedwait wrong :-\
We should really fix that one of these days.

Bonus points: our c11 thread.h has a typo in thrd_timeDout (missing d)
and doesn't set/use it where needed.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110526] [CTS] dEQP-VK.ycbcr.{conversion, format}.* fail

2019-04-26 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110526

Bug ID: 110526
   Summary: [CTS] dEQP-VK.ycbcr.{conversion,format}.* fail
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: samuel.pitoi...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

The full list of CTS fails on Polaris10:

dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.chroma_reconstruction.nearest.default_nearest_midpoint_tiling_optimal,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.chroma_reconstruction.nearest.default_nearest_midpoint_tiling_optimal_swapped_chroma,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.rgb_identity_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_2020_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_2020_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_601_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_601_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_709_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_709_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_identity_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.b8g8r8g8_422_unorm.color_conversion.ycbcr_identity_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.chroma_reconstruction.nearest.default_nearest_midpoint_tiling_optimal,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.chroma_reconstruction.nearest.default_nearest_midpoint_tiling_optimal_swapped_chroma,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.rgb_identity_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_2020_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_2020_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_601_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_601_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_709_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_709_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_identity_itu_full_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.conversion.g8b8g8r8_422_unorm.color_conversion.ycbcr_identity_itu_narrow_tiling_optimal_midpoint,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.compute_optimal,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.compute_optimal_array,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.fragment_optimal,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.fragment_optimal_array,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.geometry_optimal,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.geometry_optimal_array,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.tess_control_optimal,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.tess_control_optimal_array,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.tess_eval_optimal,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.tess_eval_optimal_array,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.vertex_optimal,Fail
dEQP-VK.ycbcr.format.b8g8r8g8_422_unorm.vertex_optimal_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.compute_linear_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.compute_linear_array_mapped,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.compute_optimal_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.fragment_linear_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.fragment_linear_array_mapped,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.fragment_optimal_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.geometry_linear_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.geometry_linear_array_mapped,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.geometry_optimal_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.tess_control_linear_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.tess_control_linear_array_mapped,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.tess_control_optimal_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.tess_eval_linear_array,Fail
dEQP-VK.ycbcr.format.g16_b16_r16_3plane_420_unorm.tess_eval_linear_array_mapped,Fail

Re: [Mesa-dev] [PATCH] radeonsi: don't ignore PIPE_FLUSH_ASYNC

2019-04-26 Thread Michel Dänzer
On 2019-04-26 4:06 a.m., Marek Olšák wrote:
> From: Marek Olšák 
> 
> ---
>  src/gallium/drivers/radeonsi/si_fence.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
> b/src/gallium/drivers/radeonsi/si_fence.c
> index 3d23597413c..ffda98d2834 100644
> --- a/src/gallium/drivers/radeonsi/si_fence.c
> +++ b/src/gallium/drivers/radeonsi/si_fence.c
> @@ -566,21 +566,21 @@ static void si_flush_from_st(struct pipe_context *ctx,
>   multi_fence->fine = fine;
>   fine.buf = NULL;
>  
>   if (flags & TC_FLUSH_ASYNC) {
>   util_queue_fence_signal(_fence->ready);
>   
> tc_unflushed_batch_token_reference(_fence->tc_token, NULL);
>   }
>   }
>   assert(!fine.buf);
>  finish:
> - if (!(flags & PIPE_FLUSH_DEFERRED)) {
> + if (!(flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC))) {
>   if (sctx->dma_cs)
>   ws->cs_sync_flush(sctx->dma_cs);
>   ws->cs_sync_flush(sctx->gfx_cs);
>   }
>  }
>  
>  static void si_fence_server_signal(struct pipe_context *ctx,
>  struct pipe_fence_handle *fence)
>  {
>   struct si_context *sctx = (struct si_context *)ctx;
> 

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 110525] [CTS] dEQP-VK.api.invariance.random crashes

2019-04-26 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=110525

Bug ID: 110525
   Summary: [CTS] dEQP-VK.api.invariance.random crashes
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/Vulkan/radeon
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: samuel.pitoi...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

$ ./deqp-vk --deqp-case=dEQP-VK.api.invariance.random
Writing test log into TestResults.qpa
dEQP Core git-4e663ef25fe639f467d583e0a145edee66cddd82 (0x4e663ef2) starting..
  target implementation = 'Default'

Test case 'dEQP-VK.api.invariance.random'..
deqp-vk: ../src/amd/vulkan/radv_image.c:1038: radv_image_create: Assertion
`info.width % desc->width_divisor == 0' failed.
Aborted (core dumped)

git bisect start
# good: [055f6281d410aa55ac56169973897000d0e0cd42] intel/fs: Don't handle
texop_tex for shaders without implicit LOD
git bisect good 055f6281d410aa55ac56169973897000d0e0cd42
# bad: [934f178341f7ec3bb10e3fa1499198e7988b086f] anv/descriptor_set: Don't
fully destroy sets in pool destroy/reset
git bisect bad 934f178341f7ec3bb10e3fa1499198e7988b086f
# bad: [5f942db190ef2154fb6512bcac3f42b4df45e0f5] panfrost/midgard: Copy prop
for texture registers
git bisect bad 5f942db190ef2154fb6512bcac3f42b4df45e0f5
# good: [5564c38212ac3d3aa2fdfca4da03f10db79a07a4] radv: Update descriptor sets
for multiple planes.
git bisect good 5564c38212ac3d3aa2fdfca4da03f10db79a07a4
# bad: [fc9248e13e20ed49e9e672b6cb82fa5b05c48e61] radv: Enable YCBCR conversion
feature.
git bisect bad fc9248e13e20ed49e9e672b6cb82fa5b05c48e61
# good: [b769a549ee6325e3759a60af19de034e5666ffdb] radv: Add hashing for the
ycbcr samplers.
git bisect good b769a549ee6325e3759a60af19de034e5666ffdb
# bad: [379b82daced4efd90fbf2df26fb99b0a7ba43e05] radv: Add ycbcr subsampled &
multiplane formats to csv.
git bisect bad 379b82daced4efd90fbf2df26fb99b0a7ba43e05
# good: [52c1adda21bb9b1dfb06934279866014accd09d1] radv: Add ycbcr format
features.
git bisect good 52c1adda21bb9b1dfb06934279866014accd09d1
# first bad commit: [379b82daced4efd90fbf2df26fb99b0a7ba43e05] radv: Add ycbcr
subsampled & multiplane formats to csv.

commit 379b82daced4efd90fbf2df26fb99b0a7ba43e05 (refs/bisect/bad)
Author: Bas Nieuwenhuizen 
Date:   Tue Apr 16 01:05:29 2019 +0200

radv: Add ycbcr subsampled & multiplane formats to csv.

Reviewed-by: Samuel Pitoiset 

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] st/dri: decrease input lag by syncing sooner in SwapBuffers

2019-04-26 Thread Michel Dänzer
On 2019-04-26 4:06 a.m., Marek Olšák wrote:
> From: Marek Olšák 
> 
> It's done by:
> - decrease the number of frames in flight by 1
> - flush before throttling in SwapBuffers
>   (instead of wait-then-flush, do flush-then-wait)
> 
> The improvement is apparent with Unigine Heaven.
> 
> Previously:
> draw frame 2
> wait frame 0
> flush frame 2
> present frame 2
> 
> The input lag is 2 frames.
> 
> Now:
> draw frame 2
> flush frame 2
> wait frame 1
> present frame 2
> 
> The input lag is 1 frame. Flushing is done before waiting, because
> otherwise the device would be idle after waiting.

Nice idea. Not sure offhand about all ramifications, but certainly worth
a go.


> Nine is affected because it also uses the pipe cap.
> ---
>  src/gallium/auxiliary/util/u_screen.c |  2 +-
>  src/gallium/state_trackers/dri/dri_drawable.c | 20 +--
>  2 files changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/util/u_screen.c 
> b/src/gallium/auxiliary/util/u_screen.c
> index 27f51e0898e..410f17421e6 100644
> --- a/src/gallium/auxiliary/util/u_screen.c
> +++ b/src/gallium/auxiliary/util/u_screen.c
> @@ -349,21 +349,21 @@ u_pipe_screen_get_param_defaults(struct pipe_screen 
> *pscreen,
> case PIPE_CAP_MAX_VARYINGS:
>return 8;
>  
> case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
>return 0;
>  
> case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:
>return 0;
>  
> case PIPE_CAP_MAX_FRAMES_IN_FLIGHT:
> -  return 2;
> +  return 1;

This might be slightly misleading, as there can still be two frames in
flight (on the GPU) at the same time. Might be better to leave this at 2
(so Nine isn't affected) and adjust its treatment in
src/gallium/state_trackers/dri/dri_drawable.c .


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/8] etnaviv: add 2D GPU YUV->RGB blitter

2019-04-26 Thread Lucas Stach
Am Freitag, den 26.04.2019, 08:24 +0200 schrieb Christian Gmeiner:
> Am Fr., 12. Apr. 2019 um 19:38 Uhr schrieb Lucas Stach 
> :
> > 
> > This adds a blit path using the 2D GPU for a linear YUV to tiled RGB
> > blit. This allows to implement importing of planar YUV textures with
> > a single copy.
> > 
> > > > Signed-off-by: Lucas Stach 
> > ---
[...]
> > +
> > +   switch (blit_info->src.format) {
> > +   case PIPE_FORMAT_NV12:
> > +  src_format = DE_FORMAT_NV12;
> > +  break;
> > +   case PIPE_FORMAT_YUYV:
> > +  src_format = DE_FORMAT_YUY2;
> > +  break;
> 
> Do you need to support YUVU in your utlra fast video-path with 
> glReadPixels(..)?

Yes, as this is the output format of the VDOA. Using the VPU together
with the VDOA is the highest performing video decode path on i.MX6.

Regards,
Lucas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/6] Add support for NV12

2019-04-26 Thread Lucas Stach
Am Freitag, den 26.04.2019, 09:41 +0200 schrieb Christian Gmeiner:
> Hi Lucas
> 
> > 
> > Am Mittwoch, den 24.04.2019, 08:36 +0200 schrieb Christian Gmeiner:
> > > This patch series goes a complete different route then the one from
> > > Lucas Stach. I am using the integrated YUV tiler instead of using
> > > the 2D core for format conversion. I am reusing some patches from
> > > Lucas and this series sits on-top of Lucas "st/dri: YUV" patches.
> > 
> > We specifically opted to use the 2D GPU to do a format conversion, as
> > this yields a RGB internal representation, which means the texture has
> > the same properties as a normal GL texture (e.g. glReadPixels works).
> > This way we can expose YUV format imports as non-external textures.
> > 
> 
> Do you know out of head if rs can handle yuv? If that would be the case
> the conversion from yuv -> rgb could also be done this way and you have
> the same interal RGB representation. But I have the feeling
> YUY2_RENDER_TARGET (aka. RS_FORMAT_YUY2) needs to be supported.

I think the RS can do some of the YUY2 conversions, but it certainly
can not do any of the planar formats.

> Is glReadPixels(..) and friends really an use-case for YUV textures? To be
> honest I am not that deep in that topic.

The point isn't that we need glReadPixels for anything, but that the
fact that it works properly allows us to expose the the import as a
regular non-external glTexture without having to lie or cheat. Non-
external textures have some benefits in the lifetime handling that make
the upper parts of the stack more performant.

> Maybe you can 1-2 sentences about the reasoning for using the 2D core
> somewhere in an commit message?
> 
> > This provides a number of benefits in texture lifetime handling in the
> > upper layers of the stack, which are used to drive those video use-
> > cases, like GStreamer. I don't really care what the blob does, but I do
> > care about having the highest performing solution, which is to have the
> > 2D GPU work in parallel with the 3D GPU and allow efficient texture
> > imports with GStreamer.
> > 
> 
> As the branch point is coming I am okay with this. But I have the feeling
> that we need to touch this area in near future for our lovely imx8 based gpus.

My guess is that the BLT engine can actually do a similar CSC like we
do on the 2D now. As the BLT engine can be driven asynchronously, we
should have almost the same behavior as with the 2D GPU based solution.

> Btw. do you have some numbers regarding the speed-up you get by using the
> 2D core in parallel?

I don't have any numbers for the speedup, but we have some use-cases
where even the fast-path on the new GC320 on i.MX6QP, which can use a
regular bitblit that does 2 pixels/clock and has a relatively high
clock frequency, we reach a GPU load of 25% for the texture import.
Moving this load away from the 3D GPU seems worthwhile.

Regards,
Lucas

> > I would really appreciate a review of my patch series.
> > 
> > Regards,
> > Lucas
> > 
> > > Christian Gmeiner (3):
> > >   etnaviv: direct YUYV/UYVY support
> > >   etnaviv: update headers from rnndb
> > >   etnaviv: add multi-planar YUV support
> > > 
> > > Lucas Stach (3):
> > >   etnaviv: clear out next pointer when allocating resource
> > >   etnaviv: remember data offset into BO
> > >   etnaviv: improve PIPE_BIND_LINEAR handling
> > > 
> > >  .../drivers/etnaviv/etnaviv_clear_blit.c  |   2 +-
> > >  src/gallium/drivers/etnaviv/etnaviv_format.c  |   5 +-
> > >  .../drivers/etnaviv/etnaviv_resource.c|  24 +++-
> > >  src/gallium/drivers/etnaviv/etnaviv_rs.c  |   5 +
> > >  src/gallium/drivers/etnaviv/etnaviv_screen.c  |   4 +
> > >  src/gallium/drivers/etnaviv/etnaviv_texture.c |   8 ++
> > >  src/gallium/drivers/etnaviv/etnaviv_yuv.c | 123
> > > ++
> > >  src/gallium/drivers/etnaviv/etnaviv_yuv.h |  44 +++
> > >  src/gallium/drivers/etnaviv/hw/common.xml.h   |   2 +-
> > >  .../drivers/etnaviv/hw/common_3d.xml.h|   2 +-
> > >  src/gallium/drivers/etnaviv/hw/state.xml.h|   4 +-
> > >  src/gallium/drivers/etnaviv/hw/state_3d.xml.h |  35 +++--
> > >  .../drivers/etnaviv/hw/state_blt.xml.h|   4 +-
> > >  .../drivers/etnaviv/hw/texdesc_3d.xml.h   |   2 +-
> > >  src/gallium/drivers/etnaviv/meson.build   |   2 +
> > >  15 files changed, 240 insertions(+), 26 deletions(-)
> > >  create mode 100644 src/gallium/drivers/etnaviv/etnaviv_yuv.c
> > >  create mode 100644 src/gallium/drivers/etnaviv/etnaviv_yuv.h
> > > 
> 
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/8] etnaviv: export etna_submit_rs_state

2019-04-26 Thread Christian Gmeiner
Am Fr., 12. Apr. 2019 um 19:38 Uhr schrieb Lucas Stach :
>
> The new 2D YUV blit needs this in some cases, so make it available.
>
> Signed-off-by: Lucas Stach 

Reviewed-by: Christian Gmeiner 

> ---
>  src/gallium/drivers/etnaviv/etnaviv_rs.c | 2 +-
>  src/gallium/drivers/etnaviv/etnaviv_rs.h | 4 
>  2 files changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_rs.c 
> b/src/gallium/drivers/etnaviv/etnaviv_rs.c
> index a9d3872ad41b..fcc2342aedc3 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_rs.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_rs.c
> @@ -171,7 +171,7 @@ etna_modify_rs_clearbits(struct compiled_rs_state *cs, 
> uint32_t clear_bits)
>
>  /* submit RS state, without any processing and no dependence on context
>   * except TS if this is a source-to-destination blit. */
> -static void
> +void
>  etna_submit_rs_state(struct etna_context *ctx,
>   const struct compiled_rs_state *cs)
>  {
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_rs.h 
> b/src/gallium/drivers/etnaviv/etnaviv_rs.h
> index 125a13a9ad34..81ef05955a79 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_rs.h
> +++ b/src/gallium/drivers/etnaviv/etnaviv_rs.h
> @@ -84,6 +84,10 @@ void
>  etna_compile_rs_state(struct etna_context *ctx, struct compiled_rs_state *cs,
>const struct rs_state *rs);
>
> +void
> +etna_submit_rs_state(struct etna_context *ctx,
> + const struct compiled_rs_state *cs);
> +
>  /* Context initialization for RS clear_blit functions. */
>  void
>  etna_clear_blit_rs_init(struct pipe_context *pctx);
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] winsys/amdgpu: Restrict allocation to GTT for small vram size

2019-04-26 Thread Michel Dänzer
On 2019-04-26 9:35 a.m., Christian König wrote:
> Am 25.04.19 um 13:37 schrieb Agrawal, Akshu:
>> To avoid evictions, use GTT only for allocation on devices with
>> small vram size.
>>
>> Signed-off-by: Akshu Agrawal 
>> ---
>>   src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 9 -
>>   1 file changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
>> b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
>> index 09cf9247755..aab801b6337 100644
>> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
>> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
>> @@ -486,8 +486,15 @@ static struct amdgpu_winsys_bo
>> *amdgpu_create_bo(struct amdgpu_winsys *ws,
>>  * shared with the OS, allow VRAM placements too. The idea is
>> not to use
>>  * VRAM usefully, but to use it so that it's not unused and
>> wasted.
>>  */
>> -  if (!ws->info.has_dedicated_vram)
>> +   if (!ws->info.has_dedicated_vram) {
>> +  /* For devices having small VRAM size use GTT only to
>> +   * avoid evictions.
>> +   */
>> +  if (ws->info.vram_size <= 16777216)
>> + request.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
> 
> Well that will certainly cause problems because it would result in
> scanout BOs to be forced into GTT.

The patch is also lacking a rationale, e.g. what problem does it solve?


-- 
Earthling Michel Dänzer   |  https://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/6] Add support for NV12

2019-04-26 Thread Christian Gmeiner
Hi Lucas

>
> Am Mittwoch, den 24.04.2019, 08:36 +0200 schrieb Christian Gmeiner:
> > This patch series goes a complete different route then the one from
> > Lucas Stach. I am using the integrated YUV tiler instead of using
> > the 2D core for format conversion. I am reusing some patches from
> > Lucas and this series sits on-top of Lucas "st/dri: YUV" patches.
>
> We specifically opted to use the 2D GPU to do a format conversion, as
> this yields a RGB internal representation, which means the texture has
> the same properties as a normal GL texture (e.g. glReadPixels works).
> This way we can expose YUV format imports as non-external textures.
>

Do you know out of head if rs can handle yuv? If that would be the case
the conversion from yuv -> rgb could also be done this way and you have
the same interal RGB representation. But I have the feeling
YUY2_RENDER_TARGET (aka. RS_FORMAT_YUY2) needs to be supported.

Is glReadPixels(..) and friends really an use-case for YUV textures? To be
honest I am not that deep in that topic.

Maybe you can 1-2 sentences about the reasoning for using the 2D core
somewhere in an commit message?

> This provides a number of benefits in texture lifetime handling in the
> upper layers of the stack, which are used to drive those video use-
> cases, like GStreamer. I don't really care what the blob does, but I do
> care about having the highest performing solution, which is to have the
> 2D GPU work in parallel with the 3D GPU and allow efficient texture
> imports with GStreamer.
>

As the branch point is coming I am okay with this. But I have the feeling
that we need to touch this area in near future for our lovely imx8 based gpus.

Btw. do you have some numbers regarding the speed-up you get by using the
2D core in parallel?

> I would really appreciate a review of my patch series.
>
> Regards,
> Lucas
>
> > Christian Gmeiner (3):
> >   etnaviv: direct YUYV/UYVY support
> >   etnaviv: update headers from rnndb
> >   etnaviv: add multi-planar YUV support
> >
> > Lucas Stach (3):
> >   etnaviv: clear out next pointer when allocating resource
> >   etnaviv: remember data offset into BO
> >   etnaviv: improve PIPE_BIND_LINEAR handling
> >
> >  .../drivers/etnaviv/etnaviv_clear_blit.c  |   2 +-
> >  src/gallium/drivers/etnaviv/etnaviv_format.c  |   5 +-
> >  .../drivers/etnaviv/etnaviv_resource.c|  24 +++-
> >  src/gallium/drivers/etnaviv/etnaviv_rs.c  |   5 +
> >  src/gallium/drivers/etnaviv/etnaviv_screen.c  |   4 +
> >  src/gallium/drivers/etnaviv/etnaviv_texture.c |   8 ++
> >  src/gallium/drivers/etnaviv/etnaviv_yuv.c | 123
> > ++
> >  src/gallium/drivers/etnaviv/etnaviv_yuv.h |  44 +++
> >  src/gallium/drivers/etnaviv/hw/common.xml.h   |   2 +-
> >  .../drivers/etnaviv/hw/common_3d.xml.h|   2 +-
> >  src/gallium/drivers/etnaviv/hw/state.xml.h|   4 +-
> >  src/gallium/drivers/etnaviv/hw/state_3d.xml.h |  35 +++--
> >  .../drivers/etnaviv/hw/state_blt.xml.h|   4 +-
> >  .../drivers/etnaviv/hw/texdesc_3d.xml.h   |   2 +-
> >  src/gallium/drivers/etnaviv/meson.build   |   2 +
> >  15 files changed, 240 insertions(+), 26 deletions(-)
> >  create mode 100644 src/gallium/drivers/etnaviv/etnaviv_yuv.c
> >  create mode 100644 src/gallium/drivers/etnaviv/etnaviv_yuv.h
> >



-- 
greets
--
Christian Gmeiner, MSc

https://christian-gmeiner.info
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] winsys/amdgpu: Restrict allocation to GTT for small vram size

2019-04-26 Thread Christian König

Am 25.04.19 um 13:37 schrieb Agrawal, Akshu:

To avoid evictions, use GTT only for allocation on devices with
small vram size.

Signed-off-by: Akshu Agrawal 
---
  src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 9 -
  1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 09cf9247755..aab801b6337 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -486,8 +486,15 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
amdgpu_winsys *ws,
 * shared with the OS, allow VRAM placements too. The idea is not to use
 * VRAM usefully, but to use it so that it's not unused and wasted.
 */
-  if (!ws->info.has_dedicated_vram)
+   if (!ws->info.has_dedicated_vram) {
+  /* For devices having small VRAM size use GTT only to
+   * avoid evictions.
+   */
+  if (ws->info.vram_size <= 16777216)
+ request.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;


Well that will certainly cause problems because it would result in 
scanout BOs to be forced into GTT.


Christian.


+  else
   request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+  }
 }
  
 if (initial_domain & RADEON_DOMAIN_GTT)


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Mesa 3D

2019-04-26 Thread Rain_Kuper
Hello,does Mesa 3D have support for NVIDIA Tegra K1 SoC and Android?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] winsys/amdgpu: Restrict allocation to GTT for small vram size

2019-04-26 Thread Agrawal, Akshu
To avoid evictions, use GTT only for allocation on devices with
small vram size.

Signed-off-by: Akshu Agrawal 
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 09cf9247755..aab801b6337 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -486,8 +486,15 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
amdgpu_winsys *ws,
* shared with the OS, allow VRAM placements too. The idea is not to use
* VRAM usefully, but to use it so that it's not unused and wasted.
*/
-  if (!ws->info.has_dedicated_vram)
+   if (!ws->info.has_dedicated_vram) {
+  /* For devices having small VRAM size use GTT only to
+   * avoid evictions.
+   */
+  if (ws->info.vram_size <= 16777216)
+ request.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
+  else
  request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+  }
}
 
if (initial_domain & RADEON_DOMAIN_GTT)
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/8] etnaviv: use filter blit for 2D YUV import on old GC320

2019-04-26 Thread Christian Gmeiner
Am Fr., 12. Apr. 2019 um 19:38 Uhr schrieb Lucas Stach :
>
> The GC320 without the 2D tiling feature doesn't support regular blits
> with YUV input, as well as the tiled output. So on those cores we need
> need to do a filter blit for the YUV->RGB conversion to a temporary
> linear buffer and then do a tiling blit into the texture buffer using
> the RS engine on the 3D core.
>
> Not the most efficient path, but at least gives us the same level of
> functionality as on the newer GC320 cores and looks the same to the
> application.
>
> Signed-off-by: Lucas Stach 
> ---
>  src/gallium/drivers/etnaviv/etnaviv_2d.c | 198 ---
>  1 file changed, 180 insertions(+), 18 deletions(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_2d.c 
> b/src/gallium/drivers/etnaviv/etnaviv_2d.c
> index 457fa4e0cbd0..31b6bf4313dd 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_2d.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_2d.c
> @@ -25,13 +25,16 @@
>  #include "etnaviv_context.h"
>  #include "etnaviv_emit.h"
>  #include "etnaviv_screen.h"
> +#include "etnaviv_rs.h"
>
>  #include "pipe/p_state.h"
>  #include "util/u_format.h"
>
>  #include "hw/state_2d.xml.h"
> +#include "hw/common.xml.h"
>

Sort includes in alphabetic order.


>  #include 
> +#include 
>
>  #define EMIT_STATE(state_name, src_value) \
> etna_coalsence_emit(stream, , VIVS_##state_name, src_value)
> @@ -39,15 +42,85 @@
>  #define EMIT_STATE_RELOC(state_name, src_value) \
> etna_coalsence_emit_reloc(stream, , VIVS_##state_name, src_value)
>
> +/* stolen from xf86-video-armada */
> +#define KERNEL_ROWS 17
> +#define KERNEL_INDICES  9
> +#define KERNEL_SIZE (KERNEL_ROWS * KERNEL_INDICES)
> +#define KERNEL_STATE_SZ ((KERNEL_SIZE + 1) / 2)
> +
> +static bool filter_kernel_initialized;
> +static uint32_t filter_kernel[KERNEL_STATE_SZ];
> +
> +static inline float
> +sinc (float x)
> +{
> +  return x != 0.0 ? sinf (x) / x : 1.0;
> +}
> +
> +static void
> +etnaviv_init_filter_kernel(void)
> +{
> +   unsigned row, idx, i;
> +   int16_t kernel_val[KERNEL_STATE_SZ * 2];
> +   float row_ofs = 0.5;
> +   float radius = 4.0;
> +
> +   /* Compute lanczos filter kernel */
> +   for (row = i = 0; row < KERNEL_ROWS; row++) {
> +  float kernel[KERNEL_INDICES] = { 0.0 };
> +  float sum = 0.0;
> +
> +  for (idx = 0; idx < KERNEL_INDICES; idx++) {
> + float x = idx - 4.0 + row_ofs;
> +
> + if (fabs (x) <= radius)
> +kernel[idx] = sinc (M_PI * x) * sinc (M_PI * x / radius);
> +
> + sum += kernel[idx];
> +   }
> +
> +   /* normalise the row */
> +   if (sum)
> +  for (idx = 0; idx < KERNEL_INDICES; idx++)
> + kernel[idx] /= sum;
> +
> +   /* convert to 1.14 format */
> +   for (idx = 0; idx < KERNEL_INDICES; idx++) {
> +  int val = kernel[idx] * (float) (1 << 14);
> +
> +  if (val < -0x8000)
> + val = -0x8000;
> +  else if (val > 0x7fff)
> + val = 0x7fff;
> +
> +  kernel_val[i++] = val;
> +   }
> +
> +   row_ofs -= 1.0 / ((KERNEL_ROWS - 1) * 2);
> +   }
> +
> +   kernel_val[KERNEL_SIZE] = 0;
> +
> +   /* Now convert the kernel values into state values */
> +   for (i = 0; i < KERNEL_STATE_SZ * 2; i += 2)
> +  filter_kernel[i / 2] =
> + VIVS_DE_FILTER_KERNEL_COEFFICIENT0 (kernel_val[i]) |
> + VIVS_DE_FILTER_KERNEL_COEFFICIENT1 (kernel_val[i + 1]);
> +}
> +
>  bool etna_try_2d_blit(struct pipe_context *pctx,
>const struct pipe_blit_info *blit_info)
>  {
> struct etna_context *ctx = etna_context(pctx);
> +   struct etna_screen *screen = ctx->screen;
> struct etna_cmd_stream *stream = ctx->stream2d;
> struct etna_coalesce coalesce;
> struct etna_reloc ry, ru, rv, rdst;
> struct pipe_resource *res_y, *res_u, *res_v, *res_dst;
> +   struct etna_bo *temp_bo = NULL;
> uint32_t src_format;
> +   bool ext_blt = VIV_2D_FEATURE(screen, chipMinorFeatures2, 2D_TILING);
> +   uint32_t dst_config;
>
> assert(util_format_is_yuv(blit_info->src.format));
> assert(blit_info->dst.format == PIPE_FORMAT_R8G8B8A8_UNORM);
> @@ -55,6 +128,11 @@ bool etna_try_2d_blit(struct pipe_context *pctx,
> if (!stream)
>return FALSE;
>
> +  if (unlikely(!ext_blt && !filter_kernel_initialized)) {
> +  etnaviv_init_filter_kernel();
> +  filter_kernel_initialized = true;
> +  }
> +
> switch (blit_info->src.format) {
> case PIPE_FORMAT_NV12:
>src_format = DE_FORMAT_NV12;
> @@ -66,6 +144,18 @@ bool etna_try_2d_blit(struct pipe_context *pctx,
>return FALSE;
  retutn false;
> }
>
> +   res_dst = blit_info->dst.resource;
> +
> +   if (!ext_blt && etna_resource(res_dst)->layout != ETNA_LAYOUT_LINEAR) {
> +  struct etna_resource *dst = etna_resource(blit_info->dst.resource);
> +  unsigned int bo_size = dst->levels[blit_info->dst.level].stride *
> +