Re: [Mesa-dev] [PATCH 5/5] mesa: add KHR_no_error support for glBindImageTextures()

2017-05-24 Thread Timothy Arceri



On 24/05/17 07:45, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/mapi/glapi/gen/ARB_multi_bind.xml |  2 +-
  src/mesa/main/shaderimage.c   | 43 +++
  src/mesa/main/shaderimage.h   |  4 
  3 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/ARB_multi_bind.xml 
b/src/mapi/glapi/gen/ARB_multi_bind.xml
index f42eaa28e96..d58c2708cb2 100644
--- a/src/mapi/glapi/gen/ARB_multi_bind.xml
+++ b/src/mapi/glapi/gen/ARB_multi_bind.xml
@@ -35,7 +35,7 @@
  
  
  
-

+
  
  
  
diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index 494125346b4..df2ad5a8195 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -659,6 +659,49 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint 
level,
  }
  
  void GLAPIENTRY

+_mesa_BindImageTextures_no_error(GLuint first, GLsizei count,
+ const GLuint *textures)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   int i;
+
+   FLUSH_VERTICES(ctx, 0);
+   ctx->NewDriverState |= ctx->DriverFlags.NewImageUnits;
+
+   _mesa_HashLockMutex(ctx->Shared->TexObjects);
+
+   for (i = 0; i < count; i++) {
+  struct gl_image_unit *u = >ImageUnits[first + i];
+  const GLuint texture = textures ? textures[i] : 0;
+
+  if (texture) {
+ struct gl_texture_object *texObj = u->TexObj;
+ GLenum tex_format;
+
+ if (!texObj || texObj->Name != texture) {
+texObj = _mesa_lookup_texture_locked(ctx, texture);
+ }
+
+ if (texObj->Target == GL_TEXTURE_BUFFER) {
+tex_format = texObj->BufferObjectFormat;
+ } else {
+struct gl_texture_image *image = texObj->Image[0][0];
+
+tex_format = image->InternalFormat;
+ }
+
+ set_image_binding(u, texObj, 0,
+   _mesa_tex_target_is_layered(texObj->Target),
+   0, GL_READ_WRITE, tex_format);
+  } else {
+ set_image_binding(u, NULL, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
+  }
+   }


In order to avoid code duplication I've been creating helpers e.g

static ALWAYS_INLINE void
bind_image_textures(..., bool no_error)

That way we can do a no_error check on error checks and we can avoid 
duplicating all this code. ALWAYS_INLINE will set compiler flags to 
force inlining so the no_error check will get optimised away.


Personally I think it looks a bit messy, but IMO its also better than 
the alternative which is the no_error function getting out of sync with 
the regular function after code updates.


For now patches 1-4 are:

Reviewed-by: Timothy Arceri 


+
+   _mesa_HashUnlockMutex(ctx->Shared->TexObjects);
+}
+
+void GLAPIENTRY
  _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
  {
 GET_CURRENT_CONTEXT(ctx);
diff --git a/src/mesa/main/shaderimage.h b/src/mesa/main/shaderimage.h
index b2b22bbf863..6a9e3d67e92 100644
--- a/src/mesa/main/shaderimage.h
+++ b/src/mesa/main/shaderimage.h
@@ -90,6 +90,10 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint 
level,
 GLenum format);
  
  void GLAPIENTRY

+_mesa_BindImageTextures_no_error(GLuint first, GLsizei count,
+ const GLuint *textures);
+
+void GLAPIENTRY
  _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures);
  
  #ifdef __cplusplus



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] glsl: disable array splitting for AoA

2017-05-24 Thread Timothy Arceri
While it produces functioning code the pass creates worse code
for arrays of arrays. See the comment added in this patch for more
detail.
---
 src/compiler/glsl/opt_array_splitting.cpp | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/src/compiler/glsl/opt_array_splitting.cpp 
b/src/compiler/glsl/opt_array_splitting.cpp
index e3073b0..525953f 100644
--- a/src/compiler/glsl/opt_array_splitting.cpp
+++ b/src/compiler/glsl/opt_array_splitting.cpp
@@ -133,20 +133,46 @@ 
ir_array_reference_visitor::get_variable_entry(ir_variable *var)
 
if (!(var->type->is_array() || var->type->is_matrix()))
   return NULL;
 
/* If the array hasn't been sized yet, we can't split it.  After
 * linking, this should be resolved.
 */
if (var->type->is_unsized_array())
   return NULL;
 
+   /* FIXME: arrays of arrays are not handled correctly by this pass so we
+* skip it for now. While the pass will create functioning code it actually
+* produces worse code.
+*
+* For example the array:
+*
+*int[3][2] a;
+*
+* ends up being split up into:
+*
+*int[3][2] a_0;
+*int[3][2] a_1;
+*int[3][2] a_2;
+*
+* And we end up referencing each of these new arrays for example:
+*
+*a[0][1] will be turned into a_0[0][1]
+*a[1][0] will be turned into a_1[1][0]
+*a[2][0] will be turned into a_2[2][0]
+*
+* For now we continue to split AoA of matrices to avoid CTS regressions.
+*/
+   if (var->type->is_array() && var->type->fields.array->is_array() &&
+   !var->type->without_array()->is_matrix())
+  return NULL;
+
foreach_in_list(variable_entry, entry, >variable_list) {
   if (entry->var == var)
  return entry;
}
 
variable_entry *entry = new(mem_ctx) variable_entry(var);
this->variable_list.push_tail(entry);
return entry;
 }
 
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nir: NULL check lower_copies_to_load_store()

2017-05-24 Thread Timothy Arceri
Allows us to disable array spliting for arrays of arrays without
regressing tests such as:

ES31-CTS.functional.shaders.arrays_of_arrays.return.explicit.struct_3x1x3_fragment
---
 src/compiler/nir/nir_lower_vars_to_ssa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c 
b/src/compiler/nir/nir_lower_vars_to_ssa.c
index e5a12eb..31f7e7a 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -434,21 +434,21 @@ register_variable_uses_block(nir_block *block,
return true;
 }
 
 /* Walks over all of the copy instructions to or from the given deref_node
  * and lowers them to load/store intrinsics.
  */
 static bool
 lower_copies_to_load_store(struct deref_node *node,
struct lower_variables_state *state)
 {
-   if (!node->copies)
+   if (!node || !node->copies)
   return true;
 
struct set_entry *copy_entry;
set_foreach(node->copies, copy_entry) {
   nir_intrinsic_instr *copy = (void *)copy_entry->key;
 
   nir_lower_var_copy_instr(copy, state->shader);
 
   for (unsigned i = 0; i < 2; ++i) {
  struct deref_node *arg_node =
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] - Rewrite mesa website in Sphinx

2017-05-24 Thread Jean Hertel
From: Emil Velikov 
Sended: 24 May 2017 04:53   
>I was looking at it yesterday, and should have most of it done some time today.
>
>The TOC "...release notes" solution (as illustrated by the 17.0.4
>ones) seem fine, so until we come with a better idea I'll just go with
>it.
>
>-Emil
 
OK, I will update the remaining release notes with the proposed solution used 
in the 17.0.4;

One more question: Currently we have many new changes to the documentation in 
mesa master (like the new releases).
Should I rebase and port those to Sphinx too?
I'm asking because, if I rebase, the comments made on github will be lost.

Best Regards,
Jean Hertel  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/16] i965: Write-combine mappings and other performance improvements for non-LLC

2017-05-24 Thread Kenneth Graunke
On Wednesday, May 24, 2017 1:04:42 PM PDT Matt Turner wrote:
> The series aims to improve performance on non-LLC platforms like Braswell and
> Broxton.
> 
> Unsynchronized mappings were not actually unsynchronized on non-LLC platforms,
> hurting Unigine Valley performance quite a lot. That's fixed. We also start
> using write-combining, a feature available since Linux v4.0.
> 
> With WC mappings in place, I've also enabled our tiled memcpy fast paths on
> non-LLC platforms. I've done that in three separate patches in order to
> benchmark them independently.
> 
> TODO:
>More benchmarking, include data in commit messages
>Sort out authorship (lots of these patches have chunks split out of a large
> patch from Chris Wilson's brw-batch branch)

Patches 1-7 are:
Reviewed-by: Kenneth Graunke 

I'll look at the rest soon; hopefully Chris can take a look too.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mapi: Enable assembly language API acceleration for PPC64LE

2017-05-24 Thread Ben Crocker
Signed-off-by: Ben Crocker 
---
 configure.ac |  12 +++
 src/mapi/Makefile.sources|   3 +
 src/mapi/entry.c |  12 ++-
 src/mapi/entry_ppc64le_tls.h | 150 +++
 src/mapi/entry_ppc64le_tsd.h | 208 +++
 5 files changed, 383 insertions(+), 2 deletions(-)
 create mode 100644 src/mapi/entry_ppc64le_tls.h
 create mode 100644 src/mapi/entry_ppc64le_tsd.h

diff --git a/configure.ac b/configure.ac
index 70885fb..2dd24eb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -728,6 +728,13 @@ if test "x$enable_asm" = xyes; then
 ;;
 esac
 ;;
+powerpc64le)
+case "$host_os" in
+linux*)
+asm_arch=ppc64le
+;;
+esac
+;;
 esac
 
 case "$asm_arch" in
@@ -743,6 +750,10 @@ if test "x$enable_asm" = xyes; then
 DEFINES="$DEFINES -DUSE_SPARC_ASM"
 AC_MSG_RESULT([yes, sparc])
 ;;
+ppc64le)
+DEFINES="$DEFINES -DUSE_PPC64LE_ASM"
+AC_MSG_RESULT([yes, ppc64le])
+;;
 *)
 AC_MSG_RESULT([no, platform not supported])
 ;;
@@ -2535,6 +2546,7 @@ AM_CONDITIONAL(HAVE_COMMON_OSMESA, test "x$enable_osmesa" 
= xyes -o \
 AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = 
xx86_64)
 AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64)
 AM_CONDITIONAL(HAVE_SPARC_ASM, test "x$asm_arch" = xsparc)
+AM_CONDITIONAL(HAVE_PPC64LE_ASM, test "x$asm_arch" = xppc64le)
 
 AC_SUBST([NINE_MAJOR], 1)
 AC_SUBST([NINE_MINOR], 0)
diff --git a/src/mapi/Makefile.sources b/src/mapi/Makefile.sources
index 37d6ef3..29327aa 100644
--- a/src/mapi/Makefile.sources
+++ b/src/mapi/Makefile.sources
@@ -15,6 +15,7 @@
 #this mode, compile MAPI_BRIDGE_FILES with MAPI_MODE_BRIDGE defined.
 
 MAPI_UTIL_FILES = \
+   ../util/u_endian.h \
u_current.c \
u_current.h \
u_execmem.c \
@@ -26,6 +27,8 @@ MAPI_BRIDGE_FILES = \
entry_x86-64_tls.h \
entry_x86_tls.h \
entry_x86_tsd.h \
+   entry_ppc64le_tls.h \
+   entry_ppc64le_tsd.h \
mapi_tmp.h
 
 MAPI_FILES = \
diff --git a/src/mapi/entry.c b/src/mapi/entry.c
index 27d0db4..a5e9b60 100644
--- a/src/mapi/entry.c
+++ b/src/mapi/entry.c
@@ -27,6 +27,10 @@
 
 #include "entry.h"
 #include "u_current.h"
+#include "../util/u_endian.h"
+
+#include 
+#include 
 
 #define _U_STRINGIFY(x) #x
 #define U_STRINGIFY(x) _U_STRINGIFY(x)
@@ -49,10 +53,14 @@
 #   endif
 #elif defined(USE_X86_64_ASM) && defined(__GNUC__) && defined(GLX_USE_TLS)
 #   include "entry_x86-64_tls.h"
+#elif defined(USE_PPC64LE_ASM) && defined(__GNUC__) && 
defined(PIPE_ARCH_LITTLE_ENDIAN)
+#   ifdef GLX_USE_TLS
+#  include "entry_ppc64le_tls.h"
+#   else
+#  include "entry_ppc64le_tsd.h"
+#   endif
 #else
 
-#include 
-
 static inline const struct mapi_table *
 entry_current_get(void)
 {
diff --git a/src/mapi/entry_ppc64le_tls.h b/src/mapi/entry_ppc64le_tls.h
new file mode 100644
index 000..9a2671e
--- /dev/null
+++ b/src/mapi/entry_ppc64le_tls.h
@@ -0,0 +1,150 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *Ben Crocker 
+ */
+
+#ifdef HAVE_FUNC_ATTRIBUTE_VISIBILITY
+#define HIDDEN __attribute__((visibility("hidden")))
+#else
+#define HIDDEN
+#endif
+
+// NOTE: These must be powers of two:
+#define PPC64LE_ENTRY_SIZE 64
+#define PPC64LE_PAGE_ALIGN 65536
+#if ((PPC64LE_ENTRY_SIZE & (PPC64LE_ENTRY_SIZE - 1)) != 0)
+#error PPC64LE_ENTRY_SIZE must be a power of two!
+#endif
+#if ((PPC64LE_PAGE_ALIGN & (PPC64LE_PAGE_ALIGN - 1)) != 0)
+#error PPC64LE_PAGE_ALIGN must be a power of two!
+#endif
+
+__asm__(".text\n"
+".balign " U_STRINGIFY(PPC64LE_ENTRY_SIZE) "\n"
+"ppc64le_entry_start:");
+
+#define STUB_ASM_ENTRY(func)

Re: [Mesa-dev] Mesa 17.1.1 release candidate

2017-05-24 Thread Chuck Atkins
>
> We don't do release tarballs for the stable RC. Although it has been
> asked a few times in the past.
> There's nothing stopping us though - will check if we can start doing so.
>

With most codes, I would normally be happy with just using the git tag but
most of the issues I run into tend to be build issues, which in the case of
Mesa, folllows a differnt path for tarballs than for git trees.  I wouldn't
spend too much time on it.  I can always just make the source taball myself
to test with.

- Chuck
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/16] i965: Replace brw_bo_map_unsynchronized with MAP_ASYNC

2017-05-24 Thread Matt Turner
This way we can let brw_bo_map() choose the best mapping type.

Part of the patch inlines map_gtt() into brw_bo_map_gtt() (and removes
map_gtt()). brw_bo_map_gtt() just wrapped map_gtt() with locking and a
call to set_domain(). map_gtt() is called by brw_bo_map_unsynchronized()
to avoid the call to set_domain(). With the MAP_ASYNC flag, we now have
the same behavior previously provided by brw_bo_map_unsynchronized().
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c   | 91 
 src/mesa/drivers/dri/i965/brw_program_cache.c|  4 +-
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 27 +++
 3 files changed, 25 insertions(+), 97 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index ec9611f..aec07e1 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -689,8 +689,9 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, 
unsigned flags)
DBG("brw_bo_map_cpu: %d (%s) -> %p\n", bo->gem_handle, bo->name,
bo->map_cpu);
 
-   set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
-  flags & MAP_WRITE ? I915_GEM_DOMAIN_CPU : 0);
+   if (!(flags & MAP_ASYNC))
+  set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
+ flags & MAP_WRITE ? I915_GEM_DOMAIN_CPU : 0);
 
bo_mark_mmaps_incoherent(bo);
VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_cpu, bo->size));
@@ -700,15 +701,17 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo 
*bo, unsigned flags)
 }
 
 static void *
-map_gtt(struct brw_bo *bo)
+brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
 
+   pthread_mutex_lock(>lock);
+
/* Get a mapping of the buffer if we haven't before. */
if (bo->map_gtt == NULL) {
   struct drm_i915_gem_mmap_gtt mmap_arg;
 
-  DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
+  DBG("brw_bo_map_gtt: mmap %d (%s), map_count=%d\n",
   bo->gem_handle, bo->name, bo->map_count);
 
   memclear(mmap_arg);
@@ -719,6 +722,7 @@ map_gtt(struct brw_bo *bo)
   if (ret != 0) {
  DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
  __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
+ pthread_mutex_unlock(>lock);
  return NULL;
   }
 
@@ -729,89 +733,24 @@ map_gtt(struct brw_bo *bo)
  bo->map_gtt = NULL;
  DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
  __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
+ pthread_mutex_unlock(>lock);
  return NULL;
   }
+  bo->map_count++;
}
 
-   DBG("bo_map_gtt: %d (%s) -> %p\n", bo->gem_handle, bo->name,
+   DBG("brw_bo_map_gtt: %d (%s) -> %p\n", bo->gem_handle, bo->name,
bo->map_gtt);
 
-   bo->map_count++;
-   return bo->map_gtt;
-}
-
-static void *
-brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   pthread_mutex_lock(>lock);
-
-   void *map = map_gtt(bo);
-   if (map == NULL) {
-  pthread_mutex_unlock(>lock);
-  return NULL;
-   }
-
-   /* Now move it to the GTT domain so that the GPU and CPU
-* caches are flushed and the GPU isn't actively using the
-* buffer.
-*
-* The pagefault handler does this domain change for us when
-* it has unbound the BO from the GTT, but it's up to us to
-* tell it when we're about to use things if we had done
-* rendering and it still happens to be bound to the GTT.
-*/
-   set_domain(brw, "GTT mapping", bo,
-  I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+   if (!(flags & MAP_ASYNC))
+  set_domain(brw, "GTT mapping", bo,
+ I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
 
bo_mark_mmaps_incoherent(bo);
VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_gtt, bo->size));
pthread_mutex_unlock(>lock);
 
-   return map;
-}
-
-/**
- * Performs a mapping of the buffer object like the normal GTT
- * mapping, but avoids waiting for the GPU to be done reading from or
- * rendering to the buffer.
- *
- * This is used in the implementation of GL_ARB_map_buffer_range: The
- * user asks to create a buffer, then does a mapping, fills some
- * space, runs a drawing command, then asks to map it again without
- * synchronizing because it guarantees that it won't write over the
- * data that the GPU is busy using (or, more specifically, that if it
- * does write over the data, it acknowledges that rendering is
- * undefined).
- */
-
-void *
-brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   /* If the CPU cache isn't coherent with the GTT, then use a
-* regular synchronized mapping.  The problem is that we don't
-* track where the buffer was last used on the CPU side in
-* terms of brw_bo_map_cpu vs brw_bo_map_gtt, so
-* we would potentially 

[Mesa-dev] [PATCH 15/16] i965: Enable tiled memcpy get tex image path on non-LLC

2017-05-24 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/intel_tex_image.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 59d5fa4..75b5d6e 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -481,8 +481,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
 * with _mesa_image_row_stride. However, before removing the restrictions
 * we need tests.
 */
-   if (!brw->has_llc ||
-   !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+   if (!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
  texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/16] i965: Enable tiled memcpy pixel read path on non-LLC

2017-05-24 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/intel_pixel_read.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c 
b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index eb3f66f..5085683 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -91,8 +91,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
 * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
 * more types.
 */
-   if (!brw->has_llc ||
-   !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+   if (!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
pixels == NULL ||
_mesa_is_bufferobj(pack->BufferObj) ||
pack->Alignment > 4 ||
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/16] i965: Add and use brw_bo_map()

2017-05-24 Thread Matt Turner
We can encapsulate the logic for choosing the mapping type. This will
also help when we add WC mappings.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 30 +--
 src/mesa/drivers/dri/i965/brw_bufmgr.h|  5 ++--
 src/mesa/drivers/dri/i965/brw_performance_query.c |  6 ++---
 src/mesa/drivers/dri/i965/brw_program.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_program_cache.c |  6 ++---
 src/mesa/drivers/dri/i965/brw_queryobj.c  |  2 +-
 src/mesa/drivers/dri/i965/gen6_queryobj.c |  2 +-
 src/mesa/drivers/dri/i965/gen6_sol.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  4 +--
 src/mesa/drivers/dri/i965/intel_buffer_objects.c  | 13 ++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ++
 src/mesa/drivers/dri/i965/intel_pixel_read.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_screen.c  |  4 +--
 src/mesa/drivers/dri/i965/intel_tex_image.c   |  2 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c|  2 +-
 src/mesa/drivers/dri/i965/intel_upload.c  |  5 +---
 16 files changed, 52 insertions(+), 54 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index b79f566..ec9611f 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -658,7 +658,7 @@ set_domain(struct brw_context *brw, const char *action,
}
 }
 
-void *
+static void *
 brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
@@ -740,7 +740,7 @@ map_gtt(struct brw_bo *bo)
return bo->map_gtt;
 }
 
-void *
+static void *
 brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
@@ -814,6 +814,32 @@ brw_bo_map_unsynchronized(struct brw_context *brw, struct 
brw_bo *bo)
return map;
 }
 
+static bool
+can_map_cpu(struct brw_bo *bo, unsigned flags)
+{
+   if (bo->cache_coherent)
+  return true;
+
+   if (flags & MAP_PERSISTENT)
+  return false;
+
+   if (flags & MAP_COHERENT)
+  return false;
+
+   return !(flags & MAP_WRITE);
+}
+
+void *
+brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
+{
+   if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
+  return brw_bo_map_gtt(brw, bo, flags);
+   else if (can_map_cpu(bo, flags))
+  return brw_bo_map_cpu(brw, bo, flags);
+   else
+  return brw_bo_map_gtt(brw, bo, flags);
+}
+
 int
 brw_bo_unmap(struct brw_bo *bo)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 831da69..099afcf 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -137,7 +137,7 @@ struct brw_bo {
  *
  * Buffer objects are not necessarily initially mapped into CPU virtual
  * address space or graphics device aperture.  They must be mapped
- * using brw_bo_map_cpu() or brw_bo_map_gtt() to be used by the CPU.
+ * using brw_bo_map() to be used by the CPU.
  */
 struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
 uint64_t size, uint64_t alignment);
@@ -189,7 +189,7 @@ void brw_bo_unreference(struct brw_bo *bo);
  * This function will block waiting for any existing execution on the
  * buffer to complete, first.  The resulting mapping is returned.
  */
-MUST_CHECK void *brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, 
unsigned flags);
+MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
unsigned flags);
 
 /**
  * Reduces the refcount on the userspace mapping of the buffer
@@ -263,7 +263,6 @@ struct brw_bo *brw_bo_gem_create_from_name(struct 
brw_bufmgr *bufmgr,
unsigned int handle);
 void brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr);
 MUST_CHECK void *brw_bo_map_unsynchronized(struct brw_context *brw, struct 
brw_bo *bo);
-MUST_CHECK void *brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, 
unsigned flags);
 
 int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
 
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 2ec070b..1c9ddf5 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -713,7 +713,7 @@ accumulate_oa_reports(struct brw_context *brw,
if (!read_oa_samples(brw))
   goto error;
 
-   query_buffer = brw_bo_map_cpu(brw, obj->oa.bo, MAP_READ);
+   query_buffer = brw_bo_map(brw, obj->oa.bo, MAP_READ);
 
start = last = query_buffer;
end = query_buffer + (MI_RPC_BO_END_OFFSET_BYTES / sizeof(uint32_t));
@@ -992,7 +992,7 @@ brw_begin_perf_query(struct gl_context *ctx,
   MI_RPC_BO_SIZE, 64);
 #ifdef DEBUG
   /* Pre-filling the BO helps debug whether writes landed. */
-  void *map = brw_bo_map_cpu(brw, 

[Mesa-dev] [PATCH 12/16] i965: Use write-combine mappings where available

2017-05-24 Thread Matt Turner
Write-combine mappings give much better performance on writes than
uncached access through the GTT.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 69 ++
 1 file changed, 69 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index aec07e1..bbcda04 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -110,6 +110,7 @@ struct brw_bufmgr {
struct hash_table *handle_table;
 
bool has_llc:1;
+   bool has_mmap_wc:1;
bool bo_reuse:1;
 };
 
@@ -701,6 +702,48 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, 
unsigned flags)
 }
 
 static void *
+brw_bo_map_wc(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
+{
+   struct brw_bufmgr *bufmgr = bo->bufmgr;
+
+   pthread_mutex_lock(>lock);
+
+   if (!bo->map_wc) {
+  struct drm_i915_gem_mmap mmap_arg;
+
+  DBG("brw_bo_map_wc: %d (%s), map_count=%d\n",
+  bo->gem_handle, bo->name, bo->map_count);
+
+  memclear(mmap_arg);
+  mmap_arg.handle = bo->gem_handle;
+  mmap_arg.size = bo->size;
+  mmap_arg.flags = I915_MMAP_WC;
+  int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg);
+  if (ret != 0) {
+ ret = -errno;
+ DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
+ __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
+ pthread_mutex_unlock(>lock);
+ return NULL;
+  }
+  bo->map_count++;
+  VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
+  bo->map_wc = (void *) (uintptr_t) mmap_arg.addr_ptr;
+   }
+   DBG("brw_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc);
+
+   if (!(flags & MAP_ASYNC))
+  set_domain(brw, "WC mapping", bo, I915_GEM_DOMAIN_GTT,
+ flags & MAP_WRITE ? I915_GEM_DOMAIN_GTT : 0);
+
+   bo_mark_mmaps_incoherent(bo);
+   VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_wc, bo->size));
+   pthread_mutex_unlock(>lock);
+
+   return bo->map_wc;
+}
+
+static void *
 brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
@@ -771,10 +814,14 @@ can_map_cpu(struct brw_bo *bo, unsigned flags)
 void *
 brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
 {
+   struct brw_bufmgr *bufmgr = bo->bufmgr;
+
if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
   return brw_bo_map_gtt(brw, bo, flags);
else if (can_map_cpu(bo, flags))
   return brw_bo_map_cpu(brw, bo, flags);
+   else if (bufmgr->has_mmap_wc)
+  return brw_bo_map_wc(brw, bo, flags);
else
   return brw_bo_map_gtt(brw, bo, flags);
 }
@@ -1177,6 +1224,27 @@ brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset, 
uint64_t *result)
return ret;
 }
 
+static int
+gem_param(int fd, int name)
+{
+   drm_i915_getparam_t gp;
+   int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */
+
+   memset(, 0, sizeof(gp));
+   gp.param = name;
+   gp.value = 
+   if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, ))
+  return -1;
+
+   return v;
+}
+
+static bool
+test_has_mmap_wc(int fd)
+{
+   return gem_param(fd, I915_PARAM_MMAP_VERSION) > 0;
+}
+
 /**
  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
  * and manage map buffer objections.
@@ -1209,6 +1277,7 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd, 
int batch_size)
}
 
bufmgr->has_llc = devinfo->has_llc;
+   bufmgr->has_mmap_wc = test_has_mmap_wc(fd);
 
init_cache_buckets(bufmgr);
 
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/16] i965: Enable tiled memcpy tex image path on non-LLC

2017-05-24 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/intel_tex_subimage.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c 
b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 7acb3d3..2aead41 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -98,8 +98,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
 * with _mesa_image_row_stride. However, before removing the restrictions
 * we need tests.
 */
-   if (!brw->has_llc ||
-   !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+   if (!(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
!(texImage->TexObject->Target == GL_TEXTURE_2D ||
  texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
pixels == NULL ||
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/16] i965: Pass the map-mode along to intel_mipmap_tree_map_raw()

2017-05-24 Thread Matt Turner
From: Chris Wilson 

Since we can distinguish when mapping between READ and WRITE, we can
pass along the map mode to avoid stalls and flushes where possible.

Reviewed-by: Kenneth Graunke 
Reviewed-by: Matt Turner 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 +++
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 049b4c1..82c2de8 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -49,7 +49,8 @@
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
 static void *intel_miptree_map_raw(struct brw_context *brw,
-   struct intel_mipmap_tree *mt);
+   struct intel_mipmap_tree *mt,
+   GLbitfield mode);
 
 static void intel_miptree_unmap_raw(struct intel_mipmap_tree *mt);
 
@@ -2427,7 +2428,9 @@ intel_update_r8stencil(struct brw_context *brw,
 }
 
 static void *
-intel_miptree_map_raw(struct brw_context *brw, struct intel_mipmap_tree *mt)
+intel_miptree_map_raw(struct brw_context *brw,
+  struct intel_mipmap_tree *mt,
+  GLbitfield mode)
 {
/* CPU accesses to color buffers don't understand fast color clears, so
 * resolve any pending fast color clears before we map.
@@ -2454,7 +2457,7 @@ intel_miptree_map_raw(struct brw_context *brw, struct 
intel_mipmap_tree *mt)
if (mt->tiling != I915_TILING_NONE || mt->is_scanout)
   return brw_bo_map_gtt(brw, bo);
else
-  return brw_bo_map(brw, bo, true);
+  return brw_bo_map(brw, bo, mode & GL_MAP_WRITE_BIT);
 }
 
 static void
@@ -2485,7 +2488,7 @@ intel_miptree_map_gtt(struct brw_context *brw,
y /= bh;
x /= bw;
 
-   base = intel_miptree_map_raw(brw, mt) + mt->offset;
+   base = intel_miptree_map_raw(brw, mt, map->mode) + mt->offset;
 
if (base == NULL)
   map->ptr = NULL;
@@ -2548,7 +2551,7 @@ intel_miptree_map_blit(struct brw_context *brw,
   }
}
 
-   map->ptr = intel_miptree_map_raw(brw, map->linear_mt);
+   map->ptr = intel_miptree_map_raw(brw, map->linear_mt, map->mode);
 
DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
map->x, map->y, map->w, map->h,
@@ -2610,7 +2613,7 @@ intel_miptree_map_movntdqa(struct brw_context *brw,
image_x += map->x;
image_y += map->y;
 
-   void *src = intel_miptree_map_raw(brw, mt);
+   void *src = intel_miptree_map_raw(brw, mt, map->mode);
if (!src)
   return;
 
@@ -2679,7 +2682,7 @@ intel_miptree_map_s8(struct brw_context *brw,
 */
if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
   uint8_t *untiled_s8_map = map->ptr;
-  uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
+  uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_READ_BIT);
   unsigned int image_x, image_y;
 
   intel_miptree_get_image_offset(mt, level, slice, _x, _y);
@@ -2716,7 +2719,7 @@ intel_miptree_unmap_s8(struct brw_context *brw,
if (map->mode & GL_MAP_WRITE_BIT) {
   unsigned int image_x, image_y;
   uint8_t *untiled_s8_map = map->ptr;
-  uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt);
+  uint8_t *tiled_s8_map = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT);
 
   intel_miptree_get_image_offset(mt, level, slice, _x, _y);
 
@@ -2771,7 +2774,7 @@ intel_miptree_unmap_etc(struct brw_context *brw,
image_x += map->x;
image_y += map->y;
 
-   uint8_t *dst = intel_miptree_map_raw(brw, mt)
+   uint8_t *dst = intel_miptree_map_raw(brw, mt, GL_MAP_WRITE_BIT)
 + image_y * mt->pitch
 + image_x * mt->cpp;
 
@@ -2822,8 +2825,8 @@ intel_miptree_map_depthstencil(struct brw_context *brw,
 */
if (!(map->mode & GL_MAP_INVALIDATE_RANGE_BIT)) {
   uint32_t *packed_map = map->ptr;
-  uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
-  uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
+  uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_READ_BIT);
+  uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_READ_BIT);
   unsigned int s_image_x, s_image_y;
   unsigned int z_image_x, z_image_y;
 
@@ -2883,8 +2886,8 @@ intel_miptree_unmap_depthstencil(struct brw_context *brw,
 
if (map->mode & GL_MAP_WRITE_BIT) {
   uint32_t *packed_map = map->ptr;
-  uint8_t *s_map = intel_miptree_map_raw(brw, s_mt);
-  uint32_t *z_map = intel_miptree_map_raw(brw, z_mt);
+  uint8_t *s_map = intel_miptree_map_raw(brw, s_mt, GL_MAP_WRITE_BIT);
+  uint32_t *z_map = intel_miptree_map_raw(brw, z_mt, GL_MAP_WRITE_BIT);
   unsigned int s_image_x, s_image_y;
   unsigned int z_image_x, z_image_y;
 
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH 09/16] i965: Drop MAP_READ from some write-only mappings

2017-05-24 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_performance_query.c | 2 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 +-
 src/mesa/drivers/dri/i965/intel_screen.c  | 2 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c| 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 4cbd60e..2ec070b 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -992,7 +992,7 @@ brw_begin_perf_query(struct gl_context *ctx,
   MI_RPC_BO_SIZE, 64);
 #ifdef DEBUG
   /* Pre-filling the BO helps debug whether writes landed. */
-  void *map = brw_bo_map_cpu(brw, obj->oa.bo, MAP_READ | MAP_WRITE);
+  void *map = brw_bo_map_cpu(brw, obj->oa.bo, MAP_WRITE);
   memset(map, 0x80, MI_RPC_BO_SIZE);
   brw_bo_unmap(obj->oa.bo);
 #endif
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 4803e36..2a37720 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1385,7 +1385,7 @@ intel_miptree_init_mcs(struct brw_context *brw,
 *
 * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
 */
-   void *map = brw_bo_map_gtt(brw, mt->mcs_buf->bo, MAP_READ | MAP_WRITE);
+   void *map = brw_bo_map_gtt(brw, mt->mcs_buf->bo, MAP_WRITE);
if (unlikely(map == NULL)) {
   fprintf(stderr, "Failed to map mcs buffer into GTT\n");
   brw_bo_unreference(mt->mcs_buf->bo);
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 2d50882..477c5c0 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1416,7 +1416,7 @@ intel_detect_pipelined_register(struct intel_screen 
*screen,
if (bo == NULL)
   goto err_results;
 
-   map = brw_bo_map_cpu(NULL, bo, MAP_READ | MAP_WRITE);
+   map = brw_bo_map_cpu(NULL, bo, MAP_WRITE);
if (!map)
   goto err_batch;
 
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c 
b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 0f75bdf..b0b1a0f 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -146,7 +146,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
   intel_batchbuffer_flush(brw);
}
 
-   void *map = brw_bo_map_cpu(brw, bo, MAP_READ | MAP_WRITE);
+   void *map = brw_bo_map_cpu(brw, bo, MAP_WRITE);
if (map == NULL) {
   DBG("%s: failed to map bo\n", __func__);
   return false;
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/16] i965: Remove brw_bo's virtual member

2017-05-24 Thread Matt Turner
Just return the map from brw_map_bo_*
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 49 +--
 src/mesa/drivers/dri/i965/brw_bufmgr.h| 19 ++---
 src/mesa/drivers/dri/i965/brw_context.h   |  2 +
 src/mesa/drivers/dri/i965/brw_performance_query.c | 10 ++---
 src/mesa/drivers/dri/i965/brw_program.c   |  3 +-
 src/mesa/drivers/dri/i965/brw_program_cache.c | 31 +-
 src/mesa/drivers/dri/i965/brw_queryobj.c  |  3 +-
 src/mesa/drivers/dri/i965/gen6_queryobj.c |  3 +-
 src/mesa/drivers/dri/i965/gen6_sol.c  |  3 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 17 
 src/mesa/drivers/dri/i965/intel_buffer_objects.c  | 23 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 12 +++---
 src/mesa/drivers/dri/i965/intel_pixel_read.c  |  8 ++--
 src/mesa/drivers/dri/i965/intel_screen.c  | 15 ---
 src/mesa/drivers/dri/i965/intel_tex_image.c   |  8 ++--
 src/mesa/drivers/dri/i965/intel_tex_subimage.c|  8 ++--
 src/mesa/drivers/dri/i965/intel_upload.c  |  9 +++--
 17 files changed, 102 insertions(+), 121 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 6ea6978..9a65d32 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -468,7 +468,6 @@ brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
 
bo->size = open_arg.size;
bo->offset64 = 0;
-   bo->virtual = NULL;
bo->bufmgr = bufmgr;
bo->gem_handle = open_arg.handle;
bo->name = name;
@@ -658,11 +657,10 @@ set_domain(struct brw_context *brw, const char *action,
}
 }
 
-int
+void *
 brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int write_enable)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
-   int ret;
 
pthread_mutex_lock(>lock);
 
@@ -675,20 +673,19 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
int write_enable)
   memclear(mmap_arg);
   mmap_arg.handle = bo->gem_handle;
   mmap_arg.size = bo->size;
-  ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg);
+  int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg);
   if (ret != 0) {
  ret = -errno;
  DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
  __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
  pthread_mutex_unlock(>lock);
- return ret;
+ return NULL;
   }
   bo->map_count++;
   VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
   bo->mem_virtual = (void *) (uintptr_t) mmap_arg.addr_ptr;
}
DBG("bo_map: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->mem_virtual);
-   bo->virtual = bo->mem_virtual;
 
set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
   write_enable ? I915_GEM_DOMAIN_CPU : 0);
@@ -697,14 +694,13 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
int write_enable)
VG(VALGRIND_MAKE_MEM_DEFINED(bo->mem_virtual, bo->size));
pthread_mutex_unlock(>lock);
 
-   return 0;
+   return bo->mem_virtual;
 }
 
-static int
+static void *
 map_gtt(struct brw_bo *bo)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
-   int ret;
 
/* Get a mapping of the buffer if we haven't before. */
if (bo->gtt_virtual == NULL) {
@@ -717,12 +713,11 @@ map_gtt(struct brw_bo *bo)
   mmap_arg.handle = bo->gem_handle;
 
   /* Get the fake offset back... */
-  ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, _arg);
+  int ret = drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, _arg);
   if (ret != 0) {
- ret = -errno;
  DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
  __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return ret;
+ return NULL;
   }
 
   /* and mmap it */
@@ -730,34 +725,30 @@ map_gtt(struct brw_bo *bo)
  MAP_SHARED, bufmgr->fd, mmap_arg.offset);
   if (bo->gtt_virtual == MAP_FAILED) {
  bo->gtt_virtual = NULL;
- ret = -errno;
  DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
  __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
- return ret;
+ return NULL;
   }
}
 
-   bo->map_count++;
-   bo->virtual = bo->gtt_virtual;
-
DBG("bo_map_gtt: %d (%s) -> %p\n", bo->gem_handle, bo->name,
bo->gtt_virtual);
 
-   return 0;
+   bo->map_count++;
+   return bo->gtt_virtual;
 }
 
-int
+void *
 brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
-   int ret;
 
pthread_mutex_lock(>lock);
 
-   ret = map_gtt(bo);
-   if (ret) {
+   void *map = map_gtt(bo);
+   if (map == NULL) {
   pthread_mutex_unlock(>lock);
-  return ret;
+  return NULL;
}
 
/* Now move it to the GTT domain so that the GPU and CPU
@@ -776,7 +767,7 @@ 

[Mesa-dev] [PATCH 13/16] i965: Explicitly disallow tiled memcpy path on Gen4 with swizzling.

2017-05-24 Thread Matt Turner
From: Chris Wilson 

The manual detiling paths are not prepared to handle Gen4-G45 with
swizzling enabled, so explicitly disable them.  (They're already
disabled because these platforms don't have LLC but the next patch will
enable this path).

Reviewed-by: Matt Turner 
---
 src/mesa/drivers/dri/i965/intel_pixel_read.c   | 7 +++
 src/mesa/drivers/dri/i965/intel_tex_image.c| 7 +++
 src/mesa/drivers/dri/i965/intel_tex_subimage.c | 7 +++
 3 files changed, 21 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c 
b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 4614279..eb3f66f 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -133,6 +133,13 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
   return false;
}
 
+   /* linear_to_tiled() assumes that if the object is swizzled, it
+* is using I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.
+* This is only true on gen5 and above.
+*/
+   if (brw->gen < 5 && brw->has_swizzling)
+  return false;
+
/* Since we are going to read raw data to the miptree, we need to resolve
 * any pending fast color clears before we start.
 */
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 27c5429..59d5fa4 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -518,6 +518,13 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
   return false;
}
 
+   /* tiled_to_linear() assumes that if the object is swizzled, it
+* is using I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.
+* This is only true on gen5 and above.
+*/
+   if (brw->gen < 5 && brw->has_swizzling)
+  return false;
+
/* Since we are going to write raw data to the miptree, we need to resolve
 * any pending fast color clears before we start.
 */
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c 
b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 40569e1..7acb3d3 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -134,6 +134,13 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
   return false;
}
 
+   /* tiled_to_linear() assumes that if the object is swizzled, it
+* is using I915_BIT6_SWIZZLE_9_10 for X and I915_BIT6_SWIZZLE_9 for Y.
+* This is only true on gen5 and above.
+*/
+   if (brw->gen < 5 && brw->has_swizzling)
+  return false;
+
/* Since we are going to write raw data to the miptree, we need to resolve
 * any pending fast color clears before we start.
 */
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/16] i965: Remove unused brw_bo_map__* functions

2017-05-24 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 105 -
 src/mesa/drivers/dri/i965/brw_bufmgr.h |   4 --
 2 files changed, 109 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 2f17934..6ea6978 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -1221,111 +1221,6 @@ brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t 
offset, uint64_t *result)
return ret;
 }
 
-void *
-brw_bo_map__gtt(struct brw_bo *bo)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (bo->gtt_virtual)
-  return bo->gtt_virtual;
-
-   pthread_mutex_lock(>lock);
-   if (bo->gtt_virtual == NULL) {
-  struct drm_i915_gem_mmap_gtt mmap_arg;
-  void *ptr;
-
-  DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
-  bo->gem_handle, bo->name, bo->map_count);
-
-  memclear(mmap_arg);
-  mmap_arg.handle = bo->gem_handle;
-
-  /* Get the fake offset back... */
-  ptr = MAP_FAILED;
-  if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, _arg) == 0) {
- /* and mmap it */
- ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
-MAP_SHARED, bufmgr->fd, mmap_arg.offset);
-  }
-  if (ptr == MAP_FAILED) {
- --bo->map_count;
- ptr = NULL;
-  }
-
-  bo->gtt_virtual = ptr;
-   }
-   pthread_mutex_unlock(>lock);
-
-   return bo->gtt_virtual;
-}
-
-void *
-brw_bo_map__cpu(struct brw_bo *bo)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (bo->mem_virtual)
-  return bo->mem_virtual;
-
-   pthread_mutex_lock(>lock);
-   if (!bo->mem_virtual) {
-  struct drm_i915_gem_mmap mmap_arg;
-
-  DBG("bo_map: %d (%s), map_count=%d\n",
-  bo->gem_handle, bo->name, bo->map_count);
-
-  memclear(mmap_arg);
-  mmap_arg.handle = bo->gem_handle;
-  mmap_arg.size = bo->size;
-  if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg)) {
- DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
-  } else {
- bo->map_count++;
- VG(VALGRIND_MALLOCLIKE_BLOCK
-(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
- bo->mem_virtual = (void *) (uintptr_t) mmap_arg.addr_ptr;
-  }
-   }
-   pthread_mutex_unlock(>lock);
-
-   return bo->mem_virtual;
-}
-
-void *
-brw_bo_map__wc(struct brw_bo *bo)
-{
-   struct brw_bufmgr *bufmgr = bo->bufmgr;
-
-   if (bo->wc_virtual)
-  return bo->wc_virtual;
-
-   pthread_mutex_lock(>lock);
-   if (!bo->wc_virtual) {
-  struct drm_i915_gem_mmap mmap_arg;
-
-  DBG("bo_map: %d (%s), map_count=%d\n",
-  bo->gem_handle, bo->name, bo->map_count);
-
-  memclear(mmap_arg);
-  mmap_arg.handle = bo->gem_handle;
-  mmap_arg.size = bo->size;
-  mmap_arg.flags = I915_MMAP_WC;
-  if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, _arg)) {
- DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
- __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
-  } else {
- bo->map_count++;
- VG(VALGRIND_MALLOCLIKE_BLOCK
-(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
- bo->wc_virtual = (void *) (uintptr_t) mmap_arg.addr_ptr;
-  }
-   }
-   pthread_mutex_unlock(>lock);
-
-   return bo->wc_virtual;
-}
-
 /**
  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
  * and manage map buffer objections.
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 56ec206..d0b88fb 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -261,10 +261,6 @@ void brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr);
 int brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo);
 int brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo);
 
-void *brw_bo_map__cpu(struct brw_bo *bo);
-void *brw_bo_map__gtt(struct brw_bo *bo);
-void *brw_bo_map__wc(struct brw_bo *bo);
-
 int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
 
 uint32_t brw_create_hw_context(struct brw_bufmgr *bufmgr);
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/16] i965: Pass flags to brw_bo_map_*

2017-05-24 Thread Matt Turner
brw_bo_map_cpu() took a write_enable arg, but it wasn't always clear
whether we were also planning to read from the buffer. I kept everything
semantically identical by passing only MAP_READ or MAP_READ | MAP_WRITE
depending on the write_enable argument.

The other flags are not used yet, but MAP_ASYNC for instance, will be
used in a later patch to remove the need for a separate
brw_bo_map_unsynchronized() function.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c|  8 
 src/mesa/drivers/dri/i965/brw_bufmgr.h| 14 --
 src/mesa/drivers/dri/i965/brw_performance_query.c |  6 +++---
 src/mesa/drivers/dri/i965/brw_program.c   |  2 +-
 src/mesa/drivers/dri/i965/brw_program_cache.c |  6 +++---
 src/mesa/drivers/dri/i965/brw_queryobj.c  |  2 +-
 src/mesa/drivers/dri/i965/gen6_queryobj.c |  2 +-
 src/mesa/drivers/dri/i965/gen6_sol.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  4 ++--
 src/mesa/drivers/dri/i965/intel_buffer_objects.c  | 16 +++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  6 +++---
 src/mesa/drivers/dri/i965/intel_pixel_read.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_screen.c  |  4 ++--
 src/mesa/drivers/dri/i965/intel_tex_image.c   |  2 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c|  2 +-
 src/mesa/drivers/dri/i965/intel_upload.c  |  4 ++--
 16 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index b36979f..b79f566 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -659,7 +659,7 @@ set_domain(struct brw_context *brw, const char *action,
 }
 
 void *
-brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, int write_enable)
+brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
 
@@ -690,7 +690,7 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, 
int write_enable)
bo->map_cpu);
 
set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
-  write_enable ? I915_GEM_DOMAIN_CPU : 0);
+  flags & MAP_WRITE ? I915_GEM_DOMAIN_CPU : 0);
 
bo_mark_mmaps_incoherent(bo);
VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_cpu, bo->size));
@@ -741,7 +741,7 @@ map_gtt(struct brw_bo *bo)
 }
 
 void *
-brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo)
+brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
 
@@ -799,7 +799,7 @@ brw_bo_map_unsynchronized(struct brw_context *brw, struct 
brw_bo *bo)
 * does reasonable things.
 */
if (!bufmgr->has_llc)
-  return brw_bo_map_gtt(brw, bo);
+  return brw_bo_map_gtt(brw, bo, MAP_READ | MAP_WRITE);
 
pthread_mutex_lock(>lock);
 
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 3dbde21..831da69 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -173,13 +173,23 @@ void brw_bo_reference(struct brw_bo *bo);
  */
 void brw_bo_unreference(struct brw_bo *bo);
 
+/* Must match MapBufferRange interface (for convenience) */
+#define MAP_READGL_MAP_READ_BIT
+#define MAP_WRITE   GL_MAP_WRITE_BIT
+#define MAP_ASYNC   GL_MAP_UNSYNCHRONIZED_BIT
+#define MAP_PERSISTENT  GL_MAP_PERSISTENT_BIT
+#define MAP_COHERENTGL_MAP_COHERENT_BIT
+/* internal */
+#define MAP_INTERNAL_MASK   (0xff << 24)
+#define MAP_RAW (0x01 << 24)
+
 /**
  * Maps the buffer into userspace.
  *
  * This function will block waiting for any existing execution on the
  * buffer to complete, first.  The resulting mapping is returned.
  */
-MUST_CHECK void *brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, 
int write_enable);
+MUST_CHECK void *brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, 
unsigned flags);
 
 /**
  * Reduces the refcount on the userspace mapping of the buffer
@@ -253,7 +263,7 @@ struct brw_bo *brw_bo_gem_create_from_name(struct 
brw_bufmgr *bufmgr,
unsigned int handle);
 void brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr);
 MUST_CHECK void *brw_bo_map_unsynchronized(struct brw_context *brw, struct 
brw_bo *bo);
-MUST_CHECK void *brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo);
+MUST_CHECK void *brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, 
unsigned flags);
 
 int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
 
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 2bfd812..4cbd60e 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -713,7 +713,7 @@ accumulate_oa_reports(struct brw_context *brw,
if (!read_oa_samples(brw))
   goto error;
 

[Mesa-dev] [PATCH 03/16] i965: Remove unused 'use_resource_streamer' field

2017-05-24 Thread Matt Turner
Missing in the resource streamer removal of commit 951f56cd43bc.
---
 src/mesa/drivers/dri/i965/brw_context.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index b99d2a9..f902265 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -772,7 +772,6 @@ struct brw_context
bool has_pln;
bool no_simd8;
bool use_rep_send;
-   bool use_resource_streamer;
 
/**
 * Some versions of Gen hardware don't do centroid interpolation correctly
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/16] i965: Rename brw_bo_map() -> brw_bo_map_cpu()

2017-05-24 Thread Matt Turner
I'm going to make a new function named brw_bo_map() in a later patch
that is responsible for choosing the mapping type, so this patch clears
the way.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 9 +
 src/mesa/drivers/dri/i965/brw_bufmgr.h| 4 ++--
 src/mesa/drivers/dri/i965/brw_performance_query.c | 6 +++---
 src/mesa/drivers/dri/i965/brw_program.c   | 2 +-
 src/mesa/drivers/dri/i965/brw_program_cache.c | 6 +++---
 src/mesa/drivers/dri/i965/brw_queryobj.c  | 2 +-
 src/mesa/drivers/dri/i965/gen6_queryobj.c | 2 +-
 src/mesa/drivers/dri/i965/gen6_sol.c  | 2 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++--
 src/mesa/drivers/dri/i965/intel_buffer_objects.c  | 6 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 ++--
 src/mesa/drivers/dri/i965/intel_pixel_read.c  | 2 +-
 src/mesa/drivers/dri/i965/intel_screen.c  | 4 ++--
 src/mesa/drivers/dri/i965/intel_tex_image.c   | 2 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c| 2 +-
 src/mesa/drivers/dri/i965/intel_upload.c  | 2 +-
 16 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index fb178e6..b36979f 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -659,7 +659,7 @@ set_domain(struct brw_context *brw, const char *action,
 }
 
 void *
-brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int write_enable)
+brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, int write_enable)
 {
struct brw_bufmgr *bufmgr = bo->bufmgr;
 
@@ -668,7 +668,7 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int 
write_enable)
if (!bo->map_cpu) {
   struct drm_i915_gem_mmap mmap_arg;
 
-  DBG("bo_map: %d (%s), map_count=%d\n",
+  DBG("brw_bo_map_cpu: %d (%s), map_count=%d\n",
   bo->gem_handle, bo->name, bo->map_count);
 
   memclear(mmap_arg);
@@ -686,7 +686,8 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int 
write_enable)
   VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
   bo->map_cpu = (void *) (uintptr_t) mmap_arg.addr_ptr;
}
-   DBG("bo_map: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_cpu);
+   DBG("brw_bo_map_cpu: %d (%s) -> %p\n", bo->gem_handle, bo->name,
+   bo->map_cpu);
 
set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
   write_enable ? I915_GEM_DOMAIN_CPU : 0);
@@ -793,7 +794,7 @@ brw_bo_map_unsynchronized(struct brw_context *brw, struct 
brw_bo *bo)
/* If the CPU cache isn't coherent with the GTT, then use a
 * regular synchronized mapping.  The problem is that we don't
 * track where the buffer was last used on the CPU side in
-* terms of brw_bo_map vs brw_bo_map_gtt, so
+* terms of brw_bo_map_cpu vs brw_bo_map_gtt, so
 * we would potentially corrupt the buffer even when the user
 * does reasonable things.
 */
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index ae77e05..3dbde21 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -137,7 +137,7 @@ struct brw_bo {
  *
  * Buffer objects are not necessarily initially mapped into CPU virtual
  * address space or graphics device aperture.  They must be mapped
- * using bo_map() or brw_bo_map_gtt() to be used by the CPU.
+ * using brw_bo_map_cpu() or brw_bo_map_gtt() to be used by the CPU.
  */
 struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
 uint64_t size, uint64_t alignment);
@@ -179,7 +179,7 @@ void brw_bo_unreference(struct brw_bo *bo);
  * This function will block waiting for any existing execution on the
  * buffer to complete, first.  The resulting mapping is returned.
  */
-MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int 
write_enable);
+MUST_CHECK void *brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, 
int write_enable);
 
 /**
  * Reduces the refcount on the userspace mapping of the buffer
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
b/src/mesa/drivers/dri/i965/brw_performance_query.c
index 1342ece..2bfd812 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -713,7 +713,7 @@ accumulate_oa_reports(struct brw_context *brw,
if (!read_oa_samples(brw))
   goto error;
 
-   query_buffer = brw_bo_map(brw, obj->oa.bo, false);
+   query_buffer = brw_bo_map_cpu(brw, obj->oa.bo, false);
 
start = last = query_buffer;
end = query_buffer + (MI_RPC_BO_END_OFFSET_BYTES / sizeof(uint32_t));
@@ -992,7 +992,7 @@ brw_begin_perf_query(struct gl_context *ctx,
   MI_RPC_BO_SIZE, 64);
 #ifdef DEBUG
   /* Pre-filling the BO helps debug whether writes landed. */
-  void *map = brw_bo_map(brw, 

[Mesa-dev] [PATCH 04/16] i965: Add a cache_coherent field to brw_bo

2017-05-24 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c| 1 +
 src/mesa/drivers/dri/i965/brw_bufmgr.h| 5 +
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 9a65d32..2b42182 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -351,6 +351,7 @@ retry:
bo->name = name;
p_atomic_set(>refcount, 1);
bo->reusable = true;
+   bo->cache_coherent = bufmgr->has_llc;
 
pthread_mutex_unlock(>lock);
 
diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
b/src/mesa/drivers/dri/i965/brw_bufmgr.h
index 1e3e8cf..6ce14bb 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
@@ -123,6 +123,11 @@ struct brw_bo {
 * Boolean of whether this buffer can be re-used
 */
bool reusable;
+
+   /**
+* Boolean of whether this buffer is cache coherent
+*/
+   bool cache_coherent;
 };
 
 #define BO_ALLOC_FOR_RENDER (1<<0)
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 6c308ff..049b4c1 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -627,6 +627,9 @@ miptree_create(struct brw_context *brw,
   alloc_flags);
}
 
+   if (layout_flags & MIPTREE_LAYOUT_FOR_SCANOUT)
+  mt->bo->cache_coherent = false;
+
return mt;
 }
 
-- 
2.10.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/16] i965: Rename *_virtual -> map_*

2017-05-24 Thread Matt Turner
I think these are better names, and it reduces the delta between
upstream and Chris Wilson's brw-batch branch.
---
 src/mesa/drivers/dri/i965/brw_bufmgr.c | 56 +-
 src/mesa/drivers/dri/i965/brw_bufmgr.h |  6 ++--
 2 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
b/src/mesa/drivers/dri/i965/brw_bufmgr.c
index 2b42182..fb178e6 100644
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -507,16 +507,16 @@ bo_free(struct brw_bo *bo)
struct hash_entry *entry;
int ret;
 
-   if (bo->mem_virtual) {
-  VG(VALGRIND_FREELIKE_BLOCK(bo->mem_virtual, 0));
-  drm_munmap(bo->mem_virtual, bo->size);
+   if (bo->map_cpu) {
+  VG(VALGRIND_FREELIKE_BLOCK(bo->map_cpu, 0));
+  drm_munmap(bo->map_cpu, bo->size);
}
-   if (bo->wc_virtual) {
-  VG(VALGRIND_FREELIKE_BLOCK(bo->wc_virtual, 0));
-  drm_munmap(bo->wc_virtual, bo->size);
+   if (bo->map_wc) {
+  VG(VALGRIND_FREELIKE_BLOCK(bo->map_wc, 0));
+  drm_munmap(bo->map_wc, bo->size);
}
-   if (bo->gtt_virtual) {
-  drm_munmap(bo->gtt_virtual, bo->size);
+   if (bo->map_gtt) {
+  drm_munmap(bo->map_gtt, bo->size);
}
 
if (bo->global_name) {
@@ -541,14 +541,14 @@ static void
 bo_mark_mmaps_incoherent(struct brw_bo *bo)
 {
 #if HAVE_VALGRIND
-   if (bo->mem_virtual)
-  VALGRIND_MAKE_MEM_NOACCESS(bo->mem_virtual, bo->size);
+   if (bo->map_cpu)
+  VALGRIND_MAKE_MEM_NOACCESS(bo->map_cpu, bo->size);
 
-   if (bo->wc_virtual)
-  VALGRIND_MAKE_MEM_NOACCESS(bo->wc_virtual, bo->size);
+   if (bo->map_wc)
+  VALGRIND_MAKE_MEM_NOACCESS(bo->map_wc, bo->size);
 
-   if (bo->gtt_virtual)
-  VALGRIND_MAKE_MEM_NOACCESS(bo->gtt_virtual, bo->size);
+   if (bo->map_gtt)
+  VALGRIND_MAKE_MEM_NOACCESS(bo->map_gtt, bo->size);
 #endif
 }
 
@@ -665,7 +665,7 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int 
write_enable)
 
pthread_mutex_lock(>lock);
 
-   if (!bo->mem_virtual) {
+   if (!bo->map_cpu) {
   struct drm_i915_gem_mmap mmap_arg;
 
   DBG("bo_map: %d (%s), map_count=%d\n",
@@ -684,18 +684,18 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, 
int write_enable)
   }
   bo->map_count++;
   VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
-  bo->mem_virtual = (void *) (uintptr_t) mmap_arg.addr_ptr;
+  bo->map_cpu = (void *) (uintptr_t) mmap_arg.addr_ptr;
}
-   DBG("bo_map: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->mem_virtual);
+   DBG("bo_map: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_cpu);
 
set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
   write_enable ? I915_GEM_DOMAIN_CPU : 0);
 
bo_mark_mmaps_incoherent(bo);
-   VG(VALGRIND_MAKE_MEM_DEFINED(bo->mem_virtual, bo->size));
+   VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_cpu, bo->size));
pthread_mutex_unlock(>lock);
 
-   return bo->mem_virtual;
+   return bo->map_cpu;
 }
 
 static void *
@@ -704,7 +704,7 @@ map_gtt(struct brw_bo *bo)
struct brw_bufmgr *bufmgr = bo->bufmgr;
 
/* Get a mapping of the buffer if we haven't before. */
-   if (bo->gtt_virtual == NULL) {
+   if (bo->map_gtt == NULL) {
   struct drm_i915_gem_mmap_gtt mmap_arg;
 
   DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
@@ -722,10 +722,10 @@ map_gtt(struct brw_bo *bo)
   }
 
   /* and mmap it */
-  bo->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
- MAP_SHARED, bufmgr->fd, mmap_arg.offset);
-  if (bo->gtt_virtual == MAP_FAILED) {
- bo->gtt_virtual = NULL;
+  bo->map_gtt = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bufmgr->fd, mmap_arg.offset);
+  if (bo->map_gtt == MAP_FAILED) {
+ bo->map_gtt = NULL;
  DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
  __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
  return NULL;
@@ -733,10 +733,10 @@ map_gtt(struct brw_bo *bo)
}
 
DBG("bo_map_gtt: %d (%s) -> %p\n", bo->gem_handle, bo->name,
-   bo->gtt_virtual);
+   bo->map_gtt);
 
bo->map_count++;
-   return bo->gtt_virtual;
+   return bo->map_gtt;
 }
 
 void *
@@ -765,7 +765,7 @@ brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo)
   I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
 
bo_mark_mmaps_incoherent(bo);
-   VG(VALGRIND_MAKE_MEM_DEFINED(bo->gtt_virtual, bo->size));
+   VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_gtt, bo->size));
pthread_mutex_unlock(>lock);
 
return map;
@@ -805,7 +805,7 @@ brw_bo_map_unsynchronized(struct brw_context *brw, struct 
brw_bo *bo)
void *map = map_gtt(bo);
if (map != NULL) {
   bo_mark_mmaps_incoherent(bo);
-  VG(VALGRIND_MAKE_MEM_DEFINED(bo->gtt_virtual, bo->size));
+  VG(VALGRIND_MAKE_MEM_DEFINED(bo->map_gtt, bo->size));
}
 
pthread_mutex_unlock(>lock);
diff --git 

[Mesa-dev] [PATCH 00/16] i965: Write-combine mappings and other performance improvements for non-LLC

2017-05-24 Thread Matt Turner
The series aims to improve performance on non-LLC platforms like Braswell and
Broxton.

Unsynchronized mappings were not actually unsynchronized on non-LLC platforms,
hurting Unigine Valley performance quite a lot. That's fixed. We also start
using write-combining, a feature available since Linux v4.0.

With WC mappings in place, I've also enabled our tiled memcpy fast paths on
non-LLC platforms. I've done that in three separate patches in order to
benchmark them independently.

TODO:
   More benchmarking, include data in commit messages
   Sort out authorship (lots of these patches have chunks split out of a large
patch from Chris Wilson's brw-batch branch)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 1/4] gallium: Add TGSI shader token for ARB_post_depth_coverage

2017-05-24 Thread Lyude
Signed-off-by: Lyude 

Changes since v2:
- Add missing string to tgsi_strings.c
Changes since v3:
- Reorder patch to be before we add the cap for this, since we can't have a cap
  for a TGSI property that doesn't exist yet
- Document the actual property seperately from the cap (whoops)

Signed-off-by: Lyude 
---
 src/gallium/auxiliary/tgsi/tgsi_strings.c  | 1 +
 src/gallium/docs/source/tgsi.rst   | 7 +++
 src/gallium/include/pipe/p_shader_tokens.h | 1 +
 3 files changed, 9 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c 
b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 2640350..7ce12d3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -151,6 +151,7 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
"NUM_CLIPDIST_ENABLED",
"NUM_CULLDIST_ENABLED",
"FS_EARLY_DEPTH_STENCIL",
+   "FS_POST_DEPTH_COVERAGE",
"NEXT_SHADER",
"CS_FIXED_BLOCK_WIDTH",
"CS_FIXED_BLOCK_HEIGHT",
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index e740476..5dcb6ba 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -3720,6 +3720,13 @@ DP3, DP4, DPH, DST, LOG, LRP, XPD, and possibly others. 
If there is a
 mismatch between shaders, then it is unspecified whether this behavior
 will be enabled.
 
+FS_POST_DEPTH_COVERAGE
+""
+
+When enabled, the input for TGSI_SEMANTIC_SAMPLEMASK will exclude samples
+that have failed the depth/stencil tests. As well, this implicitly enables
+FS_EARLY_DEPTH_STENCIL.
+
 
 Texture Sampling and Texture Formats
 
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index cb49e3b..1e08d97 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -293,6 +293,7 @@ enum tgsi_property_name {
TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL,
+   TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE,
TGSI_PROPERTY_NEXT_SHADER,
TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 3/4] st/mesa: Add support for ARB_post_depth_coverage

2017-05-24 Thread Lyude
Changes since v3:
- Add logic to the GL plumbing to implicitly enable early_fragment_tests
  for the fragment shader when post_depth_coverage is enabled.

Signed-off-by: Lyude 
---
 src/mesa/state_tracker/st_extensions.c | 1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 7 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index f3db702..7674156 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -600,6 +600,7 @@ void st_init_extensions(struct pipe_screen *screen,
   { o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY  
},
   { o(ARB_pipeline_statistics_query),
PIPE_CAP_QUERY_PIPELINE_STATISTICS},
   { o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE 
},
+  { o(ARB_post_depth_coverage),  PIPE_CAP_POST_DEPTH_COVERAGE  
},
   { o(ARB_query_buffer_object),  PIPE_CAP_QUERY_BUFFER_OBJECT  
},
   { o(ARB_robust_buffer_access_behavior), 
PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR   },
   { o(ARB_sample_shading),   PIPE_CAP_SAMPLE_SHADING   
},
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 76cd4dc..ea95857 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6376,9 +6376,14 @@ st_translate_program(
}
 
if (procType == PIPE_SHADER_FRAGMENT) {
-  if (program->shader->Program->info.fs.early_fragment_tests)
+  if (program->shader->Program->info.fs.early_fragment_tests ||
+  program->shader->Program->info.fs.post_depth_coverage) {
  ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
 
+ if (program->shader->Program->info.fs.post_depth_coverage)
+ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1);
+  }
+
   if (proginfo->info.inputs_read & VARYING_BIT_POS) {
   /* Must do this after setting up t->inputs. */
   emit_wpos(st_context(ctx), t, proginfo, ureg,
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v4 4/4] nv50/ir, nvc0: Add support for ARB_post_depth_coverage

2017-05-24 Thread Lyude
This adds support for the GL_arb_post_depth_coverage extension on nvc0
GPUs, GM200+ and up. Similar to what we do for early fragment testing,
we only call the method for updating the post depth coverage status for
fragment programs when it changes.

Signed-off-by: Lyude 
---
 docs/relnotes/17.2.0.html | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 3 +++
 src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h| 2 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_program.h   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c| 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h| 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c  | 5 +
 9 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/docs/relnotes/17.2.0.html b/docs/relnotes/17.2.0.html
index 426bb72..135d1e8 100644
--- a/docs/relnotes/17.2.0.html
+++ b/docs/relnotes/17.2.0.html
@@ -44,6 +44,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
+GL_ARB_post_depth_coverage on nvc0 (GM200+)
 GL_ARB_shader_viewport_layer_array on nvc0 (GM200+)
 GL_AMD_vertex_shader_layer on nvc0 (GM200+)
 GL_AMD_vertex_shader_viewport_index on nvc0 (GM200+)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 1962ead..76f08b1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -137,6 +137,7 @@ struct nv50_ir_prog_info
  unsigned numColourResults;
  bool writesDepth;
  bool earlyFragTests;
+ bool postDepthCoverage;
  bool separateFragData;
  bool usesDiscard;
  bool persampleInvocation;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index b583001..1264dd4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1277,6 +1277,9 @@ void Source::scanProperty(const struct tgsi_full_property 
*prop)
case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
   info->prop.fp.earlyFragTests = prop->u[0].Data;
   break;
+   case TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE:
+  info->prop.fp.postDepthCoverage = prop->u[0].Data;
+  break;
case TGSI_PROPERTY_MUL_ZERO_WINS:
   info->io.mul_zero_wins = prop->u[0].Data;
   break;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
index accde94..d7245fb 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
@@ -631,6 +631,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.
 #define NVC0_3D_UNK0F00__ESIZE 0x0004
 #define NVC0_3D_UNK0F00__LEN   0x0004
 
+#define NVC0_3D_POST_DEPTH_COVERAGE0x0f1c
+
 #define NVE4_3D_UNK0F20(i0)   (0x0f20 + 
0x4*(i0))
 #define NVE4_3D_UNK0F20__ESIZE 0x0004
 #define NVE4_3D_UNK0F20__LEN   0x0005
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 27740bc..e43a8de 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -487,6 +487,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct 
nv50_ir_prog_info *info)
fp->fp.early_z = info->prop.fp.earlyFragTests;
fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer;
+   fp->fp.post_depth_coverage = info->prop.fp.postDepthCoverage;
 
/* Mark position xy and layer as read */
if (fp->fp.reads_framebuffer)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 421ca19..b73822e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -50,6 +50,7 @@ struct nvc0_program {
   bool force_persample_interp;
   bool flatshade;
   bool reads_framebuffer;
+  bool post_depth_coverage;
} fp;
struct {
   uint32_t tess_mode; /* ~0 if defined by the other stage */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 4a95e73..b29cd7f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -261,6 +261,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case 

[Mesa-dev] [PATCH v4 2/4] gallium: Add a cap to check if the driver supports ARB_post_depth_coverage

2017-05-24 Thread Lyude
Signed-off-by: Lyude 

Changes since v1:
- Correct documentation in screen.rst for new cap
Changes since v2:
- Fix bogus documentation
Changes since v3:
- Reorder patch to be after we add the TGSI property, since we can't
  have a cap for a property that doesn't exist yet.
- Document the cap seperately from the property it's for

Signed-off-by: Lyude 
---
 src/gallium/docs/source/screen.rst   | 2 ++
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 17 files changed, 18 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 871669c..852c31b 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -392,6 +392,8 @@ The integer capabilities:
 * ``PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX``: Whether a buffer with just
   PIPE_BIND_CONSTANT_BUFFER can be legally passed to set_vertex_buffers.
 * ``PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION``: As the name says.
+* ``PIPE_CAP_POST_DEPTH_COVERAGE``: whether
+  ``TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE`` is supported.
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index bf13184..8fd1184 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -256,6 +256,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_POST_DEPTH_COVERAGE:
   return 0;
 
/* Stream output. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 99a6d3c..58fa997 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -314,6 +314,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_POST_DEPTH_COVERAGE:
return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 1cf9441..a0dbc12 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -277,6 +277,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+   case PIPE_CAP_POST_DEPTH_COVERAGE:
   return 0;
 
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 656de49..4e8c110 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -354,6 +354,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_POST_DEPTH_COVERAGE:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index e8d14bf..553ee49 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -218,6 +218,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_BALLOT:
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+   case PIPE_CAP_POST_DEPTH_COVERAGE:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 59afd14..631b2c9 100644
--- 

[Mesa-dev] [PATCH v4 0/4] ARB_post_depth_coverage for nvc0

2017-05-24 Thread Lyude
This is a patch series to add support for ARB_post_depth_coverage in nvc0,
along with the required bits for gallium, tgsi, and mesa's state tracker.

Lyude (4):
  gallium: Add TGSI shader token for ARB_post_depth_coverage
  gallium: Add a cap to check if the driver supports
ARB_post_depth_coverage
  st/mesa: Add support for ARB_post_depth_coverage
  nv50/ir, nvc0: Add support for ARB_post_depth_coverage

 docs/relnotes/17.2.0.html | 1 +
 src/gallium/auxiliary/tgsi/tgsi_strings.c | 1 +
 src/gallium/docs/source/screen.rst| 2 ++
 src/gallium/docs/source/tgsi.rst  | 7 +++
 src/gallium/drivers/etnaviv/etnaviv_screen.c  | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c  | 1 +
 src/gallium/drivers/i915/i915_screen.c| 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 3 +++
 src/gallium/drivers/nouveau/nv30/nv30_screen.c| 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c| 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h| 2 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_program.h   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c| 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h| 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c  | 5 +
 src/gallium/drivers/r300/r300_screen.c| 1 +
 src/gallium/drivers/r600/r600_pipe.c  | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c| 1 +
 src/gallium/drivers/softpipe/sp_screen.c  | 1 +
 src/gallium/drivers/svga/svga_screen.c| 1 +
 src/gallium/drivers/swr/swr_screen.cpp| 1 +
 src/gallium/drivers/vc4/vc4_screen.c  | 1 +
 src/gallium/drivers/virgl/virgl_screen.c  | 1 +
 src/gallium/include/pipe/p_defines.h  | 1 +
 src/gallium/include/pipe/p_shader_tokens.h| 1 +
 src/mesa/state_tracker/st_extensions.c| 1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp| 7 ++-
 30 files changed, 49 insertions(+), 1 deletion(-)

-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 3/4] st/mesa: Add support for ARB_post_depth_coverage

2017-05-24 Thread Marek Olšák
On Wed, May 24, 2017 at 2:52 AM, Lyude  wrote:
> Signed-off-by: Lyude 
> ---
>  src/mesa/state_tracker/st_extensions.c | 1 +
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 6 +-
>  2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/state_tracker/st_extensions.c 
> b/src/mesa/state_tracker/st_extensions.c
> index f3db702..7674156 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -600,6 +600,7 @@ void st_init_extensions(struct pipe_screen *screen,
>{ o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY
>   },
>{ o(ARB_pipeline_statistics_query),
> PIPE_CAP_QUERY_PIPELINE_STATISTICS},
>{ o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE   
>   },
> +  { o(ARB_post_depth_coverage),  PIPE_CAP_POST_DEPTH_COVERAGE
>   },
>{ o(ARB_query_buffer_object),  PIPE_CAP_QUERY_BUFFER_OBJECT
>   },
>{ o(ARB_robust_buffer_access_behavior), 
> PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR   },
>{ o(ARB_sample_shading),   PIPE_CAP_SAMPLE_SHADING 
>   },
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 76cd4dc..e038206 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -6376,9 +6376,13 @@ st_translate_program(
> }
>
> if (procType == PIPE_SHADER_FRAGMENT) {
> -  if (program->shader->Program->info.fs.early_fragment_tests)
> +  if (program->shader->Program->info.fs.early_fragment_tests) {
>   ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
>
> + if (program->shader->Program->info.fs.post_depth_coverage)
> +ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1);
> +  }
> +

NAK. It's still wrong and same as the first version.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] drirc: set force_glsl_version for Alchemist's Awakening

2017-05-24 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Wed, May 24, 2017 at 1:01 AM, Samuel Pitoiset
 wrote:
> A bunch of shaders are missing a version directive. This fixes
> the following compilation error and allows the game to launch.
>
> 0:43(28): error: cannot initialize uniform typeColor in GLSL 1.10 (GLSL 1.20 
> required)
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/mesa/drivers/dri/common/drirc | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/common/drirc 
> b/src/mesa/drivers/dri/common/drirc
> index d820462fad0..111b53646e9 100644
> --- a/src/mesa/drivers/dri/common/drirc
> +++ b/src/mesa/drivers/dri/common/drirc
> @@ -151,5 +151,9 @@ TODO: document the other workarounds.
>   executable="KSP.x86_64">
>  
>  
> +
> +
> +
> +
>  
>  
> --
> 2.13.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 99467] [radv] DOOM 2016 + wine. Green screen everywhere (but can be started)

2017-05-24 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99467

--- Comment #25 from Pavel Bordukov  ---
(In reply to Marcin Bujar from comment #24)
> FYI on archlinux if you apply the following two patches to mesa 17.1 and
> recompile with llvm 4.0 from official repo the game works perfectly, no
> green screen or black textures:
> 
> https://github.com/airlied/mesa/commit/
> cfe39ca4a4d922a063ff120f5c0db53ff347349c
> https://github.com/airlied/mesa/commit/
> a163083b481d924a476cfa2fdf8e3b8f398fb62a
> 
> Much better than messing around with mesa-git and llvm-svn. Now if only
> those patches could be merged ... :)

Thanks! That works!

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: Flip CCW in tess.

2017-05-24 Thread Bas Nieuwenhuizen
Signed-off-by: Bas Nieuwenhuizen 
---
 src/amd/vulkan/radv_pipeline.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 3282652ddd4..01303d90da5 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -686,6 +686,7 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
if (tcs_nir == NULL)
return;
 
+   tes_nir->info.tess.ccw = !tes_nir->info.tess.ccw;
nir_lower_tes_patch_vertices(tes_nir,
 tcs_nir->info.tess.tcs_vertices_out);
 
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] spirv: Flip the tessellation winding order

2017-05-24 Thread Jason Ekstrand
It's not SPIR-V that's backwards from GLSL, it's Vulkan that's backwards
from GL.  Let's make NIR consistent with the source language and do the
flipping inside the Vulkan driver instead.

Cc: Kenneth Graunke 
Cc: Dave Airlie 
---
 src/compiler/spirv/spirv_to_nir.c |  9 ++---
 src/intel/vulkan/genX_pipeline.c  | 13 -
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 0a5eb0e..d7eb940 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2896,17 +2896,12 @@ vtn_handle_execution_mode(struct vtn_builder *b, struct 
vtn_value *entry_point,
case SpvExecutionModeVertexOrderCw:
   assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
  b->shader->stage == MESA_SHADER_TESS_EVAL);
-  /* Vulkan's notion of CCW seems to match the hardware backends,
-   * but be the opposite of OpenGL.  Currently NIR follows GL semantics,
-   * so we set it backwards here.
-   */
-  b->shader->info.tess.ccw = true;
+  b->shader->info.tess.ccw = false;
   break;
case SpvExecutionModeVertexOrderCcw:
   assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
  b->shader->stage == MESA_SHADER_TESS_EVAL);
-  /* Backwards; see above */
-  b->shader->info.tess.ccw = false;
+  b->shader->info.tess.ccw = true;
   break;
case SpvExecutionModePointMode:
   assert(b->shader->stage == MESA_SHADER_TESS_CTRL ||
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 55db533..07abe19 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1217,7 +1217,18 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline)
 
anv_batch_emit(>batch, GENX(3DSTATE_TE), te) {
   te.Partitioning = tes_prog_data->partitioning;
-  te.OutputTopology = tes_prog_data->output_topology;
+
+  /* Vulkan has its winding order backwards from GL so TRI_CCW becomes
+   * TRI_CW and vice versa.
+   */
+  if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
+ te.OutputTopology = OUTPUT_TRI_CW;
+  } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
+ te.OutputTopology = OUTPUT_TRI_CCW;
+  } else {
+ te.OutputTopology = tes_prog_data->output_topology;
+  }
+
   te.TEDomain = tes_prog_data->domain;
   te.TEEnable = true;
   te.MaximumTessellationFactorOdd = 63.0;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: Also signal fence if vkAcquireNextImageKHR returns VK_SUBOPTIMAL_KHR.

2017-05-24 Thread Bas Nieuwenhuizen
It is a successful return.

Signed-off-by: Bas Nieuwenhuizen 
---
 src/amd/vulkan/radv_wsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index 3a8617fd8fa..5e866126b91 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -438,7 +438,7 @@ VkResult radv_AcquireNextImageKHR(
VkResult result = swapchain->acquire_next_image(swapchain, timeout, 
semaphore,
pImageIndex);
 
-   if (fence && result == VK_SUCCESS) {
+   if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) {
fence->submitted = true;
fence->signalled = true;
}
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] configure.ac: update libdrm amdgpu version requirement

2017-05-24 Thread Alex Deucher
On Wed, May 24, 2017 at 12:05 PM, Leo Liu  wrote:
> VCN decode has a new interface, and that depends on the latest libdrm
>
> Signed-off-by: Leo Liu 

Reviewed-by: Alex Deucher 

> ---
>  configure.ac | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/configure.ac b/configure.ac
> index 1da67f5..b57be07 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION])
>  # in the first entry.
>  LIBDRM_REQUIRED=2.4.75
>  LIBDRM_RADEON_REQUIRED=2.4.71
> -LIBDRM_AMDGPU_REQUIRED=2.4.79
> +LIBDRM_AMDGPU_REQUIRED=2.4.81
>  LIBDRM_INTEL_REQUIRED=2.4.75
>  LIBDRM_NVVIEUX_REQUIRED=2.4.66
>  LIBDRM_NOUVEAU_REQUIRED=2.4.66
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/24] i965/cnl: Add gen10 specific function declarations

2017-05-24 Thread Anuj Phogat
On Mon, May 22, 2017 at 11:23 AM, Anuj Phogat  wrote:
> On Mon, May 22, 2017 at 10:07 AM, Rafael Antognolli
>  wrote:
>> On Fri, May 12, 2017 at 04:38:10PM -0700, Anuj Phogat wrote:
>>> These declarations will help the code start compiling
>>> once we wire up the makefiles for gen10. Later patches
>>> will start using these functions for gen10.
>>>
>>> Signed-off-by: Anuj Phogat 
>>> ---
>>>  src/intel/isl/isl_priv.h  | 12 
>>>  src/mesa/drivers/dri/i965/brw_blorp.h |  2 ++
>>>  src/mesa/drivers/dri/i965/brw_state.h |  1 +
>>>  3 files changed, 15 insertions(+)
>>>
>>> diff --git a/src/intel/isl/isl_priv.h b/src/intel/isl/isl_priv.h
>>> index 3c4cc1e..04adefa 100644
>>> --- a/src/intel/isl/isl_priv.h
>>> +++ b/src/intel/isl/isl_priv.h
>>> @@ -178,6 +178,10 @@ isl_gen9_surf_fill_state_s(const struct isl_device 
>>> *dev, void *state,
>>> const struct isl_surf_fill_state_info *restrict 
>>> info);
>>>
>>>  void
>>> +isl_gen10_surf_fill_state_s(const struct isl_device *dev, void *state,
>>> +const struct isl_surf_fill_state_info 
>>> *restrict info);
>>> +
>>> +void
>>>  isl_gen4_buffer_fill_state_s(void *state,
>>>   const struct isl_buffer_fill_state_info 
>>> *restrict info);
>>>
>>> @@ -206,6 +210,10 @@ isl_gen9_buffer_fill_state_s(void *state,
>>>   const struct isl_buffer_fill_state_info 
>>> *restrict info);
>>>
>>>  void
>>> +isl_gen10_buffer_fill_state_s(void *state,
>>> +  const struct isl_buffer_fill_state_info 
>>> *restrict info);
>>> +
>>> +void
>>>  isl_gen4_emit_depth_stencil_hiz_s(const struct isl_device *dev, void 
>>> *batch,
>>>const struct 
>>> isl_depth_stencil_hiz_emit_info *restrict info);
>>>
>>> @@ -233,4 +241,8 @@ void
>>>  isl_gen9_emit_depth_stencil_hiz_s(const struct isl_device *dev, void 
>>> *batch,
>>>const struct 
>>> isl_depth_stencil_hiz_emit_info *restrict info);
>>>
>>> +void
>>> +isl_gen10_emit_depth_stencil_hiz_s(const struct isl_device *dev, void 
>>> *batch,
>>> +   const struct 
>>> isl_depth_stencil_hiz_emit_info *restrict info);
>>> +
>>>  #endif /* ISL_PRIV_H */
>>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
>>> b/src/mesa/drivers/dri/i965/brw_blorp.h
>>> index ee4bf3b..d635d79 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
>>> @@ -82,6 +82,8 @@ void gen8_blorp_exec(struct blorp_batch *batch,
>>>   const struct blorp_params *params);
>>>  void gen9_blorp_exec(struct blorp_batch *batch,
>>>   const struct blorp_params *params);
>>> +void gen10_blorp_exec(struct blorp_batch *batch,
>>> +  const struct blorp_params *params);
>>>
>>>  #ifdef __cplusplus
>>>  } /* extern "C" */
>>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
>>> b/src/mesa/drivers/dri/i965/brw_state.h
>>> index 4727e2a..4592e3e 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>>> @@ -364,6 +364,7 @@ void gen7_init_atoms(struct brw_context *brw);
>>>  void gen75_init_atoms(struct brw_context *brw);
>>>  void gen8_init_atoms(struct brw_context *brw);
>>>  void gen9_init_atoms(struct brw_context *brw);
>>> +void gen10_init_atoms(struct brw_context *brw);
>>
>> I couldn't find it in the other patches, so pardon me if you did it. But you
>> should also use the gen10_init_atoms inside brw_init_state() -
>> brw_state_upload.c.
>>
> It is in  [PATCH V2 14/24] i965/cnl: Handle gen10 in switch cases
> across the driver
Rafael, do you have any other questions for this patch?

>>>  void upload_gs_state_for_tf(struct brw_context *brw);
>>>
>>> --
>>> 2.9.3
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2] intel/compiler: Make use of IS_INTEGER_DWORD macro

2017-05-24 Thread Anuj Phogat
On Wed, May 24, 2017 at 10:06 AM, Matt Turner  wrote:
> On Wed, May 24, 2017 at 9:21 AM, Anuj Phogat  wrote:
>> This patch makes non-functional changes.
>>
>> V2: Rename IS_DWORD to IS_INTEGER_DWORD
>
>
> To be honest, I don't like IS_DWORD/IS_INTEGER_DWORD -- for the same
> reason Alejandro noted. It's not clear what it means exactly, and in
> most cases it would just be clearer to test against
> BRW_REGISTER_TYPE_D/UD.
>
> I'd prefer we removed IS_DWORD.
If it's not helping, I'm fine with dropping this patch.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2] intel/compiler: Make use of IS_INTEGER_DWORD macro

2017-05-24 Thread Matt Turner
On Wed, May 24, 2017 at 9:21 AM, Anuj Phogat  wrote:
> This patch makes non-functional changes.
>
> V2: Rename IS_DWORD to IS_INTEGER_DWORD


To be honest, I don't like IS_DWORD/IS_INTEGER_DWORD -- for the same
reason Alejandro noted. It's not clear what it means exactly, and in
most cases it would just be clearer to test against
BRW_REGISTER_TYPE_D/UD.

I'd prefer we removed IS_DWORD.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100613] Regression in Mesa 17 on s390x (zSystems)

2017-05-24 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100613

--- Comment #25 from Ben Crocker  ---
(In reply to Ray Strode from comment #24)
...
> Yea, I guess thinking about it more, even if we can get scalar fetch to work
> with sufficient twiddling, that twiddling probably introduces extra
> operations per element, so maybe not a good idea.  I guess we should take
> another crack at attachment 131000 [details] [review] first.

I have been looking at the assembly code for the 3x16 case generated
on both big- and little-endian machines.  This case stems from the
piglit/tests/general/draw-vertices:test_short_vertices function.

First, the LLVM IR I'm focusing is the IR generated by
lp_build_gather_elem_vec (called by lp_build_gather, called by
lp_build_fetch_rgba_soa, called by fetch_vector...); the IR looks like
this:

  %"lp_build_gather_elem_ptr:72.21" = extractelement <2 x i32>
%"lp_build_fetch_rgba_soa:557.", i32 0
  %"lp_build_gather_elem_ptr:75.22" = getelementptr i8, i8* %map_ptr, i32
%"lp_build_gather_elem_ptr:72.21"
  %"lp_build_gather_elem_vec:189.23" = bitcast i8*
%"lp_build_gather_elem_ptr:75.22" to i48*
  %"lp_build_gather_elem_vec:190.24" = load i48, i48*
%"lp_build_gather_elem_vec:189.23", align 1

where I've used that last parameter to the LLVMBuild* calls, the
parameter that appears as a null string in the production code, to
contain the function name and the line number, which end up getting
inserted in LLVM's result name.

If you prefer the IR without the debug info, here it is:

  %21 = extractelement <2 x i32> %"0, i32 0
  %22 = getelementptr i8, i8* %map_ptr, i32 %21
  %23 = bitcast i8* %22 to i48*
  %24 = load i48, i48* %23, align 1

I've modified the data in the draw-vertices program so the values
stick out in the registers; the data I'm using for this example looks
like this:

(gdb) x/6h $r4
0x1029ca00: 0x0015  0x0011  0x  0x0015  0x0016  0x
X1  Y1  Z1  X1  Y2  Z2

In general, PPC assembly code is a three-operand code where the syntax
is (usually)
OPCODE  target, src1, src2

Load/store syntax is
LOADRT, immediate-displacement(RA)
LOADx   RT, RA, RB;; where effective addr is RA + RB (LOADx = load
indexed)
LOADux  RT, RA, RB;; where effective addr is RA + RB (LOADux = load
indexed w/ update; RT <- MEM(RA + RB) AND RA <- RA + RB)
STORE   RS, immediate-displacement(RA)
STOREx  RS, RA, RB;; where effective addr is RA + RB (STOREx = store
indexed)
STOREux RS, RA, RB;; where effective addr is RA + RB (STOREx = store
indexed w/ update)

Both the LOAD and the LOADux variants appear below.

The Rotate instructions have more complex syntax with four operands.
The target is on the left, as usual; the source operands are:
. source register;
. the (immediate) number of bits to shift;
. an (immediate) mask specification M, with the semantics
  "AND the penultimate result with a mask consisting of bits 0:M = 1, M+1:63 =
-"

Please note that these descriptions are over-simplified.

It is important to note that, whether the machine is little-endian or
big-endian, BITS IN A REGISTER ARE NUMBERED FROM THE LEFT.  I.e.,
the most significant bit is bit 0, ... the least significant bit is
bit 63.

On a (little-endian) PPC64LE machine, the assembly code looks like this:

=> 0x3fffab370404:  lwzux   r3,r4,r3;; Load Word w/ zero-extend &
update ; r3 <- 0x11.0015 = Y.X
   0x3fffab370408:  lhz r4,4(r4);; Load halfword w/ zero-extend
; r4 <- 0x = Z
   0x3fffab37040c:  std r2,24(r1)
   0x3fffab370410:  rldicr  r4,r4,32,31 ;; Rotate Left doubleword
immediate & clear right; imm = 32, mask = 0:31 => r4 <- 0x.. =
0.Z.0.0
   0x3fffab370414:  or  r25,r3,r4   ;; r25 <- 0x.0011.0015 =
Z.Y.X

So, the operation of loading a 48-bit int corresponds well with
loading the 3-vector of int16's into the 64-bit target register.

On a big-endian PPC64 machine, the assembly code looks like this:

0x3fffaace0538:  lwzux   r3,r4,r3   ;; r3 = 0, r4 = 0x10273d80; r3 <-
0x15.0011, i.e. X.Y
0x3fffaace053c:  lhz r4,4(r4)   ;; r4 <- 0x = Z
0x3fffaace0540:  rldicr  r3,r3,16,47;; Rotate Left doubleword
immediate & clear right; imm = 16, mask = 0:47 => r3 <- 0x15.0011., i.e. r3
<<= 16 = 0.X.Y.0
...
0x3fffaace0548:  or  r24,r4,r3  ;; r4 <- 0x15.0011., i.e. 0.X.Y.Z

Note that no single operation--shift, justification, "zero-extend" or
anything else--can get the 16-bit fields into the proper order for
subsequent code.

Regarding Ray's specific comment about getting scalar fetch to work
with "sufficient twiddling," I think it's perfectly acceptable to
introduce extra operations, as long as we restrict the extra
operations to the big-endian path.  PPC64 (LE or BE) is fast enough so
that any performance impact will be negligible; S390 is less fast, but
I imagine production machines with more memory than the one we

[Mesa-dev] [PATCH V2] intel/compiler: Make use of IS_INTEGER_DWORD macro

2017-05-24 Thread Anuj Phogat
This patch makes non-functional changes.

V2: Rename IS_DWORD to IS_INTEGER_DWORD

Signed-off-by: Anuj Phogat 
---
 src/intel/compiler/brw_eu_defines.h  |  3 +++
 src/intel/compiler/brw_eu_emit.c | 24 +++-
 src/intel/compiler/brw_fs.cpp|  3 +--
 src/intel/compiler/brw_fs_cmod_propagation.cpp   |  3 +--
 src/intel/compiler/brw_fs_copy_propagation.cpp   |  3 +--
 src/intel/compiler/brw_fs_generator.cpp  |  6 ++
 src/intel/compiler/brw_fs_nir.cpp|  2 +-
 src/intel/compiler/brw_vec4.cpp  |  9 ++---
 src/intel/compiler/brw_vec4_cmod_propagation.cpp |  3 +--
 src/intel/compiler/brw_vec4_generator.cpp|  2 +-
 10 files changed, 20 insertions(+), 38 deletions(-)

diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index ccc838d..423855f 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -48,6 +48,9 @@
 
 #define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> (low))
 #define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## _SHIFT)
+#define IS_INTEGER_DWORD(reg) \
+   (reg.type == BRW_REGISTER_TYPE_UD || \
+reg.type == BRW_REGISTER_TYPE_D)
 
 #define _3DPRIM_POINTLIST 0x01
 #define _3DPRIM_LINELIST  0x02
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 231d6fd..d435271 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -959,8 +959,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct 
brw_reg dest,
assert(dest.address_mode == BRW_ADDRESS_DIRECT);
assert(dest.type == BRW_REGISTER_TYPE_F  ||
   dest.type == BRW_REGISTER_TYPE_DF ||
-  dest.type == BRW_REGISTER_TYPE_D  ||
-  dest.type == BRW_REGISTER_TYPE_UD);
+  IS_INTEGER_DWORD(dest));
if (devinfo->gen == 6) {
   brw_inst_set_3src_dst_reg_file(devinfo, inst,
  dest.file == BRW_MESSAGE_REGISTER_FILE);
@@ -1158,9 +1157,7 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, 
struct brw_reg src0)
if (devinfo->gen == 7 && !devinfo->is_haswell &&
brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 &&
dest.type == BRW_REGISTER_TYPE_DF &&
-   (src0.type == BRW_REGISTER_TYPE_F ||
-src0.type == BRW_REGISTER_TYPE_D ||
-src0.type == BRW_REGISTER_TYPE_UD) &&
+   (src0.type == BRW_REGISTER_TYPE_F || IS_INTEGER_DWORD(src0)) &&
!has_scalar_region(src0)) {
   assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
  src0.width == BRW_WIDTH_4 &&
@@ -1182,15 +1179,13 @@ brw_ADD(struct brw_codegen *p, struct brw_reg dest,
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
-  assert(src1.type != BRW_REGISTER_TYPE_UD);
-  assert(src1.type != BRW_REGISTER_TYPE_D);
+  assert(!IS_INTEGER_DWORD(src1));
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
-  assert(src0.type != BRW_REGISTER_TYPE_UD);
-  assert(src0.type != BRW_REGISTER_TYPE_D);
+  assert(!IS_INTEGER_DWORD(src0));
}
 
return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
@@ -1222,25 +1217,20 @@ brw_MUL(struct brw_codegen *p, struct brw_reg dest,
 struct brw_reg src0, struct brw_reg src1)
 {
/* 6.32.38: mul */
-   if (src0.type == BRW_REGISTER_TYPE_D ||
-   src0.type == BRW_REGISTER_TYPE_UD ||
-   src1.type == BRW_REGISTER_TYPE_D ||
-   src1.type == BRW_REGISTER_TYPE_UD) {
+   if (IS_INTEGER_DWORD(src0) || IS_INTEGER_DWORD(src1)) {
   assert(dest.type != BRW_REGISTER_TYPE_F);
}
 
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
-  assert(src1.type != BRW_REGISTER_TYPE_UD);
-  assert(src1.type != BRW_REGISTER_TYPE_D);
+  assert(!IS_INTEGER_DWORD(src1));
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
-  assert(src0.type != BRW_REGISTER_TYPE_UD);
-  assert(src0.type != BRW_REGISTER_TYPE_D);
+  assert(!IS_INTEGER_DWORD(src0));
}
 
assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 329c15b..e1a2d14 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3475,8 +3475,7 @@ fs_visitor::lower_integer_multiplication()
  *
  * FINISHME: Don't use source modifiers on src1.
  */
-assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
-   mul->src[1].type == BRW_REGISTER_TYPE_UD);
+assert(IS_INTEGER_DWORD(mul->src[1]));
 

Re: [Mesa-dev] [PATCH] intel/compiler: Make use of IS_DWORD macro

2017-05-24 Thread Anuj Phogat
On Tue, May 23, 2017 at 10:59 PM, Alejandro Piñeiro
 wrote:
> On 23/05/17 22:51, Anuj Phogat wrote:
>> This patch makes non-functional changes.
>>
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/intel/compiler/brw_eu_defines.h  |  3 +++
>>  src/intel/compiler/brw_eu_emit.c | 24 
>> +++-
>>  src/intel/compiler/brw_fs.cpp|  3 +--
>>  src/intel/compiler/brw_fs_cmod_propagation.cpp   |  3 +--
>>  src/intel/compiler/brw_fs_copy_propagation.cpp   |  3 +--
>>  src/intel/compiler/brw_fs_generator.cpp  |  6 ++
>>  src/intel/compiler/brw_fs_nir.cpp|  2 +-
>>  src/intel/compiler/brw_vec4.cpp  |  5 -
>>  src/intel/compiler/brw_vec4_cmod_propagation.cpp |  3 +--
>>  src/intel/compiler/brw_vec4_generator.cpp|  2 +-
>>  10 files changed, 18 insertions(+), 36 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_eu_defines.h 
>> b/src/intel/compiler/brw_eu_defines.h
>> index ccc838d..9e969f2 100644
>> --- a/src/intel/compiler/brw_eu_defines.h
>> +++ b/src/intel/compiler/brw_eu_defines.h
>> @@ -48,6 +48,9 @@
>>
>>  #define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> 
>> (low))
>>  #define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## 
>> _SHIFT)
>> +#define IS_DWORD(reg) \
>> +   (reg.type == BRW_REGISTER_TYPE_UD || \
>> +reg.type == BRW_REGISTER_TYPE_D)
>
> Technically F  also has a DWORD size. Just saying because ...
>
>>
>>  #define _3DPRIM_POINTLIST 0x01
>>  #define _3DPRIM_LINELIST  0x02
>> diff --git a/src/intel/compiler/brw_eu_emit.c 
>> b/src/intel/compiler/brw_eu_emit.c
>> index 231d6fd..7434610 100644
>> --- a/src/intel/compiler/brw_eu_emit.c
>> +++ b/src/intel/compiler/brw_eu_emit.c
>> @@ -959,8 +959,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct 
>> brw_reg dest,
>> assert(dest.address_mode == BRW_ADDRESS_DIRECT);
>> assert(dest.type == BRW_REGISTER_TYPE_F  ||
>>dest.type == BRW_REGISTER_TYPE_DF ||
>> -  dest.type == BRW_REGISTER_TYPE_D  ||
>> -  dest.type == BRW_REGISTER_TYPE_UD);
>> +  IS_DWORD(dest));
>
> ... this kind of checks of TYPE_F || IS_DWORD sounds somewhat strange
> (at least to me). At that happens several times.
>
> As I assume that your intention is check for integer dwords types, how
> about IS_INTEGER_DWORD?
>
Yes, that sounds better. I'll send out a V2.

>> if (devinfo->gen == 6) {
>>brw_inst_set_3src_dst_reg_file(devinfo, inst,
>>   dest.file == 
>> BRW_MESSAGE_REGISTER_FILE);
>> @@ -1158,9 +1157,7 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, 
>> struct brw_reg src0)
>> if (devinfo->gen == 7 && !devinfo->is_haswell &&
>> brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 &&
>> dest.type == BRW_REGISTER_TYPE_DF &&
>> -   (src0.type == BRW_REGISTER_TYPE_F ||
>> -src0.type == BRW_REGISTER_TYPE_D ||
>> -src0.type == BRW_REGISTER_TYPE_UD) &&
>> +   (src0.type == BRW_REGISTER_TYPE_F || IS_DWORD(src0)) &&
>> !has_scalar_region(src0)) {
>>assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
>>   src0.width == BRW_WIDTH_4 &&
>> @@ -1182,15 +1179,13 @@ brw_ADD(struct brw_codegen *p, struct brw_reg dest,
>> if (src0.type == BRW_REGISTER_TYPE_F ||
>> (src0.file == BRW_IMMEDIATE_VALUE &&
>>   src0.type == BRW_REGISTER_TYPE_VF)) {
>> -  assert(src1.type != BRW_REGISTER_TYPE_UD);
>> -  assert(src1.type != BRW_REGISTER_TYPE_D);
>> +  assert(!IS_DWORD(src1));
>> }
>>
>> if (src1.type == BRW_REGISTER_TYPE_F ||
>> (src1.file == BRW_IMMEDIATE_VALUE &&
>>   src1.type == BRW_REGISTER_TYPE_VF)) {
>> -  assert(src0.type != BRW_REGISTER_TYPE_UD);
>> -  assert(src0.type != BRW_REGISTER_TYPE_D);
>> +  assert(!IS_DWORD(src0));
>> }
>>
>> return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
>> @@ -1222,25 +1217,20 @@ brw_MUL(struct brw_codegen *p, struct brw_reg dest,
>>  struct brw_reg src0, struct brw_reg src1)
>>  {
>> /* 6.32.38: mul */
>> -   if (src0.type == BRW_REGISTER_TYPE_D ||
>> -   src0.type == BRW_REGISTER_TYPE_UD ||
>> -   src1.type == BRW_REGISTER_TYPE_D ||
>> -   src1.type == BRW_REGISTER_TYPE_UD) {
>> +   if (IS_DWORD(src0) || IS_DWORD(src1)) {
>>assert(dest.type != BRW_REGISTER_TYPE_F);
>> }
>>
>> if (src0.type == BRW_REGISTER_TYPE_F ||
>> (src0.file == BRW_IMMEDIATE_VALUE &&
>>   src0.type == BRW_REGISTER_TYPE_VF)) {
>> -  assert(src1.type != BRW_REGISTER_TYPE_UD);
>> -  assert(src1.type != BRW_REGISTER_TYPE_D);
>> +  assert(!IS_DWORD(src1));
>> }
>>
>> if (src1.type == BRW_REGISTER_TYPE_F ||
>> (src1.file == BRW_IMMEDIATE_VALUE &&
>>   src1.type == BRW_REGISTER_TYPE_VF)) {
>> -  assert(src0.type != BRW_REGISTER_TYPE_UD);
>> -  

[Mesa-dev] [PATCH] configure.ac: update libdrm amdgpu version requirement

2017-05-24 Thread Leo Liu
VCN decode has a new interface, and that depends on the latest libdrm

Signed-off-by: Leo Liu 
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1da67f5..b57be07 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION])
 # in the first entry.
 LIBDRM_REQUIRED=2.4.75
 LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.79
+LIBDRM_AMDGPU_REQUIRED=2.4.81
 LIBDRM_INTEL_REQUIRED=2.4.75
 LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 52/65] radeonsi: implement ARB_bindless_texture

2017-05-24 Thread Samuel Pitoiset



On 05/24/2017 01:25 PM, Marek Olšák wrote:

On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
 wrote:

This implements the Gallium interface. Decompression of resident
textures/images will follow in the next patches.

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/drivers/radeonsi/si_descriptors.c | 340 ++
  src/gallium/drivers/radeonsi/si_pipe.c|  12 +
  src/gallium/drivers/radeonsi/si_pipe.h|  26 ++
  3 files changed, 378 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index abe39de583..a687506f7f 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -60,6 +60,7 @@
  #include "sid.h"
  #include "gfx9d.h"

+#include "util/hash_table.h"
  #include "util/u_format.h"
  #include "util/u_memory.h"
  #include "util/u_upload_mgr.h"
@@ -2193,6 +2194,339 @@ void si_resident_descriptor_slab_free(void *priv, 
struct pb_slab *pslab)
 FREE(slab);
  }

+static int si_add_resident_tex_handle(struct si_context *sctx,
+ struct si_texture_handle *tex_handle)
+{
+   int idx;
+
+   /* New resident handle, check if the backing array is large enough. */
+   if (sctx->num_resident_tex_handles >= sctx->max_resident_tex_handles) {
+   unsigned new_max_handles =
+   MAX2(1, sctx->max_resident_tex_handles * 2);
+   struct si_texture_handle **new_handles =
+   REALLOC(sctx->resident_tex_handles,
+   sctx->num_resident_tex_handles * 
(sizeof(*new_handles)),
+   new_max_handles * sizeof(*new_handles));
+
+   if (new_handles) {
+   sctx->resident_tex_handles = new_handles;
+   sctx->max_resident_tex_handles = new_max_handles;
+   } else {
+   fprintf(stderr, "si_add_resident_tex_handle: "
+   "allocation failed\n");
+   return -1;
+   }
+   }
+
+   idx = sctx->num_resident_tex_handles;
+   sctx->resident_tex_handles[idx] = tex_handle;
+   sctx->num_resident_tex_handles++;
+
+   return 0;
+}
+
+static void si_del_resident_tex_handle(struct si_context *sctx,
+  struct si_texture_handle *tex_handle)
+{
+   unsigned i;
+   int size;
+
+   for (i = 0; i < sctx->num_resident_tex_handles; i++) {
+   if (sctx->resident_tex_handles[i] != tex_handle)
+   continue;
+
+   if (i < sctx->num_resident_tex_handles - 1) {
+   size = sizeof(*sctx->resident_tex_handles) *
+   (sctx->num_resident_tex_handles - 1 - i);
+
+   memmove(>resident_tex_handles[i],
+   >resident_tex_handles[i + 1], size);
+   }
+
+   sctx->num_resident_tex_handles--;
+   return;
+   }
+}
+
+static int si_add_resident_img_handle(struct si_context *sctx,
+ struct si_image_handle *img_handle)
+{
+   int idx;
+
+   /* New resident handle, check if the backing array is large enough. */
+   if (sctx->num_resident_img_handles >= sctx->max_resident_img_handles) {
+   unsigned new_max_handles =
+   MAX2(1, sctx->max_resident_img_handles * 2);
+   struct si_image_handle **new_handles =
+   REALLOC(sctx->resident_img_handles,
+   sctx->num_resident_img_handles * 
(sizeof(*new_handles)),
+   new_max_handles * sizeof(*new_handles));
+
+   if (new_handles) {
+   sctx->resident_img_handles = new_handles;
+   sctx->max_resident_img_handles = new_max_handles;
+   } else {
+   fprintf(stderr, "si_add_resident_img_handle: "
+   "allocation failed\n");
+   return -1;
+   }
+   }
+
+   idx = sctx->num_resident_img_handles;
+   sctx->resident_img_handles[idx] = img_handle;
+   sctx->num_resident_img_handles++;
+
+   return 0;
+}
+
+static void si_del_resident_img_handle(struct si_context *sctx,
+  struct si_image_handle *img_handle)
+{
+   unsigned i;
+   int size;
+
+   for (i = 0; i < sctx->num_resident_img_handles; i++) {
+   if (sctx->resident_img_handles[i] != img_handle)
+   continue;
+
+   if (i < sctx->num_resident_img_handles - 1) {
+   size = sizeof(*sctx->resident_img_handles) *
+   (sctx->num_resident_img_handles - 1 - i);
+
+  

Re: [Mesa-dev] [PATCH v3 1/4] gallium: Add a cap to check if the driver supports ARB_post_depth_coverage

2017-05-24 Thread Ilia Mirkin
On Tue, May 23, 2017 at 8:52 PM, Lyude  wrote:
> Signed-off-by: Lyude 
>
> Changes since v1:
>  - Correct documentation in screen.rst for new cap
> Changes since v2:
>  - Fix bogus documentation
>
> Signed-off-by: Lyude 
> ---
>  src/gallium/docs/source/screen.rst   | 3 +++
>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
>  src/gallium/drivers/vc4/vc4_screen.c | 1 +
>  src/gallium/drivers/virgl/virgl_screen.c | 1 +
>  src/gallium/include/pipe/p_defines.h | 1 +
>  17 files changed, 19 insertions(+)
>
> diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> index 871669c..c8d2b11 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -392,6 +392,9 @@ The integer capabilities:
>  * ``PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX``: Whether a buffer with just
>PIPE_BIND_CONSTANT_BUFFER can be legally passed to set_vertex_buffers.
>  * ``PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION``: As the name says.
> +* ``PIPE_CAP_POST_DEPTH_COVERAGE``: Whether the input for
> +  ``TGSI_SEMANTIC_MASK`` include samples that have failed the depth/stencil
> +  tests.  As well, this implicitly enables ``FS_EARLY_DEPTH_STENCIL``.

You're not describing the cap here. You're describing the shader
property which is added in another patch. The cap is whether the
property is supported in fragment shaders. And then you need to write
docs for the property, which should talk about this stuff. Separately,
I thought the conclusion was to have the FE have to take care of
flipping on early depth tests, in which case your last comment doesn't
apply.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 05/65] mesa: refuse to update sampler parameters when a handle is allocated

2017-05-24 Thread Samuel Pitoiset



On 05/24/2017 12:20 PM, Nicolai Hähnle wrote:

On 19.05.2017 18:52, Samuel Pitoiset wrote:

The ARB_bindless_texture spec says:

"The error INVALID_OPERATION is generated by SamplerParameter* if
  identifies a sampler object referenced by one or more
 texture handles."

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/samplerobj.c | 42 
++

  1 file changed, 42 insertions(+)

diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index ee15c68b4f..cf4bcfce7c 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -801,6 +801,18 @@ _mesa_SamplerParameteri(GLuint sampler, GLenum 
pname, GLint param)

 if (!sampObj)
return;
+   if (sampObj->HandleAllocated) {
+  /* The ARB_bindless_texture spec says:
+   *
+   * "The error INVALID_OPERATION is generated by 
SamplerParameter* if
+   *   identifies a sampler object referenced by one or 
more

+   *  texture handles."
+   */
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameteri(immutable sampler)");
+  return;
+   }


Maybe these could go into sampler_parameter_error_check?


Yes, I added a new boolean parameter to sampler_parameter_error_check() 
to differentiate set/get functions and moved the HandleAllocated check 
there.




Cheers,
Nicolai




+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, param);
@@ -884,6 +896,12 @@ _mesa_SamplerParameterf(GLuint sampler, GLenum 
pname, GLfloat param)

 if (!sampObj)
return;
+   if (sampObj->HandleAllocated) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterf(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, (GLint) param);
@@ -966,6 +984,12 @@ _mesa_SamplerParameteriv(GLuint sampler, GLenum 
pname, const GLint *params)

 if (!sampObj)
return;
+   if (sampObj->HandleAllocated) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameteriv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, params[0]);
@@ -1056,6 +1080,12 @@ _mesa_SamplerParameterfv(GLuint sampler, GLenum 
pname, const GLfloat *params)

 if (!sampObj)
return;
+   if (sampObj->HandleAllocated) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterfv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, (GLint) params[0]);
@@ -1139,6 +1169,12 @@ _mesa_SamplerParameterIiv(GLuint sampler, 
GLenum pname, const GLint *params)

 if (!sampObj)
return;
+   if (sampObj->HandleAllocated) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterIiv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, params[0]);
@@ -1223,6 +1259,12 @@ _mesa_SamplerParameterIuiv(GLuint sampler, 
GLenum pname, const GLuint *params)

 if (!sampObj)
return;
+   if (sampObj->HandleAllocated) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterIuiv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, params[0]);





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] configure.ac: Add --with-wayland-scanner-path

2017-05-24 Thread Emil Velikov
Hi Jussi,

On 23 May 2017 at 09:13, Jussi Kukkonen  wrote:
> Modify wayland-scanner lookup: Use the path given by pkg-config
> but offer an option to override the path with
> "--with-wayland-scanner-path=PATH". The latter is useful for
> cross-compile situations.
>
> AC_PATH_PROG is no longer used (if the scanner is installed it should
> get found by pkg-config). AC_SUBST is added so the output variable is
> created when only the configure option is used.
> ---
>
> My goal is to standardize wayland-scanner usage in a way that does not
> require patching when cross-compiling in Yocto (the detailed issue is
> that in Yocto pkg-config will return a "wayland_scanner" variable but
> that will contain a _target path_ when we would like to use a native
> sysroot path instead).
>
> I've sent a similar patch to weston and intend to fix other projects
> if these two patches are well received.
>
I might have misread something, but on a quick look the patch does not
look quite right. Stepping aside for a moment,

Can you explain clearly what's happening/wrong in the whole scenario?
 - Yocto does has A stage where it does X.
 - Then it proceeds to B... at which point $file
foo/wayland-scanner.pc gets picked
 - That results in an error due to variable containing $bar, due to
the $step above

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 47/65] st/mesa: enable ARB_bindless_texture

2017-05-24 Thread Nicolai Hähnle

Patches 44 & 47:

Reviewed-by: Nicolai Hähnle 


On 19.05.2017 18:52, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/state_tracker/st_extensions.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 2fa7ba7797..80695580cb 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -577,6 +577,7 @@ void st_init_extensions(struct pipe_screen *screen,
  
 static const struct st_extension_cap_mapping cap_mapping[] = {

{ o(ARB_base_instance),PIPE_CAP_START_INSTANCE  
 },
+  { o(ARB_bindless_texture), PIPE_CAP_BINDLESS_TEXTURE 
},
{ o(ARB_buffer_storage),   
PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT   },
{ o(ARB_clear_texture),PIPE_CAP_CLEAR_TEXTURE   
 },
{ o(ARB_clip_control), PIPE_CAP_CLIP_HALFZ  
 },




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 25/65] gallium: add ARB_bindless_texture interface

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/gallium/include/pipe/p_context.h | 16 
  1 file changed, 16 insertions(+)

diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 4b75386a65..83eb1c9b08 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -766,6 +766,22 @@ struct pipe_context {
unsigned last_level,
unsigned first_layer,
unsigned last_layer);
+
+   /**
+* Bindless texture/image handles interface.
+*/
+   uint64_t (*create_texture_handle)(struct pipe_context *ctx,
+ struct pipe_resource *res,
+ struct pipe_sampler_view *view,
+ const struct pipe_sampler_state *state);
+   void (*delete_texture_handle)(struct pipe_context *ctx, uint64_t handle);
+   void (*make_texture_handle_resident)(struct pipe_context *ctx,
+uint64_t handle, bool resident);
+   uint64_t (*create_image_handle)(struct pipe_context *ctx,
+   const struct pipe_image_view *image);
+   void (*delete_image_handle)(struct pipe_context *ctx, uint64_t handle);
+   void (*make_image_handle_resident)(struct pipe_context *ctx, uint64_t 
handle,
+  unsigned access, bool resident);


Should the create/delete functions be pipe_screen functions? Handles are 
shared across a GL share group, so the current implementation with a 
per-context hash of handles is definitely wrong. I still need to go over 
it in detail.


Cheers,
Nicolai
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 43/65] st/mesa: add infrastructure for storing bound texture/image handles

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/state_tracker/st_context.c |  2 +
  src/mesa/state_tracker/st_context.h | 11 ++
  src/mesa/state_tracker/st_texture.c | 77 +
  src/mesa/state_tracker/st_texture.h |  5 +++
  4 files changed, 95 insertions(+)

diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index c901764668..4dcc160b50 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -292,6 +292,8 @@ st_destroy_context_priv(struct st_context *st, bool 
destroy_pipe)
 st_destroy_drawtex(st);
 st_destroy_perfmon(st);
 st_destroy_pbo_helpers(st);
+   st_destroy_bound_texture_handles(st);
+   st_destroy_bound_image_handles(st);
  
 for (shader = 0; shader < ARRAY_SIZE(st->state.sampler_views); shader++) {

for (i = 0; i < ARRAY_SIZE(st->state.sampler_views[0]); i++) {
diff --git a/src/mesa/state_tracker/st_context.h 
b/src/mesa/state_tracker/st_context.h
index 520cd8d462..16f29669be 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -79,6 +79,12 @@ struct st_bitmap_cache
 ubyte *buffer;
  };
  
+struct st_bound_handle

+{
+   unsigned num_handles;
+   uint64_t *handles;
+};


Not super important, but maybe call this st_bound_handles (plural)? But 
that's a nitpick, and either way, patches 32 (possibly with 
inst->bindless removed) and 34-43 are


Reviewed-by: Nicolai Hähnle 



+
  struct st_context
  {
 struct st_context_iface iface;
@@ -271,6 +277,11 @@ struct st_context
 struct st_perf_monitor_group *perfmon;
  
 enum pipe_reset_status reset_status;

+
+   /* Array of bound texture/image handles which are resident in the context.
+*/
+   struct st_bound_handle bound_texture_handles[PIPE_SHADER_TYPES];
+   struct st_bound_handle bound_image_handles[PIPE_SHADER_TYPES];
  };
  
  
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c

index 65f86f2b4f..cde7759a61 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -421,6 +421,83 @@ st_create_color_map_texture(struct gl_context *ctx)
 return pt;
  }
  
+/**

+ * Destroy bound texture handles for the given stage.
+ */
+static void
+st_destroy_bound_texture_handles_per_stage(struct st_context *st,
+   enum pipe_shader_type shader)
+{
+   struct st_bound_handle *bound_handles = >bound_texture_handles[shader];
+   struct pipe_context *pipe = st->pipe;
+   unsigned i;
+
+   if (likely(!bound_handles->num_handles))
+  return;
+
+   for (i = 0; i < bound_handles->num_handles; i++) {
+  uint64_t handle = bound_handles->handles[i];
+
+  pipe->make_texture_handle_resident(pipe, handle, false);
+  pipe->delete_texture_handle(pipe, handle);
+   }
+   free(bound_handles->handles);
+   bound_handles->num_handles = 0;
+}
+
+
+/**
+ * Destroy all bound texture handles in the context.
+ */
+void
+st_destroy_bound_texture_handles(struct st_context *st)
+{
+   unsigned i;
+
+   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
+  st_destroy_bound_texture_handles_per_stage(st, i);
+   }
+}
+
+
+/**
+ * Destroy bound image handles for the given stage.
+ */
+static void
+st_destroy_bound_image_handles_per_stage(struct st_context *st,
+ enum pipe_shader_type shader)
+{
+   struct st_bound_handle *bound_handles = >bound_image_handles[shader];
+   struct pipe_context *pipe = st->pipe;
+   unsigned i;
+
+   if (likely(!bound_handles->num_handles))
+  return;
+
+   for (i = 0; i < bound_handles->num_handles; i++) {
+  uint64_t handle = bound_handles->handles[i];
+
+  pipe->make_image_handle_resident(pipe, handle, GL_READ_WRITE, false);
+  pipe->delete_image_handle(pipe, handle);
+   }
+   free(bound_handles->handles);
+   bound_handles->num_handles = 0;
+}
+
+
+/**
+ * Destroy all bound image handles in the context.
+ */
+void
+st_destroy_bound_image_handles(struct st_context *st)
+{
+   unsigned i;
+
+   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
+  st_destroy_bound_image_handles_per_stage(st, i);
+   }
+}
+
  
  /**

   * Create a texture handle from a texture unit.
diff --git a/src/mesa/state_tracker/st_texture.h 
b/src/mesa/state_tracker/st_texture.h
index 7f8a0cb841..b97814cb16 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -250,6 +250,11 @@ st_texture_image_copy(struct pipe_context *pipe,
  extern struct pipe_resource *
  st_create_color_map_texture(struct gl_context *ctx);
  
+void

+st_destroy_bound_texture_handles(struct st_context *st);
+
+void
+st_destroy_bound_image_handles(struct st_context *st);
  
  bool

  st_etc_fallback(struct st_context *st, struct gl_texture_image *texImage);




--
Lerne, wie die Welt wirklich ist,
Aber vergiss 

Re: [Mesa-dev] [RFC PATCH 33/65] st/glsl_to_tgsi: add support for bindless images

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 78 --
  1 file changed, 63 insertions(+), 15 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 5734d0e456..264b43c10b 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -288,7 +288,7 @@ public:
  
 st_dst_reg dst[2];

 st_src_reg src[4];
-   st_src_reg resource; /**< sampler or buffer register */
+   st_src_reg resource; /**< sampler, image or buffer register */
 st_src_reg *tex_offsets;
  
 /** Pointer to the ir source this tree came from for debugging */

@@ -3765,15 +3765,46 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
 exec_node *param = ir->actual_parameters.get_head();
  
 ir_dereference *img = (ir_dereference *)param;

-   const ir_variable *imgvar = img->variable_referenced();
-   const glsl_type *type = imgvar->type->without_array();
+   const struct glsl_struct_field *struct_field = NULL;
 unsigned sampler_array_size = 1, sampler_base = 0;
+   unsigned memory_coherent, memory_volatile, memory_restrict, image_format;
+   const ir_variable *imgvar;
+   const glsl_type *type;
+
+   if (img->ir_type == ir_type_dereference_record) {
+  ir_dereference_record *r = img->as_dereference_record();
+  const glsl_type *struct_type = r->record->type;
+
+  for (unsigned i = 0; i < struct_type->length; i++) {
+ if (!strcmp(struct_type->fields.structure[i].name, r->field)) {
+struct_field = _type->fields.structure[i];
+break;
+ }
+  }
+  assert(struct_field);
+   }
+
+   imgvar = img->variable_referenced();
+
+   if (struct_field) {
+  type = struct_field->type;
+  memory_coherent = struct_field->memory_coherent;
+  memory_volatile = struct_field->memory_volatile;
+  memory_restrict = struct_field->memory_restrict;
+  image_format = struct_field->image_format;
+   } else {
+  type = imgvar->type->without_array();
+  memory_coherent = imgvar->data.memory_coherent;
+  memory_volatile = imgvar->data.memory_volatile;
+  memory_restrict = imgvar->data.memory_restrict;
+  image_format = imgvar->data.image_format;
+   }


What about

   struct {
  coherent image2D imgs[6];
   } s;

   imageStore(s.imgs[i], ...);

and obviously nested arrays. I think you need to follow the dereferences 
until you hit the first ir_dereference_record or ir_dereference_variable.


Cheers,
Nicolai



 st_src_reg reladdr;
 st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
  
 get_deref_offsets(img, _array_size, _base,

- (uint16_t*), , true);
+ (uint16_t*), , 
!imgvar->is_bindless());
  
 if (reladdr.file != PROGRAM_UNDEFINED) {

image.reladdr = ralloc(mem_ctx, st_src_reg);
@@ -3886,19 +3917,27 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
   inst->dst[0].writemask = WRITEMASK_XYZW;
 }
  
-   inst->resource = image;

-   inst->sampler_array_size = sampler_array_size;
-   inst->sampler_base = sampler_base;
+   if (imgvar->is_bindless()) {
+  img->accept(this);
+  inst->resource = this->result;
+  inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+ SWIZZLE_X, SWIZZLE_Y);
+  inst->bindless = 1;
+   } else {
+  inst->resource = image;
+  inst->sampler_array_size = sampler_array_size;
+  inst->sampler_base = sampler_base;
+   }
  
 inst->tex_target = type->sampler_index();

 inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
- _mesa_get_shader_image_format(imgvar->data.image_format));
+ _mesa_get_shader_image_format(image_format));
  
-   if (imgvar->data.memory_coherent)

+   if (memory_coherent)
inst->buffer_access |= TGSI_MEMORY_COHERENT;
-   if (imgvar->data.memory_restrict)
+   if (memory_restrict)
inst->buffer_access |= TGSI_MEMORY_RESTRICT;
-   if (imgvar->data.memory_volatile)
+   if (memory_volatile)
inst->buffer_access |= TGSI_MEMORY_VOLATILE;
  }
  
@@ -5911,7 +5950,12 @@ compile_tgsi_instruction(struct st_translate *t,

} else if (inst->resource.file == PROGRAM_BUFFER) {
   src[0] = t->buffers[inst->resource.index];
} else {
- src[0] = t->images[inst->resource.index];
+ if (inst->resource.file == PROGRAM_IMAGE) {
+src[0] = t->images[inst->resource.index];
+ } else {
+/* Bindless images. */
+src[0] = translate_src(t, >resource);
+ }
   tex_target = st_translate_texture_target(inst->tex_target, 
inst->tex_shadow);
}
if (inst->resource.reladdr)
@@ -5919,7 +5963,7 @@ compile_tgsi_instruction(struct st_translate *t,

Re: [Mesa-dev] [RFC PATCH 16/65] glsl: add ir_variable::is_bindless()

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/compiler/glsl/ir.h | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 91e665cc1b..c816a327c1 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -475,6 +475,17 @@ public:
 }
  
 /**

+* Return whether this variable is a bindless sampler/image.
+*/
+   inline bool is_bindless() const
+   {
+  if (!this->type->contains_sampler() && !this->type->contains_image())
+ return false;
+
+  return this->data.bindless || this->data.mode != ir_var_uniform;
+   }


This should really be called contains_bindless(), and the comment be 
adjusted accordingly.


Cheers,
Nicolai


+
+   /**
  * Set this->interface_type on a newly created variable.
  */
 void init_interface_type(const struct glsl_type *type)




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 1/4] gallium: Add a cap to check if the driver supports ARB_post_depth_coverage

2017-05-24 Thread Marek Olšák
On Wed, May 24, 2017 at 2:52 AM, Lyude  wrote:
> Signed-off-by: Lyude 
>
> Changes since v1:
>  - Correct documentation in screen.rst for new cap
> Changes since v2:
>  - Fix bogus documentation
>
> Signed-off-by: Lyude 
> ---
>  src/gallium/docs/source/screen.rst   | 3 +++
>  src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
>  src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>  src/gallium/drivers/swr/swr_screen.cpp   | 1 +
>  src/gallium/drivers/vc4/vc4_screen.c | 1 +
>  src/gallium/drivers/virgl/virgl_screen.c | 1 +
>  src/gallium/include/pipe/p_defines.h | 1 +
>  17 files changed, 19 insertions(+)
>
> diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> index 871669c..c8d2b11 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -392,6 +392,9 @@ The integer capabilities:
>  * ``PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX``: Whether a buffer with just
>PIPE_BIND_CONSTANT_BUFFER can be legally passed to set_vertex_buffers.
>  * ``PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION``: As the name says.
> +* ``PIPE_CAP_POST_DEPTH_COVERAGE``: Whether the input for
> +  ``TGSI_SEMANTIC_MASK`` include samples that have failed the depth/stencil
> +  tests.  As well, this implicitly enables ``FS_EARLY_DEPTH_STENCIL``.

It's TGSI_SEMANTIC_SAMPLEMASK. Also, POST_DEPTH_COVERAGE *excludes*
samples (clears the bits to 0) that failed the depth/stencil tests.

Marek

>
>
>  .. _pipe_capf:
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
> b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> index bf13184..8fd1184 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
> @@ -256,6 +256,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> +   case PIPE_CAP_POST_DEPTH_COVERAGE:
>return 0;
>
> /* Stream output. */
> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
> b/src/gallium/drivers/freedreno/freedreno_screen.c
> index 99a6d3c..58fa997 100644
> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
> @@ -314,6 +314,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> +   case PIPE_CAP_POST_DEPTH_COVERAGE:
> return 0;
>
> case PIPE_CAP_MAX_VIEWPORTS:
> diff --git a/src/gallium/drivers/i915/i915_screen.c 
> b/src/gallium/drivers/i915/i915_screen.c
> index 1cf9441..a0dbc12 100644
> --- a/src/gallium/drivers/i915/i915_screen.c
> +++ b/src/gallium/drivers/i915/i915_screen.c
> @@ -277,6 +277,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
> cap)
> case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
> case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
> case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
> +   case PIPE_CAP_POST_DEPTH_COVERAGE:
>return 0;
>
> case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
> b/src/gallium/drivers/llvmpipe/lp_screen.c
> index 656de49..4e8c110 100644
> --- a/src/gallium/drivers/llvmpipe/lp_screen.c
> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c
> @@ -354,6 +354,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
> pipe_cap param)
> case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
> case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
> case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
> +   case PIPE_CAP_POST_DEPTH_COVERAGE:
>return 0;
> }
> /* should only get here on unhandled cases */
> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
> b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> index e8d14bf..553ee49 100644
> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
> @@ -218,6 +218,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
> pipe_cap param)
> case PIPE_CAP_TGSI_BALLOT:
> case 

Re: [Mesa-dev] [Intel-gfx] [RFC v3] drm/i915: Select engines via class and instance in execbuffer2

2017-05-24 Thread Tvrtko Ursulin


On 18/05/2017 18:00, Chris Wilson wrote:

On Thu, May 18, 2017 at 05:20:38PM +0100, Tvrtko Ursulin wrote:


On 18/05/2017 14:37, Chris Wilson wrote:

On Thu, May 18, 2017 at 02:06:35PM +0100, Tvrtko Ursulin wrote:


But this problem in general can also be solved separately from
class-instance addressing via engine feature masking.


But imo all members of a class should have the same features. That would
be my definition of a class!


That sounds very totalitarian! :)) To me a class is a group of some
entities which share some common characteristics - not necessarily
completely uniform.


The problem otherwise is that we then have to define yet another
interface based on features. To me that sounds like too much
duplication, that we could avoid from the beginning. Curse the hw for
being asymmetical!


Hm I don't see a problem with the feature base engine selection on top. 
You still do because of the desire classes were equal in features?


To sum up what I (and we) talked about in various parts of the thread(s):

Step 1a: New execbuf engine selection uAPI.

 - execbuf class=VCS instance=1

Step 1b: Engine discovery uAPI.

Same as above but userpace can figure out how many VCS engines there
are without PCI probing.

I didn't get much feedback on this one. :(

Step 2: Feature masks for execbuf.

 - execbuf class=VCS instance=0 features=HEVC = OK
 - execbuf class=VCS instance=1 features=HEVC = FAIL

But userspace can use engine discovery to figure out which are the valid 
combinations.


This could be a simpler, but less featureful and not very elegant 
alternative to step 2.


Otherwise just a prep step for the subsequent steps below.

Step 3a: (One day maybe) userspace selects a class, i915 picks the engine

 - execbuf class=VCS instance=any

Step 3b: userspace selected class and features

 - execbuf class=VCS instance=any features=HEVC

This RFC proposed steps 1a and 1b. The rest we leave for later.

How does that sound? Acceptable?

In case of engine discovery useful enough or what other features could 
we put it in to make it more useful for userspace? Potentially enable 
dropping PCI id probing altogether and enable libva/mesa/??? to probe 
everything using i915 ioctls.


Regards,

Tvrtko
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 52/65] radeonsi: implement ARB_bindless_texture

2017-05-24 Thread Marek Olšák
On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
 wrote:
> This implements the Gallium interface. Decompression of resident
> textures/images will follow in the next patches.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 340 
> ++
>  src/gallium/drivers/radeonsi/si_pipe.c|  12 +
>  src/gallium/drivers/radeonsi/si_pipe.h|  26 ++
>  3 files changed, 378 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index abe39de583..a687506f7f 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -60,6 +60,7 @@
>  #include "sid.h"
>  #include "gfx9d.h"
>
> +#include "util/hash_table.h"
>  #include "util/u_format.h"
>  #include "util/u_memory.h"
>  #include "util/u_upload_mgr.h"
> @@ -2193,6 +2194,339 @@ void si_resident_descriptor_slab_free(void *priv, 
> struct pb_slab *pslab)
> FREE(slab);
>  }
>
> +static int si_add_resident_tex_handle(struct si_context *sctx,
> + struct si_texture_handle *tex_handle)
> +{
> +   int idx;
> +
> +   /* New resident handle, check if the backing array is large enough. */
> +   if (sctx->num_resident_tex_handles >= sctx->max_resident_tex_handles) 
> {
> +   unsigned new_max_handles =
> +   MAX2(1, sctx->max_resident_tex_handles * 2);
> +   struct si_texture_handle **new_handles =
> +   REALLOC(sctx->resident_tex_handles,
> +   sctx->num_resident_tex_handles * 
> (sizeof(*new_handles)),
> +   new_max_handles * sizeof(*new_handles));
> +
> +   if (new_handles) {
> +   sctx->resident_tex_handles = new_handles;
> +   sctx->max_resident_tex_handles = new_max_handles;
> +   } else {
> +   fprintf(stderr, "si_add_resident_tex_handle: "
> +   "allocation failed\n");
> +   return -1;
> +   }
> +   }
> +
> +   idx = sctx->num_resident_tex_handles;
> +   sctx->resident_tex_handles[idx] = tex_handle;
> +   sctx->num_resident_tex_handles++;
> +
> +   return 0;
> +}
> +
> +static void si_del_resident_tex_handle(struct si_context *sctx,
> +  struct si_texture_handle *tex_handle)
> +{
> +   unsigned i;
> +   int size;
> +
> +   for (i = 0; i < sctx->num_resident_tex_handles; i++) {
> +   if (sctx->resident_tex_handles[i] != tex_handle)
> +   continue;
> +
> +   if (i < sctx->num_resident_tex_handles - 1) {
> +   size = sizeof(*sctx->resident_tex_handles) *
> +   (sctx->num_resident_tex_handles - 1 - i);
> +
> +   memmove(>resident_tex_handles[i],
> +   >resident_tex_handles[i + 1], size);
> +   }
> +
> +   sctx->num_resident_tex_handles--;
> +   return;
> +   }
> +}
> +
> +static int si_add_resident_img_handle(struct si_context *sctx,
> + struct si_image_handle *img_handle)
> +{
> +   int idx;
> +
> +   /* New resident handle, check if the backing array is large enough. */
> +   if (sctx->num_resident_img_handles >= sctx->max_resident_img_handles) 
> {
> +   unsigned new_max_handles =
> +   MAX2(1, sctx->max_resident_img_handles * 2);
> +   struct si_image_handle **new_handles =
> +   REALLOC(sctx->resident_img_handles,
> +   sctx->num_resident_img_handles * 
> (sizeof(*new_handles)),
> +   new_max_handles * sizeof(*new_handles));
> +
> +   if (new_handles) {
> +   sctx->resident_img_handles = new_handles;
> +   sctx->max_resident_img_handles = new_max_handles;
> +   } else {
> +   fprintf(stderr, "si_add_resident_img_handle: "
> +   "allocation failed\n");
> +   return -1;
> +   }
> +   }
> +
> +   idx = sctx->num_resident_img_handles;
> +   sctx->resident_img_handles[idx] = img_handle;
> +   sctx->num_resident_img_handles++;
> +
> +   return 0;
> +}
> +
> +static void si_del_resident_img_handle(struct si_context *sctx,
> +  struct si_image_handle *img_handle)
> +{
> +   unsigned i;
> +   int size;
> +
> +   for (i = 0; i < sctx->num_resident_img_handles; i++) {
> +   if (sctx->resident_img_handles[i] != img_handle)
> +   continue;
> +
> +   if (i < 

Re: [Mesa-dev] Mesa 17.1.1 release candidate

2017-05-24 Thread Emil Velikov
On 22 May 2017 at 19:45, Chuck Atkins  wrote:
>> The candidate for the Mesa 17.1.1 is now available.
>
>
> Excellent!
>
>
>> From build perspective - SWR now ships it's final generated header, thus
>> Python/mako is no longer required.
>
>
> Just what I was looking for, thanks!
>
> Is a source tarball available that I can test the build with? I would just
> use the git tag but building from the source tarbal was the problem I
> encountered in the first place.
>
We don't do release tarballs for the stable RC. Although it has been
asked a few times in the past.
There's nothing stopping us though - will check if we can start doing so.

FWIW the release will be out shortly, within an hour or so, once I get
stable internet connection.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 45/65] st/mesa: do not release sampler views for resident textures

2017-05-24 Thread Marek Olšák
On Wed, May 24, 2017 at 1:06 PM, Marek Olšák  wrote:
> On Wed, May 24, 2017 at 10:50 AM, Samuel Pitoiset
>  wrote:
>>
>>
>> On 05/23/2017 10:11 PM, Marek Olšák wrote:
>>>
>>> I don't think this change is necessary. Releasing sampler views
>>> doesn't release textures.
>>
>>
>> This issued has been reported by Feral directly. It happens in the following
>> scenario:
>>
>> 1) create a texture
>> 2) get a texture handle
>> 3) make it resident
>> 4) bind the texture to a unit
>> 5) draw
>>
>> The sampler views are then released and this ended up with a use-after-free
>> at decompression time for the resident texture handle.
>
> It seems that you keep a pointer to some object that is released by
> releasing sampler views. You need to increment the reference counter
> for that object, so that releasing sampler views won't release the
> object.

I'll reply on patch 52.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 45/65] st/mesa: do not release sampler views for resident textures

2017-05-24 Thread Marek Olšák
On Wed, May 24, 2017 at 10:50 AM, Samuel Pitoiset
 wrote:
>
>
> On 05/23/2017 10:11 PM, Marek Olšák wrote:
>>
>> I don't think this change is necessary. Releasing sampler views
>> doesn't release textures.
>
>
> This issued has been reported by Feral directly. It happens in the following
> scenario:
>
> 1) create a texture
> 2) get a texture handle
> 3) make it resident
> 4) bind the texture to a unit
> 5) draw
>
> The sampler views are then released and this ended up with a use-after-free
> at decompression time for the resident texture handle.

It seems that you keep a pointer to some object that is released by
releasing sampler views. You need to increment the reference counter
for that object, so that releasing sampler views won't release the
object.

Marek

>
>
>>
>> Marek
>>
>> On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
>>  wrote:
>>>
>>> When a texture is referenced by one or more texture handles,
>>> it might be resident and we shouldn't release the sampler views.
>>>
>>> Signed-off-by: Samuel Pitoiset 
>>> ---
>>>   src/mesa/state_tracker/st_sampler_view.c | 6 ++
>>>   1 file changed, 6 insertions(+)
>>>
>>> diff --git a/src/mesa/state_tracker/st_sampler_view.c
>>> b/src/mesa/state_tracker/st_sampler_view.c
>>> index c78a987486..690b50087c 100644
>>> --- a/src/mesa/state_tracker/st_sampler_view.c
>>> +++ b/src/mesa/state_tracker/st_sampler_view.c
>>> @@ -115,6 +115,12 @@ st_texture_release_all_sampler_views(struct
>>> st_context *st,
>>>   {
>>>  GLuint i;
>>>
>>> +   if (stObj->base.HandleAllocated) {
>>> +  /* Do not release sampler views when a texture is referenced by
>>> one or
>>> +   * more texture handles because the texture might be resident. */
>>> +  return;
>>> +   }
>>> +
>>>  /* XXX This should use sampler_views[i]->pipe, not st->pipe */
>>>  for (i = 0; i < stObj->num_sampler_views; ++i)
>>> pipe_sampler_view_release(st->pipe, >sampler_views[i]);
>>> --
>>> 2.13.0
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 21/65] mesa: associate uniform storage to bindless samplers/images

2017-05-24 Thread Nicolai Hähnle

Patch 3, 4, 7-21:

Reviewed-by: Nicolai Hähnle 


On 19.05.2017 18:52, Samuel Pitoiset wrote:

When a bindless sampler/image is bound to a texture/image unit,
we have to overwrite the constant value by the resident handle
directly in the constant buffer before the next draw.

One solution is to keep track of a pointer to the data.

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/program/ir_to_mesa.cpp | 25 +
  1 file changed, 25 insertions(+)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 5e6304036d..0c3ffd2e22 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2541,6 +2541,7 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
  bool propagate_to_storage)
  {
 struct gl_program_parameter_list *params = prog->Parameters;
+   gl_shader_stage shader_type = prog->info.stage;
  
 /* After adding each uniform to the parameter list, connect the storage for

  * the parameter with the tracking structure used by the API for the
@@ -2623,6 +2624,30 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
   format,
   >ParameterValues[i]);
  
+ /* When a bindless sampler/image is bound to a texture/image unit, we

+  * have to overwrite the constant value by the resident handle
+  * directly in the constant buffer before the next draw. One solution
+  * is to keep track a pointer to the base of the data.
+  */
+ if (storage->is_bindless && (prog->sh.NumBindlessSamplers ||
+  prog->sh.NumBindlessImages)) {
+unsigned array_elements = MAX2(1, storage->array_elements);
+
+for (unsigned j = 0; j < array_elements; ++j) {
+   unsigned unit = storage->opaque[shader_type].index + j;
+
+   if (storage->type->without_array()->is_sampler()) {
+  assert(unit >= 0 && unit < prog->sh.NumBindlessSamplers);
+  prog->sh.BindlessSamplers[unit].data =
+ >ParameterValues[i] + j;
+   } else if (storage->type->without_array()->is_image()) {
+  assert(unit >= 0 && unit < prog->sh.NumBindlessImages);
+  prog->sh.BindlessImages[unit].data =
+ >ParameterValues[i] + j;
+   }
+}
+ }
+
   /* After attaching the driver's storage to the uniform, propagate any
* data from the linker's backing store.  This will cause values from
* initializers in the source code to be copied over.




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 22/65] mesa: handle bindless uniforms bound to texture/image units

2017-05-24 Thread Nicolai Hähnle
Something else just occurred to me. Do you have a test case where an 
array of bindless sampler uniforms is set at once with glUniform1iv?


That should have a 32- vs. 64-bit mismatch when the user-supplied array 
is directly memcpy()'d to uni->storage. I think you need to fix that 
mismatch. On the plus side, that probably makes patch #23 unnecessary.


Cheers,
Nicolai


On 19.05.2017 18:52, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/uniform_query.cpp | 122 ++--
  1 file changed, 116 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index cc145f29e9..be04e48d53 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -993,9 +993,25 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid 
*values,
   bool changed = false;
   for (int j = 0; j < count; j++) {
  unsigned unit = uni->opaque[i].index + offset + j;
-if (sh->Program->SamplerUnits[unit] != ((unsigned *) values)[j]) {
-   sh->Program->SamplerUnits[unit] = ((unsigned *) values)[j];
-   changed = true;
+unsigned value = ((unsigned *)values)[j];
+
+if (uni->is_bindless) {
+   struct gl_bindless_sampler *sampler =
+  >Program->sh.BindlessSamplers[unit];
+
+   /* Mark this bindless sampler as bound to a texture unit.
+*/
+   if (sampler->unit != value) {
+  sampler->unit = value;
+  changed = true;
+   }
+   sampler->bound = true;
+   sh->Program->sh.HasBoundBindlessSampler = true;
+} else {
+   if (sh->Program->SamplerUnits[unit] != value) {
+  sh->Program->SamplerUnits[unit] = value;
+  changed = true;
+   }
  }
   }
  
@@ -1024,9 +1040,23 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,

   if (!uni->opaque[i].active)
  continue;
  
- for (int j = 0; j < count; j++)

-sh->Program->sh.ImageUnits[uni->opaque[i].index + offset + j] =
-   ((GLint *) values)[j];
+ for (int j = 0; j < count; j++) {
+unsigned unit = uni->opaque[i].index + offset + j;
+unsigned value = ((unsigned *)values)[j];
+
+if (uni->is_bindless) {
+   struct gl_bindless_image *image =
+  >Program->sh.BindlessImages[unit];
+
+   /* Mark this bindless image as bound to an image unit.
+*/
+   image->unit = value;
+   image->bound = true;
+   sh->Program->sh.HasBoundBindlessImage = true;
+} else {
+   sh->Program->sh.ImageUnits[unit] = value;
+}
+ }
}
  
ctx->NewDriverState |= ctx->DriverFlags.NewImageUnits;

@@ -1173,6 +1203,40 @@ _mesa_uniform_matrix(GLint location, GLsizei count,
 _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);
  }
  
+static void

+update_bound_bindless_sampler_flag(struct gl_program *prog)
+{
+   unsigned i;
+
+   if (likely(!prog->sh.HasBoundBindlessSampler))
+  return;
+
+   for (i = 0; i < prog->sh.NumBindlessSamplers; i++) {
+  struct gl_bindless_sampler *sampler = >sh.BindlessSamplers[i];
+
+  if (sampler->bound)
+ return;
+   }
+   prog->sh.HasBoundBindlessSampler = false;
+}
+
+static void
+update_bound_bindless_image_flag(struct gl_program *prog)
+{
+   unsigned i;
+
+   if (likely(!prog->sh.HasBoundBindlessImage))
+  return;
+
+   for (i = 0; i < prog->sh.NumBindlessImages; i++) {
+  struct gl_bindless_image *image = >sh.BindlessImages[i];
+
+  if (image->bound)
+ return;
+   }
+   prog->sh.HasBoundBindlessImage = false;
+}
+
  /**
   * Called via glUniformHandleui64*ARB() functions.
   */
@@ -1236,6 +1300,52 @@ _mesa_uniform_handle(GLint location, GLsizei count, 
const GLvoid *values,
sizeof(uni->storage[0]) * components * count * size_mul);
  
 _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);

+
+   if (uni->type->is_sampler()) {
+  /* Mark this bindless sampler as not bound to a texture unit because
+   * it refers to a texture handle.
+   */
+  for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct gl_linked_shader *const sh = shProg->_LinkedShaders[i];
+
+ /* If the shader stage doesn't use the sampler uniform, skip this. */
+ if (!uni->opaque[i].active)
+continue;
+
+ for (int j = 0; j < count; j++) {
+unsigned unit = uni->opaque[i].index + offset + j;
+struct gl_bindless_sampler *sampler =
+   >Program->sh.BindlessSamplers[unit];
+
+sampler->bound = false;
+ }
+
+ 

Re: [Mesa-dev] [RFC PATCH 23/65] mesa: get rid of a workaround for bindless in _mesa_get_uniform()

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

The ARB_bindless_texture spec says:

"When a sampler or image uniform's value is queried via any
 of the GetUniform* commands, the returned value will reflect
 the most recently set value through either UniformHandle* or
 Uniform1i*, converted to the requested type."


This is probably unnecessary, depending on what happens with the 
previous patch.


Cheers,
Nicolai



Signed-off-by: Samuel Pitoiset 
---
  src/compiler/glsl/ir_uniform.h |  6 ++
  src/compiler/glsl/shader_cache.cpp |  2 ++
  src/mesa/main/uniform_query.cpp| 22 +-
  3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/compiler/glsl/ir_uniform.h b/src/compiler/glsl/ir_uniform.h
index 9841df8cde..f375d8359d 100644
--- a/src/compiler/glsl/ir_uniform.h
+++ b/src/compiler/glsl/ir_uniform.h
@@ -207,6 +207,12 @@ struct gl_uniform_storage {
  * layout qualifier as specified by ARB_bindless_texture.
  */
 bool is_bindless;
+
+   /**
+* Whether this uniform variable is declared with the bindless_sampler or
+* bindless_image and used with a texture/image handle.
+*/
+   bool is_bindless_handle;
  };
  
  #ifdef __cplusplus

diff --git a/src/compiler/glsl/shader_cache.cpp 
b/src/compiler/glsl/shader_cache.cpp
index 6811cb2f50..3fe3135bd7 100644
--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -579,6 +579,7 @@ write_uniforms(struct blob *metadata, struct 
gl_shader_program *prog)
blob_write_uint32(metadata, 
prog->data->UniformStorage[i].matrix_stride);
blob_write_uint32(metadata, prog->data->UniformStorage[i].row_major);
blob_write_uint32(metadata, prog->data->UniformStorage[i].is_bindless);
+  blob_write_uint32(metadata, 
prog->data->UniformStorage[i].is_bindless_handle);
blob_write_uint32(metadata,
  
prog->data->UniformStorage[i].num_compatible_subroutines);
blob_write_uint32(metadata,
@@ -644,6 +645,7 @@ read_uniforms(struct blob_reader *metadata, struct 
gl_shader_program *prog)
uniforms[i].matrix_stride = blob_read_uint32(metadata);
uniforms[i].row_major = blob_read_uint32(metadata);
uniforms[i].is_bindless = blob_read_uint32(metadata);
+  uniforms[i].is_bindless_handle = blob_read_uint32(metadata);
uniforms[i].num_compatible_subroutines = blob_read_uint32(metadata);
uniforms[i].top_level_array_size = blob_read_uint32(metadata);
uniforms[i].top_level_array_stride = blob_read_uint32(metadata);
diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index be04e48d53..78a852377d 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -322,12 +322,20 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, 
GLint location,
  
 {

unsigned elements = uni->type->components();
-  /* XXX: Remove the sampler/image check workarounds when bindless is fully
-   * implemented.
-   */
-  const int dmul =
- (uni->type->is_64bit() && !uni->type->is_sampler() && 
!uni->type->is_image()) ? 2 : 1;
const int rmul = glsl_base_type_is_64bit(returnType) ? 2 : 1;
+  int dmul = uni->type->is_64bit() ? 2 : 1;
+
+  if ((uni->type->is_sampler() || uni->type->is_image()) &&
+  !uni->is_bindless_handle) {
+ /* The ARB_bindless_texture spec says:
+  *
+  * "When a sampler or image uniform's value is queried via any of the
+  *  GetUniform* commands, the returned value will reflect the most
+  *  recently set value through either UniformHandle* or Uniform1i*,
+  *  converted to the requested type."
+  */
+ dmul = 1;
+  }
  
/* Calculate the source base address *BEFORE* modifying elements to

 * account for the size of the user's buffer.
@@ -1007,6 +1015,7 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid 
*values,
 }
 sampler->bound = true;
 sh->Program->sh.HasBoundBindlessSampler = true;
+   uni->is_bindless_handle = false;
  } else {
 if (sh->Program->SamplerUnits[unit] != value) {
sh->Program->SamplerUnits[unit] = value;
@@ -1053,6 +1062,7 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid 
*values,
 image->unit = value;
 image->bound = true;
 sh->Program->sh.HasBoundBindlessImage = true;
+   uni->is_bindless_handle = false;
  } else {
 sh->Program->sh.ImageUnits[unit] = value;
  }
@@ -1318,6 +1328,7 @@ _mesa_uniform_handle(GLint location, GLsizei count, const 
GLvoid *values,
 >Program->sh.BindlessSamplers[unit];
  
  sampler->bound = false;

+uni->is_bindless_handle = true;
   }
  
   

Re: [Mesa-dev] [RFC PATCH 22/65] mesa: handle bindless uniforms bound to texture/image units

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/uniform_query.cpp | 122 ++--
  1 file changed, 116 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp
index cc145f29e9..be04e48d53 100644
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -993,9 +993,25 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid 
*values,
   bool changed = false;
   for (int j = 0; j < count; j++) {
  unsigned unit = uni->opaque[i].index + offset + j;
-if (sh->Program->SamplerUnits[unit] != ((unsigned *) values)[j]) {
-   sh->Program->SamplerUnits[unit] = ((unsigned *) values)[j];
-   changed = true;
+unsigned value = ((unsigned *)values)[j];
+
+if (uni->is_bindless) {
+   struct gl_bindless_sampler *sampler =
+  >Program->sh.BindlessSamplers[unit];
+
+   /* Mark this bindless sampler as bound to a texture unit.
+*/
+   if (sampler->unit != value) {
+  sampler->unit = value;
+  changed = true;
+   }
+   sampler->bound = true;


changed also needs to be to true when sampler->bound was previously false.

As a side note, I'm a bit surprised at how eagerly this code calls 
_mesa_update_shader_textures_used, which is a pretty heavy loop. We 
should be able to eliminate that and only update before a draw call, 
since it's quite common to switch multiple textures between draws.


Also, the update_bound_bindless_{sampler,image}_flag could be done 
lazily once before the draw, since at least for samplers we have various 
update loops already anyway. But these optimizations could be done 
separately.


Cheers,
Nicolai



+   sh->Program->sh.HasBoundBindlessSampler = true;
+} else {
+   if (sh->Program->SamplerUnits[unit] != value) {
+  sh->Program->SamplerUnits[unit] = value;
+  changed = true;
+   }
  }
   }
  
@@ -1024,9 +1040,23 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,

   if (!uni->opaque[i].active)
  continue;
  
- for (int j = 0; j < count; j++)

-sh->Program->sh.ImageUnits[uni->opaque[i].index + offset + j] =
-   ((GLint *) values)[j];
+ for (int j = 0; j < count; j++) {
+unsigned unit = uni->opaque[i].index + offset + j;
+unsigned value = ((unsigned *)values)[j];
+
+if (uni->is_bindless) {
+   struct gl_bindless_image *image =
+  >Program->sh.BindlessImages[unit];
+
+   /* Mark this bindless image as bound to an image unit.
+*/
+   image->unit = value;
+   image->bound = true;
+   sh->Program->sh.HasBoundBindlessImage = true;
+} else {
+   sh->Program->sh.ImageUnits[unit] = value;
+}
+ }
}
  
ctx->NewDriverState |= ctx->DriverFlags.NewImageUnits;

@@ -1173,6 +1203,40 @@ _mesa_uniform_matrix(GLint location, GLsizei count,
 _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);
  }
  
+static void

+update_bound_bindless_sampler_flag(struct gl_program *prog)
+{
+   unsigned i;
+
+   if (likely(!prog->sh.HasBoundBindlessSampler))
+  return;
+
+   for (i = 0; i < prog->sh.NumBindlessSamplers; i++) {
+  struct gl_bindless_sampler *sampler = >sh.BindlessSamplers[i];
+
+  if (sampler->bound)
+ return;
+   }
+   prog->sh.HasBoundBindlessSampler = false;
+}
+
+static void
+update_bound_bindless_image_flag(struct gl_program *prog)
+{
+   unsigned i;
+
+   if (likely(!prog->sh.HasBoundBindlessImage))
+  return;
+
+   for (i = 0; i < prog->sh.NumBindlessImages; i++) {
+  struct gl_bindless_image *image = >sh.BindlessImages[i];
+
+  if (image->bound)
+ return;
+   }
+   prog->sh.HasBoundBindlessImage = false;
+}
+
  /**
   * Called via glUniformHandleui64*ARB() functions.
   */
@@ -1236,6 +1300,52 @@ _mesa_uniform_handle(GLint location, GLsizei count, 
const GLvoid *values,
sizeof(uni->storage[0]) * components * count * size_mul);
  
 _mesa_propagate_uniforms_to_driver_storage(uni, offset, count);

+
+   if (uni->type->is_sampler()) {
+  /* Mark this bindless sampler as not bound to a texture unit because
+   * it refers to a texture handle.
+   */
+  for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct gl_linked_shader *const sh = shProg->_LinkedShaders[i];
+
+ /* If the shader stage doesn't use the sampler uniform, skip this. */
+ if (!uni->opaque[i].active)
+continue;
+
+ for (int j = 0; j < count; j++) {
+unsigned unit 

Re: [Mesa-dev] [PATCH v13 07/36] egl: implement eglQueryDmaBufModifiersEXT

2017-05-24 Thread Emil Velikov
On 23 May 2017 at 18:02, Daniel Stone  wrote:
> Hi Varad,
>
> On 23 May 2017 at 14:19, Varad Gautam  wrote:
>> On Mon, 2017-05-22 at 14:03 +0100, Emil Velikov wrote:
>>> On 19 May 2017 at 10:37, Daniel Stone  wrote:
>>> > +   if (external_only != NULL) {
>>> > +  for (i = 0; i < *count && i < max; i++)
>>> > + external_only[i] = EGL_TRUE;
>>>
>>> If I'm correctly understanding the spec, each bool represents if the
>>> respective format/modifier combo.
This should read: "... combo is supported".

>>> Do we want to unconditionally enable all here?
>>
>> Yes, bool external_only[i] corresponds to  combination.
>>
>> I am uncertain on what using modifiers with non-external images would mean,
>> since modifiers are only specific to the dmabufs path, which bind as external
>> textures.
>>
>> Would we ever have modifiers with non-GL_TEXTURE_EXTERNAL_OES images? (should
>> external_only be false perhaps for MOD_INVALID?)
>
> We definitely will. YUV images will probably going to require binding
> as TEXTURE_EXTERNAL, but if you're importing an RGB image (which may
> even be pre-tiled), that can almost certainly just be bound to
> TEXTURE_2D. So maybe the pipe callbacks need to gain an external_only
> query flag.
>
> Sorry I missed this whilst picking up the series.
>
Ack. In that case please add a TODO, FINISHME or XXX note.

If we have a external_only[i] == false case that comes to mind, do
mention it in the comment and so that one can address with a follow-up
series.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 11/65] mesa: add infrastructure for bindless samplers/images bound to units

2017-05-24 Thread Nicolai Hähnle

On 24.05.2017 12:28, Nicolai Hähnle wrote:

On 19.05.2017 18:52, Samuel Pitoiset wrote:

Yes, ARB_bindless_texture allows to do this. In other words, in
a situation like:

layout (bindless_sampler) uniform sampler2D tex;

The 'tex' sampler uniform can be either set with glUniform1()
(old-style bound samplers) or with glUniformHandleui() (resident
handles).

When glUniform1() is used, we have to somehow make the texture
resident "under the hood". This is done by requesting a texture
handle to the driver, making the handle resident in the current
context and overwriting the value directly in the constant buffer.

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/mtypes.h | 52 
++

  1 file changed, 52 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 70865b373d..1d168e8c68 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1988,6 +1988,42 @@ struct gl_perf_query_state
  /**
+ * A bindless sampler object.
+ */
+struct gl_bindless_sampler
+{
+   /** Texture unit (set by glUniform1()). */
+   GLubyte unit;
+
+   /** Texture Target (TEXTURE_1D/2D/3D/etc_INDEX). */
+   gl_texture_index target;
+
+   /** Whether this bindless sampler is bound to a unit. */
+   GLboolean bound;
+
+   /** Pointer to the base of the data. */
+   GLvoid *data;
+};
+
+/**
+ * A bindless image object.
+ */
+struct gl_bindless_image
+{
+   /** Image unit (set by glUniform1()). */
+   GLubyte unit;
+
+   /** Access qualifier (GL_READ_WRITE, GL_READ_ONLY, GL_WRITE_ONLY) */
+   GLenum access;
+
+   /** Whether this bindless image is bound to a unit. */
+   GLboolean bound;
+
+   /** Pointer to the base of the data. */
+   GLvoid *data;
+};
+
+/**
   * Names of the various vertex/fragment program register files, etc.
   *
   * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c)
@@ -2121,6 +2157,22 @@ struct gl_program
*/
   gl_texture_index SamplerTargets[MAX_SAMPLERS];
+ /**
+  * Number of samplers declared with the bindless_sampler layout
+  * qualifier as specified by ARB_bindless_texture.
+  */
+ GLuint NumBindlessSamplers;
+ GLboolean HasBoundBindlessSampler;
+ struct gl_bindless_sampler *BindlessSamplers;
+
+ /**
+  * Number of images declared with the bindless_image layout 
qualifier

+  * as specified by ARB_bindless_texture.
+  */
+ GLuint NumBindlessImages;
+ GLboolean HasBoundBindlessImage;
+ struct gl_bindless_image *BindlessImages;
+


This could go into the sh part of the union below, couldn't it?


Ah, that's where it already is. Never mind my confusion...



Cheers,
Nicolai



   union {
  struct {
 /**







--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 11/65] mesa: add infrastructure for bindless samplers/images bound to units

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

Yes, ARB_bindless_texture allows to do this. In other words, in
a situation like:

layout (bindless_sampler) uniform sampler2D tex;

The 'tex' sampler uniform can be either set with glUniform1()
(old-style bound samplers) or with glUniformHandleui() (resident
handles).

When glUniform1() is used, we have to somehow make the texture
resident "under the hood". This is done by requesting a texture
handle to the driver, making the handle resident in the current
context and overwriting the value directly in the constant buffer.

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/mtypes.h | 52 ++
  1 file changed, 52 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 70865b373d..1d168e8c68 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1988,6 +1988,42 @@ struct gl_perf_query_state
  
  
  /**

+ * A bindless sampler object.
+ */
+struct gl_bindless_sampler
+{
+   /** Texture unit (set by glUniform1()). */
+   GLubyte unit;
+
+   /** Texture Target (TEXTURE_1D/2D/3D/etc_INDEX). */
+   gl_texture_index target;
+
+   /** Whether this bindless sampler is bound to a unit. */
+   GLboolean bound;
+
+   /** Pointer to the base of the data. */
+   GLvoid *data;
+};
+
+/**
+ * A bindless image object.
+ */
+struct gl_bindless_image
+{
+   /** Image unit (set by glUniform1()). */
+   GLubyte unit;
+
+   /** Access qualifier (GL_READ_WRITE, GL_READ_ONLY, GL_WRITE_ONLY) */
+   GLenum access;
+
+   /** Whether this bindless image is bound to a unit. */
+   GLboolean bound;
+
+   /** Pointer to the base of the data. */
+   GLvoid *data;
+};
+
+/**
   * Names of the various vertex/fragment program register files, etc.
   *
   * NOTE: first four tokens must fit into 2 bits (see t_vb_arbprogram.c)
@@ -2121,6 +2157,22 @@ struct gl_program
*/
   gl_texture_index SamplerTargets[MAX_SAMPLERS];
  
+ /**

+  * Number of samplers declared with the bindless_sampler layout
+  * qualifier as specified by ARB_bindless_texture.
+  */
+ GLuint NumBindlessSamplers;
+ GLboolean HasBoundBindlessSampler;
+ struct gl_bindless_sampler *BindlessSamplers;
+
+ /**
+  * Number of images declared with the bindless_image layout qualifier
+  * as specified by ARB_bindless_texture.
+  */
+ GLuint NumBindlessImages;
+ GLboolean HasBoundBindlessImage;
+ struct gl_bindless_image *BindlessImages;
+


This could go into the sh part of the union below, couldn't it?

Cheers,
Nicolai



   union {
  struct {
 /**




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 06/65] mesa: refuse to update tex parameters when a handle is allocated

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

The ARB_bindless_texture spec says:

"The ARB_bindless_texture spec says: "The error INVALID_OPERATION
 is generated by TexImage*, CopyTexImage*, CompressedTexImage*,
 TexBuffer*, TexParameter*, as well as other functions defined in
 terms of these, if the texture object to be modified is referenced
 by one or more texture or image handles."

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/texparam.c | 61 
  1 file changed, 61 insertions(+)

diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index 0156bbd275..c73cf8bf83 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1026,6 +1026,9 @@ _mesa_TexParameterf(GLenum target, GLenum pname, GLfloat 
param)
 if (!texObj)
return;
  
+   if (texObj->HandleAllocated)

+  return;


You're not actually setting an error here.

Also, I think it'd be best to do the check to set_tex_parameter{i,f}.

There's some weird special handling of GL_TEXTURE_BORDER_COLOR in 
_mesa_texture_parameterI(u)iv that would have to be stream-lined to go 
via set_tex_parameterf, but apart from that it should work fine.


Cheers,
Nicolai



+
 _mesa_texture_parameterf(ctx, texObj, pname, param, false);
  }
  
@@ -1039,6 +1042,9 @@ _mesa_TexParameterfv(GLenum target, GLenum pname, const GLfloat *params)

 if (!texObj)
return;
  
+   if (texObj->HandleAllocated)

+  return;
+
 _mesa_texture_parameterfv(ctx, texObj, pname, params, false);
  }
  
@@ -1052,6 +1058,9 @@ _mesa_TexParameteri(GLenum target, GLenum pname, GLint param)

 if (!texObj)
return;
  
+   if (texObj->HandleAllocated)

+  return;
+
 _mesa_texture_parameteri(ctx, texObj, pname, param, false);
  }
  
@@ -1065,6 +1074,9 @@ _mesa_TexParameteriv(GLenum target, GLenum pname, const GLint *params)

 if (!texObj)
return;
  
+   if (texObj->HandleAllocated)

+  return;
+
 _mesa_texture_parameteriv(ctx, texObj, pname, params, false);
  }
  
@@ -1083,6 +1095,9 @@ _mesa_TexParameterIiv(GLenum target, GLenum pname, const GLint *params)

 if (!texObj)
return;
  
+   if (texObj->HandleAllocated)

+  return;
+
 _mesa_texture_parameterIiv(ctx, texObj, pname, params, false);
  }
  
@@ -1101,6 +1116,9 @@ _mesa_TexParameterIuiv(GLenum target, GLenum pname, const GLuint *params)

 if (!texObj)
return;
  
+   if (texObj->HandleAllocated)

+  return;
+
 _mesa_texture_parameterIuiv(ctx, texObj, pname, params, false);
  }
  
@@ -1118,6 +1136,19 @@ _mesa_TextureParameterfv(GLuint texture, GLenum pname, const GLfloat *params)

return;
 }
  
+   if (texObj->HandleAllocated) {

+  /* The ARB_bindless_texture spec says:
+   *
+   * "The error INVALID_OPERATION is generated by TexImage*, CopyTexImage*,
+   *  CompressedTexImage*, TexBuffer*, TexParameter*, as well as other
+   *  functions defined in terms of these, if the texture object to be
+   *  modified is referenced by one or more texture or image handles."
+   */
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glTextureParameterfv(immutable texture)");
+  return;
+   }
+
 _mesa_texture_parameterfv(ctx, texObj, pname, params, true);
  }
  
@@ -1134,6 +1165,12 @@ _mesa_TextureParameterf(GLuint texture, GLenum pname, GLfloat param)

return;
 }
  
+   if (texObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glTextureParameterf(immutable texture)");
+  return;
+   }
+
 _mesa_texture_parameterf(ctx, texObj, pname, param, true);
  }
  
@@ -1150,6 +1187,12 @@ _mesa_TextureParameteri(GLuint texture, GLenum pname, GLint param)

return;
 }
  
+   if (texObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glTextureParameteri(immutable texture)");
+  return;
+   }
+
 _mesa_texture_parameteri(ctx, texObj, pname, param, true);
  }
  
@@ -1167,6 +1210,12 @@ _mesa_TextureParameteriv(GLuint texture, GLenum pname,

return;
 }
  
+   if (texObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glTextureParameteriv(immutable texture)");
+  return;
+   }
+
 _mesa_texture_parameteriv(ctx, texObj, pname, params, true);
  }
  
@@ -1185,6 +1234,12 @@ _mesa_TextureParameterIiv(GLuint texture, GLenum pname, const GLint *params)

return;
 }
  
+   if (texObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glTextureParameterIiv(immutable texture)");
+  return;
+   }
+
 _mesa_texture_parameterIiv(ctx, texObj, pname, params, true);
  }
  
@@ -1202,6 +1257,12 @@ _mesa_TextureParameterIuiv(GLuint texture, GLenum pname, const GLuint *params)

return;
 }
  
+   if (texObj->HandleAllocated) {

+  _mesa_error(ctx, 

Re: [Mesa-dev] [RFC PATCH 05/65] mesa: refuse to update sampler parameters when a handle is allocated

2017-05-24 Thread Nicolai Hähnle

On 19.05.2017 18:52, Samuel Pitoiset wrote:

The ARB_bindless_texture spec says:

"The error INVALID_OPERATION is generated by SamplerParameter* if
  identifies a sampler object referenced by one or more
 texture handles."

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/main/samplerobj.c | 42 ++
  1 file changed, 42 insertions(+)

diff --git a/src/mesa/main/samplerobj.c b/src/mesa/main/samplerobj.c
index ee15c68b4f..cf4bcfce7c 100644
--- a/src/mesa/main/samplerobj.c
+++ b/src/mesa/main/samplerobj.c
@@ -801,6 +801,18 @@ _mesa_SamplerParameteri(GLuint sampler, GLenum pname, 
GLint param)
 if (!sampObj)
return;
  
+   if (sampObj->HandleAllocated) {

+  /* The ARB_bindless_texture spec says:
+   *
+   * "The error INVALID_OPERATION is generated by SamplerParameter* if
+   *   identifies a sampler object referenced by one or more
+   *  texture handles."
+   */
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameteri(immutable sampler)");
+  return;
+   }


Maybe these could go into sampler_parameter_error_check?

Cheers,
Nicolai




+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, param);
@@ -884,6 +896,12 @@ _mesa_SamplerParameterf(GLuint sampler, GLenum pname, 
GLfloat param)
 if (!sampObj)
return;
  
+   if (sampObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterf(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, (GLint) param);
@@ -966,6 +984,12 @@ _mesa_SamplerParameteriv(GLuint sampler, GLenum pname, 
const GLint *params)
 if (!sampObj)
return;
  
+   if (sampObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameteriv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, params[0]);
@@ -1056,6 +1080,12 @@ _mesa_SamplerParameterfv(GLuint sampler, GLenum pname, 
const GLfloat *params)
 if (!sampObj)
return;
  
+   if (sampObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterfv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, (GLint) params[0]);
@@ -1139,6 +1169,12 @@ _mesa_SamplerParameterIiv(GLuint sampler, GLenum pname, 
const GLint *params)
 if (!sampObj)
return;
  
+   if (sampObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterIiv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, params[0]);
@@ -1223,6 +1259,12 @@ _mesa_SamplerParameterIuiv(GLuint sampler, GLenum pname, 
const GLuint *params)
 if (!sampObj)
return;
  
+   if (sampObj->HandleAllocated) {

+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glSamplerParameterIuiv(immutable sampler)");
+  return;
+   }
+
 switch (pname) {
 case GL_TEXTURE_WRAP_S:
res = set_sampler_wrap_s(ctx, sampObj, params[0]);




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 02/65] mesa: implement ARB_bindless_texture

2017-05-24 Thread Nicolai Hähnle

[snip]

+static GLuint64
+get_texture_handle(struct gl_context *ctx, struct gl_texture_object 
*texObj,

+   struct gl_sampler_object *sampObj)
+{
+   struct gl_texture_handle_object *handleObj;
+   struct hash_entry *entry;
+   GLuint64 handle;
+
+   handleObj = CALLOC_STRUCT(gl_texture_handle_object);
+   if (!handleObj) {
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexture*HandleARB()");
+  return 0;
+   }
+
+   handleObj->texObj = texObj;
+   if (>Sampler != sampObj)
+  handleObj->sampObj = sampObj;
+
+   /* The ARB_bindless_texture spec says:
+*
+* "The handle for each texture or texture/sampler pair is 
unique; the same
+*  handle will be returned if GetTextureHandleARB is called 
multiple times
+*  for the same texture or if GetTextureSamplerHandleARB is 
called multiple

+*  times for the same texture/sampler pair."
+*/
+   mtx_lock(>Shared->HandlesMutex);
+   entry = _mesa_hash_table_search(texObj->SamplerHandles, handleObj);
+   if (entry) {
+  mtx_unlock(>Shared->HandlesMutex);
+  free(handleObj);
+  return (uint64_t)entry->data;
+   }
+
+   /* Ask the driver for a new handle and store it. */
+   handle = ctx->Driver.NewTextureHandle(ctx, texObj, sampObj);
+   if (!handle) {
+  mtx_unlock(>Shared->HandlesMutex);
+  free(handleObj);
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexture*HandleARB()");
+  return 0;
+   }
+
+   _mesa_hash_table_insert(texObj->SamplerHandles, handleObj, (void 
*)handle);

+
+   if (>Sampler != sampObj) {
+  _mesa_hash_table_insert(sampObj->Handles, handleObj,
+  (void *)handle);
+   }
+
+   /* When referenced by one or more handles, texture objects are 
immutable. */

+   texObj->HandleAllocated = true;
+   sampObj->HandleAllocated = true;
+   if (texObj->Target == GL_TEXTURE_BUFFER)
+  texObj->BufferObject->HandleAllocated = true;
+
+   /* Store the handle in the shared state for all contexts. */
+   _mesa_hash_table_insert(ctx->Shared->TextureHandles,
+   (void *)handle, handleObj);


This won't work on 32-bit systems (same for image handles).


Because the handle is 64-bit? Mmh...


Right.


[snip]

+void
+_mesa_make_texture_handles_non_resident(struct gl_context *ctx,
+struct gl_texture_object 
*texObj)

+{
+   struct hash_entry *entry;
+   GLuint64 handle;
+
+   mtx_lock(>Shared->HandlesMutex);
+
+   hash_table_foreach(texObj->SamplerHandles, entry) {
+  struct gl_texture_handle_object *handleObj =
+ (struct gl_texture_handle_object *)entry->key;
+
+  handle = (uint64_t)entry->data;
+  if (is_texture_handle_resident(ctx, handle))
+ make_texture_handle_resident(ctx, handleObj, handle, false);
+   }
+
+   hash_table_foreach(texObj->ImageHandles, entry) {
+  struct gl_image_handle_object *handleObj =
+ (struct gl_image_handle_object *)entry->key;
+
+  handle = (uint64_t)entry->data;
+  if (is_image_handle_resident(ctx, handle))
+ make_image_handle_resident(ctx, handleObj, handle, 
GL_READ_ONLY, false);

+   }


So... this also needs to loop through all other contexts and make the 
handle non-resident in them, right? Otherwise you might end up with 
dangling pointers (or at least dangling handles).


No. Resident handles are per-context. Though, I'm not very happy myself 
with the way I managed the handles. I'm open to any better suggestions.


Right, resident handles are per-context, but the handles of a texture 
might be resident in multiple contexts simultaneously. What happens if 
they are, and then the texture object is deleted?


It seems to me that right now, you end up with dangling resident handles 
in the "other" contexts. At the very least, this can cause incorrect 
errors when the handle is re-used by the driver for another texture, so 
*something* needs to loop over all contexts, it seems...


Cheers,
Nicolai
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 01/15] st/dri: refactor multi-planar YUV import path

2017-05-24 Thread Varad Gautam
Hi Lucas,

On Tue, May 23, 2017 at 9:10 PM, Lucas Stach  wrote:
> Hi Varad,
>
> Am Dienstag, den 23.05.2017, 14:40 +0530 schrieb Varad Gautam:
>> Hi Lucas,
>>
>> On Mon, May 22, 2017 at 11:16 PM, Lucas Stach  wrote:
>> > Am Mittwoch, den 10.05.2017, 23:15 +0530 schrieb Varad Gautam:
>> >> From: Varad Gautam 
>> >>
>> >> we currently ignore the plane count when converting from
>> >> __DRI_IMAGE_FORMAT* tokens to __DRI_IMAGE_FOURCC* for multiplanar
>> >> images, and only return the first plane's simplified fourcc.
>> >>
>> >> this adds a fourcc to __DRI_IMAGE_FORMAT_* mapping to dri, allowing
>> >> us to return the correct fourcc format from DRIimage queries, and
>> >> simplifies the multiplane import logic.
>> >>
>> >> Signed-off-by: Varad Gautam 
>> >> ---
>> >>  src/gallium/state_trackers/dri/dri2.c   | 288 
>> >> +++-
>> >>  src/gallium/state_trackers/dri/dri_screen.h |  13 ++
>> >>  2 files changed, 168 insertions(+), 133 deletions(-)
>> >>
>> >> diff --git a/src/gallium/state_trackers/dri/dri2.c 
>> >> b/src/gallium/state_trackers/dri/dri2.c
>> >> index ed6004f..0c5783c 100644
>> >> --- a/src/gallium/state_trackers/dri/dri2.c
>> >> +++ b/src/gallium/state_trackers/dri/dri2.c
>> >> @@ -52,93 +52,133 @@
>> >>  #include "dri_query_renderer.h"
>> >>  #include "dri2_buffer.h"
>> >>
>> >> -static int convert_fourcc(int format, int *dri_components_p)
>> >> +/* format list taken from intel_screen.c */
>> >> +static struct image_format image_formats[] = {
>> >> +   { __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_ABGR, __DRI_IMAGE_COMPONENTS_RGBA, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR, 4 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_SARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_XRGB, __DRI_IMAGE_COMPONENTS_RGB, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB, 4 }, } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_XBGR, __DRI_IMAGE_COMPONENTS_RGB, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR, 4 }, } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_ARGB1555, __DRI_IMAGE_COMPONENTS_RGBA, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB1555, 2 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_R8, __DRI_IMAGE_COMPONENTS_R, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 }, } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_R16, __DRI_IMAGE_COMPONENTS_R, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R16, 1 }, } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_GR88, __DRI_IMAGE_COMPONENTS_RG, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR88, 2 }, } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_GR1616, __DRI_IMAGE_COMPONENTS_RG, 1,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_GR1616, 2 }, } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YUV410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YUV411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YUV420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 1, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 1, 1, __DRI_IMAGE_FORMAT_R8, 1 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YUV422, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 1, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 1, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YUV444, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 1, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YVU410, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 1, 2, 2, __DRI_IMAGE_FORMAT_R8, 1 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YVU411, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 1, 2, 0, __DRI_IMAGE_FORMAT_R8, 1 } } },
>> >> +
>> >> +   { __DRI_IMAGE_FOURCC_YVU420, __DRI_IMAGE_COMPONENTS_Y_U_V, 3,
>> >> + { { 0, 0, 0, __DRI_IMAGE_FORMAT_R8, 1 },
>> >> +   { 2, 1, 1, 

Re: [Mesa-dev] [RFC PATCH 00/65] ARB_bindless_texture for RadeonSI

2017-05-24 Thread Samuel Pitoiset



On 05/24/2017 12:02 AM, Marek Olšák wrote:

On Tue, May 23, 2017 at 12:33 AM, Dieter Nützel  wrote:

Hello Samuel,

running this on radeonsi/RX580.
Didn't saw better numbers for Wine/TS2017 (TrainSimulator 2017) ever before.
But didn't feaguered if it (can) use bindless (with Wine/stagging). Do we
have a knob to toggle bindless on and off?
A key from Feral for DiRT Rally or the like would be very nice (my son would
go crazy). - He (nine years old) love (car) simulators. ~50 € is much for
'testing'...

So got my

Tested-by: Dieter Nützel 

Here come some output from 'ogl-samples'

/opt/ogl-samples> ./build/release/gl-420-primitive-bindless-nv
ATTENTION: default value of option mesa_glthread overridden by environment.
OpenGL Version Needed 4.2 ( 4.5 Found )
Failed to find Extension: "GL_NV_shader_buffer_load"
Running Test
Test Ended
/opt/ogl-samples> ./build/release/gl-420-texture-bindless-nv
ATTENTION: default value of option mesa_glthread overridden by environment.
OpenGL Version Needed 4.2 ( 4.5 Found )
Failed to find Extension: "GL_NV_bindless_texture"
Running Test
Test Ended

GL_ARB_bindless_texture 'faking' needed?


The test doesn't use ARB_bindless_texture. It only uses the NV
variant, which we don't support.


Yeah, NV_bindless_texture is a different one.



Marek


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 46/65] st/mesa: disable per-context seamless cubemap when using texture handles

2017-05-24 Thread Samuel Pitoiset



On 05/23/2017 10:21 PM, Marek Olšák wrote:

What if a texture is used both as bindless and non-bindless? Wouldn't
HandleAllocated be true all the time? Instead perhaps
st_convert_sampler needs to know whether the current use is bindless,
not whether there is an existing handle somewhere in the context.


Yes, handles are only removed when the underlying texture is deleted. 
You are probably right.




Marek

On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
 wrote:

The ARB_bindless_texture spec say:

"If ARB_seamless_cubemap (or OpenGL 4.0, which includes it) is
 supported, the per-context seamless cubemap enable is ignored
 and treated as disabled when using texture handles."

"If AMD_seamless_cubemap_per_texture is supported, the seamless
 cube map texture parameter of the underlying texture does apply
 when texture handles are used."

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/state_tracker/st_atom_sampler.c | 18 --
  1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_sampler.c 
b/src/mesa/state_tracker/st_atom_sampler.c
index c6d992fbb0..116c5380cf 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -226,8 +226,22 @@ st_convert_sampler(const struct st_context *st,
sampler->compare_func = st_compare_func_to_pipe(msamp->CompareFunc);
 }

-   sampler->seamless_cube_map =
-  ctx->Texture.CubeMapSeamless || msamp->CubeMapSeamless;
+   if (msamp->HandleAllocated) {
+  /* The ARB_bindless_texture spec says:
+   *
+   * "If ARB_seamless_cubemap (or OpenGL 4.0, which includes it) is
+   *  supported, the per-context seamless cubemap enable is ignored and
+   *  treated as disabled when using texture handles."
+   *
+   * "If AMD_seamless_cubemap_per_texture is supported, the seamless cube
+   *  map texture parameter of the underlying texture does apply when
+   *  texture handles are used."
+   */
+  sampler->seamless_cube_map = msamp->CubeMapSeamless;
+   } else {
+  sampler->seamless_cube_map =
+ ctx->Texture.CubeMapSeamless || msamp->CubeMapSeamless;
+   }
  }

  /**
--
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 45/65] st/mesa: do not release sampler views for resident textures

2017-05-24 Thread Samuel Pitoiset



On 05/23/2017 10:11 PM, Marek Olšák wrote:

I don't think this change is necessary. Releasing sampler views
doesn't release textures.


This issued has been reported by Feral directly. It happens in the 
following scenario:


1) create a texture
2) get a texture handle
3) make it resident
4) bind the texture to a unit
5) draw

The sampler views are then released and this ended up with a 
use-after-free at decompression time for the resident texture handle.




Marek

On Fri, May 19, 2017 at 6:52 PM, Samuel Pitoiset
 wrote:

When a texture is referenced by one or more texture handles,
it might be resident and we shouldn't release the sampler views.

Signed-off-by: Samuel Pitoiset 
---
  src/mesa/state_tracker/st_sampler_view.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/mesa/state_tracker/st_sampler_view.c 
b/src/mesa/state_tracker/st_sampler_view.c
index c78a987486..690b50087c 100644
--- a/src/mesa/state_tracker/st_sampler_view.c
+++ b/src/mesa/state_tracker/st_sampler_view.c
@@ -115,6 +115,12 @@ st_texture_release_all_sampler_views(struct st_context *st,
  {
 GLuint i;

+   if (stObj->base.HandleAllocated) {
+  /* Do not release sampler views when a texture is referenced by one or
+   * more texture handles because the texture might be resident. */
+  return;
+   }
+
 /* XXX This should use sampler_views[i]->pipe, not st->pipe */
 for (i = 0; i < stObj->num_sampler_views; ++i)
pipe_sampler_view_release(st->pipe, >sampler_views[i]);
--
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] configure.ac: Add --with-wayland-scanner-path

2017-05-24 Thread Jussi Kukkonen
On 23 May 2017 at 11:13, Jussi Kukkonen  wrote:
>
> Modify wayland-scanner lookup: Use the path given by pkg-config
> but offer an option to override the path with
> "--with-wayland-scanner-path=PATH". The latter is useful for
> cross-compile situations.
>
> AC_PATH_PROG is no longer used (if the scanner is installed it should
> get found by pkg-config). AC_SUBST is added so the output variable is
> created when only the configure option is used.
> ---
>
> My goal is to standardize wayland-scanner usage in a way that does not
> require patching when cross-compiling in Yocto (the detailed issue is
> that in Yocto pkg-config will return a "wayland_scanner" variable but
> that will contain a _target path_ when we would like to use a native
> sysroot path instead).
>
> I've sent a similar patch to weston and intend to fix other projects
> if these two patches are well received.


wayland-devel is clearly not 100% convinced about this approach so I
suggest dropping this patch as well: Feedback is still welcome.

Cheers,
 Jussi
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 86195] Lightswork video editor segfaults

2017-05-24 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=86195

Samuel Pitoiset  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #6 from Samuel Pitoiset  ---
Quite old bug (almost 3 years ago). The AMD open source stack has largely
evolved since 2014. Closing.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 99467] [radv] DOOM 2016 + wine. Green screen everywhere (but can be started)

2017-05-24 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=99467

--- Comment #24 from Marcin  ---
FYI on archlinux if you apply the following two patches to mesa 17.1 and
recompile with llvm 4.0 from official repo the game works perfectly, no green
screen or black textures:

https://github.com/airlied/mesa/commit/cfe39ca4a4d922a063ff120f5c0db53ff347349c
https://github.com/airlied/mesa/commit/a163083b481d924a476cfa2fdf8e3b8f398fb62a

Much better than messing around with mesa-git and llvm-svn. Now if only those
patches could be merged ... :)

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] - Rewrite mesa website in Sphinx

2017-05-24 Thread Emil Velikov
On 23 May 2017 at 11:50, Jean Hertel  wrote:
> Hello,
>
> Any update on this?
> Is someone validating the github repository?
>
I was looking at it yesterday, and should have most of it done some time today.

The TOC "...release notes" solution (as illustrated by the 17.0.4
ones) seem fine, so until we come with a better idea I'll just go with
it.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] gallium: Add renderonly-based support for pl111+vc4.

2017-05-24 Thread Christian Gmeiner
2017-05-11 1:06 GMT+02:00 Eric Anholt :
> This follows the model of imx (display) and etnaviv (render): pl111 is a
> display-only device, so when asked to do GL for it, we see if we have a
> vc4 renderer, make the vc4 screen, and have vc4 call back to pl111 to do
> scanout allocations.
>
> The difference from etnaviv is that we share the same BO between vc4 and
> pl111, rather than having a vc4 bo and a pl11 bo and copies between the
> two.  The only mismatch between their requirements is that vc4 requires
> 4-pixel (at 32bpp) stride alignment, while pl111 requires that stride
> match width.  The kernel will reject any modesets to an incorrect stride,
> so the 3D driver doesn't need to worry about that.


With Emil's comment (regarding  drmOpen* API) taken care of

Reviewed-by: Christian Gmeiner 

greets
--
Christian Gmeiner, MSc

https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] etnaviv: Only use renderonly_get_handle for GEM handles.

2017-05-24 Thread Christian Gmeiner
2017-05-11 1:06 GMT+02:00 Eric Anholt :
> Note that for requests for Prime FDs or flink names, we return handles to
> the etanviv BO, not the scanout BO.  This is at least better than previous
> behavior of returning GEM handles for a request for an FD or flink name.
>
> And add an assert that renderonly_get_handle is only used for getting the
> GEM handle.
>
> Signed-off-by: Eric Anholt 

Reviewed-by: Christian Gmeiner 

greets
--
Christian Gmeiner, MSc

https://www.youtube.com/user/AloryOFFICIAL
https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] aubinator: fix double free

2017-05-24 Thread Iago Toral
All 3 patches are:

Reviewed-by: Iago Toral Quiroga 

On Wed, 2017-05-24 at 02:19 +0100, Lionel Landwerlin wrote:
> Free previously allocated filename outside the for loop.
> 
> CID: 1405014
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/tools/aubinator_error_decode.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/intel/tools/aubinator_error_decode.c
> b/src/intel/tools/aubinator_error_decode.c
> index 37c66ec0f68..506d39012b8 100644
> --- a/src/intel/tools/aubinator_error_decode.c
> +++ b/src/intel/tools/aubinator_error_decode.c
> @@ -874,8 +874,8 @@ main(int argc, char *argv[])
>    file = fopen(filename, "r");
>    if (!file) {
>   int minor;
> + free(filename);
>   for (minor = 0; minor < 64; minor++) {
> -free(filename);
>  ret = asprintf(, "%s/%d/i915_error_state",
> path, minor);
>  assert(ret > 0);
>  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] intel/compiler: Make use of IS_DWORD macro

2017-05-24 Thread Alejandro Piñeiro
On 23/05/17 22:51, Anuj Phogat wrote:
> This patch makes non-functional changes.
>
> Signed-off-by: Anuj Phogat 
> ---
>  src/intel/compiler/brw_eu_defines.h  |  3 +++
>  src/intel/compiler/brw_eu_emit.c | 24 
> +++-
>  src/intel/compiler/brw_fs.cpp|  3 +--
>  src/intel/compiler/brw_fs_cmod_propagation.cpp   |  3 +--
>  src/intel/compiler/brw_fs_copy_propagation.cpp   |  3 +--
>  src/intel/compiler/brw_fs_generator.cpp  |  6 ++
>  src/intel/compiler/brw_fs_nir.cpp|  2 +-
>  src/intel/compiler/brw_vec4.cpp  |  5 -
>  src/intel/compiler/brw_vec4_cmod_propagation.cpp |  3 +--
>  src/intel/compiler/brw_vec4_generator.cpp|  2 +-
>  10 files changed, 18 insertions(+), 36 deletions(-)
>
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index ccc838d..9e969f2 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -48,6 +48,9 @@
>  
>  #define GET_BITS(data, high, low) ((data & INTEL_MASK((high), (low))) >> 
> (low))
>  #define GET_FIELD(word, field) (((word)  & field ## _MASK) >> field ## 
> _SHIFT)
> +#define IS_DWORD(reg) \
> +   (reg.type == BRW_REGISTER_TYPE_UD || \
> +reg.type == BRW_REGISTER_TYPE_D)

Technically F  also has a DWORD size. Just saying because ...

>  
>  #define _3DPRIM_POINTLIST 0x01
>  #define _3DPRIM_LINELIST  0x02
> diff --git a/src/intel/compiler/brw_eu_emit.c 
> b/src/intel/compiler/brw_eu_emit.c
> index 231d6fd..7434610 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -959,8 +959,7 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct 
> brw_reg dest,
> assert(dest.address_mode == BRW_ADDRESS_DIRECT);
> assert(dest.type == BRW_REGISTER_TYPE_F  ||
>dest.type == BRW_REGISTER_TYPE_DF ||
> -  dest.type == BRW_REGISTER_TYPE_D  ||
> -  dest.type == BRW_REGISTER_TYPE_UD);
> +  IS_DWORD(dest));

... this kind of checks of TYPE_F || IS_DWORD sounds somewhat strange
(at least to me). At that happens several times.

As I assume that your intention is check for integer dwords types, how
about IS_INTEGER_DWORD?

> if (devinfo->gen == 6) {
>brw_inst_set_3src_dst_reg_file(devinfo, inst,
>   dest.file == BRW_MESSAGE_REGISTER_FILE);
> @@ -1158,9 +1157,7 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, 
> struct brw_reg src0)
> if (devinfo->gen == 7 && !devinfo->is_haswell &&
> brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 &&
> dest.type == BRW_REGISTER_TYPE_DF &&
> -   (src0.type == BRW_REGISTER_TYPE_F ||
> -src0.type == BRW_REGISTER_TYPE_D ||
> -src0.type == BRW_REGISTER_TYPE_UD) &&
> +   (src0.type == BRW_REGISTER_TYPE_F || IS_DWORD(src0)) &&
> !has_scalar_region(src0)) {
>assert(src0.vstride == BRW_VERTICAL_STRIDE_4 &&
>   src0.width == BRW_WIDTH_4 &&
> @@ -1182,15 +1179,13 @@ brw_ADD(struct brw_codegen *p, struct brw_reg dest,
> if (src0.type == BRW_REGISTER_TYPE_F ||
> (src0.file == BRW_IMMEDIATE_VALUE &&
>   src0.type == BRW_REGISTER_TYPE_VF)) {
> -  assert(src1.type != BRW_REGISTER_TYPE_UD);
> -  assert(src1.type != BRW_REGISTER_TYPE_D);
> +  assert(!IS_DWORD(src1));
> }
>  
> if (src1.type == BRW_REGISTER_TYPE_F ||
> (src1.file == BRW_IMMEDIATE_VALUE &&
>   src1.type == BRW_REGISTER_TYPE_VF)) {
> -  assert(src0.type != BRW_REGISTER_TYPE_UD);
> -  assert(src0.type != BRW_REGISTER_TYPE_D);
> +  assert(!IS_DWORD(src0));
> }
>  
> return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
> @@ -1222,25 +1217,20 @@ brw_MUL(struct brw_codegen *p, struct brw_reg dest,
>  struct brw_reg src0, struct brw_reg src1)
>  {
> /* 6.32.38: mul */
> -   if (src0.type == BRW_REGISTER_TYPE_D ||
> -   src0.type == BRW_REGISTER_TYPE_UD ||
> -   src1.type == BRW_REGISTER_TYPE_D ||
> -   src1.type == BRW_REGISTER_TYPE_UD) {
> +   if (IS_DWORD(src0) || IS_DWORD(src1)) {
>assert(dest.type != BRW_REGISTER_TYPE_F);
> }
>  
> if (src0.type == BRW_REGISTER_TYPE_F ||
> (src0.file == BRW_IMMEDIATE_VALUE &&
>   src0.type == BRW_REGISTER_TYPE_VF)) {
> -  assert(src1.type != BRW_REGISTER_TYPE_UD);
> -  assert(src1.type != BRW_REGISTER_TYPE_D);
> +  assert(!IS_DWORD(src1));
> }
>  
> if (src1.type == BRW_REGISTER_TYPE_F ||
> (src1.file == BRW_IMMEDIATE_VALUE &&
>   src1.type == BRW_REGISTER_TYPE_VF)) {
> -  assert(src0.type != BRW_REGISTER_TYPE_UD);
> -  assert(src0.type != BRW_REGISTER_TYPE_D);
> +  assert(!IS_DWORD(src0));
> }
>  
> assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 329c15b..aad8d8d