[Mesa-dev] [PATCH] tgsi: Recognize RET in main for tgsi_transform

2018-02-12 Thread sroland
From: Roland Scheidegger 

Shaders coming from dx10 state trackers have a RET before the END.
And the epilog needs to be placed before the RET (otherwise it will
get ignored).
Hence figure out if a RET is in main, in this case we'll place
the epilog there rather than before the END.
(At a closer look, there actually seem to be problems with control
flow in general with output redirection, that would need another
look. It's enough however to fix draw's aa line emulation in some
internal bug - lines tend to be drawn with trivial shaders, moving
either a constant color or a vertex color directly to the output).
---
 src/gallium/auxiliary/tgsi/tgsi_transform.c | 50 ++---
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c 
b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index ffdad13..94d872c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -110,6 +110,8 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
 {
uint procType;
boolean first_instruction = TRUE;
+   boolean epilog_emitted = FALSE;
+   int stack_size = 0;
 
/* input shader */
struct tgsi_parse_context parse;
@@ -166,22 +168,60 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
  {
 struct tgsi_full_instruction *fullinst
= 
+unsigned opcode = fullinst->Instruction.Opcode;
 
 if (first_instruction && ctx->prolog) {
ctx->prolog(ctx);
 }
 
-/* XXX Note: we may also want to look for a main/top-level
- * TGSI_OPCODE_RET instruction in the future.
+/*
+ * XXX Note: we handle the case of ret in main.
+ * However, the output redirections done by transform
+ * have their limits with control flow and will generally
+ * not work correctly. e.g.
+ * if (cond) {
+ *oColor = x;
+ *ret;
+ * }
+ * oColor = y;
+ * end;
+ * If the color output is redirected to a temp and modified
+ * by a transform, this will not work (the oColor assignment
+ * in the conditional will never make it to the actual output).
  */
-if (fullinst->Instruction.Opcode == TGSI_OPCODE_END
-&& ctx->epilog) {
+if ((opcode == TGSI_OPCODE_END ||
+ (opcode == TGSI_OPCODE_RET && stack_size == 0))
+&& ctx->epilog && !epilog_emitted) {
/* Emit caller's epilog */
ctx->epilog(ctx);
-   /* Emit END */
+   epilog_emitted = TRUE;
+   /* Emit END (or RET) */
+   if (opcode == TGSI_OPCODE_END) {
+  assert(stack_size == 0);
+   }
ctx->emit_instruction(ctx, fullinst);
 }
 else {
+   switch (opcode) {
+   case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
+   case TGSI_OPCODE_SWITCH:
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_CAL:
+  stack_size++;
+  break;
+   case TGSI_OPCODE_ENDIF:
+   case TGSI_OPCODE_ENDSWITCH:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_ENDSUB:
+  assert(stack_size > 0);
+  stack_size--;
+  break;
+   case TGSI_OPCODE_BGNSUB:
+   case TGSI_OPCODE_RET:
+   default:
+  break;
+   }
if (ctx->transform_instruction)
   ctx->transform_instruction(ctx, fullinst);
else
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: add glsl version query (v3)

2018-02-12 Thread Brian Paul

One more thing: version checks...


On 02/08/2018 02:26 AM, Vadym Shovkoplias wrote:

Add support for GL_NUM_SHADING_LANGUAGE_VERSIONS
and glGetStringi for GL_SHADING_LANGUAGE_VERSION

v2:
   - Combine similar functionality into
 _mesa_get_shading_language_version() function.
   - Change GLSL version return mechanism.
v3:
   - Add return of empty string for GLSL ver 1.10.
   - Move _mesa_get_shading_language_version() function
 to src/mesa/main/version.c.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104915
Signed-off-by: Andriy Khulap 
Signed-off-by: Vadym Shovkoplias 
---
  src/mapi/glapi/gen/GL4x.xml  |  1 +
  src/mesa/main/get.c  |  4 +++
  src/mesa/main/get_hash_params.py |  3 ++
  src/mesa/main/getstring.c| 12 
  src/mesa/main/version.c  | 64 
  src/mesa/main/version.h  |  5 
  6 files changed, 89 insertions(+)

diff --git a/src/mapi/glapi/gen/GL4x.xml b/src/mapi/glapi/gen/GL4x.xml
index cd2e3b831e..2116286b35 100644
--- a/src/mapi/glapi/gen/GL4x.xml
+++ b/src/mapi/glapi/gen/GL4x.xml
@@ -42,6 +42,7 @@
  
  


+  



diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 516e8d174c..9a677a18d9 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1084,6 +1084,10 @@ find_custom_value(struct gl_context *ctx, const struct 
value_desc *d, union valu
   v->value_int = 0;
}
break;
+   /* GL 4.3 */
+   case GL_NUM_SHADING_LANGUAGE_VERSIONS:


I think we need to test for GL 4.3 here:

   if (!_mesa_is_desktop_gl(ctx) || ctx->Version < 43)
  goto invalid_enum;



+  v->value_int = _mesa_get_shading_language_version(ctx, -1, NULL);
+  break;
 /* GL_ARB_draw_indirect */
 case GL_DRAW_INDIRECT_BUFFER_BINDING:
v->value_int = ctx->DrawIndirectBuffer->Name;
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index df082af207..be716f6f6e 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -543,6 +543,9 @@ descriptor=[
  
# GL_ARB_texture_cube_map_array

[ "TEXTURE_BINDING_CUBE_MAP_ARRAY_ARB", "LOC_CUSTOM, TYPE_INT, 
TEXTURE_CUBE_ARRAY_INDEX, extra_ARB_texture_cube_map_array_OES_texture_cube_map_array" ],
+
+  # GL_NUM_SHADING_LANGUAGE_VERSIONS
+  [ "NUM_SHADING_LANGUAGE_VERSIONS", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ],


Instead of NO_EXTRA, I think we need EXTRA_VERSION_43.



  ]},
  
  # Enums in OpenGL Core profile and ES 3.0

diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 931f6a476c..b2109ac3d0 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -32,6 +32,7 @@
  #include "extensions.h"
  #include "mtypes.h"
  #include "macros.h"
+#include "version.h"
  
  /**

   * Return the string for a glGetString(GL_SHADING_LANGUAGE_VERSION) query.
@@ -186,6 +187,17 @@ _mesa_GetStringi(GLenum name, GLuint index)
   return (const GLubyte *) 0;
}
return _mesa_get_enabled_extension(ctx, index);
+   case GL_SHADING_LANGUAGE_VERSION:


Again, I think we need to test for GL 4.3 here and return 
GL_INVALID_ENUM if the version is older.


-Brian



+  {
+ char *version;
+ int num = _mesa_get_shading_language_version(ctx, index, );
+ if (index >= num) {
+_mesa_error(ctx, GL_INVALID_VALUE,
+   "glGetStringi(GL_SHADING_LANGUAGE_VERSION, index=%d)", index);
+return (const GLubyte *) 0;
+ }
+ return (const GLubyte *) version;
+  }
 default:
_mesa_error(ctx, GL_INVALID_ENUM, "glGetStringi");
return (const GLubyte *) 0;
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 1fce8fe7ca..d26baab820 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -665,3 +665,67 @@ _mesa_get_device_uuid(struct gl_context *ctx, GLint *uuid)
  {
 ctx->Driver.GetDeviceUuid(ctx, (char*) uuid);
  }
+
+/**
+ * Get the i-th GLSL version string.  If index=0, return the most recent
+ * supported version.
+ * \param ctx context to query
+ * \param index  which version string to return, or -1 if none
+ * \param versionOut returns the vesrion string
+ * \return total number of shading language versions.
+ */
+int
+_mesa_get_shading_language_version(const struct gl_context *ctx,
+   int index,
+   char **versionOut)
+{
+   int n = 0;
+
+#define GLSL_VERSION(S) \
+   if (n++ == index) \
+  *versionOut = S
+
+   /* GLSL core */
+   if (ctx->Const.GLSLVersion >= 460)
+  GLSL_VERSION("460");
+   if (ctx->Const.GLSLVersion >= 450)
+  GLSL_VERSION("450");
+   if (ctx->Const.GLSLVersion >= 440)
+  GLSL_VERSION("440");
+   if (ctx->Const.GLSLVersion >= 430)
+  GLSL_VERSION("430");
+   if (ctx->Const.GLSLVersion >= 420)
+  GLSL_VERSION("420");
+   if 

Re: [Mesa-dev] [PATCH] tgsi: Recognize RET in main for tgsi_transform

2018-02-12 Thread Brian Paul

LGTM.

Reviewed-by: Brian Paul 


On 02/12/2018 09:10 PM, srol...@vmware.com wrote:

From: Roland Scheidegger 

Shaders coming from dx10 state trackers have a RET before the END.
And the epilog needs to be placed before the RET (otherwise it will
get ignored).
Hence figure out if a RET is in main, in this case we'll place
the epilog there rather than before the END.
(At a closer look, there actually seem to be problems with control
flow in general with output redirection, that would need another
look. It's enough however to fix draw's aa line emulation in some
internal bug - lines tend to be drawn with trivial shaders, moving
either a constant color or a vertex color directly to the output).
---
  src/gallium/auxiliary/tgsi/tgsi_transform.c | 50 ++---
  1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c 
b/src/gallium/auxiliary/tgsi/tgsi_transform.c
index ffdad13..94d872c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c
@@ -110,6 +110,8 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
  {
 uint procType;
 boolean first_instruction = TRUE;
+   boolean epilog_emitted = FALSE;
+   int stack_size = 0;
  
 /* input shader */

 struct tgsi_parse_context parse;
@@ -166,22 +168,60 @@ tgsi_transform_shader(const struct tgsi_token *tokens_in,
   {
  struct tgsi_full_instruction *fullinst
 = 
+unsigned opcode = fullinst->Instruction.Opcode;
  
  if (first_instruction && ctx->prolog) {

 ctx->prolog(ctx);
  }
  
-/* XXX Note: we may also want to look for a main/top-level

- * TGSI_OPCODE_RET instruction in the future.
+/*
+ * XXX Note: we handle the case of ret in main.
+ * However, the output redirections done by transform
+ * have their limits with control flow and will generally
+ * not work correctly. e.g.
+ * if (cond) {
+ *oColor = x;
+ *ret;
+ * }
+ * oColor = y;
+ * end;
+ * If the color output is redirected to a temp and modified
+ * by a transform, this will not work (the oColor assignment
+ * in the conditional will never make it to the actual output).
   */
-if (fullinst->Instruction.Opcode == TGSI_OPCODE_END
-&& ctx->epilog) {
+if ((opcode == TGSI_OPCODE_END ||
+ (opcode == TGSI_OPCODE_RET && stack_size == 0))
+&& ctx->epilog && !epilog_emitted) {
 /* Emit caller's epilog */
 ctx->epilog(ctx);
-   /* Emit END */
+   epilog_emitted = TRUE;
+   /* Emit END (or RET) */
+   if (opcode == TGSI_OPCODE_END) {
+  assert(stack_size == 0);
+   }
 ctx->emit_instruction(ctx, fullinst);
  }
  else {
+   switch (opcode) {
+   case TGSI_OPCODE_IF:
+   case TGSI_OPCODE_UIF:
+   case TGSI_OPCODE_SWITCH:
+   case TGSI_OPCODE_BGNLOOP:
+   case TGSI_OPCODE_CAL:
+  stack_size++;
+  break;
+   case TGSI_OPCODE_ENDIF:
+   case TGSI_OPCODE_ENDSWITCH:
+   case TGSI_OPCODE_ENDLOOP:
+   case TGSI_OPCODE_ENDSUB:
+  assert(stack_size > 0);
+  stack_size--;
+  break;
+   case TGSI_OPCODE_BGNSUB:
+   case TGSI_OPCODE_RET:
+   default:
+  break;
+   }
 if (ctx->transform_instruction)
ctx->transform_instruction(ctx, fullinst);
 else



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105067] Tesselation broken for dEQP tests

2018-02-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105067

Bug ID: 105067
   Summary: Tesselation broken for dEQP tests
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mar...@gmail.com
  Reporter: mark.a.ja...@intel.com
QA Contact: mesa-dev@lists.freedesktop.org
CC: bri...@vmware.com

dozens of tests regressed on each i965 platform, eg:
dEQP-GLES31.functional.tessellation_geometry_interaction.feedback.tessellation_output_isolines_geometry_output_points

Standard Output

Testing isolines->lines primitive conversion with and without transform
feedback.
Sending a patch of 4 vertices (2x2 uniform grid) to tessellation control
shader.
Control shader emits a patch of 9 vertices (3x3 uniform grid).
Setting outer tessellation level = 3, inner = 3.
Primitive generator emits isolines
Geometry shader transforms emitted primitives to points
Reading back vertex positions of generated primitives using transform feedback.
Verifying rendered image and feedback vertices are consistent.
Rendering scene again with identical shader program, but without setting
feedback varying. Expecting similar output image.
Rendering with transform feedback
Begin transform feedback with mode GL_POINTS
Calling drawArrays with mode GL_PATCHES
Verifying GL_PRIMITIVES_GENERATED, expecting 18
Error, GL_PRIMITIVES_GENERATED was 0


Bisected to:
commit e149a0253c12d103805230bc7bc0a36887c3b8df
Author: Marek Olšák 
Date:   Thu Nov 16 04:29:35 2017 +0100
mesa,glsl,nir: reduce gl_state_index size to 2 bytes

Let's use the new gl_state_index16 type everywhere and remove
the typecasts.

This helps reduce the size of gl_program_parameter.

Reviewed-by: Brian Paul 

-- 
You are receiving this mail because:
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: fix tessellation regressions with gl_state_index16

2018-02-12 Thread Dave Airlie
From: Dave Airlie 

Looks like one conversion was missed.

Fixes: e149a0253 (mesa,glsl,nir: reduce gl_state_index size to 2 bytes)
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105067
Signed-off-by: Dave Airlie 
---
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp 
b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 9c4fb22..10a4ff4 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -251,7 +251,7 @@ brw_nir_lower_patch_vertices_in_to_uniform(nir_shader *nir)
   if (var->data.location != SYSTEM_VALUE_VERTICES_IN)
  continue;
 
-  gl_state_index tokens[STATE_LENGTH] = {
+  gl_state_index16 tokens[STATE_LENGTH] = {
  STATE_INTERNAL,
  nir->info.stage == MESA_SHADER_TESS_CTRL ?
 STATE_TCS_PATCH_VERTICES_IN : STATE_TES_PATCH_VERTICES_IN,
-- 
2.9.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/7] vulkan: Add EXT_acquire_xlib_display

2018-02-12 Thread Dylan Baker
Quoting Keith Packard (2018-02-09 20:45:12)
> This extension adds the ability to borrow an X RandR output for
> temporary use directly by a Vulkan application. For DRM, we use the
> Linux resource leasing mechanism.
> 
> Signed-off-by: Keith Packard 
> ---
>  configure.ac   |  25 ++
>  meson.build|  17 ++
>  meson_options.txt  |   7 +
>  src/amd/vulkan/Makefile.am |   7 +
>  src/amd/vulkan/meson.build |   7 +
>  src/amd/vulkan/radv_extensions.py  |  11 +-
>  src/amd/vulkan/radv_wsi_display.c  |  30 +++
>  src/intel/Makefile.vulkan.am   |   7 +
>  src/intel/vulkan/anv_extensions.py |   1 +
>  src/intel/vulkan/anv_extensions_gen.py |  10 +-
>  src/intel/vulkan/anv_wsi_display.c |  30 +++
>  src/intel/vulkan/meson.build   |   7 +
>  src/vulkan/Makefile.am |   5 +
>  src/vulkan/wsi/meson.build |   7 +
>  src/vulkan/wsi/wsi_common_display.c| 472 
> +
>  src/vulkan/wsi/wsi_common_display.h|  17 ++
>  16 files changed, 650 insertions(+), 10 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 46318365603..9effd15e8c5 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1547,6 +1547,7 @@ AM_CONDITIONAL(HAVE_APPLEDRI, test "x$enable_dri" = 
> xyes -a "x$dri_platform" = x
>  AM_CONDITIONAL(HAVE_LMSENSORS, test "x$enable_lmsensors" = xyes )
>  AM_CONDITIONAL(HAVE_GALLIUM_EXTRA_HUD, test "x$enable_gallium_extra_hud" = 
> xyes )
>  AM_CONDITIONAL(HAVE_WINDOWSDRI, test "x$enable_dri" = xyes -a 
> "x$dri_platform" = xwindows )
> +AM_CONDITIONAL(HAVE_XLEASE, test "x$have_xlease" = xyes )
>  
>  AC_ARG_ENABLE([shared-glapi],
>  [AS_HELP_STRING([--enable-shared-glapi],
> @@ -1846,6 +1847,11 @@ if test x"$enable_dri3" = xyes; then
>  PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
>  fi
>  
> +if test x"$have_xlease" = xyes; then
> +randr_modules="x11-xcb xcb-randr"
> +PKG_CHECK_MODULES([XCB_RANDR], [$randr_modules])
> +fi
> +
>  AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11')
>  AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 'wayland')
>  AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
> @@ -1853,6 +1859,25 @@ AM_CONDITIONAL(HAVE_PLATFORM_DISPLAY, echo 
> "$platforms" | grep -q 'drm')
>  AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 
> 'surfaceless')
>  AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android')
>  
> +AC_ARG_ENABLE(xlib-lease,
> +[AS_HELP_STRING([--enable-xlib-lease]
> +[enable VK_acquire_xlib_display using X leases])],
> +[enable_xlib_lease=$enableval], [enable_xlib_lease=auto])
> +case "x$enable_xlib_lease" in
> +xyes)
> +;;
> +xno)
> +;;
> +*)
> +if echo "$platforms" | grep -q 'x11' && echo "$platforms" | grep -q 
> 'drm';
> +enable_xlib_lease=yes
> +else
> +enable_xlib_lease=no
> +fi
> +esac
> +
> +AM_CONDITIONAL(HAVE_XLIB_LEASE, test "x$enable_xlib_lease" = xyes)
> +
>  dnl
>  dnl More DRI setup
>  dnl
> diff --git a/meson.build b/meson.build
> index aeb7f5e2917..595b0f66cd7 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -262,6 +262,19 @@ if _platforms != ''
>egl_native_platform = _split[0]
>  endif
>  
> +with_xlib_lease = get_option('xlib-lease')
> +if with_xlib_lease == 'auto'
> +  if with_platform_x11 and with_platform_display
> +with_xlib_lease = true
> +  else
> +with_xlib_lease = false
> +  endif

You could simplify this to
with_xlib_lease = with_platform_x11 and with_platform_display

> +elif with_xlib_lease == 'true'

We should probably error here if we don't have the correct platforms.

> +  with_xlib_lease = true
> +else
> +  with_xlib_lease = false
> +endif
> +
>  with_glx = get_option('glx')
>  if with_glx == 'auto'
>if with_dri
> @@ -1151,6 +1164,7 @@ dep_xcb_present = []
>  dep_xcb_sync = []
>  dep_xcb_xfixes = []
>  dep_xshmfence = []
> +dep_xcb_xrandr = []
>  if with_platform_x11
>if with_glx == 'xlib' or with_glx == 'gallium-xlib'
>  dep_x11 = dependency('x11')
> @@ -1190,6 +1204,9 @@ if with_platform_x11
>if with_egl
>  dep_xcb_xfixes = dependency('xcb-xfixes')
>endif
> +  if with_xlib_lease
> +dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12')
> +  endif
>  endif
>  
>  if get_option('gallium-extra-hud')
> diff --git a/meson_options.txt b/meson_options.txt
> index 7fafe2deaac..d38c9aa6149 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -286,3 +286,10 @@ option(
>value : '',
>description : 'Comma delimited list of tools to build. choices : 
> freedreno,glsl,intel,nir,nouveau or all'
>  )
> +option(
> +  'xlib-lease',
> +  type : 'combo',
> +  value : 'auto',
> +  choices : ['auto', 'true', 'false'],
> +  description : 'Enable VK_EXT_acquire_xlib_display.'
> +)
> diff --git 

Re: [Mesa-dev] [PATCH 6/7] vulkan: Add new VK_MESA_query_timestamp extension

2018-02-12 Thread Dylan Baker
Quoting Keith Packard (2018-02-09 20:45:15)
> This extension adds a single function to query the current GPU
> timestamp, just like glGetInteger64v(GL_TIMESTAMP, ). This
> function is needed to complete the implementation of
> GOOGLE_display_timing, which needs to be able to coorelate GPU and CPU
> timestamps.
> 
> Signed-off-by: Keith Packard 
> ---
>  include/vulkan/vulkan.h |  6 ++
>  src/Makefile.am |  1 +
>  src/amd/vulkan/Makefile.am  |  3 +++
>  src/amd/vulkan/meson.build  |  8 
>  src/amd/vulkan/radv_device.c|  8 
>  src/amd/vulkan/radv_extensions.py   |  1 +
>  src/intel/Makefile.vulkan.am|  7 +++
>  src/intel/vulkan/anv_extensions.py  |  1 +
>  src/intel/vulkan/anv_gem.c  | 13 +
>  src/intel/vulkan/anv_private.h  |  1 +
>  src/intel/vulkan/genX_query.c   | 15 +++
>  src/intel/vulkan/meson.build| 12 ++--
>  src/vulkan/meson.build  |  1 +
>  src/vulkan/registry/vk_mesa_query_timestamp.xml | 22 ++
>  14 files changed, 89 insertions(+), 10 deletions(-)
>  create mode 100644 src/vulkan/registry/vk_mesa_query_timestamp.xml
> 
> diff --git a/include/vulkan/vulkan.h b/include/vulkan/vulkan.h
> index d3e2e246cf3..5523eb7586f 100644
> --- a/include/vulkan/vulkan.h
> +++ b/include/vulkan/vulkan.h
> @@ -7025,6 +7025,12 @@ VKAPI_ATTR VkResult VKAPI_CALL 
> vkGetMemoryHostPointerPropertiesEXT(
>  VkMemoryHostPointerPropertiesEXT*   
> pMemoryHostPointerProperties);
>  #endif
>  
> +typedef VkResult (VKAPI_PTR *PFN_vkQueryCurrentTimestampMESA)(VkDevice 
> device, uint64_t *timestamp);
> +
> +VKAPI_ATTR VkResult VKAPI_CALL vkQueryCurrentTimestampMESA(
> +VkDevice_device,
> +uint64_t*timestamp);
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 014ffaf3e29..74ff305d7c6 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -68,6 +68,7 @@ endif
>  
>  EXTRA_DIST += vulkan/registry/vk.xml
>  EXTRA_DIST += vulkan/registry/vk_android_native_buffer.xml
> +EXTRA_DIST += vulkan/registry/vk_mesa_query_timestamp.xml
>  
>  if HAVE_AMD_DRIVERS
>  SUBDIRS += amd
> diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am
> index 94ece06e99e..0626fa2b3b3 100644
> --- a/src/amd/vulkan/Makefile.am
> +++ b/src/amd/vulkan/Makefile.am
> @@ -129,12 +129,14 @@ libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)
>  
>  vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
>  vk_android_native_buffer_xml = 
> $(top_srcdir)/src/vulkan/registry/vk_android_native_buffer.xml
> +vk_mesa_query_timestamp_xml = 
> $(top_srcdir)/src/vulkan/registry/vk_mesa_query_timestamps.xml
>  
>  radv_entrypoints.c: radv_entrypoints_gen.py radv_extensions.py 
> $(vulkan_api_xml)
> $(MKDIR_GEN)
> $(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py \
> --xml $(vulkan_api_xml) \
> --xml $(vk_android_native_buffer_xml) \
> +   --xml $(vk_mesa_query_timestamp_xml) \
> --outdir $(builddir)
>  radv_entrypoints.h: radv_entrypoints.c
>  
> @@ -144,6 +146,7 @@ radv_extensions.c: radv_extensions.py \
> $(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_extensions.py \
> --xml $(vulkan_api_xml) \
> --xml $(vk_android_native_buffer_xml) \
> +   --xml $(vk_mesa_query_timestamp_xml) \
> --out $@
>  
>  vk_format_table.c: vk_format_table.py \
> diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
> index 0b92a1763a1..34f578476c0 100644
> --- a/src/amd/vulkan/meson.build
> +++ b/src/amd/vulkan/meson.build
> @@ -20,10 +20,10 @@
>  
>  radv_entrypoints = custom_target(
>'radv_entrypoints.[ch]',
> -  input : ['radv_entrypoints_gen.py', vk_api_xml],
> +  input : ['radv_entrypoints_gen.py', vk_api_xml, 
> vk_android_native_buffer_xml, vk_mesa_query_timestamp_xml],

some of these lines look a little long, 
input : [
'radv_entrypoints_gen.py', vk_api_xml, vk_android_native_buffer_xml,
vk_mesa_query_timestamp_xml,
],

>output : ['radv_entrypoints.h', 'radv_entrypoints.c'],
>command : [
> -prog_python2, '@INPUT0@', '--xml', '@INPUT1@', '--outdir',
> +prog_python2, '@INPUT0@', '--xml', '@INPUT1@', '--xml', '@INPUT2@', 
> '--xml', '@INPUT3@', '--outdir',
>  meson.current_build_dir()
>],
>depend_files : files('radv_extensions.py'),
> @@ -31,10 +31,10 @@ radv_entrypoints = custom_target(
>  
>  radv_extensions_c = custom_target(
>'radv_extensions.c',
> -  input : ['radv_extensions.py', vk_api_xml, vk_android_native_buffer_xml],
> +  input : ['radv_extensions.py', vk_api_xml, 

[Mesa-dev] [PATCH 1/2] intel/isl: Add an isl_color_value_is_zero helper

2018-02-12 Thread Jason Ekstrand
Cc: mesa-sta...@lists.freedesktop.org
---
 src/intel/isl/isl.c | 20 
 src/intel/isl/isl.h |  3 +++
 2 files changed, 23 insertions(+)

diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
index 59f512f..f4b0502 100644
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -269,6 +269,26 @@ isl_tiling_get_info(enum isl_tiling tiling,
 }
 
 bool
+isl_color_value_is_zero(union isl_color_value value,
+enum isl_format format)
+{
+   const struct isl_format_layout *fmtl = isl_format_get_layout(format);
+
+#define RETURN_FALSE_IF_NOT_0(c, i) \
+   if (fmtl->channels.c.bits && value.u32[i] != 0) \
+  return false
+
+   RETURN_FALSE_IF_NOT_0(r, 0);
+   RETURN_FALSE_IF_NOT_0(g, 1);
+   RETURN_FALSE_IF_NOT_0(b, 2);
+   RETURN_FALSE_IF_NOT_0(a, 3);
+
+#undef RETURN_FALSE_IF_NOT_0
+
+   return true;
+}
+
+bool
 isl_color_value_is_zero_one(union isl_color_value value,
 enum isl_format format)
 {
diff --git a/src/intel/isl/isl.h b/src/intel/isl/isl.h
index fda2411..209769a 100644
--- a/src/intel/isl/isl.h
+++ b/src/intel/isl/isl.h
@@ -1692,6 +1692,9 @@ isl_extent4d(uint32_t width, uint32_t height, uint32_t 
depth,
return e;
 }
 
+bool isl_color_value_is_zero(union isl_color_value value,
+ enum isl_format format);
+
 bool isl_color_value_is_zero_one(union isl_color_value value,
  enum isl_format format);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] anv: Be more careful about fast-clear colors

2018-02-12 Thread Jason Ekstrand
Previously, we just used all the channels regardless of the format.
This is less than ideal because some channels may have undefined values
and this should be ok from the client's perspective.  Even though the
driver should do the correct thing regardless of what is in the
undefined value, it makes things less deterministic.  In particular, the
driver may choose to fast-clear or not based on undefined values.  This
level of nondeterminism is bad.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/intel/vulkan/genX_cmd_buffer.c | 47 --
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 99854eb..a574024 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -202,24 +202,6 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
}
 }
 
-static bool
-color_is_zero_one(VkClearColorValue value, enum isl_format format)
-{
-   if (isl_format_has_int_channel(format)) {
-  for (unsigned i = 0; i < 4; i++) {
- if (value.int32[i] != 0 && value.int32[i] != 1)
-return false;
-  }
-   } else {
-  for (unsigned i = 0; i < 4; i++) {
- if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
-return false;
-  }
-   }
-
-   return true;
-}
-
 static void
 color_attachment_compute_aux_usage(struct anv_device * device,
struct anv_cmd_state * cmd_state,
@@ -294,13 +276,26 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
 
assert(iview->image->planes[0].aux_surface.isl.usage & 
ISL_SURF_USAGE_CCS_BIT);
 
+   const struct isl_format_layout *view_fmtl =
+  isl_format_get_layout(iview->planes[0].isl.format);
+   union isl_color_value clear_color = {};
+
+#define COPY_CLEAR_COLOR_CHANNEL(c, i) \
+   if (view_fmtl->channels.c.bits) \
+  clear_color.u32[i] = att_state->clear_value.color.uint32[i]
+
+   COPY_CLEAR_COLOR_CHANNEL(r, 0);
+   COPY_CLEAR_COLOR_CHANNEL(g, 1);
+   COPY_CLEAR_COLOR_CHANNEL(b, 2);
+   COPY_CLEAR_COLOR_CHANNEL(a, 3);
+
+#undef COPY_CLEAR_COLOR_CHANNEL
+
att_state->clear_color_is_zero_one =
-  color_is_zero_one(att_state->clear_value.color, 
iview->planes[0].isl.format);
+  isl_color_value_is_zero_one(*fast_clear_color,
+  iview->planes[0].isl.format);
att_state->clear_color_is_zero =
-  att_state->clear_value.color.uint32[0] == 0 &&
-  att_state->clear_value.color.uint32[1] == 0 &&
-  att_state->clear_value.color.uint32[2] == 0 &&
-  att_state->clear_value.color.uint32[3] == 0;
+  isl_color_value_is_zero(*fast_clear_color, iview->planes[0].isl.format);
 
if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
   /* Start by getting the fast clear type.  We use the first subpass
@@ -358,10 +353,8 @@ color_attachment_compute_aux_usage(struct anv_device * 
device,
"LOAD_OP_CLEAR.  Only fast-clearing the first slice");
   }
 
-  if (att_state->fast_clear) {
- memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32,
-sizeof(fast_clear_color->u32));
-  }
+  if (att_state->fast_clear)
+ *fast_clear_color = clear_color;
} else {
   att_state->fast_clear = false;
}
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 19/29] anv/cmd_buffer: Move the rest of clear_subpass into begin_subpass

2018-02-12 Thread Nanley Chery
On Thu, Feb 08, 2018 at 05:23:21PM -0800, Jason Ekstrand wrote:
> On Thu, Feb 8, 2018 at 5:20 PM, Jason Ekstrand  wrote:
> 
> > On Fri, Jan 12, 2018 at 2:45 PM, Nanley Chery 
> > wrote:
> >
> >> On Mon, Nov 27, 2017 at 07:06:09PM -0800, Jason Ekstrand wrote:
> >> > ---
> >> >  src/intel/vulkan/anv_blorp.c   | 243 --
> >> ---
> >> >  src/intel/vulkan/anv_private.h |  17 ++-
> >> >  src/intel/vulkan/genX_cmd_buffer.c |  68 ++-
> >> >  3 files changed, 188 insertions(+), 140 deletions(-)
> >> >
> >> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> >> > index 7401234..45d7b12 100644
> >> > --- a/src/intel/vulkan/anv_blorp.c
> >> > +++ b/src/intel/vulkan/anv_blorp.c
> >> > @@ -1132,143 +1132,6 @@ enum subpass_stage {
> >> > SUBPASS_STAGE_RESOLVE,
> >> >  };
> >> >
> >> > -static bool
> >> > -subpass_needs_clear(const struct anv_cmd_buffer *cmd_buffer)
> >> > -{
> >> > -   const struct anv_cmd_state *cmd_state = _buffer->state;
> >> > -   uint32_t ds = cmd_state->subpass->depth_sten
> >> cil_attachment.attachment;
> >> > -
> >> > -   if (ds != VK_ATTACHMENT_UNUSED) {
> >> > -  assert(ds < cmd_state->pass->attachment_count);
> >> > -  if (cmd_state->attachments[ds].pending_clear_aspects)
> >> > - return true;
> >> > -   }
> >> > -
> >> > -   return false;
> >> > -}
> >> > -
> >> > -void
> >> > -anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer)
> >> > -{
> >> > -   const struct anv_cmd_state *cmd_state = _buffer->state;
> >> > -   const VkRect2D render_area = cmd_buffer->state.render_area;
> >> > -
> >> > -
> >> > -   if (!subpass_needs_clear(cmd_buffer))
> >> > -  return;
> >> > -
> >> > -   /* Because this gets called within a render pass, we tell blorp not
> >> to
> >> > -* trash our depth and stencil buffers.
> >> > -*/
> >> > -   struct blorp_batch batch;
> >> > -   blorp_batch_init(_buffer->device->blorp, , cmd_buffer,
> >> > -BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
> >> > -
> >> > -   VkClearRect clear_rect = {
> >> > -  .rect = cmd_buffer->state.render_area,
> >> > -  .baseArrayLayer = 0,
> >> > -  .layerCount = cmd_buffer->state.framebuffer->layers,
> >> > -   };
> >> > -
> >> > -   struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
> >> > -
> >> > -   const uint32_t ds = cmd_state->subpass->depth_sten
> >> cil_attachment.attachment;
> >> > -   assert(ds == VK_ATTACHMENT_UNUSED || ds <
> >> cmd_state->pass->attachment_count);
> >> > -
> >> > -   if (ds != VK_ATTACHMENT_UNUSED &&
> >> > -   cmd_state->attachments[ds].pending_clear_aspects) {
> >> > -
> >> > -  VkClearAttachment clear_att = {
> >> > - .aspectMask = cmd_state->attachments[ds].pen
> >> ding_clear_aspects,
> >> > - .clearValue = cmd_state->attachments[ds].clear_value,
> >> > -  };
> >> > -
> >> > -
> >> > -  const uint8_t gen = cmd_buffer->device->info.gen;
> >> > -  bool clear_with_hiz = gen >= 8 && 
> >> > cmd_state->attachments[ds].aux_usage
> >> ==
> >> > -ISL_AUX_USAGE_HIZ;
> >> > -  const struct anv_image_view *iview = fb->attachments[ds];
> >> > -
> >> > -  if (clear_with_hiz) {
> >> > - const bool clear_depth = clear_att.aspectMask &
> >> > -  VK_IMAGE_ASPECT_DEPTH_BIT;
> >> > - const bool clear_stencil = clear_att.aspectMask &
> >> > -VK_IMAGE_ASPECT_STENCIL_BIT;
> >> > -
> >> > - /* Check against restrictions for depth buffer clearing. A
> >> great GPU
> >> > -  * performance benefit isn't expected when using the HZ
> >> sequence for
> >> > -  * stencil-only clears. Therefore, we don't emit a HZ op
> >> sequence for
> >> > -  * a stencil clear in addition to using the BLORP-fallback
> >> for depth.
> >> > -  */
> >> > - if (clear_depth) {
> >> > -if (!blorp_can_hiz_clear_depth(gen,
> >> iview->planes[0].isl.format,
> >> > -   iview->image->samples,
> >> > -   render_area.offset.x,
> >> > -   render_area.offset.y,
> >> > -   render_area.offset.x +
> >> > -   render_area.extent.width,
> >> > -   render_area.offset.y +
> >> > -   render_area.extent.height))
> >> {
> >> > -   clear_with_hiz = false;
> >> > -} else if (clear_att.clearValue.depthStencil.depth !=
> >> > -   ANV_HZ_FC_VAL) {
> >> > -   /* Don't enable fast depth clears for any color not
> >> equal to
> >> > -* ANV_HZ_FC_VAL.
> >> > -*/
> >> > -   clear_with_hiz = false;
> >> > -

Re: [Mesa-dev] [PATCH 2/2] anv: Be more careful about fast-clear colors

2018-02-12 Thread Nanley Chery
On Mon, Feb 12, 2018 at 04:35:20PM -0800, Jason Ekstrand wrote:
> Previously, we just used all the channels regardless of the format.
> This is less than ideal because some channels may have undefined values
> and this should be ok from the client's perspective.  Even though the
> driver should do the correct thing regardless of what is in the
> undefined value, it makes things less deterministic.  In particular, the
> driver may choose to fast-clear or not based on undefined values.  This
> level of nondeterminism is bad.
> 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 47 
> --
>  1 file changed, 20 insertions(+), 27 deletions(-)
> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 99854eb..a574024 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -202,24 +202,6 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
> }
>  }
>  
> -static bool
> -color_is_zero_one(VkClearColorValue value, enum isl_format format)
> -{
> -   if (isl_format_has_int_channel(format)) {
> -  for (unsigned i = 0; i < 4; i++) {
> - if (value.int32[i] != 0 && value.int32[i] != 1)
> -return false;
> -  }
> -   } else {
> -  for (unsigned i = 0; i < 4; i++) {
> - if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
> -return false;
> -  }
> -   }
> -
> -   return true;
> -}
> -
>  static void
>  color_attachment_compute_aux_usage(struct anv_device * device,
> struct anv_cmd_state * cmd_state,
> @@ -294,13 +276,26 @@ color_attachment_compute_aux_usage(struct anv_device * 
> device,
>  
> assert(iview->image->planes[0].aux_surface.isl.usage & 
> ISL_SURF_USAGE_CCS_BIT);
>  
> +   const struct isl_format_layout *view_fmtl =
> +  isl_format_get_layout(iview->planes[0].isl.format);
> +   union isl_color_value clear_color = {};

Is this initializer valid?

> +
> +#define COPY_CLEAR_COLOR_CHANNEL(c, i) \
> +   if (view_fmtl->channels.c.bits) \
> +  clear_color.u32[i] = att_state->clear_value.color.uint32[i]
> +
> +   COPY_CLEAR_COLOR_CHANNEL(r, 0);
> +   COPY_CLEAR_COLOR_CHANNEL(g, 1);
> +   COPY_CLEAR_COLOR_CHANNEL(b, 2);
> +   COPY_CLEAR_COLOR_CHANNEL(a, 3);
> +
> +#undef COPY_CLEAR_COLOR_CHANNEL
> +
> att_state->clear_color_is_zero_one =
> -  color_is_zero_one(att_state->clear_value.color, 
> iview->planes[0].isl.format);
> +  isl_color_value_is_zero_one(*fast_clear_color,

Should this be clear_color?

> +  iview->planes[0].isl.format);
> att_state->clear_color_is_zero =
> -  att_state->clear_value.color.uint32[0] == 0 &&
> -  att_state->clear_value.color.uint32[1] == 0 &&
> -  att_state->clear_value.color.uint32[2] == 0 &&
> -  att_state->clear_value.color.uint32[3] == 0;
> +  isl_color_value_is_zero(*fast_clear_color, 
> iview->planes[0].isl.format);
>  

Should this be clear_color?


-Nanley

> if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
>/* Start by getting the fast clear type.  We use the first subpass
> @@ -358,10 +353,8 @@ color_attachment_compute_aux_usage(struct anv_device * 
> device,
> "LOAD_OP_CLEAR.  Only fast-clearing the first slice");
>}
>  
> -  if (att_state->fast_clear) {
> - memcpy(fast_clear_color->u32, att_state->clear_value.color.uint32,
> -sizeof(fast_clear_color->u32));
> -  }
> +  if (att_state->fast_clear)
> + *fast_clear_color = clear_color;
> } else {
>att_state->fast_clear = false;
> }
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] virgl: Support v2 caps struct (v2)

2018-02-12 Thread Dave Airlie
From: Stéphane Marchesin 

This struct allows us to report:
- accurate max point size/line width.
- accurate texel and texture gather offsets
- vertex/geometry limits.

Signed-off-by: Dave Airlie 
---
 src/gallium/drivers/virgl/virgl_hw.h| 28 ++
 src/gallium/drivers/virgl/virgl_screen.c| 29 +++
 src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 38 -
 3 files changed, 82 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/virgl/virgl_hw.h 
b/src/gallium/drivers/virgl/virgl_hw.h
index e3c56db2ac6..833ab91eee7 100644
--- a/src/gallium/drivers/virgl/virgl_hw.h
+++ b/src/gallium/drivers/virgl/virgl_hw.h
@@ -232,6 +232,11 @@ struct virgl_caps_bool_set1 {
 unsigned poly_stipple:1; /* not in GL 3.1 core profile */
 unsigned mirror_clamp:1;
 unsigned texture_query_lod:1;
+unsigned has_fp64:1;
+unsigned has_tessellation_shaders:1;
+unsigned has_indirect_draw:1;
+unsigned has_sample_shading:1;
+unsigned has_cull:1;
 };
 
 /* endless expansion capabilites - current gallium has 252 formats */
@@ -259,9 +264,32 @@ struct virgl_caps_v1 {
 uint32_t max_texture_gather_components;
 };
 
+struct virgl_caps_v2 {
+struct virgl_caps_v1 v1;
+float min_aliased_point_size;
+float max_aliased_point_size;
+float min_smooth_point_size;
+float max_smooth_point_size;
+float min_aliased_line_width;
+float max_aliased_line_width;
+float min_smooth_line_width;
+float max_smooth_line_width;
+float max_texture_lod_bias;
+uint32_t max_geom_output_vertices;
+uint32_t max_geom_total_output_components;
+uint32_t max_vertex_outputs;
+uint32_t max_vertex_attribs;
+uint32_t max_shader_patch_varyings;
+int32_t min_texel_offset;
+int32_t max_texel_offset;
+int32_t min_texture_gather_offset;
+int32_t max_texture_gather_offset;
+};
+
 union virgl_caps {
 uint32_t max_version;
 struct virgl_caps_v1 v1;
+struct virgl_caps_v2 v2;
 };
 
 enum virgl_errors {
diff --git a/src/gallium/drivers/virgl/virgl_screen.c 
b/src/gallium/drivers/virgl/virgl_screen.c
index ca73b90e0fd..72dce08c286 100644
--- a/src/gallium/drivers/virgl/virgl_screen.c
+++ b/src/gallium/drivers/virgl/virgl_screen.c
@@ -113,11 +113,13 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
   return vscreen->caps.caps.v1.max_texture_array_layers;
case PIPE_CAP_MIN_TEXEL_OFFSET:
+  return vscreen->caps.caps.v2.min_texel_offset;
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
-  return -8;
+  return vscreen->caps.caps.v2.min_texture_gather_offset;
case PIPE_CAP_MAX_TEXEL_OFFSET:
+  return vscreen->caps.caps.v2.max_texel_offset;
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-  return 7;
+  return vscreen->caps.caps.v2.max_texture_gather_offset;
case PIPE_CAP_CONDITIONAL_RENDER:
   return vscreen->caps.caps.v1.bset.conditional_render;
case PIPE_CAP_TEXTURE_BARRIER:
@@ -182,9 +184,9 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap 
param)
case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
   return 0;
case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
-  return 256;
+  return vscreen->caps.caps.v2.max_geom_output_vertices;
case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
-  return 16384;
+  return vscreen->caps.caps.v2.max_geom_total_output_components;
case PIPE_CAP_TEXTURE_QUERY_LOD:
   return vscreen->caps.caps.v1.bset.texture_query_lod;
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
@@ -310,11 +312,13 @@ virgl_get_shader_param(struct pipe_screen *screen,
  return 1;
   case PIPE_SHADER_CAP_MAX_INPUTS:
  if (vscreen->caps.caps.v1.glsl_level < 150)
-return 16;
+return vscreen->caps.caps.v2.max_vertex_attribs;
  return (shader == PIPE_SHADER_VERTEX ||
- shader == PIPE_SHADER_GEOMETRY) ? 16 : 32;
+ shader == PIPE_SHADER_GEOMETRY) ? 
vscreen->caps.caps.v2.max_vertex_attribs : 32;
   case PIPE_SHADER_CAP_MAX_OUTPUTS:
- return 32;
+ if (shader == PIPE_SHADER_FRAGMENT)
+return vscreen->caps.caps.v1.max_render_targets;
+ return vscreen->caps.caps.v2.max_vertex_outputs;
  // case PIPE_SHADER_CAP_MAX_CONSTS:
  //return 4096;
   case PIPE_SHADER_CAP_MAX_TEMPS:
@@ -350,19 +354,20 @@ virgl_get_shader_param(struct pipe_screen *screen,
 static float
 virgl_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
 {
+   struct virgl_screen *vscreen = virgl_screen(screen);
switch (param) {
case PIPE_CAPF_MAX_LINE_WIDTH:
-  /* fall-through */
+  return vscreen->caps.caps.v2.max_aliased_line_width;
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
-  

[Mesa-dev] [PATCH shaderdb 0/3] offline building of shader program for a desired target

2018-02-12 Thread Dongwon Kim
This series of changes are for making shaderdb as a complete standalone 
compiler that
can create a shader program in binary form (using glGetProgramBinary), which 
can later
be loaded on the target system specified by user (glProgramBinary).

As a prerequisite, the patch "run: new '--pci-id' option for overriding pci-id"
was written to add support for other GEN architectures that is not listed in
run.c.

The first patch, "intel_stub: override pci-id only if INTEL_DEVID_OVERRIDE is 
set"
is for fixing a segfault problem when ./intel_run is executed without
INTEL_DEVID_OVERRIDE.

Dongwon Kim (3):
  intel_stub: override pci-id only if INTEL_DEVID_OVERRIDE is set
  run: new '--pci-id' option for overriding pci-id
  run: shader program file created via GetProgramBinary

 intel_stub.c | 10 ++-
 run.c| 88 +---
 2 files changed, 94 insertions(+), 4 deletions(-)

-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH shaderdb 1/3] intel_stub: override pci-id only if INTEL_DEVID_OVERRIDE is set

2018-02-12 Thread Dongwon Kim
To prevent a segfault, pci-id is set only if INTEL_DEVID_OVERRIDE exists.

Signed-off-by: Dongwon Kim 
---
 intel_stub.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/intel_stub.c b/intel_stub.c
index ea88400..cf9ddff 100644
--- a/intel_stub.c
+++ b/intel_stub.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -174,6 +175,7 @@ ioctl(int fd, unsigned long request, ...)
va_list args;
void *argp;
struct stat buf;
+   char *pci_id;
 
va_start(args, request);
argp = va_arg(args, void *);
@@ -199,7 +201,13 @@ ioctl(int fd, unsigned long request, ...)
 *getparam->value = 1;
 break;
 case I915_PARAM_CHIPSET_ID:
-*getparam->value = 
strtod(getenv("INTEL_DEVID_OVERRIDE"), NULL);
+pci_id = getenv("INTEL_DEVID_OVERRIDE");
+
+if (pci_id)
+*getparam->value = strtod(pci_id, NULL);
+else
+return -EINVAL;
+
 break;
 case I915_PARAM_CMD_PARSER_VERSION:
 *getparam->value = 9;
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH shaderdb 3/3] run: shader program file created via GetProgramBinary

2018-02-12 Thread Dongwon Kim
extraction of linked binary program to a file using glGetProgramBinary.
This file is intended to be loaded by glProgramBinary in the graphic
application running on the target system.

A new option, '--out=' is available to be used for specifying
the output file name.

Signed-off-by: Dongwon Kim 
---
 run.c | 46 --
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/run.c b/run.c
index d066567..54575e1 100644
--- a/run.c
+++ b/run.c
@@ -358,18 +358,20 @@ const struct platform platforms[] = {
 enum
 {
 PCI_ID_OVERRIDE_OPTION = CHAR_MAX + 1,
+OUT_PROGRAM_OPTION,
 };
 
 const struct option const long_options[] =
 {
 {"pciid", required_argument, NULL, PCI_ID_OVERRIDE_OPTION},
+{"out", required_argument, NULL, OUT_PROGRAM_OPTION},
 {NULL, 0, NULL, 0}
 };
 
 void print_usage(const char *prog_name)
 {
 fprintf(stderr,
-"Usage: %s [-d ] [-j ] [-o ] [-p 
] [--pciid=] \n",
+"Usage: %s [-d ] [-j ] [-o ] [-p 
] [--pciid=] [--out=] \n",
 prog_name);
 }
 
@@ -450,6 +452,7 @@ main(int argc, char **argv)
 int opt;
 bool platf_overridden = 0;
 bool pci_id_overridden = 0;
+char out_file[64] = {0};
 
 max_threads = omp_get_max_threads();
 
@@ -518,6 +521,13 @@ main(int argc, char **argv)
 setenv("INTEL_DEVID_OVERRIDE", optarg, 1);
 pci_id_overridden = 1;
 break;
+case OUT_PROGRAM_OPTION:
+if (optarg[0] == 0) {
+  fprintf(stderr, "Output file name is empty.\n");
+  return -1;
+}
+strncpy(out_file, optarg, 64);
+break;
 default:
 fprintf(stderr, "Unknown option: %x\n", opt);
 print_usage(argv[0]);
@@ -858,13 +868,13 @@ main(int argc, char **argv)
 }
 } else if (type == TYPE_CORE || type == TYPE_COMPAT || type == 
TYPE_ES) {
 GLuint prog = glCreateProgram();
+GLint param;
 
 for (unsigned i = 0; i < num_shaders; i++) {
 GLuint s = glCreateShader(shader[i].type);
 glShaderSource(s, 1, [i].text, [i].length);
 glCompileShader(s);
 
-GLint param;
 glGetShaderiv(s, GL_COMPILE_STATUS, );
 if (unlikely(!param)) {
 GLchar log[4096];
@@ -879,6 +889,38 @@ main(int argc, char **argv)
 }
 
 glLinkProgram(prog);
+
+glGetProgramiv(prog, GL_LINK_STATUS, );
+if (unlikely(!param)) {
+   GLchar log[4096];
+   GLsizei length;
+   glGetProgramInfoLog(prog, 4096, , log);
+
+   fprintf(stderr, "ERROR: failed to link progam:\n%s\n",
+   log);
+} else {
+   if (out_file[0] != 0) {
+  char *prog_buf = (char *)malloc(10*1024*1024);
+  GLenum format;
+  GLsizei length;
+  FILE *fp;
+
+  glGetProgramBinary(prog, 10*1024*1024, , , 
prog_buf);
+
+  param = glGetError();
+  if (param != GL_NO_ERROR) {
+ fprintf(stderr, "ERROR: failed to get Program 
Binary\n");
+  } else {
+ fp = fopen(out_file, "wb");
+ fprintf(stdout, "Binary program is generated (%d 
Byte).\n", length);
+ fprintf(stdout, "Binary Format is %d\n", format);
+ fprintf(stdout, "Now writing to the file\n");
+ fwrite(prog_buf, sizeof(char), length, fp);
+ fclose(fp);
+  }
+  free(prog_buf);
+   }
+}
 glDeleteProgram(prog);
 } else {
 for (unsigned i = 0; i < num_shaders; i++) {
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH shaderdb 2/3] run: new '--pci-id' option for overriding pci-id

2018-02-12 Thread Dongwon Kim
Add a new option, '--pciid' to override a pci id of the target arch
to support cross-architecture shader compilation. Not like "-p" option,
it is for accepting any GFX devices supported by the driver.

Setting both "-p" and "--pciid" is blocked to avoid conflict.

Signed-off-by: Dongwon Kim 
---
 run.c | 44 ++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/run.c b/run.c
index 23d2b07..d066567 100644
--- a/run.c
+++ b/run.c
@@ -36,6 +36,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -353,10 +355,21 @@ const struct platform platforms[] = {
 "skl",  "0x191D",
 };
 
+enum
+{
+PCI_ID_OVERRIDE_OPTION = CHAR_MAX + 1,
+};
+
+const struct option const long_options[] =
+{
+{"pciid", required_argument, NULL, PCI_ID_OVERRIDE_OPTION},
+{NULL, 0, NULL, 0}
+};
+
 void print_usage(const char *prog_name)
 {
 fprintf(stderr,
-"Usage: %s [-d ] [-j ] [-o ] [-p 
] \n",
+"Usage: %s [-d ] [-j ] [-o ] [-p 
] [--pciid=] \n",
 prog_name);
 }
 
@@ -435,10 +448,13 @@ main(int argc, char **argv)
 char device_path[64];
 int device_id = 0;
 int opt;
+bool platf_overridden = 0;
+bool pci_id_overridden = 0;
 
 max_threads = omp_get_max_threads();
 
-while ((opt = getopt(argc, argv, "d:j:o:p:")) != -1) {
+while ((opt = getopt_long(argc, argv, "d:j:o:p:",
+  long_options, NULL)) != -1) {
 switch(opt) {
 case 'd': {
 char *endptr;
@@ -456,6 +472,13 @@ main(int argc, char **argv)
 break;
 case 'p': {
 const struct platform *platform = NULL;
+
+if (pci_id_overridden) {
+unsetenv("INTEL_DEVID_OVERRIDE");
+fprintf(stderr, "'-p' and '--pciid' can't be used 
together.\n");
+return -1;
+}
+
 for (unsigned i = 0; i < ARRAY_SIZE(platforms); i++) {
 if (strcmp(optarg, platforms[i].name) == 0) {
 platform = platforms + i;
@@ -473,11 +496,28 @@ main(int argc, char **argv)
 
 printf("### Compiling for %s ###\n", platform->name);
 setenv("INTEL_DEVID_OVERRIDE", platform->pci_id, 1);
+platf_overridden = 1;
 break;
 }
 case 'j':
 max_threads = atoi(optarg);
 break;
+case PCI_ID_OVERRIDE_OPTION:
+if (platf_overridden) {
+unsetenv("INTEL_DEVID_OVERRIDE");
+fprintf(stderr, "'-p' and '--pciid' can't be used 
together.\n");
+return -1;
+}
+
+if (optarg[0] != '0' || optarg[1] != 'x') {
+  fprintf(stderr, "pci-id should be a hex number starting with 
'0x'\n");
+  return -1;
+}
+
+printf("### Compiling for GEN arch with PCI_ID=%s ###\n", optarg);
+setenv("INTEL_DEVID_OVERRIDE", optarg, 1);
+pci_id_overridden = 1;
+break;
 default:
 fprintf(stderr, "Unknown option: %x\n", opt);
 print_usage(argv[0]);
-- 
2.16.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv: Be more careful about fast-clear colors

2018-02-12 Thread Jason Ekstrand
On Mon, Feb 12, 2018 at 5:11 PM, Nanley Chery  wrote:

> On Mon, Feb 12, 2018 at 04:35:20PM -0800, Jason Ekstrand wrote:
> > Previously, we just used all the channels regardless of the format.
> > This is less than ideal because some channels may have undefined values
> > and this should be ok from the client's perspective.  Even though the
> > driver should do the correct thing regardless of what is in the
> > undefined value, it makes things less deterministic.  In particular, the
> > driver may choose to fast-clear or not based on undefined values.  This
> > level of nondeterminism is bad.
> >
> > Cc: mesa-sta...@lists.freedesktop.org
> > ---
> >  src/intel/vulkan/genX_cmd_buffer.c | 47 --
> 
> >  1 file changed, 20 insertions(+), 27 deletions(-)
> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index 99854eb..a574024 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -202,24 +202,6 @@ add_image_view_relocs(struct anv_cmd_buffer
> *cmd_buffer,
> > }
> >  }
> >
> > -static bool
> > -color_is_zero_one(VkClearColorValue value, enum isl_format format)
> > -{
> > -   if (isl_format_has_int_channel(format)) {
> > -  for (unsigned i = 0; i < 4; i++) {
> > - if (value.int32[i] != 0 && value.int32[i] != 1)
> > -return false;
> > -  }
> > -   } else {
> > -  for (unsigned i = 0; i < 4; i++) {
> > - if (value.float32[i] != 0.0f && value.float32[i] != 1.0f)
> > -return false;
> > -  }
> > -   }
> > -
> > -   return true;
> > -}
> > -
> >  static void
> >  color_attachment_compute_aux_usage(struct anv_device * device,
> > struct anv_cmd_state * cmd_state,
> > @@ -294,13 +276,26 @@ color_attachment_compute_aux_usage(struct
> anv_device * device,
> >
> > assert(iview->image->planes[0].aux_surface.isl.usage &
> ISL_SURF_USAGE_CCS_BIT);
> >
> > +   const struct isl_format_layout *view_fmtl =
> > +  isl_format_get_layout(iview->planes[0].isl.format);
> > +   union isl_color_value clear_color = {};
>
> Is this initializer valid?
>

It's a GCC extension (also supported by clang), but yes.


> > +
> > +#define COPY_CLEAR_COLOR_CHANNEL(c, i) \
> > +   if (view_fmtl->channels.c.bits) \
> > +  clear_color.u32[i] = att_state->clear_value.color.uint32[i]
> > +
> > +   COPY_CLEAR_COLOR_CHANNEL(r, 0);
> > +   COPY_CLEAR_COLOR_CHANNEL(g, 1);
> > +   COPY_CLEAR_COLOR_CHANNEL(b, 2);
> > +   COPY_CLEAR_COLOR_CHANNEL(a, 3);
> > +
> > +#undef COPY_CLEAR_COLOR_CHANNEL
> > +
> > att_state->clear_color_is_zero_one =
> > -  color_is_zero_one(att_state->clear_value.color,
> iview->planes[0].isl.format);
> > +  isl_color_value_is_zero_one(*fast_clear_color,
>
> Should this be clear_color?
>

Yes it should.  Fixed locally.


> > +  iview->planes[0].isl.format);
> > att_state->clear_color_is_zero =
> > -  att_state->clear_value.color.uint32[0] == 0 &&
> > -  att_state->clear_value.color.uint32[1] == 0 &&
> > -  att_state->clear_value.color.uint32[2] == 0 &&
> > -  att_state->clear_value.color.uint32[3] == 0;
> > +  isl_color_value_is_zero(*fast_clear_color,
> iview->planes[0].isl.format);
> >
>
> Should this be clear_color?
>

Yes it should.  Fixed locally.  This caused a lot of fails.  I don't know
how I didn't catch it. :(

Do you want a v2?

--Jason


>
> -Nanley
>
> > if (att_state->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
> >/* Start by getting the fast clear type.  We use the first subpass
> > @@ -358,10 +353,8 @@ color_attachment_compute_aux_usage(struct
> anv_device * device,
> > "LOAD_OP_CLEAR.  Only fast-clearing the first
> slice");
> >}
> >
> > -  if (att_state->fast_clear) {
> > - memcpy(fast_clear_color->u32, att_state->clear_value.color.
> uint32,
> > -sizeof(fast_clear_color->u32));
> > -  }
> > +  if (att_state->fast_clear)
> > + *fast_clear_color = clear_color;
> > } else {
> >att_state->fast_clear = false;
> > }
> > --
> > 2.5.0.400.gff86faf
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

2018-02-12 Thread rainer.hochec...@onlinehome.de

Hi Christian,

 

We have enabled vsync, hence we wait for page flip and renderer will block.

 

I prefer having control at the application level. If all goes easy and you don't

push the system to its limits, all fine. The fun part starts if you have to skip

decoding cycles or drop frames to catch up from being late. Or playing i.e.

60fps material on a 30hz screen.

I want to make sure that decoding has finished before I schedule a frame for

rendering.

 

Regards,

Rainer

 

Gesendet: Sonntag, 11. Februar 2018 um 19:23 Uhr
Von: "Christian König" 
An: "rainer.hochec...@onlinehome.de" 
Cc: "Philipp Kerling" , s...@jkqxz.net, peter.fruehber...@gmail.com, mic...@daenzer.net, mesa-dev@lists.freedesktop.org, harry.wentl...@amd.com, lru...@libreelec.tv
Betreff: Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2



Hi Rainer,

well at the end of the pipeline the page flip waits for all previous operations on a buffer to be completed before displaying it. But even that wait is asynchronous.

So as long as you don't wait for the page flip your render thread won't be blocked.

Using vaSyncSurface is only good in two occasions:
1. You want to access the data with the CPU. And even then it is not necessary most of the time because of how Mesa is designed.
2. You need to limit how many jobs are in flight. Depending on how you do resource management that is necessary to prevent the application from scheduling mass amount of jobs and running away with all the resources for that.

Regards,
Christian.

Am 11.02.2018 um 13:26 schrieb rainer.hochec...@onlinehome.de:



Hi Christian,

 

Finally something in the render pipeline has to wait until decoding is finished or the

frame can't be rendered and this will result in blocking the render thread.

 

Regards,

Rainer

 


Gesendet: Sonntag, 11. Februar 2018 um 13:13 Uhr
Von: "Christian König" 
An: "rainer.hochec...@onlinehome.de" 
Cc: "Philipp Kerling" , s...@jkqxz.net, peter.fruehber...@gmail.com, mic...@daenzer.net, mesa-dev@lists.freedesktop.org, harry.wentl...@amd.com, lru...@libreelec.tv
Betreff: Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2



Hi Rainer,

the render thread doesn't wait either.


See when you dispatch some work the AMD drivers always wait for prerequisites in the background, not in the foreground.

The older radeon driver uses hardware semaphores for this while amdgpu has a GPU scheduler which handles that stuff.

This is very important because when you hold back rendering work in the application the driver stack won't know about it and power management starts to stutter.

Not so important for 1920p because power management should be able to compensate the work peaks, but for 4K that's something mandatory to let the driver be able to estimate future load.

We even discussed in our multimedia meeting if we shouldn't limit 3D power management when UVD/VCN decoding is active because of that problem. But I'm not very keen about those workarounds because it's really counterproductive for transcode use cases to have the 3D engine idling around with high clocks.

Regards,
Christian.

Am 11.02.2018 um 12:51 schrieb rainer.hochec...@onlinehome.de:



Hi Christian,

 

For Kodi it is better to wait on the thread that does decoding than later 

by the render thread. Means it is desired to call it.

 

Does vaSyncSurface block as stated by the docs?

 

Regards,

Rainer

 

Gesendet: Sonntag, 11. Februar 2018 um 11:02 Uhr
Von: "Christian König" 
An: "Philipp Kerling" , s...@jkqxz.net
Cc: rainer.hochec...@onlinehome.de, peter.fruehber...@gmail.com, mic...@daenzer.net, mesa-dev@lists.freedesktop.org, harry.wentl...@amd.com, lru...@libreelec.tv
Betreff: Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

Am 09.02.2018 um 21:35 schrieb Philipp Kerling:
> Hi,
>
> resurrecting this thread again since there's been some progress on the
> Kodi side.
>
>> For the EGL part, see 
>> and .
> We recently started testing vaExportSurfaceHandle support, so we will
> have this covered soon.
>
>> I have been testing with mpv and ffmpeg; any thoughts from the
>> Kodi point of view would be most welcome.
> It generally works quite well, but we still have the unresolved
> vaSyncSurface problem.
> To recap: vaExportSurfaceHandle requires calling vaSyncSurface to make
> sure that the decode is actually finished and the buffer is usable
> before rendering the frame. However, vaSyncSurface was largely
> unimplemented on mesa back then and it was unclear how to proceed with
> regard to decode (VAAPI)/present (EGL+GL) synchronization.
>
> So on to the question: Is this still the case, or has there been
> progress on implementing vaSyncSurface in mesa? In either case, 

Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

2018-02-12 Thread rainer.hochec...@onlinehome.de
Hi Christian,

 

Finally something in the render pipeline has to wait until decoding is finished or the

frame can't be rendered and this will result in blocking the render thread.

 

Regards,

Rainer

 


Gesendet: Sonntag, 11. Februar 2018 um 13:13 Uhr
Von: "Christian König" 
An: "rainer.hochec...@onlinehome.de" 
Cc: "Philipp Kerling" , s...@jkqxz.net, peter.fruehber...@gmail.com, mic...@daenzer.net, mesa-dev@lists.freedesktop.org, harry.wentl...@amd.com, lru...@libreelec.tv
Betreff: Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2



Hi Rainer,

the render thread doesn't wait either.


See when you dispatch some work the AMD drivers always wait for prerequisites in the background, not in the foreground.

The older radeon driver uses hardware semaphores for this while amdgpu has a GPU scheduler which handles that stuff.

This is very important because when you hold back rendering work in the application the driver stack won't know about it and power management starts to stutter.

Not so important for 1920p because power management should be able to compensate the work peaks, but for 4K that's something mandatory to let the driver be able to estimate future load.

We even discussed in our multimedia meeting if we shouldn't limit 3D power management when UVD/VCN decoding is active because of that problem. But I'm not very keen about those workarounds because it's really counterproductive for transcode use cases to have the 3D engine idling around with high clocks.

Regards,
Christian.

Am 11.02.2018 um 12:51 schrieb rainer.hochec...@onlinehome.de:



Hi Christian,

 

For Kodi it is better to wait on the thread that does decoding than later 

by the render thread. Means it is desired to call it.

 

Does vaSyncSurface block as stated by the docs?

 

Regards,

Rainer

 

Gesendet: Sonntag, 11. Februar 2018 um 11:02 Uhr
Von: "Christian König" 
An: "Philipp Kerling" , s...@jkqxz.net
Cc: rainer.hochec...@onlinehome.de, peter.fruehber...@gmail.com, mic...@daenzer.net, mesa-dev@lists.freedesktop.org, harry.wentl...@amd.com, lru...@libreelec.tv
Betreff: Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

Am 09.02.2018 um 21:35 schrieb Philipp Kerling:
> Hi,
>
> resurrecting this thread again since there's been some progress on the
> Kodi side.
>
>> For the EGL part, see 
>> and .
> We recently started testing vaExportSurfaceHandle support, so we will
> have this covered soon.
>
>> I have been testing with mpv and ffmpeg; any thoughts from the
>> Kodi point of view would be most welcome.
> It generally works quite well, but we still have the unresolved
> vaSyncSurface problem.
> To recap: vaExportSurfaceHandle requires calling vaSyncSurface to make
> sure that the decode is actually finished and the buffer is usable
> before rendering the frame. However, vaSyncSurface was largely
> unimplemented on mesa back then and it was unclear how to proceed with
> regard to decode (VAAPI)/present (EGL+GL) synchronization.
>
> So on to the question: Is this still the case, or has there been
> progress on implementing vaSyncSurface in mesa? In either case, do we
> need that support or does this syncing work implicitly somehow on AMD?
>
> I've noticed that mpv does not seem to call vaSyncSurface, although it
> technically should.

Actually the mpv approach is correct.

Calling vaSyncSurface is unnecessary and undesired for AMD hardware
because it moves synchronization to the CPU while it should happen on
the GPU and/or GPU scheduler.

E.g. our 3D pipeline can wait for hardware video decoding to finish
before starting the rendering. We even have some implementations which
allow the 3D pipeline to start when only the first halve of the picture
is decoded etc..

If we don't do this the 3D pipeline runs dry between frame decoding
which leads to problems with power management.

We should probably add a flag or bit or feature or something like this
to note that the application explicitly should NOT call vaSyncSurface
before exporting the surface.

Regards,
Christian.

>
> Best regards,
> Philipp
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
 







___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

2018-02-12 Thread rainer.hochec...@onlinehome.de
Hi Christian,

 

For Kodi it is better to wait on the thread that does decoding than later 

by the render thread. Means it is desired to call it.

 

Does vaSyncSurface block as stated by the docs?

 

Regards,

Rainer

 

Gesendet: Sonntag, 11. Februar 2018 um 11:02 Uhr
Von: "Christian König" 
An: "Philipp Kerling" , s...@jkqxz.net
Cc: rainer.hochec...@onlinehome.de, peter.fruehber...@gmail.com, mic...@daenzer.net, mesa-dev@lists.freedesktop.org, harry.wentl...@amd.com, lru...@libreelec.tv
Betreff: Re: [Mesa-dev] 10bit HEVC decoding for RadeonSI v2

Am 09.02.2018 um 21:35 schrieb Philipp Kerling:
> Hi,
>
> resurrecting this thread again since there's been some progress on the
> Kodi side.
>
>> For the EGL part, see 
>> and .
> We recently started testing vaExportSurfaceHandle support, so we will
> have this covered soon.
>
>> I have been testing with mpv and ffmpeg; any thoughts from the
>> Kodi point of view would be most welcome.
> It generally works quite well, but we still have the unresolved
> vaSyncSurface problem.
> To recap: vaExportSurfaceHandle requires calling vaSyncSurface to make
> sure that the decode is actually finished and the buffer is usable
> before rendering the frame. However, vaSyncSurface was largely
> unimplemented on mesa back then and it was unclear how to proceed with
> regard to decode (VAAPI)/present (EGL+GL) synchronization.
>
> So on to the question: Is this still the case, or has there been
> progress on implementing vaSyncSurface in mesa? In either case, do we
> need that support or does this syncing work implicitly somehow on AMD?
>
> I've noticed that mpv does not seem to call vaSyncSurface, although it
> technically should.

Actually the mpv approach is correct.

Calling vaSyncSurface is unnecessary and undesired for AMD hardware
because it moves synchronization to the CPU while it should happen on
the GPU and/or GPU scheduler.

E.g. our 3D pipeline can wait for hardware video decoding to finish
before starting the rendering. We even have some implementations which
allow the 3D pipeline to start when only the first halve of the picture
is decoded etc..

If we don't do this the 3D pipeline runs dry between frame decoding
which leads to problems with power management.

We should probably add a flag or bit or feature or something like this
to note that the application explicitly should NOT call vaSyncSurface
before exporting the surface.

Regards,
Christian.

>
> Best regards,
> Philipp
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
 


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/5] i965: ASTC5x5 workaround

2018-02-12 Thread Tapani Pälli



On 02/12/2018 09:44 AM, Tapani Pälli wrote:

Hi;

On 02/08/2018 09:50 AM, Rogovin, Kevin wrote:

Hi,

I gave it a whirl of setting the .mocs field set to 0 passed to 
isl_surf_fill_state() ALWAYS. Sadly CarChase GLES continued to hang 
(where as the GL did not because it does not use ASTC). This makes 
sense since MOCS (atleast last time I looked at it) only really 
controls cache usage for L3 and eLLC (please anyone correct me if I am 
wrong in this) whereas the issue is that the samplers mess up how they 
deal with its own (private) cache.


It really is nasty that it appears (as of now) that this complicated 
work around is needed and needs to somehow be re-implemented in anv as 
well.


It seems surrounding code has changed so that these patches need some 
changes. Kevin, are you planning to rebase/refactor these changes?


FYI I've rebased the patches and did additional porting (because of 
commit df13588d21) here:


https://cgit.freedesktop.org/~tpalli/mesa/log/?h=astc5x5

Let me know if this looks OK for you.




-Kevin

*From:*Jason Ekstrand [mailto:ja...@jlekstrand.net]
*Sent:* Thursday, February 8, 2018 2:47 AM
*To:* Rogovin, Kevin 
*Cc:* ML mesa-dev 
*Subject:* Re: [Mesa-dev] [PATCH v2 0/5] i965: ASTC5x5 workaround

Random thought:

Nanley and I were talking about this just now and I was complaining 
about how much I hate the fact that this workaround exists because we 
can't implement it in Vulkan.  Then I got an idea.  What would happen 
if we just set MOCS to zero (uncached) for ASTC 5x5 textures?  Does 
that make the hang go away?  How bad is the car chase performance with 
that compared to this series?  It's a bit of a big hammer but has the 
advantage of simplicity.  If it causes performance to tank on anything 
then then the more complex solution is probably worth it but I thought 
it was worth a try.


--Jason

On Thu, Dec 14, 2017 at 9:39 AM, > wrote:


    From: Kevin Rogovin >

    This patch series implements a needed workaround for Gen9 for ASTC5x5
    sampler reads. The crux of the work around is to make sure that the
    sampler does not read an ASTC5x5 texture and a surface with an 
auxilary

    buffer without having a texture cache invalidate and command streamer
    stall between such accesses.

    With this patch series applied to the (current) master branch of 
mesa,

    carchase works on my SKL GT4.

    v2:
   Rename workaround functions from brw_ to gen9_
   (suggested/requested by Topi Pohjolainen).

   Place texture resolve to avoid using auxilary surface
   when ASTC5x5 is detected in brw_predraw_resolve_inputs()
   instead of another detected function; doing so allows
   one to avoid walking the textures again.
   (suggested/requested by Topi Pohjolainen).

   Emit command streamer stall in addition to texture
   invalidate.
   (original short-coming caught by Jason Ekstrand)

   Place workaround function in (new) dedicated file.

   Minor path re-ordering to accomodate changes.

    Kevin Rogovin (5):
   i965: define astx5x5 workaround infrastructure
   i965: set ASTC5x5 workaround texture type tracking on texture
    validate
   i965: use ASTC5x5 workaround in brw_draw
   i965: use ASTC5x5 workaround in brw_compute
   i965: ASTC5x5 workaround logic for blorp

  src/mesa/drivers/dri/i965/Makefile.sources       |  1 +
  src/mesa/drivers/dri/i965/brw_compute.c          |  6 
  src/mesa/drivers/dri/i965/brw_context.c          |  6 
  src/mesa/drivers/dri/i965/brw_context.h          | 24 


  src/mesa/drivers/dri/i965/brw_draw.c             | 16 +--
  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  5 
  src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c      | 36
    
  src/mesa/drivers/dri/i965/genX_blorp_exec.c      |  5 
  src/mesa/drivers/dri/i965/intel_batchbuffer.c    |  1 +
  src/mesa/drivers/dri/i965/intel_tex_image.c      | 16 ---
  src/mesa/drivers/dri/i965/intel_tex_validate.c   | 13 +
  src/mesa/drivers/dri/i965/meson.build            |  1 +
  12 files changed, 124 insertions(+), 6 deletions(-)
  create mode 100644 src/mesa/drivers/dri/i965/gen9_astc5x5_wa.c

    --
    2.7.4

    ___
    mesa-dev mailing list
    mesa-dev@lists.freedesktop.org 


    https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

Re: [Mesa-dev] [PATCH 1/6] intel/vulkan: Support INTEL_NO_HW environment variable

2018-02-12 Thread Samuel Iglesias Gonsálvez
Series is,

Reviewed-by: Samuel Iglesias Gonsálvez 

Sam

On 10/02/18 09:06, Jordan Justen wrote:
> Signed-off-by: Jordan Justen 
> ---
>  src/intel/vulkan/anv_device.c  | 3 +++
>  src/intel/vulkan/anv_private.h | 2 ++
>  src/intel/vulkan/anv_queue.c   | 2 +-
>  3 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 86c1bdc1d51..59ea073803d 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -294,6 +294,8 @@ anv_physical_device_init(struct anv_physical_device 
> *device,
> assert(strlen(path) < ARRAY_SIZE(device->path));
> strncpy(device->path, path, ARRAY_SIZE(device->path));
>  
> +   device->no_hw = getenv("INTEL_NO_HW") != NULL;
> +
> device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
> if (!device->chipset_id) {
>result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
> @@ -1371,6 +1373,7 @@ VkResult anv_CreateDevice(
> device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
> device->instance = physical_device->instance;
> device->chipset_id = physical_device->chipset_id;
> +   device->no_hw = physical_device->no_hw;
> device->lost = false;
>  
> if (pAllocator)
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index d38dd9e4220..b2d9bf785e7 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -749,6 +749,7 @@ struct anv_physical_device {
>  
>  struct anv_instance *   instance;
>  uint32_tchipset_id;
> +boolno_hw;
>  charpath[20];
>  const char *name;
>  struct gen_device_info  info;
> @@ -852,6 +853,7 @@ struct anv_device {
>  
>  struct anv_instance *   instance;
>  uint32_tchipset_id;
> +boolno_hw;
>  struct gen_device_info  info;
>  struct isl_device   isl_dev;
>  int context_id;
> diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
> index c6b2e01c628..b0dcc882edc 100644
> --- a/src/intel/vulkan/anv_queue.c
> +++ b/src/intel/vulkan/anv_queue.c
> @@ -39,7 +39,7 @@ anv_device_execbuf(struct anv_device *device,
> struct drm_i915_gem_execbuffer2 *execbuf,
> struct anv_bo **execbuf_bos)
>  {
> -   int ret = anv_gem_execbuffer(device, execbuf);
> +   int ret = device->no_hw ? 0 : anv_gem_execbuffer(device, execbuf);
> if (ret != 0) {
>/* We don't know the real error. */
>device->lost = true;



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl/linker: fix bug when checking precision qualifier

2018-02-12 Thread Samuel Iglesias Gonsálvez
This patch is still unreviewed.

Sam


On 30/01/18 08:50, Samuel Iglesias Gonsálvez wrote:
> According to GLSL ES 3.2 spec, see table in 9.2.1 "Linked Shaders"
> section, the precision qualifier should match for uniform variables.
> This also applies to previous GLSL ES 3.x specs.
>
> This 'if' checks the condition for uniform variables, while for UBOs
> it is checked in link_interface_blocks.cpp.
>
> Fixes: b50b82b8a553
> ("glsl/es31: precision qualifier doesn't need to match in shader interface 
> block members")
>
> Signed-off-by: Samuel Iglesias Gonsálvez 
> ---
>  src/compiler/glsl/linker.cpp | 12 
>  1 file changed, 4 insertions(+), 8 deletions(-)
>
> diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
> index ce101935b01..050b2906f6b 100644
> --- a/src/compiler/glsl/linker.cpp
> +++ b/src/compiler/glsl/linker.cpp
> @@ -1134,15 +1134,11 @@ cross_validate_globals(struct gl_shader_program *prog,
>  
>  return;
>   }
> - /* Only in GLSL ES 3.10, the precision qualifier should not match
> -  * between block members defined in matched block names within a
> -  * shader interface.
> -  *
> -  * In GLSL ES 3.00 and ES 3.20, precision qualifier for each block
> -  * member should match.
> +
> + /* Check the precision qualifier matches for uniform variables. For 
> UBOs,
> +  * it is checked in link_interface_blocks.cpp.
>*/
> - if (prog->IsES && (prog->data->Version != 310 ||
> -!var->get_interface_type()) &&
> + if (prog->IsES && !var->get_interface_type() &&
>   existing->data.precision != var->data.precision) {
>  if ((existing->data.used && var->data.used) || 
> prog->data->Version >= 300) {
> linker_error(prog, "declarations for %s `%s` have "




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105068] vulkaninfo gives an VK_ERROR_INITIALIZATION_FAILED

2018-02-12 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105068

leigh scott  changed:

   What|Removed |Added

   See Also||https://github.com/KhronosG
   ||roup/Vulkan-LoaderAndValida
   ||tionLayers/issues/2152

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH shaderdb 1/3] intel_stub: override pci-id only if INTEL_DEVID_OVERRIDE is set

2018-02-12 Thread Kenneth Graunke
On Monday, February 12, 2018 5:26:14 PM PST Dongwon Kim wrote:
> To prevent a segfault, pci-id is set only if INTEL_DEVID_OVERRIDE exists.
> 
> Signed-off-by: Dongwon Kim 
> ---
>  intel_stub.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/intel_stub.c b/intel_stub.c
> index ea88400..cf9ddff 100644
> --- a/intel_stub.c
> +++ b/intel_stub.c
> @@ -28,6 +28,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -174,6 +175,7 @@ ioctl(int fd, unsigned long request, ...)
>   va_list args;
>   void *argp;
>   struct stat buf;
> + char *pci_id;
>  
>   va_start(args, request);
>   argp = va_arg(args, void *);
> @@ -199,7 +201,13 @@ ioctl(int fd, unsigned long request, ...)
>  *getparam->value = 1;
>  break;
>  case I915_PARAM_CHIPSET_ID:
> -*getparam->value = 
> strtod(getenv("INTEL_DEVID_OVERRIDE"), NULL);
> +pci_id = getenv("INTEL_DEVID_OVERRIDE");
> +
> +if (pci_id)
> +*getparam->value = strtod(pci_id, NULL);
> +else
> +return -EINVAL;
> +
>  break;
>  case I915_PARAM_CMD_PARSER_VERSION:
>  *getparam->value = 9;
> 

This patch is:

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] loader_dri3/glx/egl: Reinstate the loader_dri3_vtable get_dri_screen callback

2018-02-12 Thread Thomas Hellstrom

Hi!

It would be good if somebody could review this so we can get the fix in.

Thanks,
Thomas


On 02/09/2018 09:37 AM, Thomas Hellstrom wrote:

Removing this callback caused rendering corruption in some multi-screen cases,
so it is reinstated but without the drawable argument which was never used
by implementations and was confusing since the drawable could have been
created with another screen.

Cc: "17.3" mesa-sta...@lists.freedesktop.org
Fixes: 5198e48a0d (loader_dri3/glx/egl: Remove the loader_dri3_vtable 
get_dri_screen callback)
Bugzilla: 
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D105013=DwIGaQ=uilaK90D4TOVoH58JNXRgQ=wnSlgOCqfpNS4d02vP68_E9q2BNMCwfD2OZ_6dCFVQQ=XWphEAw-49womV9tmv90qPaEgOeyVd98mULQPxCMsy0=EC7q92tLLJ_BOkb9bWoQEHpl07D4zKI-gaOq3y87TcE=
Reported-by: Daniel van Vugt 
Signed-off-by: Thomas Hellstrom 
---
  src/egl/drivers/dri2/platform_x11_dri3.c | 12 
  src/glx/dri3_glx.c   | 11 +++
  src/loader/loader_dri3_helper.c  | 12 +++-
  src/loader/loader_dri3_helper.h  |  1 +
  4 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c 
b/src/egl/drivers/dri2/platform_x11_dri3.c
index 6e40eaa596..060b5f83a3 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -75,6 +75,17 @@ egl_dri3_get_dri_context(struct loader_dri3_drawable *draw)
 return dri2_ctx->dri_context;
  }
  
+static __DRIscreen *

+egl_dri3_get_dri_screen(void)
+{
+   _EGLContext *ctx = _eglGetCurrentContext();
+   struct dri2_egl_context *dri2_ctx;
+   if (!ctx)
+  return NULL;
+   dri2_ctx = dri2_egl_context(ctx);
+   return dri2_egl_display(dri2_ctx->base.Resource.Display)->dri_screen;
+}
+
  static void
  egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
  {
@@ -88,6 +99,7 @@ static const struct loader_dri3_vtable egl_dri3_vtable = {
 .set_drawable_size = egl_dri3_set_drawable_size,
 .in_current_context = egl_dri3_in_current_context,
 .get_dri_context = egl_dri3_get_dri_context,
+   .get_dri_screen = egl_dri3_get_dri_screen,
 .flush_drawable = egl_dri3_flush_drawable,
 .show_fps = NULL,
  };
diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index f280a8cef7..016f91b196 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -116,6 +116,16 @@ glx_dri3_get_dri_context(struct loader_dri3_drawable *draw)
 return (gc != ) ? dri3Ctx->driContext : NULL;
  }
  
+static __DRIscreen *

+glx_dri3_get_dri_screen(void)
+{
+   struct glx_context *gc = __glXGetCurrentContext();
+   struct dri3_context *pcp = (struct dri3_context *) gc;
+   struct dri3_screen *psc = (struct dri3_screen *) pcp->base.psc;
+
+   return (gc !=  && psc) ? psc->driScreen : NULL;
+}
+
  static void
  glx_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
  {
@@ -150,6 +160,7 @@ static const struct loader_dri3_vtable glx_dri3_vtable = {
 .set_drawable_size = glx_dri3_set_drawable_size,
 .in_current_context = glx_dri3_in_current_context,
 .get_dri_context = glx_dri3_get_dri_context,
+   .get_dri_screen = glx_dri3_get_dri_screen,
 .flush_drawable = glx_dri3_flush_drawable,
 .show_fps = glx_dri3_show_fps,
  };
diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index fbda3d635c..2e3b6c619e 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -1311,6 +1311,7 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, 
unsigned int format,
 xcb_sync_fence_t sync_fence;
 struct xshmfence *shm_fence;
 int  fence_fd;
+   __DRIscreen  *cur_screen;
  
 if (buffer)

return buffer;
@@ -1341,8 +1342,17 @@ dri3_get_pixmap_buffer(__DRIdrawable *driDrawable, 
unsigned int format,
 if (!bp_reply)
goto no_image;
  
+   /* Get the currently-bound screen or revert to using the drawable's screen if

+* no contexts are currently bound. The latter case is at least necessary 
for
+* obs-studio, when using Window Capture (Xcomposite) as a Source.
+*/
+   cur_screen = draw->vtable->get_dri_screen();
+   if (!cur_screen) {
+   cur_screen = draw->dri_screen;
+   }
+
 buffer->image = loader_dri3_create_image(draw->conn, bp_reply, format,
-draw->dri_screen, draw->ext->image,
+cur_screen, draw->ext->image,
  buffer);
 if (!buffer->image)
goto no_image;
diff --git a/src/loader/loader_dri3_helper.h b/src/loader/loader_dri3_helper.h
index 4ce98b8c59..839cba30df 100644
--- a/src/loader/loader_dri3_helper.h
+++ b/src/loader/loader_dri3_helper.h
@@ -99,6 +99,7 @@ struct loader_dri3_vtable {
 

[Mesa-dev] [PATCH] ac: implement nir_intrinsic_image_samples

2018-02-12 Thread Timothy Arceri
Fixes cts test:
KHR-GL45.shader_texture_image_samples_tests.image_functional_test
---
 src/amd/common/ac_nir_to_llvm.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3af3dbace2..8d1eed241f 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3785,6 +3785,26 @@ static LLVMValueRef visit_image_atomic(struct 
ac_nir_context *ctx,
return ac_build_intrinsic(>ac, intrinsic_name, ctx->ac.i32, 
params, param_count, 0);
 }
 
+static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
+   const nir_intrinsic_instr *instr)
+{
+   const nir_variable *var = instr->variables[0]->var;
+   const struct glsl_type *type = glsl_without_array(var->type);
+   bool da = glsl_sampler_type_is_array(type) ||
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
+ glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D;
+
+   struct ac_image_args args = { 0 };
+   args.da = da;
+   args.dmask = 0xf;
+   args.resource = get_sampler_desc(ctx, instr->variables[0],
+AC_DESC_IMAGE, NULL, true, false);
+   args.opcode = ac_image_get_resinfo;
+   args.addr = ctx->ac.i32_0;
+
+   return ac_build_image_opcode(>ac, );
+}
+
 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
 const nir_intrinsic_instr *instr)
 {
@@ -4483,6 +4503,9 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
case nir_intrinsic_store_shared:
visit_store_shared(ctx, instr);
break;
+   case nir_intrinsic_image_samples:
+   result = visit_image_samples(ctx, instr);
+   break;
case nir_intrinsic_image_load:
result = visit_image_load(ctx, instr);
break;
-- 
2.14.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


<    1   2