Re: [Mesa-dev] [PATCH] radv: using tls to store llvm related info and speed up compiles (v8)

2018-07-04 Thread Dieter Nützel

Tested-by: Dieter Nützel 

on RX580
with F1 2017

and 'smoketest':

cpupower frequency-set -g ondemand

/home/dieter> smoketest
5172 presents in 5.00043 seconds (FPS: 1034.31)
5426 presents in 5.00098 seconds (FPS: 1084.99)
5304 presents in 5.2 seconds (FPS: 1060.8)
5338 presents in 5.00085 seconds (FPS: 1067.42)
5304 presents in 5.00034 seconds (FPS: 1060.73)
5357 presents in 5.00073 seconds (FPS: 1071.24)
frames:35390, elapsedms:33388

cpupower frequency-set -g performance

/home/dieter> smoketest
5907 presents in 5.00033 seconds (FPS: 1181.32)
6093 presents in 5.7 seconds (FPS: 1218.58)
6070 presents in 5.00094 seconds (FPS: 1213.77)
6096 presents in 5.6 seconds (FPS: 1219.19)
6107 presents in 5.00063 seconds (FPS: 1221.25)
frames:33423, elapsedms:27686

Dieter

Am 05.07.2018 02:03, schrieb Dave Airlie:

From: Dave Airlie 

This uses the common compiler passes abstraction to help radv
avoid fixed cost compiler overheads. This uses a linked list per
thread stored in thread local storage, with an entry in the list
for each target machine.

This should remove all the fixed overheads setup costs of creating
the pass manager each time.

This takes a demo app time to compile the radv meta shaders on nocache
and exit from 1.7s to 1s. It also has been reported to take the startup
time of uncached shaders on RoTR from 12m24s to 11m35s (Alex)

v2: fix llvm6 build, inline emit function, handle multiple targets
in one thread
v3: rebase and port onto new structure
v4: rename some vars (Bas)
v5: drag all code into radv for now, we can refactor it out later
for radeonsi if we make it shareable
v6: use a bit more C++ in the wrapper
v7: logic bugs fixed so it actually runs again.
v8: rebase on top of radeonsi changes.
---
 src/amd/vulkan/Makefile.sources |   2 +
 src/amd/vulkan/meson.build  |   2 +
 src/amd/vulkan/radv_debug.h |   1 +
 src/amd/vulkan/radv_device.c|   1 +
 src/amd/vulkan/radv_llvm_helper.cpp | 148 
 src/amd/vulkan/radv_nir_to_llvm.c   |  27 +
 src/amd/vulkan/radv_shader.c|  10 +-
 src/amd/vulkan/radv_shader_helper.h |  44 +
 8 files changed, 207 insertions(+), 28 deletions(-)
 create mode 100644 src/amd/vulkan/radv_llvm_helper.cpp
 create mode 100644 src/amd/vulkan/radv_shader_helper.h

diff --git a/src/amd/vulkan/Makefile.sources 
b/src/amd/vulkan/Makefile.sources

index 70d56e88cb3..152fdd7cb71 100644
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -54,6 +54,7 @@ VULKAN_FILES := \
radv_meta_resolve_cs.c \
radv_meta_resolve_fs.c \
radv_nir_to_llvm.c \
+   radv_llvm_helper.cpp \
radv_pass.c \
radv_pipeline.c \
radv_pipeline_cache.c \
@@ -62,6 +63,7 @@ VULKAN_FILES := \
radv_shader.c \
radv_shader_info.c \
radv_shader.h \
+   radv_shader_helper.h \
radv_query.c \
radv_util.c \
radv_util.h \
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 22857926fa1..9f2842182e7 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -67,6 +67,7 @@ libradv_files = files(
   'radv_descriptor_set.h',
   'radv_formats.c',
   'radv_image.c',
+  'radv_llvm_helper.cpp',
   'radv_meta.c',
   'radv_meta.h',
   'radv_meta_blit.c',
@@ -88,6 +89,7 @@ libradv_files = files(
   'radv_radeon_winsys.h',
   'radv_shader.c',
   'radv_shader.h',
+  'radv_shader_helper.h',
   'radv_shader_info.c',
   'radv_query.c',
   'radv_util.c',
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index f1b0dc26a63..9fe4c3b7404 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -49,6 +49,7 @@ enum {
RADV_DEBUG_ERRORS= 0x8,
RADV_DEBUG_STARTUP   = 0x10,
RADV_DEBUG_CHECKIR   = 0x20,
+   RADV_DEBUG_NOTHREADLLVM  = 0x40,
 };

 enum {
diff --git a/src/amd/vulkan/radv_device.c 
b/src/amd/vulkan/radv_device.c

index ad3465f594e..73c48cef1f0 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -436,6 +436,7 @@ static const struct debug_control 
radv_debug_options[] = {

{"errors", RADV_DEBUG_ERRORS},
{"startup", RADV_DEBUG_STARTUP},
{"checkir", RADV_DEBUG_CHECKIR},
+   {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
{NULL, 0}
 };

diff --git a/src/amd/vulkan/radv_llvm_helper.cpp
b/src/amd/vulkan/radv_llvm_helper.cpp
new file mode 100644
index 000..dad881f6b1a
--- /dev/null
+++ b/src/amd/vulkan/radv_llvm_helper.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2018 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person 
obtaining a
+ * copy of this software and associated documentation files (the 
"Software"),
+ * to deal in the Software without restriction, including without 
limitation
+ * the rights to use, copy, modify, merge, publish, distribute, 
sublicense,
+ * and/or sell copies of the 

Re: [Mesa-dev] [PATCH 6/6] radv/winsys: remove the max IBs per submit limit for the sysmem path

2018-07-04 Thread Dieter Nützel

For the series:

Tested-by: Dieter Nützel 

on RX580
with F1 2017

Dieter

Am 04.07.2018 15:06, schrieb Samuel Pitoiset:

This path will be eventually improved later but as it's only
used on SI (or with RADV_DEBUG=noibs), I'm not sure if that
matters much.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 46 ---
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index f20b9ff6d8..4e3b1d0e6b 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -299,15 +299,6 @@ static void radv_amdgpu_cs_grow(struct
radeon_cmdbuf *_cs, size_t min_size)
/* The maximum size in dwords has been reached,
 * try to allocate a new one.
 */
-   if (cs->num_old_cs_buffers + 1 >= 
AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
-   /* TODO: Allow to submit more than 4 IBs. */
-   fprintf(stderr, "amdgpu: Maximum number of IBs "
-   "per submit reached.\n");
-   cs->failed = true;
-   cs->base.cdw = 0;
-   return;
-   }
-
cs->old_cs_buffers =
realloc(cs->old_cs_buffers,
 (cs->num_old_cs_buffers + 1) * 
sizeof(*cs->old_cs_buffers));

@@ -996,30 +987,46 @@ static int
radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
assert(cs_count);

for (unsigned i = 0; i < cs_count;) {
-   struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = 
{0};
-   unsigned number_of_ibs = 1;
-   struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = 
{0};
+   struct amdgpu_cs_ib_info *ibs;
+   struct radeon_winsys_bo **bos;
struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs :
initial_preamble_cs;
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+   unsigned number_of_ibs;
uint32_t *ptr;
unsigned cnt = 0;
unsigned size = 0;
unsigned pad_words = 0;

-   if (cs->num_old_cs_buffers > 0) {
+   /* Compute the number of IBs for this submit. */
+   number_of_ibs = cs->num_old_cs_buffers + 1;
+
+   ibs = malloc(number_of_ibs * sizeof(*ibs));
+   if (!ibs)
+   return -ENOMEM;
+
+   bos = malloc(number_of_ibs * sizeof(*bos));
+   if (!bos) {
+   free(ibs);
+   return -ENOMEM;
+   }
+
+   if (number_of_ibs > 1) {
/* Special path when the maximum size in dwords has
 * been reached because we need to handle more than one
 * IB per submit.
 */
-   unsigned new_cs_count = cs->num_old_cs_buffers + 1;
-   struct radeon_cmdbuf 
*new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
+   struct radeon_cmdbuf **new_cs_array;
unsigned idx = 0;

+   new_cs_array = malloc(cs->num_old_cs_buffers *
+ sizeof(*new_cs_array));
+   assert(new_cs_array);
+
for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
new_cs_array[idx++] = >old_cs_buffers[j];
new_cs_array[idx++] = cs_array[i];

-   for (unsigned j = 0; j < new_cs_count; j++) {
+   for (unsigned j = 0; j < number_of_ibs; j++) {
struct radeon_cmdbuf *rcs = new_cs_array[j];
bool needs_preamble = preamble_cs && j == 0;
unsigned size = 0;
@@ -1057,8 +1064,8 @@ static int
radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
ibs[j].ib_mc_address = 
radv_buffer_get_va(bos[j]);
}

-   number_of_ibs = new_cs_count;
cnt++;
+   free(new_cs_array);
} else {
if (preamble_cs)
size += preamble_cs->cdw;
@@ -1107,6 +1114,8 @@ static int
radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed "
"for the sysmem submission (%d)\n", r);
+   free(ibs);
+  

Re: [Mesa-dev] [PATCH] mesa: enable ARB_direct_state_access in OpenGL 4.5 compat profile

2018-07-04 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Jul 3, 2018 at 9:09 PM, Timothy Arceri  wrote:
> Its unlikely anyone will add proper ARB_direct_state_access compat
> support before we branch 18.2. Enabling the extension in 4.5 at
> least allows users to make use of MESA_GL_VERSION_OVERRIDE=4.5COMPAT
> for games like No Mans Sky.
> ---
>  src/mapi/glapi/gen/apiexec.py| 194 +++
>  src/mesa/main/extensions_table.h |   2 +-
>  2 files changed, 98 insertions(+), 98 deletions(-)
>
> diff --git a/src/mapi/glapi/gen/apiexec.py b/src/mapi/glapi/gen/apiexec.py
> index 44552f43f29..b163d88549b 100644
> --- a/src/mapi/glapi/gen/apiexec.py
> +++ b/src/mapi/glapi/gen/apiexec.py
> @@ -152,103 +152,103 @@ functions = {
>
>  # OpenGL 4.5 / GL_ARB_direct_state_access.   Mesa can expose the 
> extension
>  # with core profile.
> -"CreateTransformFeedbacks": exec_info(core=31),
> -"TransformFeedbackBufferBase": exec_info(core=31),
> -"TransformFeedbackBufferRange": exec_info(core=31),
> -"GetTransformFeedbackiv": exec_info(core=31),
> -"GetTransformFeedbacki_v": exec_info(core=31),
> -"GetTransformFeedbacki64_v": exec_info(core=31),
> -"CreateBuffers": exec_info(core=31),
> -"NamedBufferStorage": exec_info(core=31),
> -"NamedBufferData": exec_info(core=31),
> -"NamedBufferSubData": exec_info(core=31),
> -"CopyNamedBufferSubData": exec_info(core=31),
> -"ClearNamedBufferData": exec_info(core=31),
> -"ClearNamedBufferSubData": exec_info(core=31),
> -"MapNamedBuffer": exec_info(core=31),
> -"MapNamedBufferRange": exec_info(core=31),
> -"UnmapNamedBuffer": exec_info(core=31),
> -"FlushMappedNamedBufferRange": exec_info(core=31),
> -"GetNamedBufferParameteriv": exec_info(core=31),
> -"GetNamedBufferParameteri64v": exec_info(core=31),
> -"GetNamedBufferPointerv": exec_info(core=31),
> -"GetNamedBufferSubData": exec_info(core=31),
> -"CreateFramebuffers": exec_info(core=31),
> -"NamedFramebufferRenderbuffer": exec_info(core=31),
> -"NamedFramebufferParameteri": exec_info(core=31),
> -"NamedFramebufferTexture": exec_info(core=31),
> -"NamedFramebufferTextureLayer": exec_info(core=31),
> -"NamedFramebufferDrawBuffer": exec_info(core=31),
> -"NamedFramebufferDrawBuffers": exec_info(core=31),
> -"NamedFramebufferReadBuffer": exec_info(core=31),
> -"InvalidateNamedFramebufferData": exec_info(core=31),
> -"InvalidateNamedFramebufferSubData": exec_info(core=31),
> -"ClearNamedFramebufferiv": exec_info(core=31),
> -"ClearNamedFramebufferuiv": exec_info(core=31),
> -"ClearNamedFramebufferfv": exec_info(core=31),
> -"ClearNamedFramebufferfi": exec_info(core=31),
> -"BlitNamedFramebuffer": exec_info(core=31),
> -"CheckNamedFramebufferStatus": exec_info(core=31),
> -"GetNamedFramebufferParameteriv": exec_info(core=31),
> -"GetNamedFramebufferAttachmentParameteriv": exec_info(core=31),
> -"CreateRenderbuffers": exec_info(core=31),
> -"NamedRenderbufferStorage": exec_info(core=31),
> -"NamedRenderbufferStorageMultisample": exec_info(core=31),
> -"GetNamedRenderbufferParameteriv": exec_info(core=31),
> -"CreateTextures": exec_info(core=31),
> -"TextureBuffer": exec_info(core=31),
> -"TextureBufferRange": exec_info(core=31),
> -"TextureStorage1D": exec_info(core=31),
> -"TextureStorage2D": exec_info(core=31),
> -"TextureStorage3D": exec_info(core=31),
> -"TextureStorage2DMultisample": exec_info(core=31),
> -"TextureStorage3DMultisample": exec_info(core=31),
> -"TextureSubImage1D": exec_info(core=31),
> -"TextureSubImage2D": exec_info(core=31),
> -"TextureSubImage3D": exec_info(core=31),
> -"CompressedTextureSubImage1D": exec_info(core=31),
> -"CompressedTextureSubImage2D": exec_info(core=31),
> -"CompressedTextureSubImage3D": exec_info(core=31),
> -"CopyTextureSubImage1D": exec_info(core=31),
> -"CopyTextureSubImage2D": exec_info(core=31),
> -"CopyTextureSubImage3D": exec_info(core=31),
> -"TextureParameterf": exec_info(core=31),
> -"TextureParameterfv": exec_info(core=31),
> -"TextureParameteri": exec_info(core=31),
> -"TextureParameterIiv": exec_info(core=31),
> -"TextureParameterIuiv": exec_info(core=31),
> -"TextureParameteriv": exec_info(core=31),
> -"GenerateTextureMipmap": exec_info(core=31),
> -"BindTextureUnit": exec_info(core=31),
> -"GetTextureImage": exec_info(core=31),
> -"GetCompressedTextureImage": exec_info(core=31),
> -"GetTextureLevelParameterfv": exec_info(core=31),
> -"GetTextureLevelParameteriv": exec_info(core=31),
> -"GetTextureParameterfv": exec_info(core=31),
> -"GetTextureParameterIiv": exec_info(core=31),
> -"GetTextureParameterIuiv": exec_info(core=31),
> -"GetTextureParameteriv": exec_info(core=31),
> -"CreateVertexArrays": exec_info(core=31),
> -"DisableVertexArrayAttrib": 

Re: [Mesa-dev] [PATCH] util/drirc: turn on force_glsl_extensions_warn for No Mans Sky

2018-07-04 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Tue, Jul 3, 2018 at 8:41 PM, Timothy Arceri  wrote:
> The game forgets to enable multiple extensions in its shaders, one
> of those extesions is EXT_texture_array. But enabling this config
> entry fixes at least one other rendering issue that enabling
> EXT_texture_array on its own doesn't fix.
> ---
>  src/util/drirc | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/util/drirc b/src/util/drirc
> index 7f91035ae8b..8ece875e34f 100644
> --- a/src/util/drirc
> +++ b/src/util/drirc
> @@ -181,6 +181,10 @@ TODO: document the other workarounds.
>  
>  
>
> +
> +
> +
> +
>  
>
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH rfc 2/3] gallium: add PIPE_CAP_BUFFER_AGE

2018-07-04 Thread Qiang Yu
For gallium drivers to expose EGL/GLX_EXT_buffer_age.

Signed-off-by: Qiang Yu 
---
 src/gallium/docs/source/screen.rst  | 1 +
 src/gallium/drivers/etnaviv/etnaviv_screen.c| 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c| 1 +
 src/gallium/drivers/i915/i915_screen.c  | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c| 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c  | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c  | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  | 1 +
 src/gallium/drivers/r300/r300_screen.c  | 1 +
 src/gallium/drivers/r600/r600_pipe.c| 1 +
 src/gallium/drivers/radeonsi/si_get.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c| 1 +
 src/gallium/drivers/svga/svga_screen.c  | 1 +
 src/gallium/drivers/swr/swr_screen.cpp  | 1 +
 src/gallium/drivers/vc4/vc4_screen.c| 1 +
 src/gallium/drivers/vc5/vc5_screen.c| 1 +
 src/gallium/drivers/virgl/virgl_screen.c| 2 ++
 src/gallium/include/pipe/p_defines.h| 1 +
 src/gallium/state_trackers/dri/dri_query_renderer.c | 4 +++-
 19 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 3837360fb4..427944bf70 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -420,6 +420,7 @@ The integer capabilities:
   by the driver, and the driver can throw assertion failures.
 * ``PIPE_CAP_PACKED_UNIFORMS``: True if the driver supports packed uniforms
   as opposed to padding to vec4s.
+* ``PIPE_CAP_BUFFER_AGE``: True if the driver wants to expose 
EGL/GLX_EXT_buffer_age.
 
 
 .. _pipe_capf:
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index b0f8b4bebe..1b4276a36e 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -144,6 +144,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TEXCOORD:
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+   case PIPE_CAP_BUFFER_AGE:
   return 1;
case PIPE_CAP_NATIVE_FENCE_FD:
   return screen->drm_version >= ETNA_DRM_VERSION_FENCE_FD;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index f338d756df..a7c6f4453e 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -186,6 +186,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
case PIPE_CAP_TEXTURE_BARRIER:
case PIPE_CAP_INVALIDATE_BUFFER:
+   case PIPE_CAP_BUFFER_AGE:
return 1;
 
case PIPE_CAP_VERTEXID_NOBASE:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 59d2ec6628..168912946c 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -205,6 +205,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+   case PIPE_CAP_BUFFER_AGE:
   return 1;
 
/* Unsupported features (boolean caps). */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 3f5d0327bf..495e3f96b6 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -110,6 +110,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_NPOT_TEXTURES:
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+   case PIPE_CAP_BUFFER_AGE:
   return 1;
case PIPE_CAP_SM3:
   return 1;
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 1d1fbaad60..47030210b3 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -94,6 +94,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_BUFFER_AGE:
   return 1;
/* nv35 capabilities */
case PIPE_CAP_DEPTH_BOUNDS_TEST:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 6fd2982e3c..1dca07caf9 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -199,6 +199,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case 

[Mesa-dev] [PATCH rfc 0/3] Be able to disable EGL/GLX_EXT_buffer_age

2018-07-04 Thread Qiang Yu
For GPU like ARM mali Utgard EGL/GLX_EXT_buffer_age will make
performace worse. But mesa has no way to disable it.

This patch series make driver be able to disable it and add a
gallium pipe cap for gallium driver usage. Due to currently
only out of tree lima driver need it, and not sure if this is
the right way to disable it, so I send this RFC before lima be
able to upstream.

Qiang Yu (3):
  egl,glx: query dri integer to expose EGL/GLX_EXT_buffer_age
  gallium: add PIPE_CAP_BUFFER_AGE
  egl: fix query buffer age fail when EGL_KHR_partial_update

 include/GL/internal/dri_interface.h |  2 ++
 src/egl/drivers/dri2/egl_dri2.c |  3 +++
 src/egl/drivers/dri2/platform_android.c |  1 -
 src/egl/drivers/dri2/platform_drm.c |  4 ++--
 src/egl/drivers/dri2/platform_wayland.c |  2 --
 src/egl/drivers/dri2/platform_x11.c |  1 -
 src/egl/main/eglsurface.c   |  7 ++-
 src/gallium/docs/source/screen.rst  |  1 +
 src/gallium/drivers/etnaviv/etnaviv_screen.c|  1 +
 src/gallium/drivers/freedreno/freedreno_screen.c|  1 +
 src/gallium/drivers/i915/i915_screen.c  |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c|  1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c  |  1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c  |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c  |  1 +
 src/gallium/drivers/r300/r300_screen.c  |  1 +
 src/gallium/drivers/r600/r600_pipe.c|  1 +
 src/gallium/drivers/radeonsi/si_get.c   |  1 +
 src/gallium/drivers/softpipe/sp_screen.c|  1 +
 src/gallium/drivers/svga/svga_screen.c  |  1 +
 src/gallium/drivers/swr/swr_screen.cpp  |  1 +
 src/gallium/drivers/vc4/vc4_screen.c|  1 +
 src/gallium/drivers/vc5/vc5_screen.c|  1 +
 src/gallium/drivers/virgl/virgl_screen.c|  2 ++
 src/gallium/include/pipe/p_defines.h|  1 +
 src/gallium/state_trackers/dri/dri_query_renderer.c |  5 +
 src/glx/dri3_glx.c  | 12 +---
 src/mesa/drivers/dri/i915/intel_screen.c|  3 +++
 src/mesa/drivers/dri/i965/intel_screen.c|  3 +++
 src/mesa/drivers/dri/nouveau/nouveau_screen.c   |  3 +++
 src/mesa/drivers/dri/radeon/radeon_screen.c |  3 +++
 src/mesa/drivers/dri/swrast/swrast.c|  3 +++
 32 files changed, 61 insertions(+), 10 deletions(-)

-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH rfc 3/3] egl: fix query buffer age fail when EGL_KHR_partial_update

2018-07-04 Thread Qiang Yu
When no EGL_EXT_buffer_age but has EGL_KHR_partial_update,
query buffer age should not fail.

Signed-off-by: Qiang Yu 
---
 src/egl/main/eglsurface.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c
index 3bd14a8cd0..222ef4923d 100644
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -388,7 +388,12 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, 
_EGLSurface *surface,
   *value = surface->PostSubBufferSupportedNV;
   break;
case EGL_BUFFER_AGE_EXT:
-  if (!dpy->Extensions.EXT_buffer_age)
+  /* EGL_BUFFER_AGE_EXT belong to EGL_EXT_buffer_age
+   * EGL_BUFFER_AGE_KHR belong to EGL_KHR_partial_update
+   * but EGL_BUFFER_AGE_EXT == EGL_BUFFER_AGE_KHR by value
+   */
+  if (!dpy->Extensions.EXT_buffer_age &&
+  !dpy->Extensions.KHR_partial_update)
  return _eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface");
 
   _EGLContext *ctx = _eglGetCurrentContext();
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH rfc 1/3] egl, glx: query dri integer to expose EGL/GLX_EXT_buffer_age

2018-07-04 Thread Qiang Yu
Add __DRI2_RENDERER_EXPOSE_BUFFER_AGE dri integer query for
driver to determine if want to expose EGL/GLX_EXT_buffer_age.

Without knowing damage region first, some tile buffer based
GPU like ARM Mali Utgard has to load whole render target to
tile buffer before partitial update, then write them back.

Expose this extension will mis-lead application to use it
which will make performance worse for this kind of GPU.

EGL_KHR_partial_update is better choice for this kind of GPU.

Signed-off-by: Qiang Yu 
---
 include/GL/internal/dri_interface.h |  2 ++
 src/egl/drivers/dri2/egl_dri2.c |  3 +++
 src/egl/drivers/dri2/platform_android.c |  1 -
 src/egl/drivers/dri2/platform_drm.c |  4 ++--
 src/egl/drivers/dri2/platform_wayland.c |  2 --
 src/egl/drivers/dri2/platform_x11.c |  1 -
 src/gallium/state_trackers/dri/dri_query_renderer.c |  3 +++
 src/glx/dri3_glx.c  | 12 +---
 src/mesa/drivers/dri/i915/intel_screen.c|  3 +++
 src/mesa/drivers/dri/i965/intel_screen.c|  3 +++
 src/mesa/drivers/dri/nouveau/nouveau_screen.c   |  3 +++
 src/mesa/drivers/dri/radeon/radeon_screen.c |  3 +++
 src/mesa/drivers/dri/swrast/swrast.c|  3 +++
 13 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index 4f4795c7ae..9dd8a90381 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1856,6 +1856,8 @@ typedef struct __DRIDriverVtableExtensionRec {
 #define   __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM (1 << 1)
 #define   __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_HIGH   (1 << 2)
 
+#define __DRI2_RENDERER_EXPOSE_BUFFER_AGE 0x000e
+
 typedef struct __DRI2rendererQueryExtensionRec __DRI2rendererQueryExtension;
 struct __DRI2rendererQueryExtensionRec {
__DRIextension base;
diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 45d0c7275c..8f91c91638 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -777,6 +777,9 @@ dri2_setup_screen(_EGLDisplay *disp)
 
if (dri2_dpy->flush_control)
   disp->Extensions.KHR_context_flush_control = EGL_TRUE;
+
+   disp->Extensions.EXT_buffer_age =
+  dri2_renderer_query_integer(dri2_dpy, __DRI2_RENDERER_EXPOSE_BUFFER_AGE);
 }
 
 void
diff --git a/src/egl/drivers/dri2/platform_android.c 
b/src/egl/drivers/dri2/platform_android.c
index 7f1a496ea2..d1ef070e6b 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -1270,7 +1270,6 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay 
*disp)
disp->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
disp->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
disp->Extensions.ANDROID_recordable = EGL_TRUE;
-   disp->Extensions.EXT_buffer_age = EGL_TRUE;
 #if ANDROID_API_LEVEL >= 23
disp->Extensions.KHR_partial_update = EGL_TRUE;
 #endif
diff --git a/src/egl/drivers/dri2/platform_drm.c 
b/src/egl/drivers/dri2/platform_drm.c
index dc4efea910..e9f4a6a055 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -769,8 +769,8 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
}
 
disp->Extensions.KHR_image_pixmap = EGL_TRUE;
-   if (dri2_dpy->dri2)
-  disp->Extensions.EXT_buffer_age = EGL_TRUE;
+   if (!dri2_dpy->dri2)
+  disp->Extensions.EXT_buffer_age = EGL_FALSE;
 
 #ifdef HAVE_WAYLAND_PLATFORM
dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd);
diff --git a/src/egl/drivers/dri2/platform_wayland.c 
b/src/egl/drivers/dri2/platform_wayland.c
index 80853ac00b..78d9329f7e 100644
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1433,8 +1433,6 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay 
*disp)
if (!dri2_dpy->is_different_gpu)
   disp->Extensions.WL_create_wayland_buffer_from_image = EGL_TRUE;
 
-   disp->Extensions.EXT_buffer_age = EGL_TRUE;
-
disp->Extensions.EXT_swap_buffers_with_damage = EGL_TRUE;
 
/* Fill vtbl last to prevent accidentally calling virtual function during
diff --git a/src/egl/drivers/dri2/platform_x11.c 
b/src/egl/drivers/dri2/platform_x11.c
index 60330b33df..3485ed1466 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1340,7 +1340,6 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay 
*disp)
   disp->Extensions.KHR_image_pixmap = EGL_TRUE;
disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE;
disp->Extensions.CHROMIUM_sync_control = EGL_TRUE;
-   disp->Extensions.EXT_buffer_age = EGL_TRUE;
 
dri2_set_WL_bind_wayland_display(drv, disp);
 
diff --git a/src/gallium/state_trackers/dri/dri_query_renderer.c 
b/src/gallium/state_trackers/dri/dri_query_renderer.c

Re: [Mesa-dev] [PATCH 2/2] radv: add the trace BO to the list when starting a new cmdbuf

2018-07-04 Thread Dieter Nützel

This series bring the anomaly back,

that 'ondemand' is _faster_ than 'performance':

cpupower frequency-set -g ondemand

/home/dieter> smoketest
5905 presents in 5.00014 seconds (FPS: 1180.97)
6101 presents in 5.00115 seconds (FPS: 1219.92)
6020 presents in 5.00038 seconds (FPS: 1203.91)
6021 presents in 5.0002 seconds (FPS: 1204.15)
6005 presents in 5.00013 seconds (FPS: 1200.97)
6023 presents in 5.5 seconds (FPS: 1204.59)
frames:41278, elapsedms:34411

cpupower frequency-set -g performance

/home/dieter> smoketest
5326 presents in 5.00078 seconds (FPS: 1065.03)
5532 presents in 5.00023 seconds (FPS: 1106.35)
5487 presents in 5.0009 seconds (FPS: 1097.2)
5516 presents in 5.00014 seconds (FPS: 1103.17)
5516 presents in 5.00082 seconds (FPS: 1103.02)
frames:31385, elapsedms:28709

I saw this with and without Dave's TLS patch set.

Apart from that:

Tested-by: Dieter Nützel 

Dieter

Am 03.07.2018 12:43, schrieb Samuel Pitoiset:

That might reduce CPU overhead a little bit when using
RADV_TRACE_FILE.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
b/src/amd/vulkan/radv_cmd_buffer.c

index 26d9fef314..0a7a3f3fa9 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -446,7 +446,6 @@ void radv_cmd_buffer_trace_emit(struct
radv_cmd_buffer *cmd_buffer)
MAYBE_UNUSED unsigned cdw_max =
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 7);

++cmd_buffer->state.trace_id;
-   radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8);
radv_emit_write_data_packet(cs, va, 1, _buffer->state.trace_id);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
@@ -509,7 +508,6 @@ radv_save_pipeline(struct radv_cmd_buffer 
*cmd_buffer,

data[0] = (uintptr_t)pipeline;
data[1] = (uintptr_t)pipeline >> 32;

-   radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8);
radv_emit_write_data_packet(cs, va, 2, data);
 }

@@ -551,7 +549,6 @@ radv_save_descriptors(struct radv_cmd_buffer 
*cmd_buffer,

data[i * 2 + 1] = (uintptr_t)set >> 32;
}

-   radv_cs_add_buffer(device->ws, cs, device->trace_bo, 8);
radv_emit_write_data_packet(cs, va, MAX_SETS * 2, data);
 }

@@ -2300,8 +2297,14 @@ VkResult radv_BeginCommandBuffer(
radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
}

-   if (unlikely(cmd_buffer->device->trace_bo))
+   if (unlikely(cmd_buffer->device->trace_bo)) {
+   struct radv_device *device = cmd_buffer->device;
+
+   radv_cs_add_buffer(device->ws, cmd_buffer->cs,
+  device->trace_bo, 8);
+
radv_cmd_buffer_trace_emit(cmd_buffer);
+   }

cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: using tls to store llvm related info and speed up compiles (v8)

2018-07-04 Thread Dave Airlie
From: Dave Airlie 

This uses the common compiler passes abstraction to help radv
avoid fixed cost compiler overheads. This uses a linked list per
thread stored in thread local storage, with an entry in the list
for each target machine.

This should remove all the fixed overheads setup costs of creating
the pass manager each time.

This takes a demo app time to compile the radv meta shaders on nocache
and exit from 1.7s to 1s. It also has been reported to take the startup
time of uncached shaders on RoTR from 12m24s to 11m35s (Alex)

v2: fix llvm6 build, inline emit function, handle multiple targets
in one thread
v3: rebase and port onto new structure
v4: rename some vars (Bas)
v5: drag all code into radv for now, we can refactor it out later
for radeonsi if we make it shareable
v6: use a bit more C++ in the wrapper
v7: logic bugs fixed so it actually runs again.
v8: rebase on top of radeonsi changes.
---
 src/amd/vulkan/Makefile.sources |   2 +
 src/amd/vulkan/meson.build  |   2 +
 src/amd/vulkan/radv_debug.h |   1 +
 src/amd/vulkan/radv_device.c|   1 +
 src/amd/vulkan/radv_llvm_helper.cpp | 148 
 src/amd/vulkan/radv_nir_to_llvm.c   |  27 +
 src/amd/vulkan/radv_shader.c|  10 +-
 src/amd/vulkan/radv_shader_helper.h |  44 +
 8 files changed, 207 insertions(+), 28 deletions(-)
 create mode 100644 src/amd/vulkan/radv_llvm_helper.cpp
 create mode 100644 src/amd/vulkan/radv_shader_helper.h

diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources
index 70d56e88cb3..152fdd7cb71 100644
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -54,6 +54,7 @@ VULKAN_FILES := \
radv_meta_resolve_cs.c \
radv_meta_resolve_fs.c \
radv_nir_to_llvm.c \
+   radv_llvm_helper.cpp \
radv_pass.c \
radv_pipeline.c \
radv_pipeline_cache.c \
@@ -62,6 +63,7 @@ VULKAN_FILES := \
radv_shader.c \
radv_shader_info.c \
radv_shader.h \
+   radv_shader_helper.h \
radv_query.c \
radv_util.c \
radv_util.h \
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 22857926fa1..9f2842182e7 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -67,6 +67,7 @@ libradv_files = files(
   'radv_descriptor_set.h',
   'radv_formats.c',
   'radv_image.c',
+  'radv_llvm_helper.cpp',
   'radv_meta.c',
   'radv_meta.h',
   'radv_meta_blit.c',
@@ -88,6 +89,7 @@ libradv_files = files(
   'radv_radeon_winsys.h',
   'radv_shader.c',
   'radv_shader.h',
+  'radv_shader_helper.h',
   'radv_shader_info.c',
   'radv_query.c',
   'radv_util.c',
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index f1b0dc26a63..9fe4c3b7404 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -49,6 +49,7 @@ enum {
RADV_DEBUG_ERRORS= 0x8,
RADV_DEBUG_STARTUP   = 0x10,
RADV_DEBUG_CHECKIR   = 0x20,
+   RADV_DEBUG_NOTHREADLLVM  = 0x40,
 };
 
 enum {
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ad3465f594e..73c48cef1f0 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -436,6 +436,7 @@ static const struct debug_control radv_debug_options[] = {
{"errors", RADV_DEBUG_ERRORS},
{"startup", RADV_DEBUG_STARTUP},
{"checkir", RADV_DEBUG_CHECKIR},
+   {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
{NULL, 0}
 };
 
diff --git a/src/amd/vulkan/radv_llvm_helper.cpp 
b/src/amd/vulkan/radv_llvm_helper.cpp
new file mode 100644
index 000..dad881f6b1a
--- /dev/null
+++ b/src/amd/vulkan/radv_llvm_helper.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright © 2018 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "ac_llvm_util.h"
+#include 

[Mesa-dev] [Bug 107116] [OpenCL] [llvm-svn] [meson] build failure C++ library 'clangCodeGen' not found

2018-07-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107116

Bug ID: 107116
   Summary: [OpenCL] [llvm-svn] [meson] build failure  C++ library
'clangCodeGen' not found
   Product: Mesa
   Version: git
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: lonew...@xs4all.nl
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 140466
  --> https://bugs.freedesktop.org/attachment.cgi?id=140466=edit
meson build log

mesa-git master rev 4695984dbc
llvm-svn r336299
meson 0.47.0

buildscript used :
https://aur.archlinux.org/cgit/aur.git/tree/PKGBUILD?h=mesa-git
building with mesa-git rev 2854c0f795 worked.


src/gallium/targets/opencl/meson.build:36:0: ERROR:  C++ library 'clangCodeGen'
not found

src/_build/meson-logs/meson-log.txt attached

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Jason Ekstrand

On July 4, 2018 15:35:15 Bas Nieuwenhuizen  wrote:


On Wed, Jul 4, 2018 at 11:00 PM, Jason Ekstrand  wrote:

On Wed, Jul 4, 2018 at 1:20 PM, Francisco Jerez 
wrote:


Jason Ekstrand  writes:


Many fragment shaders do a discard using relatively little information
but still put the discard fairly far down in the shader for no good
reason.  If the discard is moved higher up, we can possibly avoid doing
some or almost all of the work in the shader.  When this lets us skip
texturing operations, it's an especially high win.

One of the biggest offenders here is DXVK.  The D3D APIs have different
rules for discards than OpenGL and Vulkan.  One effective way (which is
what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
wait until the very end of the shader to discard.  This ends up in the
pessimal case where we always do all of the work before discarding.
This pass helps some DXVK shaders significantly.


One thing to keep in mind is that this sort of transformation is trading
off run-time of fragment shader invocations that don't call discard (or
do so non-uniformly, which means that the code the discard jump is
protecting will be executed anyway, so doing this can actually increase
the critical path of the program) in favour of invocations that call
discard uniformly (so executing discard early will effectively terminate
the program early).



It's not really a uniform vs. non-uniform thing.  Even if a shader only
discards some of the fragments, it sill reduces the number of live channels
which reduces the cost of later non-uniform control-flow.



Optimizing for the latter case is an essentially
heuristic assumption that needs to be verified experimentally.  Have you
tested the effect of this pass on non-DX workloads extensively?



Yes, it is a trade-off.  No, I have not done particularly extensive testing.
We do, however, know of non-DXVK workloads that would benefit from this.  I
believe Manhattan is one such example though I have not yet benchmarked it.


Out of curiosity, what is the performance trade-off here? What extra
costs could we
get by discarding early?


It's typically not high but there may be some cost from extra stalls from 
less latency hiding.  For instance, if you put the discard right after a 
texture, you can't process any more of the shader until it returns so you 
know whether or not to discard.  Also, you could end up increasing register 
pressure if you end up moving something to the top that's uses both in the 
discard and in something else.  It's the usual set of trade-offs you get 
every time you move instructions made possibly worse by how aggressive this 
pass is in making the only instructions before the discard the ones that 
are explicitly needed.










v2 (Jason Ekstrand):
- Fix a couple of typos (Grazvydas, Ian)
- Use the new nir_instr_move helper
- Find all movable discards before moving anything so we don't
accidentally re-order anything and break dependencies
---
src/compiler/Makefile.sources  |   1 +
src/compiler/nir/meson.build   |   1 +
src/compiler/nir/nir.h |  10 +
src/compiler/nir/nir_opt_discard.c | 396 +
4 files changed, 408 insertions(+)
create mode 100644 src/compiler/nir/nir_opt_discard.c

diff --git a/src/compiler/Makefile.sources
b/src/compiler/Makefile.sources
index 9e3fbdc2612..8600ce81281 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -271,6 +271,7 @@ NIR_FILES = \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
nir/nir_opt_dead_cf.c \
+ nir/nir_opt_discard.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
nir/nir_opt_if.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 28aa8de7014..e339258bb94 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -156,6 +156,7 @@ files_libnir = files(
'nir_opt_cse.c',
'nir_opt_dce.c',
'nir_opt_dead_cf.c',
+  'nir_opt_discard.c',
'nir_opt_gcm.c',
'nir_opt_global_to_local.c',
'nir_opt_if.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c40a88c8ccc..dac019c17e8 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
*/
bool vs_inputs_dual_locations;

+   /**
+* Whether or not derivatives are still a safe operation after a
discard
+* has occurred.  Optimization passes may be able to be a bit more
+* agressive if this is true.
+*/
+   bool derivatives_safe_after_discard;
+


It's worth noting in the comment above that any driver that is in
position to enable this option (e.g. i965) is strictly speaking
non-compliant with GLSL and SPIR-V, whether or not this optimization
pass is used.  The reason is that derivatives being safe after a
non-uniform discard implies that any invocations involved in derivative
computations must be executed even though they aren't supposed to
according to the spec, and even though doing so might lead to undefined

Re: [Mesa-dev] [PATCH v2 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Bas Nieuwenhuizen
On Wed, Jul 4, 2018 at 11:00 PM, Jason Ekstrand  wrote:
> On Wed, Jul 4, 2018 at 1:20 PM, Francisco Jerez 
> wrote:
>>
>> Jason Ekstrand  writes:
>>
>> > Many fragment shaders do a discard using relatively little information
>> > but still put the discard fairly far down in the shader for no good
>> > reason.  If the discard is moved higher up, we can possibly avoid doing
>> > some or almost all of the work in the shader.  When this lets us skip
>> > texturing operations, it's an especially high win.
>> >
>> > One of the biggest offenders here is DXVK.  The D3D APIs have different
>> > rules for discards than OpenGL and Vulkan.  One effective way (which is
>> > what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
>> > wait until the very end of the shader to discard.  This ends up in the
>> > pessimal case where we always do all of the work before discarding.
>> > This pass helps some DXVK shaders significantly.
>> >
>>
>> One thing to keep in mind is that this sort of transformation is trading
>> off run-time of fragment shader invocations that don't call discard (or
>> do so non-uniformly, which means that the code the discard jump is
>> protecting will be executed anyway, so doing this can actually increase
>> the critical path of the program) in favour of invocations that call
>> discard uniformly (so executing discard early will effectively terminate
>> the program early).
>
>
> It's not really a uniform vs. non-uniform thing.  Even if a shader only
> discards some of the fragments, it sill reduces the number of live channels
> which reduces the cost of later non-uniform control-flow.
>
>>
>> Optimizing for the latter case is an essentially
>> heuristic assumption that needs to be verified experimentally.  Have you
>> tested the effect of this pass on non-DX workloads extensively?
>
>
> Yes, it is a trade-off.  No, I have not done particularly extensive testing.
> We do, however, know of non-DXVK workloads that would benefit from this.  I
> believe Manhattan is one such example though I have not yet benchmarked it.

Out of curiosity, what is the performance trade-off here? What extra
costs could we
get by discarding early?

>
>>
>> > v2 (Jason Ekstrand):
>> >  - Fix a couple of typos (Grazvydas, Ian)
>> >  - Use the new nir_instr_move helper
>> >  - Find all movable discards before moving anything so we don't
>> >accidentally re-order anything and break dependencies
>> > ---
>> >  src/compiler/Makefile.sources  |   1 +
>> >  src/compiler/nir/meson.build   |   1 +
>> >  src/compiler/nir/nir.h |  10 +
>> >  src/compiler/nir/nir_opt_discard.c | 396 +
>> >  4 files changed, 408 insertions(+)
>> >  create mode 100644 src/compiler/nir/nir_opt_discard.c
>> >
>> > diff --git a/src/compiler/Makefile.sources
>> > b/src/compiler/Makefile.sources
>> > index 9e3fbdc2612..8600ce81281 100644
>> > --- a/src/compiler/Makefile.sources
>> > +++ b/src/compiler/Makefile.sources
>> > @@ -271,6 +271,7 @@ NIR_FILES = \
>> >   nir/nir_opt_cse.c \
>> >   nir/nir_opt_dce.c \
>> >   nir/nir_opt_dead_cf.c \
>> > + nir/nir_opt_discard.c \
>> >   nir/nir_opt_gcm.c \
>> >   nir/nir_opt_global_to_local.c \
>> >   nir/nir_opt_if.c \
>> > diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
>> > index 28aa8de7014..e339258bb94 100644
>> > --- a/src/compiler/nir/meson.build
>> > +++ b/src/compiler/nir/meson.build
>> > @@ -156,6 +156,7 @@ files_libnir = files(
>> >'nir_opt_cse.c',
>> >'nir_opt_dce.c',
>> >'nir_opt_dead_cf.c',
>> > +  'nir_opt_discard.c',
>> >'nir_opt_gcm.c',
>> >'nir_opt_global_to_local.c',
>> >'nir_opt_if.c',
>> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> > index c40a88c8ccc..dac019c17e8 100644
>> > --- a/src/compiler/nir/nir.h
>> > +++ b/src/compiler/nir/nir.h
>> > @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
>> >  */
>> > bool vs_inputs_dual_locations;
>> >
>> > +   /**
>> > +* Whether or not derivatives are still a safe operation after a
>> > discard
>> > +* has occurred.  Optimization passes may be able to be a bit more
>> > +* agressive if this is true.
>> > +*/
>> > +   bool derivatives_safe_after_discard;
>> > +
>>
>> It's worth noting in the comment above that any driver that is in
>> position to enable this option (e.g. i965) is strictly speaking
>> non-compliant with GLSL and SPIR-V, whether or not this optimization
>> pass is used.  The reason is that derivatives being safe after a
>> non-uniform discard implies that any invocations involved in derivative
>> computations must be executed even though they aren't supposed to
>> according to the spec, and even though doing so might lead to undefined
>> behaviour that wasn't present in the original program, e.g.:
>>
>> | int delta = non_uniform_computation();
>> | if (delta == 0)
>> |  discard;
>> |
>> | for (int i = 0; i < N; i += delta) {
>> |   // 

Re: [Mesa-dev] [PATCH v2 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Jason Ekstrand
On Wed, Jul 4, 2018 at 1:20 PM, Francisco Jerez 
wrote:

> Jason Ekstrand  writes:
>
> > Many fragment shaders do a discard using relatively little information
> > but still put the discard fairly far down in the shader for no good
> > reason.  If the discard is moved higher up, we can possibly avoid doing
> > some or almost all of the work in the shader.  When this lets us skip
> > texturing operations, it's an especially high win.
> >
> > One of the biggest offenders here is DXVK.  The D3D APIs have different
> > rules for discards than OpenGL and Vulkan.  One effective way (which is
> > what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
> > wait until the very end of the shader to discard.  This ends up in the
> > pessimal case where we always do all of the work before discarding.
> > This pass helps some DXVK shaders significantly.
> >
>
> One thing to keep in mind is that this sort of transformation is trading
> off run-time of fragment shader invocations that don't call discard (or
> do so non-uniformly, which means that the code the discard jump is
> protecting will be executed anyway, so doing this can actually increase
> the critical path of the program) in favour of invocations that call
> discard uniformly (so executing discard early will effectively terminate
> the program early).


It's not really a uniform vs. non-uniform thing.  Even if a shader only
discards some of the fragments, it sill reduces the number of live channels
which reduces the cost of later non-uniform control-flow.


> Optimizing for the latter case is an essentially
> heuristic assumption that needs to be verified experimentally.  Have you
> tested the effect of this pass on non-DX workloads extensively?
>

Yes, it is a trade-off.  No, I have not done particularly extensive
testing.  We do, however, know of non-DXVK workloads that would benefit
from this.  I believe Manhattan is one such example though I have not yet
benchmarked it.


> > v2 (Jason Ekstrand):
> >  - Fix a couple of typos (Grazvydas, Ian)
> >  - Use the new nir_instr_move helper
> >  - Find all movable discards before moving anything so we don't
> >accidentally re-order anything and break dependencies
> > ---
> >  src/compiler/Makefile.sources  |   1 +
> >  src/compiler/nir/meson.build   |   1 +
> >  src/compiler/nir/nir.h |  10 +
> >  src/compiler/nir/nir_opt_discard.c | 396 +
> >  4 files changed, 408 insertions(+)
> >  create mode 100644 src/compiler/nir/nir_opt_discard.c
> >
> > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.
> sources
> > index 9e3fbdc2612..8600ce81281 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -271,6 +271,7 @@ NIR_FILES = \
> >   nir/nir_opt_cse.c \
> >   nir/nir_opt_dce.c \
> >   nir/nir_opt_dead_cf.c \
> > + nir/nir_opt_discard.c \
> >   nir/nir_opt_gcm.c \
> >   nir/nir_opt_global_to_local.c \
> >   nir/nir_opt_if.c \
> > diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> > index 28aa8de7014..e339258bb94 100644
> > --- a/src/compiler/nir/meson.build
> > +++ b/src/compiler/nir/meson.build
> > @@ -156,6 +156,7 @@ files_libnir = files(
> >'nir_opt_cse.c',
> >'nir_opt_dce.c',
> >'nir_opt_dead_cf.c',
> > +  'nir_opt_discard.c',
> >'nir_opt_gcm.c',
> >'nir_opt_global_to_local.c',
> >'nir_opt_if.c',
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index c40a88c8ccc..dac019c17e8 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
> >  */
> > bool vs_inputs_dual_locations;
> >
> > +   /**
> > +* Whether or not derivatives are still a safe operation after a
> discard
> > +* has occurred.  Optimization passes may be able to be a bit more
> > +* agressive if this is true.
> > +*/
> > +   bool derivatives_safe_after_discard;
> > +
>
> It's worth noting in the comment above that any driver that is in
> position to enable this option (e.g. i965) is strictly speaking
> non-compliant with GLSL and SPIR-V, whether or not this optimization
> pass is used.  The reason is that derivatives being safe after a
> non-uniform discard implies that any invocations involved in derivative
> computations must be executed even though they aren't supposed to
> according to the spec, and even though doing so might lead to undefined
> behaviour that wasn't present in the original program, e.g.:
>
> | int delta = non_uniform_computation();
> | if (delta == 0)
> |  discard;
> |
> | for (int i = 0; i < N; i += delta) {
> |   // Will loop forever if discarded fragments are incorrectly executed
> |   // by the back-end.
> | }
>
> The above shader is specified to terminate if the semantics of discard
> are as defined by GLSL or SPIRV, but not necessarily as defined by DX.
>

That is an interesting point.  One possible 

Re: [Mesa-dev] [PATCH v2 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Francisco Jerez
Jason Ekstrand  writes:

> Many fragment shaders do a discard using relatively little information
> but still put the discard fairly far down in the shader for no good
> reason.  If the discard is moved higher up, we can possibly avoid doing
> some or almost all of the work in the shader.  When this lets us skip
> texturing operations, it's an especially high win.
>
> One of the biggest offenders here is DXVK.  The D3D APIs have different
> rules for discards than OpenGL and Vulkan.  One effective way (which is
> what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
> wait until the very end of the shader to discard.  This ends up in the
> pessimal case where we always do all of the work before discarding.
> This pass helps some DXVK shaders significantly.
>

One thing to keep in mind is that this sort of transformation is trading
off run-time of fragment shader invocations that don't call discard (or
do so non-uniformly, which means that the code the discard jump is
protecting will be executed anyway, so doing this can actually increase
the critical path of the program) in favour of invocations that call
discard uniformly (so executing discard early will effectively terminate
the program early).  Optimizing for the latter case is an essentially
heuristic assumption that needs to be verified experimentally.  Have you
tested the effect of this pass on non-DX workloads extensively?

> v2 (Jason Ekstrand):
>  - Fix a couple of typos (Grazvydas, Ian)
>  - Use the new nir_instr_move helper
>  - Find all movable discards before moving anything so we don't
>accidentally re-order anything and break dependencies
> ---
>  src/compiler/Makefile.sources  |   1 +
>  src/compiler/nir/meson.build   |   1 +
>  src/compiler/nir/nir.h |  10 +
>  src/compiler/nir/nir_opt_discard.c | 396 +
>  4 files changed, 408 insertions(+)
>  create mode 100644 src/compiler/nir/nir_opt_discard.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 9e3fbdc2612..8600ce81281 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -271,6 +271,7 @@ NIR_FILES = \
>   nir/nir_opt_cse.c \
>   nir/nir_opt_dce.c \
>   nir/nir_opt_dead_cf.c \
> + nir/nir_opt_discard.c \
>   nir/nir_opt_gcm.c \
>   nir/nir_opt_global_to_local.c \
>   nir/nir_opt_if.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index 28aa8de7014..e339258bb94 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -156,6 +156,7 @@ files_libnir = files(
>'nir_opt_cse.c',
>'nir_opt_dce.c',
>'nir_opt_dead_cf.c',
> +  'nir_opt_discard.c',
>'nir_opt_gcm.c',
>'nir_opt_global_to_local.c',
>'nir_opt_if.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index c40a88c8ccc..dac019c17e8 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
>  */
> bool vs_inputs_dual_locations;
>  
> +   /**
> +* Whether or not derivatives are still a safe operation after a discard
> +* has occurred.  Optimization passes may be able to be a bit more
> +* agressive if this is true.
> +*/
> +   bool derivatives_safe_after_discard;
> +

It's worth noting in the comment above that any driver that is in
position to enable this option (e.g. i965) is strictly speaking
non-compliant with GLSL and SPIR-V, whether or not this optimization
pass is used.  The reason is that derivatives being safe after a
non-uniform discard implies that any invocations involved in derivative
computations must be executed even though they aren't supposed to
according to the spec, and even though doing so might lead to undefined
behaviour that wasn't present in the original program, e.g.:

| int delta = non_uniform_computation();
| if (delta == 0)
|  discard;
| 
| for (int i = 0; i < N; i += delta) {
|   // Will loop forever if discarded fragments are incorrectly executed
|   // by the back-end.
| }

The above shader is specified to terminate if the semantics of discard
are as defined by GLSL or SPIRV, but not necessarily as defined by DX.
This makes me think that DXVK is in a privileged position to decide
where the discard jump should end up at, since it can make assumptions
about code lexically after a discard being well-defined even if the
discard condition evaluates to true.  It's unfortunate that it behaves
so suboptimally currently that you need to work around it here.

> unsigned max_unroll_iterations;
>  } nir_shader_compiler_options;
>  
> @@ -2901,6 +2908,9 @@ bool nir_opt_dce(nir_shader *shader);
>  
>  bool nir_opt_dead_cf(nir_shader *shader);
>  
> +bool nir_opt_discard_if(nir_shader *shader);
> +bool nir_opt_move_discards_to_top(nir_shader *shader);
> +
>  bool nir_opt_gcm(nir_shader *shader, bool value_number);
>  
>  bool nir_opt_if(nir_shader 

[Mesa-dev] [Bug 105255] Waiting for fences without waitAll is not implemented

2018-07-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105255

thsniel...@gmail.com changed:

   What|Removed |Added

 CC||thsniel...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] New EGL extension in mesa and 'adriconf' packaging - EVoC'18

2018-07-04 Thread Veluri Mithun
Dear developers,

I'm an undergraduate student in India. I've great enthusiasm to contribute
to mesa and I've already done a few

contributions to adriconf .

Currently, adriconf isn't able to configure the mesa drivers in a machine
where Wayland protocol is used due to lack of EGL extensions in mesa. Hence
to provide *Wayland support* in adriconf a new EGL extension is to be
created in mesa.

Regarding the extension and packaging for adriconf, I've discussed with @Rob
Clark  , @Nicolai Hähnle  , @Jean
Hertel  and drafted the proposal[1].

More info related to the extension and packaging can be found in the
proposal. If anyone has suggestions regarding this, please let me know.

[1]
https://docs.google.com/document/d/1EHpLMx7BiZNZxf6MY4fAdmDIFpE5c1Rro9qhuJhag3c/edit?usp=sharing

Thank you.

Best Regards,
Veluri.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] configure: bump libdrm for AMDGPU to 2.4.92

2018-07-04 Thread Emil Velikov
Hi Samuel,

On 4 July 2018 at 14:06, Samuel Pitoiset  wrote:
> This will allow us to submit more IBs at once because the limit
> has been removed in libdrm 2.4.92.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  configure.ac | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/configure.ac b/configure.ac
> index f135d05736..7c58a5a8f1 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION])
>  # in the first entry.
>  LIBDRM_REQUIRED=2.4.75
>  LIBDRM_RADEON_REQUIRED=2.4.71
> -LIBDRM_AMDGPU_REQUIRED=2.4.91
> +LIBDRM_AMDGPU_REQUIRED=2.4.92

The _drm_amdgpu_ver line in meson.build would need similar fixup. With that
Reviewed-by: Emil Velikov 

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Jason Ekstrand
On Wed, Jul 4, 2018 at 10:00 AM, Matt Turner  wrote:

> On Wed, Jul 4, 2018 at 9:59 AM Jason Ekstrand 
> wrote:
> >
> > Many fragment shaders do a discard using relatively little information
> > but still put the discard fairly far down in the shader for no good
> > reason.  If the discard is moved higher up, we can possibly avoid doing
> > some or almost all of the work in the shader.  When this lets us skip
> > texturing operations, it's an especially high win.
> >
> > One of the biggest offenders here is DXVK.  The D3D APIs have different
> > rules for discards than OpenGL and Vulkan.  One effective way (which is
> > what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
> > wait until the very end of the shader to discard.  This ends up in the
> > pessimal case where we always do all of the work before discarding.
> > This pass helps some DXVK shaders significantly.
> >
> > v2 (Jason Ekstrand):
> >  - Fix a couple of typos (Grazvydas, Ian)
> >  - Use the new nir_instr_move helper
> >  - Find all movable discards before moving anything so we don't
> >accidentally re-order anything and break dependencies
> > ---
> >  src/compiler/Makefile.sources  |   1 +
> >  src/compiler/nir/meson.build   |   1 +
> >  src/compiler/nir/nir.h |  10 +
> >  src/compiler/nir/nir_opt_discard.c | 396 +
> >  4 files changed, 408 insertions(+)
> >  create mode 100644 src/compiler/nir/nir_opt_discard.c
> >
> > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.
> sources
> > index 9e3fbdc2612..8600ce81281 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -271,6 +271,7 @@ NIR_FILES = \
> > nir/nir_opt_cse.c \
> > nir/nir_opt_dce.c \
> > nir/nir_opt_dead_cf.c \
> > +   nir/nir_opt_discard.c \
> > nir/nir_opt_gcm.c \
> > nir/nir_opt_global_to_local.c \
> > nir/nir_opt_if.c \
> > diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> > index 28aa8de7014..e339258bb94 100644
> > --- a/src/compiler/nir/meson.build
> > +++ b/src/compiler/nir/meson.build
> > @@ -156,6 +156,7 @@ files_libnir = files(
> >'nir_opt_cse.c',
> >'nir_opt_dce.c',
> >'nir_opt_dead_cf.c',
> > +  'nir_opt_discard.c',
> >'nir_opt_gcm.c',
> >'nir_opt_global_to_local.c',
> >'nir_opt_if.c',
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index c40a88c8ccc..dac019c17e8 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
> >  */
> > bool vs_inputs_dual_locations;
> >
> > +   /**
> > +* Whether or not derivatives are still a safe operation after a
> discard
> > +* has occurred.  Optimization passes may be able to be a bit more
> > +* agressive if this is true.
>
> s/agressive/aggressive/
>

fixed locally.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Matt Turner
On Wed, Jul 4, 2018 at 9:59 AM Jason Ekstrand  wrote:
>
> Many fragment shaders do a discard using relatively little information
> but still put the discard fairly far down in the shader for no good
> reason.  If the discard is moved higher up, we can possibly avoid doing
> some or almost all of the work in the shader.  When this lets us skip
> texturing operations, it's an especially high win.
>
> One of the biggest offenders here is DXVK.  The D3D APIs have different
> rules for discards than OpenGL and Vulkan.  One effective way (which is
> what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
> wait until the very end of the shader to discard.  This ends up in the
> pessimal case where we always do all of the work before discarding.
> This pass helps some DXVK shaders significantly.
>
> v2 (Jason Ekstrand):
>  - Fix a couple of typos (Grazvydas, Ian)
>  - Use the new nir_instr_move helper
>  - Find all movable discards before moving anything so we don't
>accidentally re-order anything and break dependencies
> ---
>  src/compiler/Makefile.sources  |   1 +
>  src/compiler/nir/meson.build   |   1 +
>  src/compiler/nir/nir.h |  10 +
>  src/compiler/nir/nir_opt_discard.c | 396 +
>  4 files changed, 408 insertions(+)
>  create mode 100644 src/compiler/nir/nir_opt_discard.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 9e3fbdc2612..8600ce81281 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -271,6 +271,7 @@ NIR_FILES = \
> nir/nir_opt_cse.c \
> nir/nir_opt_dce.c \
> nir/nir_opt_dead_cf.c \
> +   nir/nir_opt_discard.c \
> nir/nir_opt_gcm.c \
> nir/nir_opt_global_to_local.c \
> nir/nir_opt_if.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index 28aa8de7014..e339258bb94 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -156,6 +156,7 @@ files_libnir = files(
>'nir_opt_cse.c',
>'nir_opt_dce.c',
>'nir_opt_dead_cf.c',
> +  'nir_opt_discard.c',
>'nir_opt_gcm.c',
>'nir_opt_global_to_local.c',
>'nir_opt_if.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index c40a88c8ccc..dac019c17e8 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
>  */
> bool vs_inputs_dual_locations;
>
> +   /**
> +* Whether or not derivatives are still a safe operation after a discard
> +* has occurred.  Optimization passes may be able to be a bit more
> +* agressive if this is true.

s/agressive/aggressive/
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Jason Ekstrand
Many fragment shaders do a discard using relatively little information
but still put the discard fairly far down in the shader for no good
reason.  If the discard is moved higher up, we can possibly avoid doing
some or almost all of the work in the shader.  When this lets us skip
texturing operations, it's an especially high win.

One of the biggest offenders here is DXVK.  The D3D APIs have different
rules for discards than OpenGL and Vulkan.  One effective way (which is
what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
wait until the very end of the shader to discard.  This ends up in the
pessimal case where we always do all of the work before discarding.
This pass helps some DXVK shaders significantly.

v2 (Jason Ekstrand):
 - Fix a couple of typos (Grazvydas, Ian)
 - Use the new nir_instr_move helper
 - Find all movable discards before moving anything so we don't
   accidentally re-order anything and break dependencies
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/meson.build   |   1 +
 src/compiler/nir/nir.h |  10 +
 src/compiler/nir/nir_opt_discard.c | 396 +
 4 files changed, 408 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_discard.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 9e3fbdc2612..8600ce81281 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -271,6 +271,7 @@ NIR_FILES = \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
nir/nir_opt_dead_cf.c \
+   nir/nir_opt_discard.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
nir/nir_opt_if.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 28aa8de7014..e339258bb94 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -156,6 +156,7 @@ files_libnir = files(
   'nir_opt_cse.c',
   'nir_opt_dce.c',
   'nir_opt_dead_cf.c',
+  'nir_opt_discard.c',
   'nir_opt_gcm.c',
   'nir_opt_global_to_local.c',
   'nir_opt_if.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c40a88c8ccc..dac019c17e8 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
 */
bool vs_inputs_dual_locations;
 
+   /**
+* Whether or not derivatives are still a safe operation after a discard
+* has occurred.  Optimization passes may be able to be a bit more
+* agressive if this is true.
+*/
+   bool derivatives_safe_after_discard;
+
unsigned max_unroll_iterations;
 } nir_shader_compiler_options;
 
@@ -2901,6 +2908,9 @@ bool nir_opt_dce(nir_shader *shader);
 
 bool nir_opt_dead_cf(nir_shader *shader);
 
+bool nir_opt_discard_if(nir_shader *shader);
+bool nir_opt_move_discards_to_top(nir_shader *shader);
+
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
 bool nir_opt_if(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_discard.c 
b/src/compiler/nir/nir_opt_discard.c
new file mode 100644
index 000..c61af163707
--- /dev/null
+++ b/src/compiler/nir/nir_opt_discard.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_control_flow.h"
+#include "nir_worklist.h"
+
+static bool
+block_has_only_discard(nir_block *block)
+{
+   nir_instr *instr = nir_block_first_instr(block);
+   if (instr == NULL || instr != nir_block_last_instr(block))
+  return false;
+
+   if (instr->type != nir_instr_type_intrinsic)
+  return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   return intrin->intrinsic == nir_intrinsic_discard;
+}
+
+static bool
+opt_discard_if_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(, impl);
+
+   

[Mesa-dev] [PATCH 1.5/3] nir: Add a nir_instr_move helper

2018-07-04 Thread Jason Ekstrand
Removes an instruction from one place and inserts it at another while
working around a weird cursor corner-case.
---
 src/compiler/nir/nir.c | 15 +++
 src/compiler/nir/nir.h |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index c515a30b626..bbb28aef890 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -845,6 +845,21 @@ nir_instr_insert(nir_cursor cursor, nir_instr *instr)
   nir_handle_add_jump(instr->block);
 }
 
+void
+nir_instr_move(nir_cursor cursor, nir_instr *instr)
+{
+   /* If the cursor happens to refer to this instruction (either before or
+* after), don't do anything.
+*/
+   if ((cursor.option == nir_cursor_before_instr ||
+cursor.option == nir_cursor_after_instr) &&
+   cursor.instr == instr)
+  return;
+
+   nir_instr_remove(instr);
+   nir_instr_insert(cursor, instr);
+}
+
 static bool
 src_is_valid(const nir_src *src)
 {
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 92ab3a699cc..c40a88c8ccc 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2318,6 +2318,8 @@ nir_after_cf_list(struct exec_list *cf_list)
  */
 void nir_instr_insert(nir_cursor cursor, nir_instr *instr);
 
+void nir_instr_move(nir_cursor cursor, nir_instr *instr);
+
 static inline void
 nir_instr_insert_before(nir_instr *instr, nir_instr *before)
 {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Jason Ekstrand
On Wed, Jul 4, 2018 at 2:22 AM, Grazvydas Ignotas  wrote:

> On Wed, Jul 4, 2018 at 9:13 AM, Jason Ekstrand 
> wrote:
> > Many fragment shaders do a discard using relatively little information
> > but still put the discard fairly far down in the shader for no good
> > reason.  If the discard is moved higher up, we can possibly avoid doing
> > some or almost all of the work in the shader.  When this lets us skip
> > texturing operations, it's an especially high win.
> >
> > One of the biggest offenders here is DXVK.  The D3D APIs have different
> > rules for discards than OpenGL and Vulkan.  One effective way (which is
> > what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
> > wait until the very end of the shader to discard.  This ends up in the
> > pessimal case where we always do all of the work before discarding.
> > This pass helps some DXVK shaders significantly.
> > ---
> >  src/compiler/Makefile.sources  |   1 +
> >  src/compiler/nir/meson.build   |   1 +
> >  src/compiler/nir/nir.h |  10 +
> >  src/compiler/nir/nir_opt_discard.c | 404 +
> >  4 files changed, 416 insertions(+)
> >  create mode 100644 src/compiler/nir/nir_opt_discard.c
> >
> > diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.
> sources
> > index 9e3fbdc2612..8600ce81281 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -271,6 +271,7 @@ NIR_FILES = \
> > nir/nir_opt_cse.c \
> > nir/nir_opt_dce.c \
> > nir/nir_opt_dead_cf.c \
> > +   nir/nir_opt_discard.c \
> > nir/nir_opt_gcm.c \
> > nir/nir_opt_global_to_local.c \
> > nir/nir_opt_if.c \
> > diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> > index 28aa8de7014..e339258bb94 100644
> > --- a/src/compiler/nir/meson.build
> > +++ b/src/compiler/nir/meson.build
> > @@ -156,6 +156,7 @@ files_libnir = files(
> >'nir_opt_cse.c',
> >'nir_opt_dce.c',
> >'nir_opt_dead_cf.c',
> > +  'nir_opt_discard.c',
> >'nir_opt_gcm.c',
> >'nir_opt_global_to_local.c',
> >'nir_opt_if.c',
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index 92ab3a699cc..44006dac160 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
> >  */
> > bool vs_inputs_dual_locations;
> >
> > +   /**
> > +* Whether or not derivatives are still a safe operation after a
> discard
> > +* has occurred.  Optimization passes may be able to be a bit more
> > +* agressive if this is true.
> > +*/
> > +   bool derivatives_safe_after_discard;
> > +
> > unsigned max_unroll_iterations;
> >  } nir_shader_compiler_options;
> >
> > @@ -2899,6 +2906,9 @@ bool nir_opt_dce(nir_shader *shader);
> >
> >  bool nir_opt_dead_cf(nir_shader *shader);
> >
> > +bool nir_opt_discard_if(nir_shader *shader);
> > +bool nir_opt_move_discards_to_top(nir_shader *shader);
> > +
> >  bool nir_opt_gcm(nir_shader *shader, bool value_number);
> >
> >  bool nir_opt_if(nir_shader *shader);
> > diff --git a/src/compiler/nir/nir_opt_discard.c
> b/src/compiler/nir/nir_opt_discard.c
> > new file mode 100644
> > index 000..eb1a8296282
> > --- /dev/null
> > +++ b/src/compiler/nir/nir_opt_discard.c
> > @@ -0,0 +1,404 @@
> > +/*
> > + * Copyright © 2014 Intel Corporation
>
> 2018?
>

Yeah...


> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + *
> > + * Authors:
> > + *Jason Ekstrand (ja...@jlekstrand.net)
> > + *
> > + */
> > +
> > +#include "nir.h"
> > +#include "nir_builder.h"
> > +#include "nir_control_flow.h"
> > +#include "nir_worklist.h"
> > +
> > +static bool
> > +block_has_only_discard(nir_block *block)
> > +{
> > +   nir_instr *instr = 

Re: [Mesa-dev] [PATCH v2] nvc0: implement multisampled images on Maxwell+

2018-07-04 Thread Karol Herbst
it passes all the CTS tests, which is good :) Thanks!

Reviewed-by: Karol Herbst 

On Wed, Jul 4, 2018 at 11:21 AM, Rhys Perry  wrote:
> Changes in v2:
> - make loadSuInfo32() protected without making the rest protected
> - move NVC0_SU_INFO_* into nv50_ir_lowering_nvc0.h instead of duplicating
>   NVC0_SU_INFO_MS
>
> Signed-off-by: Rhys Perry 
> ---
>  .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 13 +
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 31 
> ++
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h| 31 
> +-
>  src/gallium/drivers/nouveau/nv50/nv50_resource.h   |  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  7 -
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c|  4 +--
>  6 files changed, 48 insertions(+), 39 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> index 209f5c67ab..c7436e2e29 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
> @@ -315,6 +315,19 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
>samples->tex.query = TXQ_TYPE;
> }
>
> +   if (suq->tex.target.isMS()) {
> +  bld.setPosition(suq, true);
> +
> +  if (mask & 0x1)
> + bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
> +   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), 
> suq->tex.bindless));
> +  if (mask & 0x2) {
> + int d = util_bitcount(mask & 0x1);
> + bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
> +   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), 
> suq->tex.bindless));
> +  }
> +   }
> +
> return true;
>  }
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index a2e9fcb612..71cabfa88d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1748,35 +1748,6 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t 
> off)
>mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), 
> ptr);
>  }
>
> -/* On nvc0, surface info is obtained via the surface binding points passed
> - * to the SULD/SUST instructions.
> - * On nve4, surface info is stored in c[] and is used by various special
> - * instructions, e.g. for clamping coordinates or generating an address.
> - * They couldn't just have added an equivalent to TIC now, couldn't they ?
> - */
> -#define NVC0_SU_INFO_ADDR   0x00
> -#define NVC0_SU_INFO_FMT0x04
> -#define NVC0_SU_INFO_DIM_X  0x08
> -#define NVC0_SU_INFO_PITCH  0x0c
> -#define NVC0_SU_INFO_DIM_Y  0x10
> -#define NVC0_SU_INFO_ARRAY  0x14
> -#define NVC0_SU_INFO_DIM_Z  0x18
> -#define NVC0_SU_INFO_UNK1C  0x1c
> -#define NVC0_SU_INFO_WIDTH  0x20
> -#define NVC0_SU_INFO_HEIGHT 0x24
> -#define NVC0_SU_INFO_DEPTH  0x28
> -#define NVC0_SU_INFO_TARGET 0x2c
> -#define NVC0_SU_INFO_BSIZE  0x30
> -#define NVC0_SU_INFO_RAW_X  0x34
> -#define NVC0_SU_INFO_MS_X   0x38
> -#define NVC0_SU_INFO_MS_Y   0x3c
> -
> -#define NVC0_SU_INFO__STRIDE 0x40
> -
> -#define NVC0_SU_INFO_DIM(i)  (0x08 + (i) * 8)
> -#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
> -#define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
> -
>  inline Value *
>  NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool 
> bindless)
>  {
> @@ -2410,6 +2381,8 @@ 
> NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
>
> bld.setPosition(su, false);
>
> +   adjustCoordinatesMS(su);
> +
> // add texture handle
> switch (su->op) {
> case OP_SUSTP:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index b650bbd219..06b363e8fa 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -23,6 +23,35 @@
>  #include "codegen/nv50_ir.h"
>  #include "codegen/nv50_ir_build_util.h"
>
> +/* On nvc0, surface info is obtained via the surface binding points passed
> + * to the SULD/SUST instructions.
> + * On nve4, surface info is stored in c[] and is used by various special
> + * instructions, e.g. for clamping coordinates or generating an address.
> + * They couldn't just have added an equivalent to TIC now, couldn't they ?
> + */
> +#define NVC0_SU_INFO_ADDR   0x00
> +#define NVC0_SU_INFO_FMT0x04
> +#define NVC0_SU_INFO_DIM_X  0x08
> +#define NVC0_SU_INFO_PITCH  0x0c
> +#define NVC0_SU_INFO_DIM_Y  0x10
> +#define NVC0_SU_INFO_ARRAY  0x14
> +#define NVC0_SU_INFO_DIM_Z  0x18
> +#define NVC0_SU_INFO_UNK1C  0x1c
> +#define NVC0_SU_INFO_WIDTH  0x20
> +#define NVC0_SU_INFO_HEIGHT 0x24
> +#define NVC0_SU_INFO_DEPTH  0x28
> +#define NVC0_SU_INFO_TARGET 

[Mesa-dev] [PATCH 6/6] radv/winsys: remove the max IBs per submit limit for the sysmem path

2018-07-04 Thread Samuel Pitoiset
This path will be eventually improved later but as it's only
used on SI (or with RADV_DEBUG=noibs), I'm not sure if that
matters much.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 46 ---
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index f20b9ff6d8..4e3b1d0e6b 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -299,15 +299,6 @@ static void radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, 
size_t min_size)
/* The maximum size in dwords has been reached,
 * try to allocate a new one.
 */
-   if (cs->num_old_cs_buffers + 1 >= 
AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
-   /* TODO: Allow to submit more than 4 IBs. */
-   fprintf(stderr, "amdgpu: Maximum number of IBs "
-   "per submit reached.\n");
-   cs->failed = true;
-   cs->base.cdw = 0;
-   return;
-   }
-
cs->old_cs_buffers =
realloc(cs->old_cs_buffers,
(cs->num_old_cs_buffers + 1) * 
sizeof(*cs->old_cs_buffers));
@@ -996,30 +987,46 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct 
radeon_winsys_ctx *_ctx,
assert(cs_count);
 
for (unsigned i = 0; i < cs_count;) {
-   struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = 
{0};
-   unsigned number_of_ibs = 1;
-   struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = 
{0};
+   struct amdgpu_cs_ib_info *ibs;
+   struct radeon_winsys_bo **bos;
struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : 
initial_preamble_cs;
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+   unsigned number_of_ibs;
uint32_t *ptr;
unsigned cnt = 0;
unsigned size = 0;
unsigned pad_words = 0;
 
-   if (cs->num_old_cs_buffers > 0) {
+   /* Compute the number of IBs for this submit. */
+   number_of_ibs = cs->num_old_cs_buffers + 1;
+
+   ibs = malloc(number_of_ibs * sizeof(*ibs));
+   if (!ibs)
+   return -ENOMEM;
+
+   bos = malloc(number_of_ibs * sizeof(*bos));
+   if (!bos) {
+   free(ibs);
+   return -ENOMEM;
+   }
+
+   if (number_of_ibs > 1) {
/* Special path when the maximum size in dwords has
 * been reached because we need to handle more than one
 * IB per submit.
 */
-   unsigned new_cs_count = cs->num_old_cs_buffers + 1;
-   struct radeon_cmdbuf 
*new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
+   struct radeon_cmdbuf **new_cs_array;
unsigned idx = 0;
 
+   new_cs_array = malloc(cs->num_old_cs_buffers *
+ sizeof(*new_cs_array));
+   assert(new_cs_array);
+
for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
new_cs_array[idx++] = >old_cs_buffers[j];
new_cs_array[idx++] = cs_array[i];
 
-   for (unsigned j = 0; j < new_cs_count; j++) {
+   for (unsigned j = 0; j < number_of_ibs; j++) {
struct radeon_cmdbuf *rcs = new_cs_array[j];
bool needs_preamble = preamble_cs && j == 0;
unsigned size = 0;
@@ -1057,8 +1064,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct 
radeon_winsys_ctx *_ctx,
ibs[j].ib_mc_address = 
radv_buffer_get_va(bos[j]);
}
 
-   number_of_ibs = new_cs_count;
cnt++;
+   free(new_cs_array);
} else {
if (preamble_cs)
size += preamble_cs->cdw;
@@ -1107,6 +1114,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct 
radeon_winsys_ctx *_ctx,
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed "
"for the sysmem submission (%d)\n", r);
+   free(ibs);
+   free(bos);
return r;
}
 
@@ -1136,6 

[Mesa-dev] [PATCH 5/6] radv/winsys: fix buffer deletion in the sysmem path

2018-07-04 Thread Samuel Pitoiset
In case we failed to submit the CS correctly.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index e54d9e84fb..f20b9ff6d8 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -1134,10 +1134,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct 
radeon_winsys_ctx *_ctx,
 
for (unsigned j = 0; j < number_of_ibs; j++) {
ws->buffer_destroy(bos[j]);
-   if (r)
-   return r;
}
 
+   if (r)
+   return r;
+
i += cnt;
}
if (fence)
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] radv/winsys: remove the max IBs per submit limit for the fallback path

2018-07-04 Thread Samuel Pitoiset
The chained submission is the fastest path and it should now
be used more often than before.

This breaks some CTS when HiZ is enabled, flushing DB_META
at the end of cmdbufs fixes the regressions. I guess there is
a missing flush somewhere.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 103 ++
 1 file changed, 55 insertions(+), 48 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 848e81924f..d99fffd431 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -886,66 +886,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct 
radeon_winsys_ctx *_ctx,
struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
amdgpu_bo_list_handle bo_list;
-   struct amdgpu_cs_request request;
-   bool emit_signal_sem = sem_info->cs_emit_signal;
+   struct amdgpu_cs_request request = {};
+   struct amdgpu_cs_ib_info *ibs;
+   struct radv_amdgpu_cs *cs0;
+   unsigned number_of_ibs;
+
assert(cs_count);
+   cs0 = radv_amdgpu_cs(cs_array[0]);
 
-   for (unsigned i = 0; i < cs_count;) {
-   struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]);
-   struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
-   struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : 
initial_preamble_cs;
-   unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - 
!!preamble_cs,
-   cs_count - i);
+   /* Compute the number of IBs for this submit. */
+   number_of_ibs = cs_count + !!initial_preamble_cs;
 
-   memset(, 0, sizeof(request));
+   /* Create a buffer object list. */
+   r = radv_amdgpu_create_bo_list(cs0->ws, _array[0], cs_count, NULL, 0,
+  initial_preamble_cs, radv_bo_list,
+  _list);
+   if (r) {
+   fprintf(stderr, "amdgpu: buffer list creation failed "
+   "for the fallback submission (%d)\n", r);
+   return r;
+   }
 
-   r = radv_amdgpu_create_bo_list(cs0->ws, _array[i], cnt, 
NULL, 0,
-  preamble_cs, radv_bo_list, 
_list);
-   if (r) {
-   fprintf(stderr, "amdgpu: buffer list creation failed "
-   "for the fallback submission (%d)\n", 
r);
-   return r;
-   }
+   ibs = malloc(number_of_ibs * sizeof(*ibs));
+   if (!ibs) {
+   if (bo_list)
+   amdgpu_bo_list_destroy(bo_list);
+   return -ENOMEM;
+   }
 
-   request.ip_type = cs0->hw_ip;
-   request.ring = queue_idx;
-   request.resources = bo_list;
-   request.number_of_ibs = cnt + !!preamble_cs;
-   request.ibs = ibs;
-   request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, 
queue_idx);
+   /* Configure the CS request. */
+   if (initial_preamble_cs)
+   ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
 
-   if (preamble_cs) {
-   ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
-   }
+   for (unsigned i = 0; i < cs_count; i++) {
+   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
 
-   for (unsigned j = 0; j < cnt; ++j) {
-   struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + 
j]);
-   ibs[j + !!preamble_cs] = cs->ib;
+   ibs[i + !!initial_preamble_cs] = cs->ib;
 
-   if (cs->is_chained) {
-   *cs->ib_size_ptr -= 4;
-   cs->is_chained = false;
-   }
+   if (cs->is_chained) {
+   *cs->ib_size_ptr -= 4;
+   cs->is_chained = false;
}
+   }
 
-   sem_info->cs_emit_signal = (i == cs_count - cnt) ? 
emit_signal_sem : false;
-   r = radv_amdgpu_cs_submit(ctx, , sem_info);
-   if (r) {
-   if (r == -ENOMEM)
-   fprintf(stderr, "amdgpu: Not enough memory for 
command submission.\n");
-   else
-   fprintf(stderr, "amdgpu: The CS has been 
rejected, "
-   "see dmesg for more 
information.\n");
-   }
+   request.ip_type = cs0->hw_ip;
+   request.ring = queue_idx;
+   request.resources = bo_list;
+   request.number_of_ibs = number_of_ibs;
+   request.ibs = ibs;
+   request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, 

[Mesa-dev] [PATCH 4/6] radv/winsys: do not assign last submission when chained path failed

2018-07-04 Thread Samuel Pitoiset
I don't think we want to wait for something that hasn't been
correctly submitted. This is similar to the fallback path.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 7112819e56..e54d9e84fb 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -870,12 +870,15 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct 
radeon_winsys_ctx *_ctx,
if (bo_list)
amdgpu_bo_list_destroy(bo_list);
 
+   if (r)
+   return r;
+
if (fence)
radv_amdgpu_request_to_fence(ctx, fence, );
 
radv_assign_last_submit(ctx, );
 
-   return r;
+   return 0;
 }
 
 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx 
*_ctx,
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] radv/winsys: cleanup the chained submission path

2018-07-04 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 28 +++
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index d99fffd431..7112819e56 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -807,6 +807,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct 
radeon_winsys_ctx *_ctx,
amdgpu_bo_list_handle bo_list;
struct amdgpu_cs_request request = {0};
struct amdgpu_cs_ib_info ibs[2];
+   unsigned number_of_ibs = 1;
 
for (unsigned i = cs_count; i--;) {
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
@@ -830,28 +831,33 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct 
radeon_winsys_ctx *_ctx,
}
}
 
-   r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, 
initial_preamble_cs,
-  radv_bo_list, _list);
+   /* Create a buffer object list. */
+   r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0,
+  initial_preamble_cs, radv_bo_list,
+  _list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed for the "
"chained submission(%d)\n", r);
return r;
}
 
+   /* Configure the CS request. */
+   if (initial_preamble_cs) {
+   ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
+   ibs[1] = cs0->ib;
+   number_of_ibs++;
+   } else {
+   ibs[0] = cs0->ib;
+   }
+
request.ip_type = cs0->hw_ip;
request.ring = queue_idx;
-   request.number_of_ibs = 1;
-   request.ibs = >ib;
+   request.number_of_ibs = number_of_ibs;
+   request.ibs = ibs;
request.resources = bo_list;
request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
 
-   if (initial_preamble_cs) {
-   request.ibs = ibs;
-   request.number_of_ibs = 2;
-   ibs[1] = cs0->ib;
-   ibs[0] = ((struct radv_amdgpu_cs*)initial_preamble_cs)->ib;
-   }
-
+   /* Submit the CS. */
r = radv_amdgpu_cs_submit(ctx, , sem_info);
if (r) {
if (r == -ENOMEM)
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] configure: bump libdrm for AMDGPU to 2.4.92

2018-07-04 Thread Samuel Pitoiset
This will allow us to submit more IBs at once because the limit
has been removed in libdrm 2.4.92.

Signed-off-by: Samuel Pitoiset 
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index f135d05736..7c58a5a8f1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION])
 # in the first entry.
 LIBDRM_REQUIRED=2.4.75
 LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.91
+LIBDRM_AMDGPU_REQUIRED=2.4.92
 LIBDRM_INTEL_REQUIRED=2.4.75
 LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] egl/dri: Use __DRI2_DAMAGE extension for KHR_partial_update

2018-07-04 Thread Harish Krupo
Use the DRI2 interface callback to pass the damage rects to
the driver.

Signed-off-by: Harish Krupo 
---
 src/egl/drivers/dri2/egl_dri2.c | 46 ++---
 src/egl/drivers/dri2/egl_dri2.h |  1 +
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 8f1e78186b..0659976d9a 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -455,6 +455,7 @@ static const struct dri2_extension_match 
optional_core_extensions[] = {
{ __DRI2_NO_ERROR, 1, offsetof(struct dri2_egl_display, no_error) },
{ __DRI2_CONFIG_QUERY, 1, offsetof(struct dri2_egl_display, config) },
{ __DRI2_FENCE, 1, offsetof(struct dri2_egl_display, fence) },
+   { __DRI2_DAMAGE, 1, offsetof(struct dri2_egl_display, damage_extension) },
{ __DRI2_RENDERER_QUERY, 1, offsetof(struct dri2_egl_display, 
rendererQuery) },
{ __DRI2_INTEROP, 1, offsetof(struct dri2_egl_display, interop) },
{ __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) },
@@ -891,6 +892,9 @@ dri2_setup_extensions(_EGLDisplay *disp)
   (dri2_dpy->image && dri2_dpy->image->base.version >= 15);
 #endif
 
+   if (dri2_dpy->damage_extension)
+  disp->Extensions.KHR_partial_update = true;
+
dri2_bind_extensions(dri2_dpy, optional_core_extensions, extensions, true);
return EGL_TRUE;
 }
@@ -1659,10 +1663,20 @@ dri2_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, 
_EGLSurface *surf)
 {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
_EGLContext *ctx = _eglGetCurrentContext();
+   __DRIcontext *dri_ctx = dri2_egl_context(ctx)->dri_context;
+   int ret;
 
if (ctx && surf)
   dri2_surf_update_fence_fd(ctx, dpy, surf);
-   return dri2_dpy->vtbl->swap_buffers(drv, dpy, surf);
+   ret = dri2_dpy->vtbl->swap_buffers(drv, dpy, surf);
+
+   /* Successfully swapped the buffer.
+* This marks the end of frame boundary.
+* Set the damage rects back to full again.
+*/
+   if (ret && dri2_dpy->damage_extension)
+  dri2_dpy->damage_extension->set_damage_region(dri_ctx, 0, NULL);
+   return ret;
 }
 
 static EGLBoolean
@@ -1672,11 +1686,20 @@ dri2_swap_buffers_with_damage(_EGLDriver *drv, 
_EGLDisplay *dpy,
 {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
_EGLContext *ctx = _eglGetCurrentContext();
+   __DRIcontext *dri_ctx = dri2_egl_context(ctx)->dri_context;
+   int ret;
 
if (ctx && surf)
   dri2_surf_update_fence_fd(ctx, dpy, surf);
-   return dri2_dpy->vtbl->swap_buffers_with_damage(drv, dpy, surf,
+   ret = dri2_dpy->vtbl->swap_buffers_with_damage(drv, dpy, surf,
rects, n_rects);
+   /* Successfully swapped the buffer.
+* This marks the end of frame boundary.
+* Set the damage rects back to full again.
+*/
+   if (ret && dri2_dpy->damage_extension)
+  dri2_dpy->damage_extension->set_damage_region(dri_ctx, 0, NULL);
+   return ret;
 }
 
 static EGLBoolean
@@ -1684,7 +1707,16 @@ dri2_swap_buffers_region(_EGLDriver *drv, _EGLDisplay 
*dpy, _EGLSurface *surf,
  EGLint numRects, const EGLint *rects)
 {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
-   return dri2_dpy->vtbl->swap_buffers_region(drv, dpy, surf, numRects, rects);
+   _EGLContext *ctx = _eglGetCurrentContext();
+   __DRIcontext *dri_ctx = dri2_egl_context(ctx)->dri_context;
+   int ret = dri2_dpy->vtbl->swap_buffers_region(drv, dpy, surf, numRects, 
rects);
+   /* Successfully swapped the buffer.
+* This marks the end of frame boundary.
+* Set the damage rects back to full again.
+*/
+   if (ret && dri2_dpy->damage_extension)
+  dri2_dpy->damage_extension->set_damage_region(dri_ctx, 0, NULL);
+   return ret;
 }
 
 static EGLBoolean
@@ -1692,7 +1724,13 @@ dri2_set_damage_region(_EGLDriver *drv, _EGLDisplay 
*dpy, _EGLSurface *surf,
EGLint *rects, EGLint n_rects)
 {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
-   return false;
+
+   _EGLContext *ctx = _eglGetCurrentContext();
+   __DRIcontext *dri_ctx = dri2_egl_context(ctx)->dri_context;
+   if (dri2_dpy->damage_extension)
+  return dri2_dpy->damage_extension->set_damage_region(dri_ctx, n_rects, 
rects);
+   else
+  return false;
 }
 
 static EGLBoolean
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index c7ea212946..8efa33da6d 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -171,6 +171,7 @@ struct dri2_egl_display
const __DRInoErrorExtension*no_error;
const __DRI2configQueryExtension *config;
const __DRI2fenceExtension *fence;
+   const __DRI2damageExtension *damage_extension;
const __DRI2blobExtension *blob;
const __DRI2rendererQueryExtension *rendererQuery;
const __DRI2interopExtension *interop;
-- 
2.18.0

___
mesa-dev mailing list

[Mesa-dev] [PATCH 1/3] egl/android: Delete set_damage_region from egl dri vtbl

2018-07-04 Thread Harish Krupo
The intension of the KHR_partial_update was not to send the damage back
to the platform but to send the damage to the driver to ensure that the
following rendering could be restricted to those regions.
This patch removes the set_damage_region from the egl_dri vtbl and all
the platfrom_*.c files.
Then upcomming patches add a new dri2 interface for the drivers to
implement

Signed-off-by: Harish Krupo 
---
 src/egl/drivers/dri2/egl_dri2.c |  2 +-
 src/egl/drivers/dri2/egl_dri2.h |  4 --
 src/egl/drivers/dri2/egl_dri2_fallbacks.h   |  9 -
 src/egl/drivers/dri2/platform_android.c | 45 -
 src/egl/drivers/dri2/platform_drm.c |  1 -
 src/egl/drivers/dri2/platform_surfaceless.c |  1 -
 src/egl/drivers/dri2/platform_wayland.c |  1 -
 src/egl/drivers/dri2/platform_x11.c |  2 -
 src/egl/drivers/dri2/platform_x11_dri3.c|  1 -
 9 files changed, 1 insertion(+), 65 deletions(-)

diff --git a/src/egl/drivers/dri2/egl_dri2.c b/src/egl/drivers/dri2/egl_dri2.c
index 45d0c7275c..8f1e78186b 100644
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1692,7 +1692,7 @@ dri2_set_damage_region(_EGLDriver *drv, _EGLDisplay *dpy, 
_EGLSurface *surf,
EGLint *rects, EGLint n_rects)
 {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
-   return dri2_dpy->vtbl->set_damage_region(drv, dpy, surf, rects, n_rects);
+   return false;
 }
 
 static EGLBoolean
diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
index 5d8fbfa235..c7ea212946 100644
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -118,10 +118,6 @@ struct dri2_egl_display_vtbl {
   _EGLSurface *surface,
   const EGLint *rects, EGLint n_rects);
 
-   EGLBoolean (*set_damage_region)(_EGLDriver *drv, _EGLDisplay *dpy,
-   _EGLSurface *surface,
-   const EGLint *rects, EGLint n_rects);
-
EGLBoolean (*swap_buffers_region)(_EGLDriver *drv, _EGLDisplay *dpy,
  _EGLSurface *surf, EGLint numRects,
  const EGLint *rects);
diff --git a/src/egl/drivers/dri2/egl_dri2_fallbacks.h 
b/src/egl/drivers/dri2/egl_dri2_fallbacks.h
index a664677572..44d7d2132d 100644
--- a/src/egl/drivers/dri2/egl_dri2_fallbacks.h
+++ b/src/egl/drivers/dri2/egl_dri2_fallbacks.h
@@ -61,7 +61,6 @@ dri2_fallback_swap_buffers_with_damage(_EGLDriver *drv, 
_EGLDisplay *dpy,
   const EGLint *rects, EGLint n_rects)
 {
struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
-   dri2_dpy->vtbl->set_damage_region(drv, dpy, surf, rects, n_rects);
return dri2_dpy->vtbl->swap_buffers(drv, dpy, surf);
 }
 
@@ -89,14 +88,6 @@ dri2_fallback_copy_buffers(_EGLDriver *drv, _EGLDisplay *dpy,
return EGL_FALSE;
 }
 
-static inline EGLBoolean
-dri2_fallback_set_damage_region(_EGLDriver *drv, _EGLDisplay *dpy,
-_EGLSurface *surf,
-const EGLint *rects, EGLint n_rects)
-{
-   return EGL_FALSE;
-}
-
 static inline EGLint
 dri2_fallback_query_buffer_age(_EGLDriver *drv, _EGLDisplay *dpy,
_EGLSurface *surf)
diff --git a/src/egl/drivers/dri2/platform_android.c 
b/src/egl/drivers/dri2/platform_android.c
index cc16fd8118..704e3d311e 100644
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -666,43 +666,6 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, 
_EGLSurface *draw)
return EGL_TRUE;
 }
 
-#if ANDROID_API_LEVEL >= 23
-static EGLBoolean
-droid_set_damage_region(_EGLDriver *drv,
-_EGLDisplay *disp,
-_EGLSurface *draw, const EGLint* rects, EGLint n_rects)
-{
-   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
-   android_native_rect_t* droid_rects = NULL;
-   int ret;
-
-   if (n_rects == 0)
-  return EGL_TRUE;
-
-   droid_rects = malloc(n_rects * sizeof(android_native_rect_t));
-   if (droid_rects == NULL)
- return _eglError(EGL_BAD_ALLOC, "eglSetDamageRegionKHR");
-
-   for (EGLint num_drects = 0; num_drects < n_rects; num_drects++) {
-  EGLint i = num_drects * 4;
-  droid_rects[num_drects].left = rects[i];
-  droid_rects[num_drects].bottom = rects[i + 1];
-  droid_rects[num_drects].right = rects[i] + rects[i + 2];
-  droid_rects[num_drects].top = rects[i + 1] + rects[i + 3];
-   }
-
-   /*
-* XXX/TODO: Need to check for other return values
-*/
-
-   ret = native_window_set_surface_damage(dri2_surf->window, droid_rects, 
n_rects);
-   free(droid_rects);
-
-   return ret == 0 ? EGL_TRUE : EGL_FALSE;
-}
-#endif
-
 static _EGLImage *
 

[Mesa-dev] [PATCH 2/3] dri_interface: add an interface for setting damage region

2018-07-04 Thread Harish Krupo
Implementors of this interface provide ability to render only
to damaged regions on the framebuffer.

Signed-off-by: Harish Krupo 
---
 include/GL/internal/dri_interface.h | 25 +
 1 file changed, 25 insertions(+)

diff --git a/include/GL/internal/dri_interface.h 
b/include/GL/internal/dri_interface.h
index c32cdd3767..c4da3fd18a 100644
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -83,6 +83,7 @@ typedef struct __DRI2throttleExtensionRec 
__DRI2throttleExtension;
 typedef struct __DRI2fenceExtensionRec  __DRI2fenceExtension;
 typedef struct __DRI2interopExtensionRec   __DRI2interopExtension;
 typedef struct __DRI2blobExtensionRec   __DRI2blobExtension;
+typedef struct __DRI2damageExtensionRec __DRI2damageExtension;
 
 typedef struct __DRIimageLoaderExtensionRec __DRIimageLoaderExtension;
 typedef struct __DRIimageDriverExtensionRec __DRIimageDriverExtension;
@@ -460,6 +461,30 @@ struct __DRI2fenceExtensionRec {
int (*get_fence_fd)(__DRIscreen *screen, void *fence);
 };
 
+/**
+ * Extension for damage rendering
+ */
+
+#define __DRI2_DAMAGE "DRI2_Damage"
+#define __DRI2_DAMAGE_VERSION 1
+
+struct __DRI2damageExtensionRec {
+   __DRIextension base;
+
+   /**
+* Provides rectangles to the driver to restrict its rendering
+* to those rectangles.
+*
+* \param ctx the context where commands are flushed
+* \param sizetotal elements in the array
+* \param rects   the array of rectangles
+*
+* Calling this function with size: 0 and rects: NULL should reset the
+* damage to the full surface.
+*/
+   GLboolean (*set_damage_region)(__DRIcontext *ctx, GLuint size, GLint 
*rects);
+
+};
 
 /**
  * Extension for API interop.
-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] Rectify KHR_partial_update extension implementation

2018-07-04 Thread Harish Krupo
Based on the discussion in the thread starting at: 
https://lists.freedesktop.org/archives/mesa-dev/2018-June/198846.html
it was identified that the implementation of the KHR_partial_update
was incorrect and in that the damages were sent to the egl backends
instead of the driver. This patch series removes that implementation
and adds an interface for drivers to implement so that the damage can
be reported to the drivers and the rendering can be restricted to those
damage rectangles.

Harish Krupo (3):
  egl/android: Delete set_damage_region from egl dri vtbl
  dri_interface: add an interface for setting damage region
  egl/dri: Use __DRI2_DAMAGE extension for KHR_partial_update

 include/GL/internal/dri_interface.h | 25 +++
 src/egl/drivers/dri2/egl_dri2.c | 46 +++--
 src/egl/drivers/dri2/egl_dri2.h |  5 +--
 src/egl/drivers/dri2/egl_dri2_fallbacks.h   |  9 
 src/egl/drivers/dri2/platform_android.c | 45 
 src/egl/drivers/dri2/platform_drm.c |  1 -
 src/egl/drivers/dri2/platform_surfaceless.c |  1 -
 src/egl/drivers/dri2/platform_wayland.c |  1 -
 src/egl/drivers/dri2/platform_x11.c |  2 -
 src/egl/drivers/dri2/platform_x11_dri3.c|  1 -
 10 files changed, 68 insertions(+), 68 deletions(-)

-- 
2.18.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106151] [amdgpu][vulkan] GPU hang (Vega 56) while running game (Rise of the Tomb Raider)

2018-07-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106151

--- Comment #23 from Samuel Pitoiset  ---
Can you also try with RADV_PERFTEST=nobatchchain please?

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/4] util/queue: add a process name into a thread name

2018-07-04 Thread Eric Engestrom
On Tuesday, 2018-07-03 19:16:11 -0400, Marek Olšák wrote:
> From: Marek Olšák 
> 
> ---
>  src/util/u_queue.c | 35 +--
>  src/util/u_queue.h |  2 +-
>  2 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/src/util/u_queue.c b/src/util/u_queue.c
> index da513fd9cc5..6c92e8140a1 100644
> --- a/src/util/u_queue.c
> +++ b/src/util/u_queue.c
> @@ -24,20 +24,21 @@
>   * of the Software.
>   */
>  
>  #include "u_queue.h"
>  
>  #include 
>  
>  #include "util/os_time.h"
>  #include "util/u_string.h"
>  #include "util/u_thread.h"
> +#include "process.h"
>  
>  static void util_queue_killall_and_wait(struct util_queue *queue);
>  
>  /
>   * Wait for all queues to assert idle when exit() is called.
>   *
>   * Otherwise, C++ static variable destructors can be called while threads
>   * are using the static variables.
>   */
>  
> @@ -233,21 +234,21 @@ struct thread_input {
>  static int
>  util_queue_thread_func(void *input)
>  {
> struct util_queue *queue = ((struct thread_input*)input)->queue;
> int thread_index = ((struct thread_input*)input)->thread_index;
>  
> free(input);
>  
> if (queue->name) {
>char name[16];
> -  util_snprintf(name, sizeof(name), "%s:%i", queue->name, thread_index);
> +  util_snprintf(name, sizeof(name), "%s%i", queue->name, thread_index);
>u_thread_setname(name);
> }
>  
> while (1) {
>struct util_queue_job job;
>  
>mtx_lock(>lock);
>assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);
>  
>/* wait if the queue is empty */
> @@ -292,22 +293,52 @@ util_queue_thread_func(void *input)
>  
>  bool
>  util_queue_init(struct util_queue *queue,
>  const char *name,
>  unsigned max_jobs,
>  unsigned num_threads,
>  unsigned flags)
>  {
> unsigned i;
>  
> +   /* Form the thread name from process_name and name, limited to 13
> +* characters. Characters 14-15 are reserved for the thread number.
> +* Character 16 should be 0. Final form: "process:name12"
> +*
> +* If name is too long, it's truncated. If any space is left, the process
> +* name fills it.
> +*/
> +   const char *process_name = util_get_process_name();
> +   unsigned process_len = process_name ? strlen(process_name) : 0;
> +   unsigned name_len = strlen(name);
> +   const unsigned max_chars = 13;

Let's avoid magic numbers :)

   const unsigned max_chars = sizeof(queue->name) - 1;

> +
> +   name_len = MIN2(name_len, max_chars);
> +
> +   /* See if there is any space left for the process name, add + 1 for
> +* the colon. */
> +   if (max_chars > name_len + 1)
> +  process_len = MIN2(process_len, max_chars - name_len - 1);
> +   else
> +  process_len = 0;

I think doing the math only once is clearer:

   /* See if there is any space left for the process name; reserve 1 for
* the colon. */
   process_len = MIN2(process_len, max_chars - name_len - 1);
   if (process_len < 0)
 process_len = 0;

> +
> memset(queue, 0, sizeof(*queue));
> -   queue->name = name;
> +
> +   if (process_len) {
> +  memcpy(queue->name, process_name, process_len);
> +  queue->name[process_len] = ':';
> +  memcpy(queue->name + process_len + 1, name, name_len);
> +  queue->name[process_len + 1 + name_len] = 0;

If you truncate the process name:

   process_name[process_len] = 0;

Then this `if (process_len)` branch can be a simple:

   snprintf(queue->name, sizeof(queue->name), "%s:%s", process_name, name);

> +   } else {
> +  snprintf(queue->name, max_chars + 1, "%s", name);

nit: replace `max_chars + 1` with `sizeof(queue-name)` :)


With the magic number removed:
Reviewed-by: Eric Engestrom 

(I'll leave it up to you for the rest)

> +   }
> +
> queue->flags = flags;
> queue->num_threads = num_threads;
> queue->max_jobs = max_jobs;
>  
> queue->jobs = (struct util_queue_job*)
>   calloc(max_jobs, sizeof(struct util_queue_job));
> if (!queue->jobs)
>goto fail;
>  
> (void) mtx_init(>lock, mtx_plain);
> diff --git a/src/util/u_queue.h b/src/util/u_queue.h
> index d702c4bce8d..3c21ef3bc7b 100644
> --- a/src/util/u_queue.h
> +++ b/src/util/u_queue.h
> @@ -192,21 +192,21 @@ typedef void (*util_queue_execute_func)(void *job, int 
> thread_index);
>  
>  struct util_queue_job {
> void *job;
> struct util_queue_fence *fence;
> util_queue_execute_func execute;
> util_queue_execute_func cleanup;
>  };
>  
>  /* Put this into your context. */
>  struct util_queue {
> -   const char *name;
> +   char name[14];
> mtx_t finish_lock; /* only for util_queue_finish */
> mtx_t lock;
> cnd_t has_queued_cond;
> cnd_t has_space_cond;
> thrd_t *threads;
> unsigned flags;
> int num_queued;
> unsigned num_threads;
> int 

Re: [Mesa-dev] [PATCH 2/4] gallium/os: use util_get_process_name when possible

2018-07-04 Thread Eric Engestrom
On Tuesday, 2018-07-03 19:16:10 -0400, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Eric Engestrom 

> ---
>  src/gallium/auxiliary/os/os_process.c | 16 ++--
>  src/util/process.c|  2 ++
>  2 files changed, 4 insertions(+), 14 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/os/os_process.c 
> b/src/gallium/auxiliary/os/os_process.c
> index 035bd228e76..87072f0b046 100644
> --- a/src/gallium/auxiliary/os/os_process.c
> +++ b/src/gallium/auxiliary/os/os_process.c
> @@ -22,32 +22,27 @@
>   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>   *
>   **/
>  
>  
>  #include "pipe/p_config.h"
>  #include "os/os_process.h"
>  #include "util/u_memory.h"
> +#include "util/process.h"
>  
>  #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
>  #  include 
> -#elif defined(__GLIBC__) || defined(__CYGWIN__)
> -#  include 
> -#elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE) || 
> defined(PIPE_OS_ANDROID)
> -#  include 
>  #elif defined(PIPE_OS_HAIKU)
>  #  include 
>  #  include 
> -#else
> -#warning unexpected platform in os_process.c
>  #endif
>  
>  #if defined(PIPE_OS_LINUX)
>  #  include 
>  #endif
>  
>  
>  /**
>   * Return the name of the current process.
>   * \param procname  returns the process name
> @@ -77,34 +72,27 @@ os_get_process_name(char *procname, size_t size)
>lpProcessName = strrchr(szProcessPath, '\\');
>lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
>  
>lpProcessExt = strrchr(lpProcessName, '.');
>if (lpProcessExt) {
>   *lpProcessExt = '\0';
>}
>  
>name = lpProcessName;
>  
> -#elif defined(__GLIBC__) || defined(__CYGWIN__)
> -  name = program_invocation_short_name;
> -#elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE) || 
> defined(PIPE_OS_ANDROID)
> -  /* *BSD and OS X */
> -  name = getprogname();
>  #elif defined(PIPE_OS_HAIKU)
>image_info info;
>get_image_info(B_CURRENT_TEAM, );
>name = info.name;
>  #else
> -#warning unexpected platform in os_process.c
> -  return FALSE;
> +  name = util_get_process_name();
>  #endif
> -
> }
>  
> assert(size > 0);
> assert(procname);
>  
> if (name && procname && size > 0) {
>strncpy(procname, name, size);
>procname[size - 1] = '\0';
>return TRUE;
> }
> diff --git a/src/util/process.c b/src/util/process.c
> index 992ba7f734a..449c7fa77c2 100644
> --- a/src/util/process.c
> +++ b/src/util/process.c
> @@ -53,20 +53,22 @@ __getProgramName()
>  #include 
>  #if (__FreeBSD_version >= 44)
>  #define GET_PROGRAM_NAME() getprogname()
>  #endif
>  #elif defined(__NetBSD__) && defined(__NetBSD_Version__) && 
> (__NetBSD_Version__ >= 106000100)
>  #define GET_PROGRAM_NAME() getprogname()
>  #elif defined(__DragonFly__)
>  #define GET_PROGRAM_NAME() getprogname()
>  #elif defined(__APPLE__)
>  #define GET_PROGRAM_NAME() getprogname()
> +#elif defined(ANDROID)
> +#define GET_PROGRAM_NAME() getprogname()
>  #elif defined(__sun)
>  /* Solaris has getexecname() which returns the full path - return just
> the basename to match BSD getprogname() */
>  #include 
>  
>  static const char *
>  __getProgramName()
>  {
>  static const char *progname;
>  
> -- 
> 2.17.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] util: extract get_process_name from xmlconfig.c

2018-07-04 Thread Eric Engestrom
On Tuesday, 2018-07-03 19:16:09 -0400, Marek Olšák wrote:
> From: Marek Olšák 
> 
> ---
>  src/util/Makefile.sources |   2 +
>  src/util/process.c| 116 ++
>  src/util/process.h|  34 +++
>  src/util/xmlconfig.c  |  86 +---
>  4 files changed, 154 insertions(+), 84 deletions(-)
>  create mode 100644 src/util/process.c
>  create mode 100644 src/util/process.h
> 
> diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
> index 534520ce763..e8933666726 100644
> --- a/src/util/Makefile.sources
> +++ b/src/util/Makefile.sources
> @@ -17,20 +17,22 @@ MESA_UTIL_FILES := \
>   half_float.c \
>   half_float.h \
>   hash_table.c \
>   hash_table.h \
>   list.h \
>   macros.h \
>   mesa-sha1.c \
>   mesa-sha1.h \
>   os_time.c \
>   os_time.h \
> + process.c \
> + process.h \
>   sha1/sha1.c \
>   sha1/sha1.h \
>   ralloc.c \
>   ralloc.h \
>   rand_xor.c \
>   rand_xor.h \
>   register_allocate.c \
>   register_allocate.h \
>   rgtc.c \
>   rgtc.h \

8<
diff --git a/src/util/meson.build b/src/util/meson.build
index c777984e28d939f66934..1713864b4f7bffa07685 100644
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -48,6 +48,8 @@ files_mesa_util = files(
   'mesa-sha1.h',
   'os_time.c',
   'os_time.h',
+  'process.c',
+  'process.h',
   'sha1/sha1.c',
   'sha1/sha1.h',
   'ralloc.c',
>8

With that:
Reviewed-by: Eric Engestrom 

> diff --git a/src/util/process.c b/src/util/process.c
> new file mode 100644
> index 000..992ba7f734a
> --- /dev/null
> +++ b/src/util/process.c
> @@ -0,0 +1,116 @@
> +/*
> + * Copyright © 2003 Felix Kuehling
> + * Copyright © 2018 Advanced Micro Devices, Inc.
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining
> + * a copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
> + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
> + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + */
> +
> +#include "process.h"
> +#include 
> +#include 
> +#include 
> +
> +#undef GET_PROGRAM_NAME
> +
> +#if (defined(__GNU_LIBRARY__) || defined(__GLIBC__)) && !defined(__UCLIBC__)
> +#if !defined(__GLIBC__) || (__GLIBC__ < 2)
> +/* These aren't declared in any libc5 header */
> +extern char *program_invocation_name, *program_invocation_short_name;
> +#endif
> +static const char *
> +__getProgramName()
> +{
> +char * arg = strrchr(program_invocation_name, '/');
> +if (arg)
> +return arg+1;
> +else
> +return program_invocation_name;
> +}
> +#define GET_PROGRAM_NAME() __getProgramName()
> +#elif defined(__CYGWIN__)
> +#define GET_PROGRAM_NAME() program_invocation_short_name
> +#elif defined(__FreeBSD__) && (__FreeBSD__ >= 2)
> +#include 
> +#if (__FreeBSD_version >= 44)
> +#define GET_PROGRAM_NAME() getprogname()
> +#endif
> +#elif defined(__NetBSD__) && defined(__NetBSD_Version__) && 
> (__NetBSD_Version__ >= 106000100)
> +#define GET_PROGRAM_NAME() getprogname()
> +#elif defined(__DragonFly__)
> +#define GET_PROGRAM_NAME() getprogname()
> +#elif defined(__APPLE__)
> +#define GET_PROGRAM_NAME() getprogname()
> +#elif defined(__sun)
> +/* Solaris has getexecname() which returns the full path - return just
> +   the basename to match BSD getprogname() */
> +#include 
> +
> +static const char *
> +__getProgramName()
> +{
> +static const char *progname;
> +
> +if (progname == NULL) {
> +const char *e = getexecname();
> +if (e != NULL) {
> +/* Have to make a copy since getexecname can return a readonly
> +   string, but basename expects to be able to modify its arg. */
> +char *n = strdup(e);
> +if (n != NULL) {
> +progname = basename(n);
> +}
> +}
> +}
> +return progname;
> +}
> +
> +#define GET_PROGRAM_NAME() 

Re: [Mesa-dev] [PATCH 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Grazvydas Ignotas
On Wed, Jul 4, 2018 at 9:13 AM, Jason Ekstrand  wrote:
> Many fragment shaders do a discard using relatively little information
> but still put the discard fairly far down in the shader for no good
> reason.  If the discard is moved higher up, we can possibly avoid doing
> some or almost all of the work in the shader.  When this lets us skip
> texturing operations, it's an especially high win.
>
> One of the biggest offenders here is DXVK.  The D3D APIs have different
> rules for discards than OpenGL and Vulkan.  One effective way (which is
> what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
> wait until the very end of the shader to discard.  This ends up in the
> pessimal case where we always do all of the work before discarding.
> This pass helps some DXVK shaders significantly.
> ---
>  src/compiler/Makefile.sources  |   1 +
>  src/compiler/nir/meson.build   |   1 +
>  src/compiler/nir/nir.h |  10 +
>  src/compiler/nir/nir_opt_discard.c | 404 +
>  4 files changed, 416 insertions(+)
>  create mode 100644 src/compiler/nir/nir_opt_discard.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 9e3fbdc2612..8600ce81281 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -271,6 +271,7 @@ NIR_FILES = \
> nir/nir_opt_cse.c \
> nir/nir_opt_dce.c \
> nir/nir_opt_dead_cf.c \
> +   nir/nir_opt_discard.c \
> nir/nir_opt_gcm.c \
> nir/nir_opt_global_to_local.c \
> nir/nir_opt_if.c \
> diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
> index 28aa8de7014..e339258bb94 100644
> --- a/src/compiler/nir/meson.build
> +++ b/src/compiler/nir/meson.build
> @@ -156,6 +156,7 @@ files_libnir = files(
>'nir_opt_cse.c',
>'nir_opt_dce.c',
>'nir_opt_dead_cf.c',
> +  'nir_opt_discard.c',
>'nir_opt_gcm.c',
>'nir_opt_global_to_local.c',
>'nir_opt_if.c',
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 92ab3a699cc..44006dac160 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
>  */
> bool vs_inputs_dual_locations;
>
> +   /**
> +* Whether or not derivatives are still a safe operation after a discard
> +* has occurred.  Optimization passes may be able to be a bit more
> +* agressive if this is true.
> +*/
> +   bool derivatives_safe_after_discard;
> +
> unsigned max_unroll_iterations;
>  } nir_shader_compiler_options;
>
> @@ -2899,6 +2906,9 @@ bool nir_opt_dce(nir_shader *shader);
>
>  bool nir_opt_dead_cf(nir_shader *shader);
>
> +bool nir_opt_discard_if(nir_shader *shader);
> +bool nir_opt_move_discards_to_top(nir_shader *shader);
> +
>  bool nir_opt_gcm(nir_shader *shader, bool value_number);
>
>  bool nir_opt_if(nir_shader *shader);
> diff --git a/src/compiler/nir/nir_opt_discard.c 
> b/src/compiler/nir/nir_opt_discard.c
> new file mode 100644
> index 000..eb1a8296282
> --- /dev/null
> +++ b/src/compiler/nir/nir_opt_discard.c
> @@ -0,0 +1,404 @@
> +/*
> + * Copyright © 2014 Intel Corporation

2018?

> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *Jason Ekstrand (ja...@jlekstrand.net)
> + *
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +#include "nir_control_flow.h"
> +#include "nir_worklist.h"
> +
> +static bool
> +block_has_only_discard(nir_block *block)
> +{
> +   nir_instr *instr = nir_block_first_instr(block);
> +   if (instr == NULL || instr != nir_block_last_instr(block))
> +  return false;
> +
> +   if (instr->type != nir_instr_type_intrinsic)
> +  return false;
> +
> +   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
> +   return intrin->intrinsic == nir_intrinsic_discard;
> +}
> +

[Mesa-dev] [PATCH v2] nvc0: implement multisampled images on Maxwell+

2018-07-04 Thread Rhys Perry
Changes in v2:
- make loadSuInfo32() protected without making the rest protected
- move NVC0_SU_INFO_* into nv50_ir_lowering_nvc0.h instead of duplicating
  NVC0_SU_INFO_MS

Signed-off-by: Rhys Perry 
---
 .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 13 +
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 31 ++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h| 31 +-
 src/gallium/drivers/nouveau/nv50/nv50_resource.h   |  1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  7 -
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c|  4 +--
 6 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 209f5c67ab..c7436e2e29 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -315,6 +315,19 @@ GM107LoweringPass::handleSUQ(TexInstruction *suq)
   samples->tex.query = TXQ_TYPE;
}
 
+   if (suq->tex.target.isMS()) {
+  bld.setPosition(suq, true);
+
+  if (mask & 0x1)
+ bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(0), suq->getDef(0),
+   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(0), 
suq->tex.bindless));
+  if (mask & 0x2) {
+ int d = util_bitcount(mask & 0x1);
+ bld.mkOp2(OP_SHR, TYPE_U32, suq->getDef(d), suq->getDef(d),
+   loadSuInfo32(ind, slot, NVC0_SU_INFO_MS(1), 
suq->tex.bindless));
+  }
+   }
+
return true;
 }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index a2e9fcb612..71cabfa88d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1748,35 +1748,6 @@ NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off)
   mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), 
ptr);
 }
 
-/* On nvc0, surface info is obtained via the surface binding points passed
- * to the SULD/SUST instructions.
- * On nve4, surface info is stored in c[] and is used by various special
- * instructions, e.g. for clamping coordinates or generating an address.
- * They couldn't just have added an equivalent to TIC now, couldn't they ?
- */
-#define NVC0_SU_INFO_ADDR   0x00
-#define NVC0_SU_INFO_FMT0x04
-#define NVC0_SU_INFO_DIM_X  0x08
-#define NVC0_SU_INFO_PITCH  0x0c
-#define NVC0_SU_INFO_DIM_Y  0x10
-#define NVC0_SU_INFO_ARRAY  0x14
-#define NVC0_SU_INFO_DIM_Z  0x18
-#define NVC0_SU_INFO_UNK1C  0x1c
-#define NVC0_SU_INFO_WIDTH  0x20
-#define NVC0_SU_INFO_HEIGHT 0x24
-#define NVC0_SU_INFO_DEPTH  0x28
-#define NVC0_SU_INFO_TARGET 0x2c
-#define NVC0_SU_INFO_BSIZE  0x30
-#define NVC0_SU_INFO_RAW_X  0x34
-#define NVC0_SU_INFO_MS_X   0x38
-#define NVC0_SU_INFO_MS_Y   0x3c
-
-#define NVC0_SU_INFO__STRIDE 0x40
-
-#define NVC0_SU_INFO_DIM(i)  (0x08 + (i) * 8)
-#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
-#define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
-
 inline Value *
 NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool 
bindless)
 {
@@ -2410,6 +2381,8 @@ 
NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
 
bld.setPosition(su, false);
 
+   adjustCoordinatesMS(su);
+
// add texture handle
switch (su->op) {
case OP_SUSTP:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index b650bbd219..06b363e8fa 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -23,6 +23,35 @@
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_build_util.h"
 
+/* On nvc0, surface info is obtained via the surface binding points passed
+ * to the SULD/SUST instructions.
+ * On nve4, surface info is stored in c[] and is used by various special
+ * instructions, e.g. for clamping coordinates or generating an address.
+ * They couldn't just have added an equivalent to TIC now, couldn't they ?
+ */
+#define NVC0_SU_INFO_ADDR   0x00
+#define NVC0_SU_INFO_FMT0x04
+#define NVC0_SU_INFO_DIM_X  0x08
+#define NVC0_SU_INFO_PITCH  0x0c
+#define NVC0_SU_INFO_DIM_Y  0x10
+#define NVC0_SU_INFO_ARRAY  0x14
+#define NVC0_SU_INFO_DIM_Z  0x18
+#define NVC0_SU_INFO_UNK1C  0x1c
+#define NVC0_SU_INFO_WIDTH  0x20
+#define NVC0_SU_INFO_HEIGHT 0x24
+#define NVC0_SU_INFO_DEPTH  0x28
+#define NVC0_SU_INFO_TARGET 0x2c
+#define NVC0_SU_INFO_BSIZE  0x30
+#define NVC0_SU_INFO_RAW_X  0x34
+#define NVC0_SU_INFO_MS_X   0x38
+#define NVC0_SU_INFO_MS_Y   0x3c
+
+#define NVC0_SU_INFO__STRIDE 0x40
+
+#define NVC0_SU_INFO_DIM(i)  (0x08 + (i) * 8)
+#define NVC0_SU_INFO_SIZE(i) (0x20 + (i) * 4)
+#define NVC0_SU_INFO_MS(i)   (0x38 + (i) * 4)
+
 namespace nv50_ir {
 
 class NVC0LegalizeSSA : 

[Mesa-dev] [Bug 105139] Assertion in Dying Light

2018-07-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105139

--- Comment #8 from Mike Lothian  ---
Correct, I've only ever used debug builds when debugging issues

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 107098] Segfault after munmap(kms_sw_dt->ro_mapped)

2018-07-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107098

--- Comment #1 from Nubosch  ---
I can confirm this issue. Using tinydrm display with
weston 4.0.90
linux 4.17.2

KMS-DEBUG: unmapped buffer 2 (was 0x69eaa000)
KMS-DEBUG: unmapped buffer 2 (was (nil))
KMS-DEBUG: destroyed buffer 2

The process shows really weird behavior after this, like missing memory
regions.

>* adding condition on above munmap() to call it only if ro_mapped is not NULL
This was the solution I went with too, but added it for both pointers ;)

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105139] Assertion in Dying Light

2018-07-04 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105139

--- Comment #7 from Grazvydas Ignotas  ---
It seems the patches have landed, and while GL version overrides are no longer
needed, the assert still triggers for me. Perhaps you've built a non-debug
version when you tested?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv/icl: Don't set float blend optimization bit in CACHE_MODE_SS

2018-07-04 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 03/07/18 21:24, Anuj Phogat wrote:

Bump
On Fri, Jun 1, 2018 at 2:40 PM Anuj Phogat  wrote:

CACHE_MODE_SS is not listed in gfxspecs table for user mode
non-privileged registers. So, making any changes from Mesa
will do nothing. Kernel is already setting this bit in
CACHE_MODE_SS register which is saved/restored to/from
the HW context image.

Signed-off-by: Anuj Phogat 
Cc: Lionel Landwerlin 
---
  src/intel/vulkan/genX_state.c | 12 
  1 file changed, 12 deletions(-)

diff --git a/src/intel/vulkan/genX_state.c b/src/intel/vulkan/genX_state.c
index c6e54046910..06dc2d345e2 100644
--- a/src/intel/vulkan/genX_state.c
+++ b/src/intel/vulkan/genX_state.c
@@ -121,18 +121,6 @@ genX(init_device_state)(struct anv_device *device)
 }
  #endif

-#if GEN_GEN == 10 || GEN_GEN == 11
-   uint32_t cache_mode_ss;
-   anv_pack_struct(_mode_ss, GENX(CACHE_MODE_SS),
-   .FloatBlendOptimizationEnable = true,
-   .FloatBlendOptimizationEnableMask = true);
-
-   anv_batch_emit(, GENX(MI_LOAD_REGISTER_IMM), lri) {
-  lri.RegisterOffset = GENX(CACHE_MODE_SS_num);
-  lri.DataDWord  = cache_mode_ss;
-   }
-#endif
-
 anv_batch_emit(, GENX(3DSTATE_AA_LINE_PARAMETERS), aa);

 anv_batch_emit(, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
--
2.17.0



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] anv/pipeline: honor the pipeline_cache_enabled run-time flag

2018-07-04 Thread Iago Toral Quiroga
---
 src/intel/vulkan/anv_pipeline_cache.c | 37 +++
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline_cache.c 
b/src/intel/vulkan/anv_pipeline_cache.c
index d4c7262dc0..5825bf9f01 100644
--- a/src/intel/vulkan/anv_pipeline_cache.c
+++ b/src/intel/vulkan/anv_pipeline_cache.c
@@ -570,23 +570,26 @@ anv_device_search_for_kernel(struct anv_device *device,
}
 
 #ifdef ENABLE_SHADER_CACHE
-   struct disk_cache *disk_cache = device->instance->physicalDevice.disk_cache;
-   if (disk_cache) {
-  cache_key cache_key;
-  disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
-
-  size_t buffer_size;
-  uint8_t *buffer = disk_cache_get(disk_cache, cache_key, _size);
-  if (buffer) {
- struct blob_reader blob;
- blob_reader_init(, buffer, buffer_size);
- bin = anv_shader_bin_create_from_blob(device, );
- free(buffer);
-
- if (bin) {
-if (cache)
-   anv_pipeline_cache_add_shader_bin(cache, bin);
-return bin;
+   if (device->instance->pipeline_cache_enabled) {
+  struct disk_cache *disk_cache =
+ device->instance->physicalDevice.disk_cache;
+  if (disk_cache) {
+ cache_key cache_key;
+ disk_cache_compute_key(disk_cache, key_data, key_size, cache_key);
+
+ size_t buffer_size;
+ uint8_t *buffer = disk_cache_get(disk_cache, cache_key, _size);
+ if (buffer) {
+struct blob_reader blob;
+blob_reader_init(, buffer, buffer_size);
+bin = anv_shader_bin_create_from_blob(device, );
+free(buffer);
+
+if (bin) {
+   if (cache)
+  anv_pipeline_cache_add_shader_bin(cache, bin);
+   return bin;
+}
  }
   }
}
-- 
2.14.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] spirv: implement support for OpAtomicLoad for image pointers

2018-07-04 Thread Samuel Iglesias Gonsálvez
Signed-off-by: Samuel Iglesias Gonsálvez 
---
 src/compiler/spirv/spirv_to_nir.c | 33 +++
 1 file changed, 33 insertions(+)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index e8ab48012f5..dc00d853c39 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2480,6 +2480,39 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
   return;
}
 
+   if (opcode == SpvOpAtomicLoad) {
+  struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
+  struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
+
+  unsigned dest_components = nir_intrinsic_dest_components(intrin);
+  if (intrin->intrinsic == nir_intrinsic_image_deref_size) {
+ dest_components = intrin->num_components =
+glsl_get_vector_elements(type->type);
+  }
+
+  nir_ssa_dest_init(>instr, >dest,
+dest_components, 32, NULL);
+
+  nir_builder_instr_insert(>nb, >instr);
+
+  /* Copy the vec4 value and get first component */
+  nir_ssa_dest_init(>instr, >dest,
+dest_components, 32, NULL);
+
+  nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_imov);
+  nir_ssa_dest_init(>instr, >dest.dest, 1,
+32, NULL);
+  mov->dest.write_mask = (1 << 1) - 1;
+  mov->src[0].src = nir_src_for_ssa(>dest.ssa);
+  mov->src[0].swizzle[0] = 0;
+  nir_builder_instr_insert(>nb, >instr);
+
+  val->ssa = vtn_create_ssa_value(b, type->type);
+  val->ssa->def = >dest.dest.ssa;
+
+  return;
+   }
+
if (opcode != SpvOpImageWrite) {
   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] spirv: implement support for OpAtomicStore for image pointers

2018-07-04 Thread Samuel Iglesias Gonsálvez
Signed-off-by: Samuel Iglesias Gonsálvez 
---
 src/compiler/spirv/spirv_to_nir.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index fb4211193fb..e8ab48012f5 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -2462,6 +2462,24 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
   vtn_fail("Invalid image opcode");
}
 
+   if (opcode == SpvOpAtomicStore) {
+  /* The image value to store is always 4 components but we may not have 
that
+   * many.  Swizzle to compensate.
+   */
+  nir_ssa_def *val = vtn_ssa_value(b, w[4])->def;
+  const struct glsl_type *val_type = vtn_ssa_value(b, w[4])->type;
+  struct vtn_type *mem_type = vtn_value(b, w[1], 
vtn_value_type_image_pointer)->type->image;
+  unsigned swiz[4] = {0, 0, 0, 0};
+
+  assert(mem_type->type == val_type);
+
+  /* For nir_intrinsic_image_var_store, the value to write is a vec4 */
+  intrin->src[3] = nir_src_for_ssa(nir_swizzle(>nb, val,
+   swiz, 4, false));
+  nir_builder_instr_insert(>nb, >instr);
+  return;
+   }
+
if (opcode != SpvOpImageWrite) {
   struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
   struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] intel/compiler: add an optimization pass for booleans

2018-07-04 Thread Iago Toral
On Tue, 2018-07-03 at 18:45 -0700, Caio Marcelo de Oliveira Filho
wrote:
> Hi,
> 
> 
> > +   /* Look for any follow-up instructions that sources from the
> > boolean
> > +* result of the producer instruction and rewrite them to use
> > the correct
> > +* bit-size.
> > +*/
> > +   foreach_inst_in_block_starting_from(fs_inst, fixup_inst, inst)
> > {
> > +  if (!inst_supports_boolean(fixup_inst))
> > + continue;
> > +
> > +  /* For MOV instructions we can always rewrite the boolean
> > source
> > +   * if the instrucion reads the same region we produced in
> > the
> > +   * 32-bit conversion.
> > +   */
> > +  if (fixup_inst->opcode == BRW_OPCODE_MOV &&
> > +  region_match(inst->dst, inst->size_written,
> > +   fixup_inst->src[0], fixup_inst-
> > >size_read(0))) {
> > + if (propagate_from_source) {
> > +fixup_inst->src[0].file = inst->src[0].file;
> > +fixup_inst->src[0].nr = inst->src[0].nr;
> > + }
> > + fixup_inst->src[0] =
> > +fix_bool_reg_bit_size(fixup_inst->src[0], bit_size);
> > + progress = true;
> > + continue;
> > +  }
> 
> It seems the rest of the code assumes that instruction is not MOV, so
> you would need to ensure continue is called regardless the region
> match.

Right, although if the region doesn't match the rest of the code won't
do anything anyway.

> Idea: it seems we could just remove this section above (handling
> MOV),
> and slightly change the section below so that MOV can be dealt with
> it
> too.
> 
> - Drop the section above;
> - Rename progress_logical to local_progress;
> - Add a "fixup_inst->opcode == BRW_OPCODE_MOV" to the

The recursive call executes for logical instructions, not for MOV, so
this should be !=.

>   condition that controls the recursive call;
> - Update comments accordingly.

Sounds like a good idea, thanks for the feedback.

Iago

> 
> > +
> > +  /* For logical instructions we have the same restriction as
> > for MOVs,
> > +   * and we also need to:
> > +   *
> > +   * 1. Propagate the bit-size to the boolean destination of
> > the
> > +   *instruction.
> > +   * 2. Rewrite any instruction that reads the destination to
> > use
> > +   *the new bit-size.
> > +   *
> > +   * However, we can only do these if we can rewrite all the
> > operands
> > +   * to use the same bit-size.
> > +   */
> > +  bool progress_logical = false;
> > +  bool same_bit_size = true;
> > +  for (unsigned i = 0; i < fixup_inst->sources; i++) {
> > + if (region_match(inst->dst, inst->size_written,
> > +  fixup_inst->src[i], fixup_inst-
> > >size_read(i))) {
> > +if (propagate_from_source) {
> > +   fixup_inst->src[i].file = inst->src[0].file;
> > +   fixup_inst->src[i].nr = inst->src[0].nr;
> > +}
> > +fixup_inst->src[i] =
> > +   fix_bool_reg_bit_size(fixup_inst->src[i],
> > bit_size);
> > +progress_logical = true;
> > +progress = true;
> > + }
> > +
> > + if (i > 0 &&
> > + type_sz(fixup_inst->src[i].type) !=
> > + type_sz(fixup_inst->src[i - 1].type)) {
> > +same_bit_size = false;
> > + }
> > +  }
> > +
> > +  /* If we have successfully rewritten a logical instruction
> > operand
> > +   * to use a smaller bit-size boolean and all the operands in
> > the
> > +   * instruction have the same small bit-size, then propagate
> > the
> > +   * new bit-size to the destination boolean and do the same
> > for all
> > +   * follow-up instructions that read from it.
> > +   */
> > +  if (progress_logical && same_bit_size) {
> > + fixup_inst->dst = retype(fixup_inst->dst, fixup_inst-
> > >src[0].type);
> > + propagate_bool_bit_size(fixup_inst, false);
> > +  }
> > +   }
> > +
> > +   return progress;
> > +}
> 
> 
> 
> 
> Thanks,
> Caio
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/sb: fix crash in fold_alu_op3

2018-07-04 Thread Dave Airlie
On 4 July 2018 at 12:44,   wrote:
> From: Roland Scheidegger 
>
> fold_assoc() called from fold_alu_op3() can lower the number of src to 2,
> which then leads to an invalid access to n.src[2]->gvalue().
> This didn't seem to have caused much harm in the past, but on Fedora 28
> it will crash (presumably because -D_GLIBCXX_ASSERTIONS is used, although
> with libstdc++ 4.8.5 this didn't do anything, -D_GLIBCXX_DEBUG was
> needed to show the issue).
>
> An alternative fix would be to instead call fold_alu_op2() from within
> fold_assoc() when the number of src is reduced and return always TRUE
> from fold_assoc() in this case, with the only actual difference being
> the return value from fold_alu_op3() then. I'm not sure what the return
> value actually should be in this case (or whether it even can make a
> difference).
>
> https://bugs.freedesktop.org/show_bug.cgi?id=106928
> Cc: mesa-sta...@lists.freedesktop.org

Reviewed-by: Dave Airlie 

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] ac: add reusable helpers for direct LLVM compilation

2018-07-04 Thread Dave Airlie
On 4 July 2018 at 16:02, Marek Olšák  wrote:
> From: Marek Olšák 
>
> This is basically LLVMTargetMachineEmitToMemoryBuffer inlined and reworked.
>
> struct ac_compiler_passes (opaque type) contains the main pass manager.
>
> ac_create_llvm_passes -- the result can go to thread local storage
> ac_destroy_llvm_passes -- can be called by a destructor in TLS
> ac_compile_module_to_binary -- from LLVMModuleRef to ac_shader_binary
>
> The motivation is to do the expensive call addPassesToEmitFile once
> per context or thread.

Nice, I'll go rebase myself on this when you land it, this looks
pretty good to me.

For the series:
Reviewed-by: Dave Airlie 


> ---
>  src/amd/common/ac_binary.h|  8 
>  src/amd/common/ac_llvm_helper.cpp | 63 +--
>  src/amd/common/ac_llvm_util.h |  9 +
>  3 files changed, 76 insertions(+), 4 deletions(-)
>
> diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h
> index 4bd86b939f1..735e3932055 100644
> --- a/src/amd/common/ac_binary.h
> +++ b/src/amd/common/ac_binary.h
> @@ -20,20 +20,24 @@
>   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
> THE
>   * SOFTWARE.
>   */
>
>  #ifndef AC_BINARY_H
>  #define AC_BINARY_H
>
>  #include 
>  #include 
>
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
>  struct ac_shader_reloc {
> char name[32];
> uint64_t offset;
>  };
>
>  struct ac_shader_binary {
> unsigned code_size;
> unsigned config_size;
> /** The number of bytes of config information for each global symbol.
>  */
> @@ -91,11 +95,15 @@ bool ac_elf_read(const char *elf_data, unsigned elf_size,
>  const unsigned char *ac_shader_binary_config_start(
> const struct ac_shader_binary *binary,
> uint64_t symbol_offset);
>
>  void ac_shader_binary_read_config(struct ac_shader_binary *binary,
>   struct ac_shader_config *conf,
>   unsigned symbol_offset,
>   bool supports_spill);
>  void ac_shader_binary_clean(struct ac_shader_binary *b);
>
> +#ifdef __cplusplus
> +}
> +#endif
> +
>  #endif /* AC_BINARY_H */
> diff --git a/src/amd/common/ac_llvm_helper.cpp 
> b/src/amd/common/ac_llvm_helper.cpp
> index d4eaaffa124..a1358755a58 100644
> --- a/src/amd/common/ac_llvm_helper.cpp
> +++ b/src/amd/common/ac_llvm_helper.cpp
> @@ -22,29 +22,33 @@
>   * of the Software.
>   *
>   */
>
>  /* based on Marek's patch to lp_bld_misc.cpp */
>
>  // Workaround http://llvm.org/PR23628
>  #pragma push_macro("DEBUG")
>  #undef DEBUG
>
> +#include "ac_binary.h"
>  #include "ac_llvm_util.h"
> +
>  #include 
> -#include 
> -#include 
> -#include 
> -#include 
> +#include 
>  #include 
>  #include 
>
> +#include 
> +#if HAVE_LLVM < 0x0700
> +#include "llvm/Support/raw_ostream.h"
> +#endif
> +
>  void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
>  {
> llvm::Argument *A = llvm::unwrap(val);
> A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), 
> bytes));
>  }
>
>  bool ac_is_sgpr_param(LLVMValueRef arg)
>  {
> llvm::Argument *A = llvm::unwrap(arg);
> llvm::AttributeList AS = A->getParent()->getAttributes();
> @@ -99,10 +103,61 @@ LLVMTargetLibraryInfoRef
>  ac_create_target_library_info(const char *triple)
>  {
> return reinterpret_cast(new 
> llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
>  }
>
>  void
>  ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
>  {
> delete reinterpret_cast(library_info);
>  }
> +
> +/* The LLVM compiler is represented as a pass manager containing passes for
> + * optimizations, instruction selection, and code generation.
> + */
> +struct ac_compiler_passes {
> +   ac_compiler_passes(): ostream(code_string) {}
> +
> +   llvm::SmallString<0> code_string;  /* ELF shader binary */
> +   llvm::raw_svector_ostream ostream; /* stream for appending data to 
> the binary */
> +   llvm::legacy::PassManager passmgr; /* list of passes */
> +};
> +
> +struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
> +{
> +   struct ac_compiler_passes *p = new ac_compiler_passes();
> +   if (!p)
> +   return NULL;
> +
> +   llvm::TargetMachine *TM = reinterpret_cast(tm);
> +
> +   if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
> +#if HAVE_LLVM >= 0x0700
> +   nullptr,
> +#endif
> +   llvm::TargetMachine::CGFT_ObjectFile)) {
> +   fprintf(stderr, "amd: TargetMachine can't emit a file of this 
> type!\n");
> +   delete p;
> +   return NULL;
> +   }
> +   return p;
> +}
> +
> +void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
> +{
> +   delete p;
> +}
> +
> +/* This returns false on failure. */
> +bool ac_compile_module_to_binary(struct ac_compiler_passes *p, LLVMModuleRef 
> 

[Mesa-dev] radv variable pointers (nir/spirv/radv)

2018-07-04 Thread Dave Airlie
This is a bit hacky, (esp not sure about the nir bits).

When we enable variable pointers on radv we don't want to do the
offset lowering stuff, so this fixes up all the fallout from enabling
things.

The big issue is a select followed by a load, since the select
can't have a variable pointer in it's return, the next load_deref
falls over in lots of places due to lack of var, so I fixed up
a bunch of nir to avoid crashing, and fixed spirv to handle
the non lowering offset case.

This passes ./deqp-vk --deqp-case=dEQP-VK.*variable_pointers*
for me on an rx550.

Dave.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radv: enable variablePointers.

2018-07-04 Thread Dave Airlie
From: Dave Airlie 

---
 src/amd/common/ac_llvm_build.c  |  3 +++
 src/amd/common/ac_nir_to_llvm.c | 33 +++--
 src/amd/vulkan/radv_device.c|  2 +-
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 93ae273f900..c42c907e936 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -210,6 +210,9 @@ static LLVMTypeRef to_integer_type_scalar(struct 
ac_llvm_context *ctx, LLVMTypeR
 LLVMTypeRef
 ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
 {
+   if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
+   return t;
+   }
if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
LLVMTypeRef elem_type = LLVMGetElementType(t);
return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 83d8b9a4425..49d9e79fe9c 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1739,23 +1739,33 @@ static LLVMValueRef visit_load_var(struct 
ac_nir_context *ctx,
nir_variable *var = 
nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
 
LLVMValueRef values[8];
-   int idx = var->data.driver_location;
+   int idx = 0;
int ve = instr->dest.ssa.num_components;
-   unsigned comp = var->data.location_frac;
+   unsigned comp = 0;
LLVMValueRef indir_index;
LLVMValueRef ret;
unsigned const_index;
-   unsigned stride = var->data.compact ? 1 : 4;
-   bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
-var->data.mode == nir_var_shader_in;
-
-   get_deref_offset(ctx, 
nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL,
-_index, _index);
-
+   unsigned stride = 4;
+   bool vs_in = false;
+   int mode = nir_var_shared;
+
+   /* if we don't have a var we are getting a deref into shared memory */
+   if (var) {
+   if (var->data.compact)
+   stride = 1;
+   comp = var->data.location_frac;
+   idx = var->data.driver_location;
+   mode = var->data.mode;
+   vs_in = ctx->stage == MESA_SHADER_VERTEX &&
+   var->data.mode == nir_var_shader_in;
+
+   get_deref_offset(ctx, 
nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL,
+_index, _index);
+   }
if (instr->dest.ssa.bit_size == 64)
ve *= 2;
 
-   switch (var->data.mode) {
+   switch (mode) {
case nir_var_shader_in:
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
ctx->stage == MESA_SHADER_TESS_EVAL) {
@@ -3592,6 +3602,9 @@ static void visit_deref(struct ac_nir_context *ctx,
result = ac_build_gep0(>ac, get_src(ctx, instr->parent),
   get_src(ctx, instr->arr.index));
break;
+   case nir_deref_type_cast:
+   result = get_src(ctx, instr->parent);
+   break;
default:
unreachable("Unhandled deref_instr deref type");
}
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 73c48cef1f0..7e10f1ba21e 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -745,7 +745,7 @@ void radv_GetPhysicalDeviceFeatures2(
case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
VkPhysicalDeviceVariablePointerFeaturesKHR *features = 
(void *)ext;
features->variablePointersStorageBuffer = true;
-   features->variablePointers = false;
+   features->variablePointers = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR: {
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] nir: handle no variable in derefs in some places

2018-07-04 Thread Dave Airlie
From: Dave Airlie 

---
 src/compiler/nir/nir_gather_info.c   | 2 ++
 src/compiler/nir/nir_lower_indirect_derefs.c | 4 
 src/compiler/nir/nir_lower_vars_to_ssa.c | 4 
 3 files changed, 10 insertions(+)

diff --git a/src/compiler/nir/nir_gather_info.c 
b/src/compiler/nir/nir_gather_info.c
index 2b431e343e9..4bbdd967c2b 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -233,6 +233,8 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, 
nir_shader *shader,
   nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
   nir_variable *var = nir_deref_instr_get_variable(deref);
 
+  if (!var)
+ break;
   if (var->data.mode == nir_var_shader_in ||
   var->data.mode == nir_var_shader_out) {
  bool is_output_read = false;
diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c 
b/src/compiler/nir/nir_lower_indirect_derefs.c
index d85c1704222..be39e1098ed 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -131,6 +131,8 @@ lower_indirect_derefs_block(nir_block *block, nir_builder 
*b,
  continue;
 
   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+  if (!deref)
+ continue;
 
   /* Walk the deref chain back to the base and look for indirects */
   bool has_indirect = false;
@@ -141,6 +143,8 @@ lower_indirect_derefs_block(nir_block *block, nir_builder 
*b,
 has_indirect = true;
 
  base = nir_deref_instr_parent(base);
+ if (!base)
+break;
   }
 
   if (!has_indirect)
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c 
b/src/compiler/nir/nir_lower_vars_to_ssa.c
index 3f37acaed33..dcef9b8e221 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -142,6 +142,8 @@ static struct deref_node *
 get_deref_node_recur(nir_deref_instr *deref,
  struct lower_variables_state *state)
 {
+   if (!deref)
+  return NULL;
if (deref->deref_type == nir_deref_type_var)
   return get_deref_node_for_var(deref->var, state);
 
@@ -198,6 +200,8 @@ get_deref_node_recur(nir_deref_instr *deref,
 
   return parent->wildcard;
 
+   case nir_deref_type_cast:
+  return NULL;
default:
   unreachable("Invalid deref type");
}
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] spirv/vtn: handle variable pointers without offset lowering

2018-07-04 Thread Dave Airlie
From: Dave Airlie 

---
 src/compiler/spirv/spirv_to_nir.c  | 15 ---
 src/compiler/spirv/vtn_variables.c |  3 ++-
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index fb4211193fb..f8c11aaf929 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1189,17 +1189,18 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
  val->type->type = glsl_uint_type();
   }
 
-  if (storage_class == SpvStorageClassWorkgroup &&
-  b->options->lower_workgroup_access_to_offsets) {
- uint32_t size, align;
- val->type->deref = vtn_type_layout_std430(b, val->type->deref,
-   , );
- val->type->length = size;
- val->type->align = align;
+  if (storage_class == SpvStorageClassWorkgroup) {
  /* These can actually be stored to nir_variables and used as SSA
   * values so they need a real glsl_type.
   */
  val->type->type = glsl_uint_type();
+ if (b->options->lower_workgroup_access_to_offsets) {
+uint32_t size, align;
+val->type->deref = vtn_type_layout_std430(b, val->type->deref,
+  , );
+val->type->length = size;
+val->type->align = align;
+ }
   }
   break;
}
diff --git a/src/compiler/spirv/vtn_variables.c 
b/src/compiler/spirv/vtn_variables.c
index b7c9e6f2f70..cfefe804897 100644
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1581,7 +1581,8 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def 
*ssa,
   vtn_assert(ssa->num_components == 2);
   ptr->block_index = nir_channel(>nb, ssa, 0);
   ptr->offset = nir_channel(>nb, ssa, 1);
-   } else if (ptr->mode == vtn_variable_mode_workgroup ||
+   } else if ((ptr->mode == vtn_variable_mode_workgroup &&
+  b->options->lower_workgroup_access_to_offsets) ||
   ptr->mode == vtn_variable_mode_push_constant) {
   /* This pointer type needs to have actual storage */
   vtn_assert(ptr_type->type);
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] intel/nir: Enable nir_opt_move_discards_to_top

2018-07-04 Thread Jason Ekstrand
According to RenderDoc, this shaves about 15% off of one of the render
passes in the Skyrim SE frame I've been analyzing on my KBL laptop and
about 8% off the whole frame.  It's a bit noisy and hard to tell with
just the DXVK HUD but there appears to be a small FPS bump in-game as
well.
---
 src/intel/compiler/brw_compiler.c | 3 ++-
 src/intel/compiler/brw_nir.c  | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index 6480dbefbf6..86bd4c6bac6 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -46,7 +46,8 @@
.native_integers = true,   \
.use_interpolated_input_intrinsics = true, \
.vertex_id_zero_based = true,  \
-   .lower_base_vertex = true
+   .lower_base_vertex = true, \
+   .derivatives_safe_after_discard = true
 
 #define COMMON_SCALAR_OPTIONS \
.lower_pack_half_2x16 = true,  \
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index f4aee3d41ee..decf7e3f57b 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -664,6 +664,9 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
nir = brw_nir_optimize(nir, compiler, is_scalar);
 
+   if (nir->info.stage == MESA_SHADER_FRAGMENT)
+  OPT(nir_opt_move_discards_to_top);
+
/* This needs to be run after the first optimization pass but before we
 * lower indirect derefs away
 */
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] nir: Add a discard optimization pass

2018-07-04 Thread Jason Ekstrand
Many fragment shaders do a discard using relatively little information
but still put the discard fairly far down in the shader for no good
reason.  If the discard is moved higher up, we can possibly avoid doing
some or almost all of the work in the shader.  When this lets us skip
texturing operations, it's an especially high win.

One of the biggest offenders here is DXVK.  The D3D APIs have different
rules for discards than OpenGL and Vulkan.  One effective way (which is
what DXVK uses) to implement DX behavior on top of GL or Vulkan is to
wait until the very end of the shader to discard.  This ends up in the
pessimal case where we always do all of the work before discarding.
This pass helps some DXVK shaders significantly.
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/meson.build   |   1 +
 src/compiler/nir/nir.h |  10 +
 src/compiler/nir/nir_opt_discard.c | 404 +
 4 files changed, 416 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_discard.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 9e3fbdc2612..8600ce81281 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -271,6 +271,7 @@ NIR_FILES = \
nir/nir_opt_cse.c \
nir/nir_opt_dce.c \
nir/nir_opt_dead_cf.c \
+   nir/nir_opt_discard.c \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
nir/nir_opt_if.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 28aa8de7014..e339258bb94 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -156,6 +156,7 @@ files_libnir = files(
   'nir_opt_cse.c',
   'nir_opt_dce.c',
   'nir_opt_dead_cf.c',
+  'nir_opt_discard.c',
   'nir_opt_gcm.c',
   'nir_opt_global_to_local.c',
   'nir_opt_if.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 92ab3a699cc..44006dac160 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2022,6 +2022,13 @@ typedef struct nir_shader_compiler_options {
 */
bool vs_inputs_dual_locations;
 
+   /**
+* Whether or not derivatives are still a safe operation after a discard
+* has occurred.  Optimization passes may be able to be a bit more
+* agressive if this is true.
+*/
+   bool derivatives_safe_after_discard;
+
unsigned max_unroll_iterations;
 } nir_shader_compiler_options;
 
@@ -2899,6 +2906,9 @@ bool nir_opt_dce(nir_shader *shader);
 
 bool nir_opt_dead_cf(nir_shader *shader);
 
+bool nir_opt_discard_if(nir_shader *shader);
+bool nir_opt_move_discards_to_top(nir_shader *shader);
+
 bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
 bool nir_opt_if(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_discard.c 
b/src/compiler/nir/nir_opt_discard.c
new file mode 100644
index 000..eb1a8296282
--- /dev/null
+++ b/src/compiler/nir/nir_opt_discard.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *Jason Ekstrand (ja...@jlekstrand.net)
+ *
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_control_flow.h"
+#include "nir_worklist.h"
+
+static bool
+block_has_only_discard(nir_block *block)
+{
+   nir_instr *instr = nir_block_first_instr(block);
+   if (instr == NULL || instr != nir_block_last_instr(block))
+  return false;
+
+   if (instr->type != nir_instr_type_intrinsic)
+  return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   return intrin->intrinsic == nir_intrinsic_discard;
+}
+
+static bool
+opt_discard_if_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+
+   nir_builder b;
+   nir_builder_init(, impl);
+
+   nir_foreach_block(block, impl) {
+  nir_if *nif = nir_block_get_following_if(block);
+  if (!nif)
+ continue;
+
+  bool discard_in_then;

[Mesa-dev] [PATCH 1/3] nir/worklist: Rework the foreach macro

2018-07-04 Thread Jason Ekstrand
This makes the arguments match the (thing, container) pattern used in
other nir_foreach macros and also renames it to make that a bit more
clear.
---
 src/compiler/nir/nir_opt_dce.c  | 3 +--
 src/compiler/nir/nir_worklist.h | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c
index c9b338862e6..70532be33d7 100644
--- a/src/compiler/nir/nir_opt_dce.c
+++ b/src/compiler/nir/nir_opt_dce.c
@@ -129,8 +129,7 @@ nir_opt_dce_impl(nir_function_impl *impl)
   init_block(block, worklist);
}
 
-   nir_instr *instr = NULL;
-   nir_instr_worklist_foreach(worklist, instr)
+   nir_foreach_instr_in_worklist(instr, worklist)
   nir_foreach_src(instr, mark_live_cb, worklist);
 
nir_instr_worklist_destroy(worklist);
diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h
index 3fb391fceff..05aa757eb79 100644
--- a/src/compiler/nir/nir_worklist.h
+++ b/src/compiler/nir/nir_worklist.h
@@ -154,8 +154,8 @@ nir_instr_worklist_pop_head(nir_instr_worklist *wl)
return *vec_instr;
 }
 
-#define nir_instr_worklist_foreach(wl, instr)\
-   while ((instr = nir_instr_worklist_pop_head(wl)))
+#define nir_foreach_instr_in_worklist(instr, wl) \
+   for (nir_instr *instr; (instr = nir_instr_worklist_pop_head(wl));)
 
 #ifdef __cplusplus
 } /* extern "C" */
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] radeonsi: use ac_compile_module_to_binary to reduce compile times

2018-07-04 Thread Marek Olšák
From: Marek Olšák 

Compile times of simple shaders are reduced by ~20%.
Compile times of prologs and epilogs are reduced by up to 40%.
---
 src/gallium/drivers/radeonsi/si_pipe.c|  2 ++
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 33 ++-
 2 files changed, 4 insertions(+), 31 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 740533fd7a0..ac4f77a8964 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -109,24 +109,26 @@ static void si_init_compiler(struct si_screen *sscreen,
 {
enum ac_target_machine_options tm_options =
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 
0) |
(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 
0) |
(!sscreen->llvm_has_working_vgpr_indexing ? 
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0);
 
ac_init_llvm_once();
ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
+   compiler->passes = ac_create_llvm_passes(compiler->tm);
 }
 
 static void si_destroy_compiler(struct ac_llvm_compiler *compiler)
 {
+   ac_destroy_llvm_passes(compiler->passes);
ac_destroy_llvm_compiler(compiler);
 }
 
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
 {
struct si_context *sctx = (struct si_context *)context;
int i;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 566d3a8eb6e..6c1e18ff812 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -78,63 +78,34 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, 
void *context)
 /**
  * Compile an LLVM module to machine code.
  *
  * @returns 0 for success, 1 for failure
  */
 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 struct ac_llvm_compiler *compiler,
 struct pipe_debug_callback *debug)
 {
struct si_llvm_diagnostics diag;
-   char *err;
LLVMContextRef llvm_ctx;
-   LLVMMemoryBufferRef out_buffer;
-   unsigned buffer_size;
-   const char *buffer_data;
-   LLVMBool mem_err;
 
diag.debug = debug;
diag.retval = 0;
 
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
 
LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, );
 
-   /* Compile IR*/
-   mem_err = LLVMTargetMachineEmitToMemoryBuffer(compiler->tm, M,
- LLVMObjectFile, ,
- _buffer);
-
-   /* Process Errors/Warnings */
-   if (mem_err) {
-   fprintf(stderr, "%s: %s", __FUNCTION__, err);
-   pipe_debug_message(debug, SHADER_INFO,
-  "LLVM emit error: %s", err);
-   FREE(err);
+   /* Compile IR. */
+   if (!ac_compile_module_to_binary(compiler->passes, M, binary))
diag.retval = 1;
-   goto out;
-   }
-
-   /* Extract Shader Code*/
-   buffer_size = LLVMGetBufferSize(out_buffer);
-   buffer_data = LLVMGetBufferStart(out_buffer);
-
-   if (!ac_elf_read(buffer_data, buffer_size, binary)) {
-   fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
-   diag.retval = 1;
-   }
-
-   /* Clean up */
-   LLVMDisposeMemoryBuffer(out_buffer);
 
-out:
if (diag.retval != 0)
pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
return diag.retval;
 }
 
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  enum tgsi_opcode_type type)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] radeonsi: reorder code in si_llvm_context_init

2018-07-04 Thread Marek Olšák
From: Marek Olšák 

---
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 26 +--
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 6c1e18ff812..b8cfd15a67f 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -949,35 +949,35 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 
/* Initialize the gallivm object:
 * We are only using the module, context, and builder fields of this 
struct.
 * This should be enough for us to be able to pass our gallivm struct 
to the
 * helper functions in the gallivm module.
 */
memset(ctx, 0, sizeof(*ctx));
ctx->screen = sscreen;
ctx->compiler = compiler;
 
-   ctx->gallivm.context = LLVMContextCreate();
-   ctx->gallivm.module = ac_create_module(compiler->tm, 
ctx->gallivm.context);
-
-   bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
-   enum ac_float_mode float_mode =
-   unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
-   AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
+   ctx->ac.context = LLVMContextCreate();
+   ac_llvm_context_init(>ac, ctx->ac.context,
+sscreen->info.chip_class, sscreen->info.family);
 
-   ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
-float_mode);
+   ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context);
 
-   ac_llvm_context_init(>ac, ctx->gallivm.context,
-sscreen->info.chip_class, sscreen->info.family);
-   ctx->ac.module = ctx->gallivm.module;
-   ctx->ac.builder = ctx->gallivm.builder;
+   enum ac_float_mode float_mode =
+   sscreen->debug_flags & DBG(UNSAFE_MATH) ?
+   AC_FLOAT_MODE_UNSAFE_FP_MATH :
+   AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
+   ctx->ac.builder = ac_create_builder(ctx->ac.context, float_mode);
+
+   ctx->gallivm.context = ctx->ac.context;
+   ctx->gallivm.module = ctx->ac.module;
+   ctx->gallivm.builder = ctx->ac.builder;
 
struct lp_build_tgsi_context *bld_base = >bld_base;
 
type.floating = true;
type.fixed = false;
type.sign = true;
type.norm = false;
type.width = 32;
type.length = 1;
 
-- 
2.17.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] ac: fold LLVMContext creation into ac_llvm_context_init

2018-07-04 Thread Marek Olšák
From: Marek Olšák 

---
 src/amd/common/ac_llvm_build.c  |  6 +++---
 src/amd/common/ac_llvm_build.h  |  2 +-
 src/amd/vulkan/radv_nir_to_llvm.c   | 10 --
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c |  5 +
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index d2ada00255a..878c965ceb8 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -50,29 +50,29 @@ struct ac_llvm_flow {
/* Loop exit or next part of if/else/endif. */
LLVMBasicBlockRef next_block;
LLVMBasicBlockRef loop_entry_block;
 };
 
 /* Initialize module-independent parts of the context.
  *
  * The caller is responsible for initializing ctx::module and ctx::builder.
  */
 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ac_llvm_context_init(struct ac_llvm_context *ctx,
 enum chip_class chip_class, enum radeon_family family)
 {
LLVMValueRef args[1];
 
+   ctx->context = LLVMContextCreate();
+
ctx->chip_class = chip_class;
ctx->family = family;
-
-   ctx->context = context;
ctx->module = NULL;
ctx->builder = NULL;
 
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
ctx->i1 = LLVMInt1TypeInContext(ctx->context);
ctx->i8 = LLVMInt8TypeInContext(ctx->context);
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 026955a5556..4e7cbcd5fa0 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -90,21 +90,21 @@ struct ac_llvm_context {
LLVMValueRef fpmath_md_2p5_ulp;
LLVMValueRef empty_md;
 
enum chip_class chip_class;
enum radeon_family family;
 
LLVMValueRef lds;
 };
 
 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ac_llvm_context_init(struct ac_llvm_context *ctx,
 enum chip_class chip_class, enum radeon_family family);
 
 void
 ac_llvm_context_dispose(struct ac_llvm_context *ctx);
 
 int
 ac_get_llvm_num_components(LLVMValueRef value);
 
 int
 ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c 
b/src/amd/vulkan/radv_nir_to_llvm.c
index 45ac0854c17..15c10493022 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3130,24 +3130,23 @@ static
 LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *const *shaders,
int shader_count,
struct radv_shader_variant_info 
*shader_info,
const struct radv_nir_compiler_options 
*options)
 {
struct radv_shader_context ctx = {0};
unsigned i;
ctx.options = options;
ctx.shader_info = shader_info;
-   ctx.context = LLVMContextCreate();
 
-   ac_llvm_context_init(, ctx.context, options->chip_class,
-options->family);
+   ac_llvm_context_init(, options->chip_class, options->family);
+   ctx.context = ctx.ac.context;
ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
 
enum ac_float_mode float_mode =
options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
   AC_FLOAT_MODE_DEFAULT;
 
ctx.ac.builder = ac_create_builder(ctx.context, float_mode);
 
memset(shader_info, 0, sizeof(*shader_info));
 
@@ -3569,26 +3568,25 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
 
 void
 radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *geom_shader,
struct ac_shader_binary *binary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info,
const struct radv_nir_compiler_options *options)
 {
struct radv_shader_context ctx = {0};
-   ctx.context = LLVMContextCreate();
ctx.options = options;
ctx.shader_info = shader_info;
 
-   ac_llvm_context_init(, ctx.context, options->chip_class,
-options->family);
+   ac_llvm_context_init(, options->chip_class, options->family);
+   ctx.context = ctx.ac.context;
ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context);
 
ctx.is_gs_copy_shader = true;
 
enum ac_float_mode float_mode =
options->unsafe_math ? 

[Mesa-dev] [PATCH 1/4] ac: add reusable helpers for direct LLVM compilation

2018-07-04 Thread Marek Olšák
From: Marek Olšák 

This is basically LLVMTargetMachineEmitToMemoryBuffer inlined and reworked.

struct ac_compiler_passes (opaque type) contains the main pass manager.

ac_create_llvm_passes -- the result can go to thread local storage
ac_destroy_llvm_passes -- can be called by a destructor in TLS
ac_compile_module_to_binary -- from LLVMModuleRef to ac_shader_binary

The motivation is to do the expensive call addPassesToEmitFile once
per context or thread.
---
 src/amd/common/ac_binary.h|  8 
 src/amd/common/ac_llvm_helper.cpp | 63 +--
 src/amd/common/ac_llvm_util.h |  9 +
 3 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h
index 4bd86b939f1..735e3932055 100644
--- a/src/amd/common/ac_binary.h
+++ b/src/amd/common/ac_binary.h
@@ -20,20 +20,24 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
  * SOFTWARE.
  */
 
 #ifndef AC_BINARY_H
 #define AC_BINARY_H
 
 #include 
 #include 
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 struct ac_shader_reloc {
char name[32];
uint64_t offset;
 };
 
 struct ac_shader_binary {
unsigned code_size;
unsigned config_size;
/** The number of bytes of config information for each global symbol.
 */
@@ -91,11 +95,15 @@ bool ac_elf_read(const char *elf_data, unsigned elf_size,
 const unsigned char *ac_shader_binary_config_start(
const struct ac_shader_binary *binary,
uint64_t symbol_offset);
 
 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
  struct ac_shader_config *conf,
  unsigned symbol_offset,
  bool supports_spill);
 void ac_shader_binary_clean(struct ac_shader_binary *b);
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* AC_BINARY_H */
diff --git a/src/amd/common/ac_llvm_helper.cpp 
b/src/amd/common/ac_llvm_helper.cpp
index d4eaaffa124..a1358755a58 100644
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -22,29 +22,33 @@
  * of the Software.
  *
  */
 
 /* based on Marek's patch to lp_bld_misc.cpp */
 
 // Workaround http://llvm.org/PR23628
 #pragma push_macro("DEBUG")
 #undef DEBUG
 
+#include "ac_binary.h"
 #include "ac_llvm_util.h"
+
 #include 
-#include 
-#include 
-#include 
-#include 
+#include 
 #include 
 #include 
 
+#include 
+#if HAVE_LLVM < 0x0700
+#include "llvm/Support/raw_ostream.h"
+#endif
+
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
llvm::Argument *A = llvm::unwrap(val);
A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), 
bytes));
 }
 
 bool ac_is_sgpr_param(LLVMValueRef arg)
 {
llvm::Argument *A = llvm::unwrap(arg);
llvm::AttributeList AS = A->getParent()->getAttributes();
@@ -99,10 +103,61 @@ LLVMTargetLibraryInfoRef
 ac_create_target_library_info(const char *triple)
 {
return reinterpret_cast(new 
llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
 }
 
 void
 ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
 {
delete reinterpret_cast(library_info);
 }
+
+/* The LLVM compiler is represented as a pass manager containing passes for
+ * optimizations, instruction selection, and code generation.
+ */
+struct ac_compiler_passes {
+   ac_compiler_passes(): ostream(code_string) {}
+
+   llvm::SmallString<0> code_string;  /* ELF shader binary */
+   llvm::raw_svector_ostream ostream; /* stream for appending data to the 
binary */
+   llvm::legacy::PassManager passmgr; /* list of passes */
+};
+
+struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
+{
+   struct ac_compiler_passes *p = new ac_compiler_passes();
+   if (!p)
+   return NULL;
+
+   llvm::TargetMachine *TM = reinterpret_cast(tm);
+
+   if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
+#if HAVE_LLVM >= 0x0700
+   nullptr,
+#endif
+   llvm::TargetMachine::CGFT_ObjectFile)) {
+   fprintf(stderr, "amd: TargetMachine can't emit a file of this 
type!\n");
+   delete p;
+   return NULL;
+   }
+   return p;
+}
+
+void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
+{
+   delete p;
+}
+
+/* This returns false on failure. */
+bool ac_compile_module_to_binary(struct ac_compiler_passes *p, LLVMModuleRef 
module,
+struct ac_shader_binary *binary)
+{
+   p->passmgr.run(*llvm::unwrap(module));
+
+   llvm::StringRef data = p->ostream.str();
+   bool success = ac_elf_read(data.data(), data.size(), binary);
+   p->code_string = ""; /* release the ELF shader binary */
+
+   if (!success)
+   fprintf(stderr, "amd: cannot read an ELF shader binary\n");
+   return success;
+}
diff --git