[Mesa-dev] [PATCH] nv50/ir: delete MINMAX instruction that is no longer in the BB

2018-11-10 Thread Ilia Mirkin
We removed the op from the BB, but it was still listed in its sources'
uses. This could trip up some logic down the line which analyzes all the
uses of an l-value, e.g. spilling.

Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 8767e5efb99..04d26dcbf53 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1898,7 +1898,7 @@ AlgebraicOpt::handleMINMAX(Instruction *minmax)
if (minmax->src(0).mod == minmax->src(1).mod) {
   if (minmax->def(0).mayReplace(minmax->src(0))) {
  minmax->def(0).replace(minmax->src(0), false);
- minmax->bb->remove(minmax);
+ delete_Instruction(prog, minmax);
   } else {
  minmax->op = OP_CVT;
  minmax->setSrc(1, NULL);
-- 
2.18.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] mapi: avoid text relocation in x86 tsd stubs

2018-11-10 Thread Jonathan Gray
Make similiar changes to libglvnd to avoid a text relocation in
x86 tsd stubs fixing the build with lld.

v2:
  - store the address of the GOT in ebx required before calling PLT stub
  - change .balign values to match X86_ENTRY_SIZE

Signed-off-by: Jonathan Gray 
Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108541
---
 src/mapi/entry_x86_tsd.h | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/mapi/entry_x86_tsd.h b/src/mapi/entry_x86_tsd.h
index 0c28c8ff068..1dec3ed86c4 100644
--- a/src/mapi/entry_x86_tsd.h
+++ b/src/mapi/entry_x86_tsd.h
@@ -31,25 +31,31 @@
 #define HIDDEN
 #endif
 
-#define X86_ENTRY_SIZE 32
+#define X86_ENTRY_SIZE 64
 
 __asm__(".text\n"
-".balign 32\n"
+".balign " U_STRINGIFY(X86_ENTRY_SIZE) "\n"
 "x86_entry_start:");
 
 #define STUB_ASM_ENTRY(func)\
".globl " func "\n"  \
".type " func ", @function\n"\
-   ".balign 32\n"   \
+   ".balign " U_STRINGIFY(X86_ENTRY_SIZE) "\n" \
func ":"
 
 #define STUB_ASM_CODE(slot) \
-   "movl " ENTRY_CURRENT_TABLE ", %eax\n\t" \
+   "push %ebx\n\t"  \
+   "call 1f\n"  \
+   "1:\n\t" \
+   "popl %ebx\n\t"  \
+   "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx\n\t" \
+   "movl " ENTRY_CURRENT_TABLE "@GOT(%ebx), %eax\n\t" \
+   "mov (%eax), %eax\n\t"   \
"testl %eax, %eax\n\t"   \
-   "je 1f\n\t"  \
-   "jmp *(4 * " slot ")(%eax)\n"\
+   "jne 1f\n\t" \
+   "call " ENTRY_CURRENT_TABLE_GET "@PLT\n" \
"1:\n\t" \
-   "call " ENTRY_CURRENT_TABLE_GET "\n\t" \
+   "pop %ebx\n\t"   \
"jmp *(4 * " slot ")(%eax)"
 
 #define MAPI_TMP_STUB_ASM_GCC
@@ -57,7 +63,7 @@ __asm__(".text\n"
 
 #ifndef MAPI_MODE_BRIDGE
 
-__asm__(".balign 32\n"
+__asm__(".balign " U_STRINGIFY(X86_ENTRY_SIZE) "\n"
 "x86_entry_end:");
 
 #include 
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] nir: Use nir_shader_get_entrypoint in nir_lower_clip_vs().

2018-11-10 Thread Jason Ekstrand

Rb

On November 9, 2018 20:13:44 Kenneth Graunke  wrote:


---
src/compiler/nir/nir_lower_clip.c | 6 ++
1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir_lower_clip.c 
b/src/compiler/nir/nir_lower_clip.c

index 7081295a500..013645101f2 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -199,6 +199,7 @@ lower_clip_vs(nir_function_impl *impl, unsigned 
ucp_enables,

bool
nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables)
{
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
   int clipvertex = -1;
   int position = -1;
   int maxloc = -1;
@@ -250,10 +251,7 @@ nir_lower_clip_vs(nir_shader *shader, unsigned 
ucp_enables)

  out[1] =
 create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1);

-   nir_foreach_function(function, shader) {
-  if (!strcmp(function->name, "main"))
- lower_clip_vs(function->impl, ucp_enables, cv, out);
-   }
+   lower_clip_vs(impl, ucp_enables, cv, out);

   return true;
}
--
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108705] GLX with DRI2 doesn't work properly when xcb is owning the event queue

2018-11-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108705

--- Comment #1 from Julien Isorce  ---
Maybe related https://bugs.freedesktop.org/show_bug.cgi?id=105132

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 105132] glxSwapBuffers has no effect

2018-11-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=105132

Julien Isorce  changed:

   What|Removed |Added

 Depends on||108705


Referenced Bugs:

https://bugs.freedesktop.org/show_bug.cgi?id=108705
[Bug 108705] GLX with DRI2 doesn't work properly when xcb is owning the event
queue
-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108705] GLX with DRI2 doesn't work properly when xcb is owning the event queue

2018-11-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108705

Julien Isorce  changed:

   What|Removed |Added

 Blocks||105132


Referenced Bugs:

https://bugs.freedesktop.org/show_bug.cgi?id=105132
[Bug 105132] glxSwapBuffers has no effect
-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108705] GLX with DRI2 doesn't work properly when xcb is owning the event queue

2018-11-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108705

Bug ID: 108705
   Summary: GLX with DRI2 doesn't work properly when xcb is owning
the event queue
   Product: Mesa
   Version: 18.2
  Hardware: All
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: GLX
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: yshu...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

There was an email thread on this topic before [1]. To summarize, dri2 glx
relies on XESetWireToEvent to register DRI2WireToEvent as a callback, so it can
get the buffer invalidation events. If xcb is owning the queue, those callbacks
won't be called, thus things can't work.

Recently, to avoid a Xlib event handling bug, compton[2] switched to xcb event
processing, and that caused some problem with users who don't have DRI3 (for
example, nouveau doesn't have DRI3 by default). 

It would be really nice if GLX on DRI2 can work with xcb event processing.

[1]: https://lists.freedesktop.org/archives/xcb/2011-November/007337.html
[2]: https://github.com/yshui/compton

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 108581] llvmpipe crashes using kms_swrast_dri.so

2018-11-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=108581

--- Comment #5 from Stephan Hilb  ---
Created attachment 142425
  --> https://bugs.freedesktop.org/attachment.cgi?id=142425=edit
helgrind log

Sadly running with valgrind doesn't trigger the crash, but `--tool=helgrind`
however gives some useful info on potential race conditions.
The warnings for `lp_rast.c` seem interesting.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/2] Update on thread_submit

2018-11-10 Thread Axel Davy
I just wanted to write a cover letter for this patch to describe
a feature not many user may know about.

On d3d9, frames are supposed to be displayed in the order
they were produced, with no frame skipped.

Thus when vsync is off, the newly produced frames replace the last frame during
refresh, thus creating screen tearing (unless your compositor composites
fullscreen apps, but that's another story).

Mesa ogl backend currently should tear as well when vsync is off.


While this is not in the spec, nine allows to enable a feature
often named 'triple buffering'.
It is named that way because intuitively the idea is to have one buffer
to render to, one buffer on screen, and one buffer more recent than the buffer
on screen that is ready to go on screen at next refresh.

When the refresh is done, we replace the buffer on screen with the more
up-to-date buffer.
When a new frame is rendered, the buffer that is not on screen,
but could go on screen are swapped.

(Ofc you have don't switch the buffer on screen if you don't have any
newer frame).

This enables to render at any framerate (even above screen refresh)
without tearing.

This feature on nine is enabled with the env var (also drirc conf option)
tearfree_discard=true.


One issue though is that currently buffers are sent to the server while
rendering is not finished.

This behaviour, done by mesa ogl as well (both X and Wayland),
is probably done because:
. This helps reducing compositor lag.
  The compositing operation is scheduled ahead in the gpu pipeline.
. As soon as the application updates its window, it can fetch the content via
  some API, or rely on some window message communication to occur.
  You need thus to send the updated buffer when ogl swapbuffer() is called.

When fullscreen and not composited, however, we run into the following issue:
It is possible the buffer gets planned for a pageflip (replace current buffer
on screen), but it is not finished rendering when the pageflip occurs.
Thus the pageflip fails and the previous buffer stays on screen.

This can make a game fps feel smaller on screen that what it is rendering at,
and adds lag.


Fortunately for nine, we can reasonnably assume the application won't read the
window content just after presenting it with d3d (in game screenshots are
implemented by looking at the rendering buffer, not the window content,
and beside that, which app would want to access that content ?).

In order to support DRI_PRIME systems without artefacts, before the kernel
supported dma-buf synchronization, nine added support for thread_submit drirc
env var, which basically uses a thread to delay sending the last rendered
buffer to the X server until it is finished rendering.
This feature can be used without DRI_PRIME as well.

This patchset enables to use thread_submit=true with tearfree_discard=true,
thus enabling to have 'triple buffering' without the mentionned issue.


Another solution of course is to use vsync, but some games' dynamics
can work better without (because cpu time gets eaten waiting vsync).


With vsync, some users apparently have issues with
pageflip getting missed, and in that case thread_submit=true can be used
to increase smoothness.




We thus recommand the following configurations (assuming fullscreen and
not composited):

Game with vsync:
Use thread_submit=true if your graphic card rendering rate is close or
slightly above the screen refresh rate (you are more likely to have
missed pageflips).
You can also enable always, it shouldn't hurt

Game without vsync:
If you don't mind tearing, nothing particular.
If you mind tearing, thread_submit=true tearfree_discard=true


tearfree_discard=true doesn't impact vsync, and both env vars
don't affect mesa ogl, thus you can just set
export tearfree_discard=true
export thread_submit=true
 in your
~/.bashrc to have them always on.


Note: if you don't see tearing with vsync off and without
these options, it means you get composited.
Getting composited means a small perf impact
and possibly a small lag.
Either disable composition manually (alt+shift+f12 on kwin
for example) when needed, or use a wine patch like this one:
https://github.com/ValveSoftware/wine/commit/141ba5cf73029029a5a0bd2cdcfd5f9f9ab7ee7b


Axel Davy (2):
  st/nine: Allow 'triple buffering' with thread_submit
  st/nine: Remove thread_submit warning

 src/gallium/state_trackers/nine/swapchain9.c | 66 +++-
 src/gallium/state_trackers/nine/swapchain9.h |  1 +
 src/gallium/targets/d3dadapter9/drm.c|  3 -
 3 files changed, 50 insertions(+), 20 deletions(-)

-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/nine: Remove thread_submit warning

2018-11-10 Thread Axel Davy
thread_submit can be useful even without DRI_PRIME,
as it can help avoid missed pageflips.

Signed-off-by: Axel Davy 
---
 src/gallium/targets/d3dadapter9/drm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/gallium/targets/d3dadapter9/drm.c 
b/src/gallium/targets/d3dadapter9/drm.c
index 85b3e10633e..6fb8caf5c2f 100644
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -279,9 +279,6 @@ drm_create_adapter( int fd,
 DBG("You have set a non standard throttling value in combination with 
thread_submit."
 "We advise to use a throttling value of -2/0");
 }
-if (ctx->base.thread_submit && !different_device)
-DBG("You have set thread_submit but do not use a different device than 
the server."
-"You should not expect any benefit.");
 
 if (driCheckOption(, "override_vendorid", DRI_INT)) {
 override_vendorid = driQueryOptioni(, 
"override_vendorid");
-- 
2.19.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] st/nine: Allow 'triple buffering' with thread_submit

2018-11-10 Thread Axel Davy
The path allowing triple buffering behaviour wasn't implemented
yet for thread_submit

Signed-off-by: Axel Davy 
---
 src/gallium/state_trackers/nine/swapchain9.c | 66 +++-
 src/gallium/state_trackers/nine/swapchain9.h |  1 +
 2 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/src/gallium/state_trackers/nine/swapchain9.c 
b/src/gallium/state_trackers/nine/swapchain9.c
index 85ee51a0ae7..f86ab81ab97 100644
--- a/src/gallium/state_trackers/nine/swapchain9.c
+++ b/src/gallium/state_trackers/nine/swapchain9.c
@@ -28,6 +28,7 @@
 #include "nine_pipe.h"
 #include "nine_dump.h"
 
+#include "util/u_atomic.h"
 #include "util/u_inlines.h"
 #include "util/u_surface.h"
 #include "hud/hud_context.h"
@@ -50,6 +51,7 @@ NineSwapChain9_ctor( struct NineSwapChain9 *This,
  D3DDISPLAYMODEEX *mode )
 {
 HRESULT hr;
+int i;
 
 DBG("This=%p pDevice=%p pPresent=%p pCTX=%p hFocusWindow=%p\n",
 This, pParams->device, pPresent, pCTX, hFocusWindow);
@@ -65,8 +67,7 @@ NineSwapChain9_ctor( struct NineSwapChain9 *This,
 This->mode = NULL;
 
 ID3DPresent_AddRef(pPresent);
-if (!This->actx->thread_submit &&
-This->base.device->minor_version_num > 2) {
+if (This->base.device->minor_version_num > 2) {
 D3DPRESENT_PARAMETERS2 params2;
 
 memset(, 0, sizeof(D3DPRESENT_PARAMETERS2));
@@ -80,6 +81,11 @@ NineSwapChain9_ctor( struct NineSwapChain9 *This,
 
 This->rendering_done = FALSE;
 This->pool = NULL;
+for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) {
+This->pending_presentation[i] = calloc(1, sizeof(BOOL));
+if (!This->pending_presentation[i])
+return E_OUTOFMEMORY;
+}
 return NineSwapChain9_Resize(This, pPresentationParameters, mode);
 }
 
@@ -508,6 +514,11 @@ NineSwapChain9_dtor( struct NineSwapChain9 *This )
 if (This->pool)
 _mesa_threadpool_destroy(This, This->pool);
 
+for (i = 0; i < D3DPRESENT_BACK_BUFFERS_MAX_EX + 1; i++) {
+if (This->pending_presentation[i])
+FREE(This->pending_presentation[i]);
+}
+
 for (i = 0; i < This->num_back_buffers; i++) {
 if (This->buffers[i])
 NineUnknown_Detach(NineUnknown(This->buffers[i]));
@@ -619,6 +630,7 @@ struct end_present_struct {
 struct pipe_fence_handle *fence_to_wait;
 ID3DPresent *present;
 D3DWindowBuffer *present_handle;
+BOOL *pending_presentation;
 HWND hDestWindowOverride;
 };
 
@@ -630,6 +642,7 @@ static void work_present(void *data)
 work->screen->fence_reference(work->screen, &(work->fence_to_wait), 
NULL);
 }
 ID3DPresent_PresentBuffer(work->present, work->present_handle, 
work->hDestWindowOverride, NULL, NULL, NULL, 0);
+p_atomic_set(work->pending_presentation, FALSE);
 free(work);
 }
 
@@ -643,6 +656,8 @@ static void pend_present(struct NineSwapChain9 *This,
 work->present = This->present;
 work->present_handle = This->present_handles[0];
 work->hDestWindowOverride = hDestWindowOverride;
+work->pending_presentation = This->pending_presentation[0];
+p_atomic_set(work->pending_presentation, TRUE);
 This->tasks[0] = _mesa_threadpool_queue_task(This->pool, work_present, 
work);
 
 return;
@@ -853,6 +868,7 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
 struct pipe_resource *res = NULL;
 D3DWindowBuffer *handle_temp;
 struct threadpool_task *task_temp;
+BOOL *pending_presentation_temp;
 int i;
 HRESULT hr;
 
@@ -886,14 +902,14 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
 
 if (This->base.device->minor_version_num > 2 &&
 This->params.SwapEffect == D3DSWAPEFFECT_DISCARD &&
-This->params.PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE &&
-!This->actx->thread_submit) {
+This->params.PresentationInterval == D3DPRESENT_INTERVAL_IMMEDIATE) {
 int next_buffer = -1;
 
 while (next_buffer == -1) {
 /* Find a free backbuffer */
 for (i = 1; i < This->num_back_buffers; i++) {
-if (ID3DPresent_IsBufferReleased(This->present, 
This->present_handles[i])) {
+if (!p_atomic_read(This->pending_presentation[i]) &&
+ID3DPresent_IsBufferReleased(This->present, 
This->present_handles[i])) {
 DBG("Found buffer released: %d\n", i);
 next_buffer = i;
 break;
@@ -904,6 +920,17 @@ NineSwapChain9_Present( struct NineSwapChain9 *This,
 ID3DPresent_WaitBufferReleaseEvent(This->present);
 }
 }
+
+/* Free the task (we already checked it is finished) */
+if (This->tasks[next_buffer])
+_mesa_threadpool_wait_for_task(This->pool, 
&(This->tasks[next_buffer]));
+assert(!*This->pending_presentation[next_buffer] && 
!This->tasks[next_buffer]);
+This->tasks[next_buffer] = This->tasks[0];
+This->tasks[0] 

Re: [Mesa-dev] [PATCH 2/2] i965: Do NIR shader cloning in the caller.

2018-11-10 Thread Alejandro Piñeiro
I was tempted to suggest to add a comment somewhere mentioning this
policy change, but there are so many functions that Im not sure what
would be that somewhere. Ramblings apart:

Reviewed-by: Alejandro Piñeiro 


On 10/11/18 09:17, Kenneth Graunke wrote:
> This moves nir_shader_clone() to the driver-specific compile function,
> rather than the shared src/intel/compiler code.  This allows i965 to do
> key-specific passes before calling brw_compile_*.  Vulkan should not
> need this cloning as it doesn't compile multiple variants.
>
> We do need to continue cloning in the compute shader code because we
> lower various things in NIR based on the SIMD width.
> ---
>  src/intel/compiler/brw_compiler.h  | 10 +-
>  src/intel/compiler/brw_fs.cpp  |  3 +--
>  src/intel/compiler/brw_shader.cpp  |  3 +--
>  src/intel/compiler/brw_vec4.cpp|  3 +--
>  src/intel/compiler/brw_vec4_gs_visitor.cpp |  3 +--
>  src/intel/compiler/brw_vec4_tcs.cpp|  3 +--
>  src/mesa/drivers/dri/i965/brw_cs.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_gs.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_tcs.c|  2 +-
>  src/mesa/drivers/dri/i965/brw_tes.c|  3 ++-
>  src/mesa/drivers/dri/i965/brw_vs.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_wm.c |  2 +-
>  12 files changed, 17 insertions(+), 21 deletions(-)
>
> diff --git a/src/intel/compiler/brw_compiler.h 
> b/src/intel/compiler/brw_compiler.h
> index d8c9499065f..1daf935d7fe 100644
> --- a/src/intel/compiler/brw_compiler.h
> +++ b/src/intel/compiler/brw_compiler.h
> @@ -1238,7 +1238,7 @@ brw_compile_vs(const struct brw_compiler *compiler, 
> void *log_data,
> void *mem_ctx,
> const struct brw_vs_prog_key *key,
> struct brw_vs_prog_data *prog_data,
> -   const struct nir_shader *shader,
> +   struct nir_shader *shader,
> int shader_time_index,
> char **error_str);
>  
> @@ -1253,7 +1253,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
>  void *mem_ctx,
>  const struct brw_tcs_prog_key *key,
>  struct brw_tcs_prog_data *prog_data,
> -const struct nir_shader *nir,
> +struct nir_shader *nir,
>  int shader_time_index,
>  char **error_str);
>  
> @@ -1268,7 +1268,7 @@ brw_compile_tes(const struct brw_compiler *compiler, 
> void *log_data,
>  const struct brw_tes_prog_key *key,
>  const struct brw_vue_map *input_vue_map,
>  struct brw_tes_prog_data *prog_data,
> -const struct nir_shader *shader,
> +struct nir_shader *shader,
>  struct gl_program *prog,
>  int shader_time_index,
>  char **error_str);
> @@ -1283,7 +1283,7 @@ brw_compile_gs(const struct brw_compiler *compiler, 
> void *log_data,
> void *mem_ctx,
> const struct brw_gs_prog_key *key,
> struct brw_gs_prog_data *prog_data,
> -   const struct nir_shader *shader,
> +   struct nir_shader *shader,
> struct gl_program *prog,
> int shader_time_index,
> char **error_str);
> @@ -1330,7 +1330,7 @@ brw_compile_fs(const struct brw_compiler *compiler, 
> void *log_data,
> void *mem_ctx,
> const struct brw_wm_prog_key *key,
> struct brw_wm_prog_data *prog_data,
> -   const struct nir_shader *shader,
> +   struct nir_shader *shader,
> struct gl_program *prog,
> int shader_time_index8,
> int shader_time_index16,
> diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
> index 3e083723471..aa29c8a4deb 100644
> --- a/src/intel/compiler/brw_fs.cpp
> +++ b/src/intel/compiler/brw_fs.cpp
> @@ -7119,7 +7119,7 @@ brw_compile_fs(const struct brw_compiler *compiler, 
> void *log_data,
> void *mem_ctx,
> const struct brw_wm_prog_key *key,
> struct brw_wm_prog_data *prog_data,
> -   const nir_shader *src_shader,
> +   nir_shader *shader,
> struct gl_program *prog,
> int shader_time_index8, int shader_time_index16,
> int shader_time_index32, bool allow_spilling,
> @@ -7128,7 +7128,6 @@ brw_compile_fs(const struct brw_compiler *compiler, 
> void *log_data,
>  {
> const struct gen_device_info *devinfo = compiler->devinfo;
>  
> -   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
> shader = brw_nir_apply_sampler_key(shader, compiler, >tex, true);
> brw_nir_lower_fs_inputs(shader, devinfo, key);
> brw_nir_lower_fs_outputs(shader);
> diff --git a/src/intel/compiler/brw_shader.cpp 
> b/src/intel/compiler/brw_shader.cpp
> index 

Re: [Mesa-dev] [PATCH 1/2] i965: Use a 'nir' temporary rather than poking at brw_program

2018-11-10 Thread Alejandro Piñeiro
Reviewed-by: Alejandro Piñeiro 

On 10/11/18 09:17, Kenneth Graunke wrote:
> It's shorter and will also be useful when I adjust cloning soon.
> ---
>  src/mesa/drivers/dri/i965/brw_cs.c  |  6 +++---
>  src/mesa/drivers/dri/i965/brw_gs.c  | 11 ++-
>  src/mesa/drivers/dri/i965/brw_tcs.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_tes.c |  2 +-
>  src/mesa/drivers/dri/i965/brw_vs.c  | 15 ---
>  src/mesa/drivers/dri/i965/brw_wm.c  | 11 ++-
>  6 files changed, 25 insertions(+), 22 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_cs.c 
> b/src/mesa/drivers/dri/i965/brw_cs.c
> index 498c80d46a5..3ae54830f78 100644
> --- a/src/mesa/drivers/dri/i965/brw_cs.c
> +++ b/src/mesa/drivers/dri/i965/brw_cs.c
> @@ -58,6 +58,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
> struct brw_cs_prog_data prog_data;
> bool start_busy = false;
> double start_time = 0;
> +   nir_shader *nir = cp->program.nir;
>  
> memset(_data, 0, sizeof(prog_data));
>  
> @@ -76,7 +77,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
>  
> assign_cs_binding_table_offsets(devinfo, >program, _data);
>  
> -   brw_nir_setup_glsl_uniforms(mem_ctx, cp->program.nir,
> +   brw_nir_setup_glsl_uniforms(mem_ctx, nir,
> >program, _data.base, true);
>  
> if (unlikely(brw->perf_debug)) {
> @@ -91,8 +92,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
>  
> char *error_str;
> program = brw_compile_cs(brw->screen->compiler, brw, mem_ctx, key,
> -_data, cp->program.nir, st_index,
> -_str);
> +_data, nir, st_index, _str);
> if (program == NULL) {
>cp->program.sh.data->LinkStatus = LINKING_FAILURE;
>ralloc_strcat(>program.sh.data->InfoLog, error_str);
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> b/src/mesa/drivers/dri/i965/brw_gs.c
> index 7263f6351e9..55c2923bded 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -89,15 +89,17 @@ brw_codegen_gs_prog(struct brw_context *brw,
>  
> void *mem_ctx = ralloc_context(NULL);
>  
> +   nir_shader *nir = gp->program.nir;
> +
> assign_gs_binding_table_offsets(devinfo, >program, _data);
>  
> -   brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, >program,
> +   brw_nir_setup_glsl_uniforms(mem_ctx, nir, >program,
> _data.base.base,
> compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
> -   brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, NULL,
> +   brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
>prog_data.base.base.ubo_ranges);
>  
> -   uint64_t outputs_written = gp->program.nir->info.outputs_written;
> +   uint64_t outputs_written = nir->info.outputs_written;
>  
> brw_compute_vue_map(devinfo,
> _data.base.vue_map, outputs_written,
> @@ -115,8 +117,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
> char *error_str;
> const unsigned *program =
>brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key,
> - _data, gp->program.nir, >program,
> - st_index, _str);
> + _data, nir, >program, st_index, _str);
> if (program == NULL) {
>ralloc_strcat(>program.sh.data->InfoLog, error_str);
>_mesa_problem(NULL, "Failed to compile geometry shader: %s\n", 
> error_str);
> diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c 
> b/src/mesa/drivers/dri/i965/brw_tcs.c
> index 17f4130c095..6e60a44fc10 100644
> --- a/src/mesa/drivers/dri/i965/brw_tcs.c
> +++ b/src/mesa/drivers/dri/i965/brw_tcs.c
> @@ -100,7 +100,7 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct 
> brw_program *tcp,
>brw_nir_setup_glsl_uniforms(mem_ctx, nir, >program,
>_data.base.base,
>
> compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
> -  brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, NULL,
> +  brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
>   prog_data.base.base.ubo_ranges);

> } else {
>/* Upload the Patch URB Header as the first two uniforms.
> diff --git a/src/mesa/drivers/dri/i965/brw_tes.c 
> b/src/mesa/drivers/dri/i965/brw_tes.c
> index b3220a94741..8f8f68530b7 100644
> --- a/src/mesa/drivers/dri/i965/brw_tes.c
> +++ b/src/mesa/drivers/dri/i965/brw_tes.c
> @@ -85,7 +85,7 @@ brw_codegen_tes_prog(struct brw_context *brw,
> brw_nir_setup_glsl_uniforms(mem_ctx, nir, >program,
> _data.base.base,
> 
> compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
> -   brw_nir_analyze_ubo_ranges(compiler, tep->program.nir, NULL,
> +   brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
>prog_data.base.base.ubo_ranges);
>  
> int st_index = -1;
> diff --git 

Re: [Mesa-dev] [PATCH 6/7] RFC: nir/xfb_info: arrays of basic types adds just one varying

2018-11-10 Thread Alejandro Piñeiro
On 09/11/18 16:58, Jason Ekstrand wrote:
> On November 9, 2018 06:39:25 Alejandro Piñeiro 
> wrote:
>> On 08/11/18 23:14, Jason Ekstrand wrote:
>>> On Thu, Nov 8, 2018 at 7:22 AM Alejandro Piñeiro
>>> mailto:apinhe...@igalia.com>> wrote:
>>>
>>> On OpenGL, a array of a simple type adds just one varying. So
>>> gl_transform_feedback_varying_info struct defined at mtypes.h
>>> includes
>>> the parameters Type (base_type) and Size (number of elements).
>>>
>>> This commit checks this when the recursive add_var_xfb_outputs call
>>> handles arrays, to ensure that just one is addded.
>>>
>>> RFC: Until this point, all changes were reasonable, but this
>>> change is
>>> (imho) ugly. My idea was introducing as less as possible changes on
>>> the code, specially on its logic/flow. But this commit is almost a
>>> hack. The ideal solution would be to change the focus of the
>>> recursive
>>> function, focusing on varyings, and at each varying, recursively add
>>> outputs. But that seems like an overkill for a pass that was
>>> originally intended for consumers only caring about the outputs. So
>>> perhaps ARB_gl_spirv should keep their own gathering pass, with
>>> vayings and outputs, and let this one untouched for those that only
>>> care on outputs.
>>> ---
>>>  src/compiler/nir/nir_gather_xfb_info.c | 52
>>> --
>>>  1 file changed, 43 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/src/compiler/nir/nir_gather_xfb_info.c
>>> b/src/compiler/nir/nir_gather_xfb_info.c
>>> index 948b802a815..cb0e2724cab 100644
>>> --- a/src/compiler/nir/nir_gather_xfb_info.c
>>> +++ b/src/compiler/nir/nir_gather_xfb_info.c
>>> @@ -36,23 +36,59 @@ nir_gather_xfb_info_create(void *mem_ctx,
>>> uint16_t output_count, uint16_t varyin
>>>     return xfb;
>>>  }
>>>
>>> +static bool
>>> +glsl_type_is_leaf(const struct glsl_type *type)
>>> +{
>>> +   if (glsl_type_is_struct(type) ||
>>> +       (glsl_type_is_array(type) &&
>>> +        (glsl_type_is_array(glsl_get_array_element(type)) ||
>>> +         glsl_type_is_struct(glsl_get_array_element(type) {
>>>
>>>
>>> I'm trying to understand exactly what this means.  From what you
>>> wrote here it looks like the following are all one varying:
>>>
>>> float var[3];
>>> vec2 var[3];
>>> mat4 var[3];
>>
>> Yes, GLSL returns one varying per each one (Size 3).
>
> Just to be clear, a matrix it array of matrices is one varying?

Yep, and being more clear, for this shader:
#version 150
#extension GL_ARB_enhanced_layouts: require

layout(xfb_offset = 0) out mat4 var[3];

void main() {
  mat4 m4;

  gl_Position = vec4(0.0);

  var[0] = m4;
}

We get the following when we dump gl_program::LinkedTransformFeedback,
that is a struct gl_transform_feedback_info defined at mtypes.h:

[gl_transform_feedback_info]
    NumOuputs = 12, (OutputRegister, OutputBuffer, NumComponents,
StreamId, DstOffset, ComponentOffset)
            0:(31,  0,  4,  0,  0,  0)
            1:(32,  0,  4,  0,  4,  0)
            2:(33,  0,  4,  0,  8,  0)
            3:(34,  0,  4,  0, 12,  0)
            4:(35,  0,  4,  0, 16,  0)
            5:(36,  0,  4,  0, 20,  0)
            6:(37,  0,  4,  0, 24,  0)
            7:(38,  0,  4,  0, 28,  0)
            8:(39,  0,  4,  0, 32,  0)
            9:(40,  0,  4,  0, 36,  0)
            10:(41,  0,  4,  0, 40,  0)
            11:(42,  0,  4,  0, 44,  0)
    NumVarying=1, (Offset, Type, BufferIndex, Size, Name)
            0:( 0,   GL_FLOAT_MAT4,  0,  3, var)
    ActiveBuffers=1, (Binding, NumVaryings, Stride, Stream):
            0:( 0,  1, 192,  0)

FWIW, in some cases we are also getting a slightly different amount of
Outputs. But Im personally not really worried about that as far as it
keeps working. The number of varyings is somewhat different as it is
exposed through the program interface queries, so (I assume) it should
be consistent.

>
>>
>>>
>>> but the following are not
>>>
>>> struct S {
>>>    float f;
>>>    vec4 v;
>>> };
>>>
>>> S var[3];
>>
>>> float var[3][5];
>>
>> I guess that you are asking for thos two cases because this code is 
>> not handling it properly. You are right. For the array of structs,
>> our code is crashing. For the array of arrays, it is enumerating four
>> varyings. One with three GL_FLOAT components, and three with five
>> GL_FLOAT components, instead of just three varyings with five
>> components. In my defense, I already mentioned that it was wip code,
>> but preferred to agree on the way to go before keep working on it.
>>
>> For the GLSL case, the array of struct returns 6 varyings. And funny
>> thing, for the array of arrays, GLSL is handling the situation even
>> worse. It returns the following link error: "Failed to link: error:
>> Transform feedback varying var[0] undeclared." Just a quick skim on
>> the spec, I didn't see anything preventing using aoa 

[Mesa-dev] [Bug 107822] Just Cause 3 Flickering Textures with AMD RADV

2018-11-10 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=107822

--- Comment #5 from fin4...@hotmail.com ---
Lower the graphics details in the game.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] mapi: avoid text relocation in x86 tsd stubs

2018-11-10 Thread Jonathan Gray
On Fri, Nov 09, 2018 at 10:30:42PM +1100, Jonathan Gray wrote:
> On Thu, Nov 08, 2018 at 03:54:20PM +, Emil Velikov wrote:
> > On Fri, 2 Nov 2018 at 00:02, Jonathan Gray  wrote:
> > >
> > > On Thu, Nov 01, 2018 at 12:26:34PM -0700, Ian Romanick wrote:
> > > > On 10/31/2018 09:08 PM, Jonathan Gray wrote:
> > > > > Make similiar changes to libglvnd to avoid a text relocation in
> > > > > x86 tsd stubs fixing the build with lld.
> > > > >
> > > > > Signed-off-by: Jonathan Gray 
> > > > > Cc: mesa-sta...@lists.freedesktop.org
> > > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108541
> > > > > ---
> > > > >  src/mapi/entry_x86_tsd.h | 14 +-
> > > > >  1 file changed, 9 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/src/mapi/entry_x86_tsd.h b/src/mapi/entry_x86_tsd.h
> > > > > index 0c28c8ff068..e08a02f3db2 100644
> > > > > --- a/src/mapi/entry_x86_tsd.h
> > > > > +++ b/src/mapi/entry_x86_tsd.h
> > > > > @@ -31,7 +31,7 @@
> > > > >  #define HIDDEN
> > > > >  #endif
> > > > >
> > > > > -#define X86_ENTRY_SIZE 32
> > > > > +#define X86_ENTRY_SIZE 64
> > > > >
> > > > >  __asm__(".text\n"
> > > > >  ".balign 32\n"
> > > > > @@ -44,12 +44,16 @@ __asm__(".text\n"
> > > > > func ":"
> > > > >
> > > > >  #define STUB_ASM_CODE(slot) \
> > > > > -   "movl " ENTRY_CURRENT_TABLE ", %eax\n\t" \
> > > > > +   "call 1f\n\t"\
> > > > > +   "1:\n\t" \
> > > > > +   "popl %eax\n\t"  \
> > > > > +   "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %eax\n\t" \
> > > > > +   "movl " ENTRY_CURRENT_TABLE "@GOT(%eax), %eax\n\t" \
> > > > > +   "mov (%eax), %eax\n\t"   \
> > > > > "testl %eax, %eax\n\t"   \
> > > > > -   "je 1f\n\t"  \
> > > > > -   "jmp *(4 * " slot ")(%eax)\n"\
> > > > > +   "jne 1f\n\t" \
> > > > > +   "call " ENTRY_CURRENT_TABLE_GET "@PLT\n\t" \
> > > > > "1:\n\t" \
> > > > > -   "call " ENTRY_CURRENT_TABLE_GET "\n\t" \
> > > > > "jmp *(4 * " slot ")(%eax)"
> > > >
> > > > After this change, the code is:
> > > >
> > > > #define STUB_ASM_CODE(slot) \
> > > >"call 1f\n\t"\
> > > >"1:\n\t" \
> > > >"popl %eax\n\t"  \
> > > >"addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %eax\n\t" \
> > > >"movl " ENTRY_CURRENT_TABLE "@GOT(%eax), %eax\n\t" \
> > > >"mov (%eax), %eax\n\t"   \
> > > >"testl %eax, %eax\n\t"   \
> > > >"jne 1f\n\t" \
> > > >"call " ENTRY_CURRENT_TABLE_GET "@PLT\n\t" \
> > > >"1:\n\t" \
> > > >"jmp *(4 * " slot ")(%eax)"
> > > >
> > > > So there's going to be two labels "1:".  Does that even assemble?
> > >
> > > Yes, the call/jmp is always forward as it is '1f'.
> > > This also runs glxinfo, glxgears etc on a pentium m running OpenBSD/i386.
> > >
> > > https://github.com/NVIDIA/libglvnd/blob/master/src/GLdispatch/vnd-glapi/entry_x86_tsd.c#L58
> > >
> > > libglvnd has two labels like this as well, the ebx use there isn't needed.
> > 
> > Hi all, did this get stuck or it's superseded/obsolete?
> 
> This is still the latest version of the patch.

The original patch should be ignored as while it worked for dynamically
linked libGL users like glxinfo and glxgears a program using SDL2 which
dlopens libGL segfaulted.  The ebx portion is required.

Index: entry_x86_tsd.h
===
RCS file: /cvs/xenocara/lib/mesa/src/mapi/entry_x86_tsd.h,v
retrieving revision 1.3
diff -u -p -r1.3 entry_x86_tsd.h
--- entry_x86_tsd.h 10 Nov 2018 08:11:16 -  1.3
+++ entry_x86_tsd.h 10 Nov 2018 08:12:09 -
@@ -31,7 +31,7 @@
 #define HIDDEN
 #endif
 
-#define X86_ENTRY_SIZE 32
+#define X86_ENTRY_SIZE 64
 
 __asm__(".text\n"
 ".balign 32\n"
@@ -44,12 +44,18 @@ __asm__(".text\n"
func ":"
 
 #define STUB_ASM_CODE(slot) \
-   "movl " ENTRY_CURRENT_TABLE ", %eax\n\t" \
+   "push %ebx\n\t"  \
+   "call 1f\n\t"\
+   "1:\n\t" \
+   "popl %ebx\n\t"  \
+   "addl $_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx\n\t" \
+   "movl " ENTRY_CURRENT_TABLE "@GOT(%ebx), %eax\n\t" \
+   "mov (%eax), %eax\n\t"   \
"testl %eax, %eax\n\t"   \
-   "je 1f\n\t"  \
-   "jmp *(4 * " slot ")(%eax)\n"\
+   "jne 1f\n\t" \
+   "call " ENTRY_CURRENT_TABLE_GET "@PLT\n\t" \
"1:\n\t" \
-   "call " ENTRY_CURRENT_TABLE_GET "\n\t" \
+   "pop %ebx\n\t"   \
"jmp *(4 * " slot ")(%eax)"
 
 #define MAPI_TMP_STUB_ASM_GCC
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Use a 'nir' temporary rather than poking at brw_program

2018-11-10 Thread Kenneth Graunke
It's shorter and will also be useful when I adjust cloning soon.
---
 src/mesa/drivers/dri/i965/brw_cs.c  |  6 +++---
 src/mesa/drivers/dri/i965/brw_gs.c  | 11 ++-
 src/mesa/drivers/dri/i965/brw_tcs.c |  2 +-
 src/mesa/drivers/dri/i965/brw_tes.c |  2 +-
 src/mesa/drivers/dri/i965/brw_vs.c  | 15 ---
 src/mesa/drivers/dri/i965/brw_wm.c  | 11 ++-
 6 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cs.c 
b/src/mesa/drivers/dri/i965/brw_cs.c
index 498c80d46a5..3ae54830f78 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -58,6 +58,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
struct brw_cs_prog_data prog_data;
bool start_busy = false;
double start_time = 0;
+   nir_shader *nir = cp->program.nir;
 
memset(_data, 0, sizeof(prog_data));
 
@@ -76,7 +77,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
 
assign_cs_binding_table_offsets(devinfo, >program, _data);
 
-   brw_nir_setup_glsl_uniforms(mem_ctx, cp->program.nir,
+   brw_nir_setup_glsl_uniforms(mem_ctx, nir,
>program, _data.base, true);
 
if (unlikely(brw->perf_debug)) {
@@ -91,8 +92,7 @@ brw_codegen_cs_prog(struct brw_context *brw,
 
char *error_str;
program = brw_compile_cs(brw->screen->compiler, brw, mem_ctx, key,
-_data, cp->program.nir, st_index,
-_str);
+_data, nir, st_index, _str);
if (program == NULL) {
   cp->program.sh.data->LinkStatus = LINKING_FAILURE;
   ralloc_strcat(>program.sh.data->InfoLog, error_str);
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
b/src/mesa/drivers/dri/i965/brw_gs.c
index 7263f6351e9..55c2923bded 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -89,15 +89,17 @@ brw_codegen_gs_prog(struct brw_context *brw,
 
void *mem_ctx = ralloc_context(NULL);
 
+   nir_shader *nir = gp->program.nir;
+
assign_gs_binding_table_offsets(devinfo, >program, _data);
 
-   brw_nir_setup_glsl_uniforms(mem_ctx, gp->program.nir, >program,
+   brw_nir_setup_glsl_uniforms(mem_ctx, nir, >program,
_data.base.base,
compiler->scalar_stage[MESA_SHADER_GEOMETRY]);
-   brw_nir_analyze_ubo_ranges(compiler, gp->program.nir, NULL,
+   brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
   prog_data.base.base.ubo_ranges);
 
-   uint64_t outputs_written = gp->program.nir->info.outputs_written;
+   uint64_t outputs_written = nir->info.outputs_written;
 
brw_compute_vue_map(devinfo,
_data.base.vue_map, outputs_written,
@@ -115,8 +117,7 @@ brw_codegen_gs_prog(struct brw_context *brw,
char *error_str;
const unsigned *program =
   brw_compile_gs(brw->screen->compiler, brw, mem_ctx, key,
- _data, gp->program.nir, >program,
- st_index, _str);
+ _data, nir, >program, st_index, _str);
if (program == NULL) {
   ralloc_strcat(>program.sh.data->InfoLog, error_str);
   _mesa_problem(NULL, "Failed to compile geometry shader: %s\n", 
error_str);
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c 
b/src/mesa/drivers/dri/i965/brw_tcs.c
index 17f4130c095..6e60a44fc10 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -100,7 +100,7 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct 
brw_program *tcp,
   brw_nir_setup_glsl_uniforms(mem_ctx, nir, >program,
   _data.base.base,
   
compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
-  brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir, NULL,
+  brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
  prog_data.base.base.ubo_ranges);
} else {
   /* Upload the Patch URB Header as the first two uniforms.
diff --git a/src/mesa/drivers/dri/i965/brw_tes.c 
b/src/mesa/drivers/dri/i965/brw_tes.c
index b3220a94741..8f8f68530b7 100644
--- a/src/mesa/drivers/dri/i965/brw_tes.c
+++ b/src/mesa/drivers/dri/i965/brw_tes.c
@@ -85,7 +85,7 @@ brw_codegen_tes_prog(struct brw_context *brw,
brw_nir_setup_glsl_uniforms(mem_ctx, nir, >program,
_data.base.base,
compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
-   brw_nir_analyze_ubo_ranges(compiler, tep->program.nir, NULL,
+   brw_nir_analyze_ubo_ranges(compiler, nir, NULL,
   prog_data.base.base.ubo_ranges);
 
int st_index = -1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index 69c0046bbb9..d557e82f9fc 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -174,26 +174,28 @@ brw_codegen_vs_prog(struct brw_context *brw,
 
mem_ctx = ralloc_context(NULL);
 

[Mesa-dev] [PATCH 2/2] i965: Do NIR shader cloning in the caller.

2018-11-10 Thread Kenneth Graunke
This moves nir_shader_clone() to the driver-specific compile function,
rather than the shared src/intel/compiler code.  This allows i965 to do
key-specific passes before calling brw_compile_*.  Vulkan should not
need this cloning as it doesn't compile multiple variants.

We do need to continue cloning in the compute shader code because we
lower various things in NIR based on the SIMD width.
---
 src/intel/compiler/brw_compiler.h  | 10 +-
 src/intel/compiler/brw_fs.cpp  |  3 +--
 src/intel/compiler/brw_shader.cpp  |  3 +--
 src/intel/compiler/brw_vec4.cpp|  3 +--
 src/intel/compiler/brw_vec4_gs_visitor.cpp |  3 +--
 src/intel/compiler/brw_vec4_tcs.cpp|  3 +--
 src/mesa/drivers/dri/i965/brw_cs.c |  2 +-
 src/mesa/drivers/dri/i965/brw_gs.c |  2 +-
 src/mesa/drivers/dri/i965/brw_tcs.c|  2 +-
 src/mesa/drivers/dri/i965/brw_tes.c|  3 ++-
 src/mesa/drivers/dri/i965/brw_vs.c |  2 +-
 src/mesa/drivers/dri/i965/brw_wm.c |  2 +-
 12 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index d8c9499065f..1daf935d7fe 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -1238,7 +1238,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void 
*log_data,
void *mem_ctx,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *prog_data,
-   const struct nir_shader *shader,
+   struct nir_shader *shader,
int shader_time_index,
char **error_str);
 
@@ -1253,7 +1253,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
 void *mem_ctx,
 const struct brw_tcs_prog_key *key,
 struct brw_tcs_prog_data *prog_data,
-const struct nir_shader *nir,
+struct nir_shader *nir,
 int shader_time_index,
 char **error_str);
 
@@ -1268,7 +1268,7 @@ brw_compile_tes(const struct brw_compiler *compiler, void 
*log_data,
 const struct brw_tes_prog_key *key,
 const struct brw_vue_map *input_vue_map,
 struct brw_tes_prog_data *prog_data,
-const struct nir_shader *shader,
+struct nir_shader *shader,
 struct gl_program *prog,
 int shader_time_index,
 char **error_str);
@@ -1283,7 +1283,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void 
*log_data,
void *mem_ctx,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
-   const struct nir_shader *shader,
+   struct nir_shader *shader,
struct gl_program *prog,
int shader_time_index,
char **error_str);
@@ -1330,7 +1330,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
-   const struct nir_shader *shader,
+   struct nir_shader *shader,
struct gl_program *prog,
int shader_time_index8,
int shader_time_index16,
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 3e083723471..aa29c8a4deb 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -7119,7 +7119,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
-   const nir_shader *src_shader,
+   nir_shader *shader,
struct gl_program *prog,
int shader_time_index8, int shader_time_index16,
int shader_time_index32, bool allow_spilling,
@@ -7128,7 +7128,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
 {
const struct gen_device_info *devinfo = compiler->devinfo;
 
-   nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler, >tex, true);
brw_nir_lower_fs_inputs(shader, devinfo, key);
brw_nir_lower_fs_outputs(shader);
diff --git a/src/intel/compiler/brw_shader.cpp 
b/src/intel/compiler/brw_shader.cpp
index 798c799a828..b77bd798d17 100644
--- a/src/intel/compiler/brw_shader.cpp
+++ b/src/intel/compiler/brw_shader.cpp
@@ -1189,7 +1189,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
 const struct brw_tes_prog_key *key,
 const struct brw_vue_map *input_vue_map,
 struct brw_tes_prog_data *prog_data,
-const nir_shader *src_shader,
+nir_shader *nir,
 struct