[Mesa-dev] [RFC 3/3] mesa: call DrawBuffer(s) driver hook in update_framebuffer for windows-system FB

2018-04-13 Thread Timothy Arceri
From: Boyan Ding 

When draw buffers are changed on a bound framebuffer, DrawBuffer(s) hook
should be called. However, it is missing in update_framebuffer with
window-system framebuffer, in which FB's draw buffer state should match
context state, potentially resulting in a change.

Note: This seems to be needed because gallium delays creating the front
buffer. i965 works fine without this change.

V2 (Timothy Arceri):
 - Rebased on merged/simplified DrawBuffer driver function
 - Move DrawBuffer call outside fb->ColorDrawBuffer[0] !=
   ctx->Color.DrawBuffer[0] check to make piglit pass.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99116
---
 src/mesa/main/framebuffer.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index 249e775f8cb..81d3b370e73 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -623,6 +623,12 @@ update_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
  _mesa_drawbuffers(ctx, fb, ctx->Const.MaxDrawBuffers,
ctx->Color.DrawBuffer, NULL);
   }
+
+  /* Call device driver function if fb is the bound draw buffer. */
+  if (fb == ctx->DrawBuffer) {
+ if (ctx->Driver.DrawBuffer)
+ctx->Driver.DrawBuffer(ctx);
+  }
}
else {
   /* This is a user-created framebuffer.
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] mesa: drop the buffer mode param from the DrawBuffer driver function

2018-04-13 Thread Timothy Arceri
No drivers used it.
---
 src/mesa/drivers/common/driverfuncs.c| 2 +-
 src/mesa/drivers/dri/i915/intel_buffers.c| 2 +-
 src/mesa/drivers/dri/i965/intel_buffers.c| 2 +-
 src/mesa/drivers/dri/nouveau/nouveau_state.c | 2 +-
 src/mesa/main/buffers.c  | 7 ++-
 src/mesa/main/dd.h   | 2 +-
 src/mesa/state_tracker/st_cb_fbo.c   | 4 +---
 7 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/common/driverfuncs.c 
b/src/mesa/drivers/common/driverfuncs.c
index b7ac2b5b434..e783262773e 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -306,5 +306,5 @@ _mesa_init_driver_state(struct gl_context *ctx)
  ctx->Stencil.ZPassFunc[1]);
 
 
-   ctx->Driver.DrawBuffer(ctx, ctx->Color.DrawBuffer[0]);
+   ctx->Driver.DrawBuffer(ctx);
 }
diff --git a/src/mesa/drivers/dri/i915/intel_buffers.c 
b/src/mesa/drivers/dri/i915/intel_buffers.c
index 386e032443a..26ba3df7d7a 100644
--- a/src/mesa/drivers/dri/i915/intel_buffers.c
+++ b/src/mesa/drivers/dri/i915/intel_buffers.c
@@ -53,7 +53,7 @@ intel_check_front_buffer_rendering(struct intel_context 
*intel)
 }
 
 static void
-intelDrawBuffer(struct gl_context * ctx, GLenum mode)
+intelDrawBuffer(struct gl_context *ctx)
 {
if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
   struct intel_context *const intel = intel_context(ctx);
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c 
b/src/mesa/drivers/dri/i965/intel_buffers.c
index fd522cc4f4d..dae56e3362c 100644
--- a/src/mesa/drivers/dri/i965/intel_buffers.c
+++ b/src/mesa/drivers/dri/i965/intel_buffers.c
@@ -33,7 +33,7 @@
 #include "main/renderbuffer.h"
 
 static void
-intelDrawBuffer(struct gl_context * ctx, GLenum mode)
+intelDrawBuffer(struct gl_context *ctx)
 {
if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer)) {
   struct brw_context *const brw = brw_context(ctx);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c 
b/src/mesa/drivers/dri/nouveau/nouveau_state.c
index 91ca95b5907..a05c8be854a 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_state.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c
@@ -115,7 +115,7 @@ nouveau_read_buffer(struct gl_context *ctx, GLenum buffer)
 }
 
 static void
-nouveau_draw_buffer(struct gl_context *ctx, GLenum buffers)
+nouveau_draw_buffer(struct gl_context *ctx)
 {
nouveau_validate_framebuffer(ctx);
context_dirty(ctx, FRAMEBUFFER);
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 7bb5725d085..53dae410253 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -305,7 +305,7 @@ draw_buffer(struct gl_context *ctx, struct gl_framebuffer 
*fb,
/* Call device driver function only if fb is the bound draw buffer */
if (fb == ctx->DrawBuffer) {
   if (ctx->Driver.DrawBuffer)
- ctx->Driver.DrawBuffer(ctx, buffer);
+ ctx->Driver.DrawBuffer(ctx);
}
 }
 
@@ -580,13 +580,10 @@ draw_buffers(struct gl_context *ctx, struct 
gl_framebuffer *fb, GLsizei n,
 
/*
 * Call device driver function if fb is the bound draw buffer.
-* Note that n can be equal to 0,
-* in which case we don't want to reference buffers[0], which
-* may not be valid.
 */
if (fb == ctx->DrawBuffer) {
   if (ctx->Driver.DrawBuffer)
- ctx->Driver.DrawBuffer(ctx, n > 0 ? buffers[0] : GL_NONE);
+ ctx->Driver.DrawBuffer(ctx);
}
 }
 
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index d85d89ef50c..a110e928cda 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -611,7 +611,7 @@ struct dd_function_table {
/** Specify mapping of depth values from NDC to window coordinates */
void (*DepthRange)(struct gl_context *ctx);
/** Specify the current buffer for writing */
-   void (*DrawBuffer)( struct gl_context *ctx, GLenum buffer );
+   void (*DrawBuffer)( struct gl_context *ctx);
/** Enable or disable server-side gl capabilities */
void (*Enable)(struct gl_context *ctx, GLenum cap, GLboolean state);
/** Specify fog parameters */
diff --git a/src/mesa/state_tracker/st_cb_fbo.c 
b/src/mesa/state_tracker/st_cb_fbo.c
index 696a08fd65b..5691097ae82 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -714,13 +714,11 @@ st_validate_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
  * created FBOs.
  */
 static void
-st_DrawBuffer(struct gl_context *ctx, GLenum buffer)
+st_DrawBuffer(struct gl_context *ctx)
 {
struct st_context *st = st_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
 
-   (void) buffer;
-
if (_mesa_is_winsys_fbo(fb)) {
   GLuint i;
   /* add the renderbuffers on demand */
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] mesa: merge the driver functions DrawBuffers and DrawBuffer

2018-04-13 Thread Timothy Arceri
The extra params we unused by the drivers that used DrawBuffers.
---
 src/mesa/drivers/common/driverfuncs.c| 1 -
 src/mesa/drivers/dri/nouveau/nouveau_state.c | 4 ++--
 src/mesa/main/buffers.c  | 8 ++--
 src/mesa/main/dd.h   | 2 --
 src/mesa/state_tracker/st_cb_fbo.c   | 9 -
 5 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/common/driverfuncs.c 
b/src/mesa/drivers/common/driverfuncs.c
index 11134b69e94..b7ac2b5b434 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -134,7 +134,6 @@ _mesa_init_driver_functions(struct dd_function_table 
*driver)
driver->ColorMaterial = NULL;
driver->CullFace = NULL;
driver->DrawBuffer = NULL;
-   driver->DrawBuffers = NULL;
driver->FrontFace = NULL;
driver->DepthFunc = NULL;
driver->DepthMask = NULL;
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c 
b/src/mesa/drivers/dri/nouveau/nouveau_state.c
index e2b01043675..91ca95b5907 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_state.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c
@@ -115,7 +115,7 @@ nouveau_read_buffer(struct gl_context *ctx, GLenum buffer)
 }
 
 static void
-nouveau_draw_buffers(struct gl_context *ctx, GLsizei n, const GLenum *buffers)
+nouveau_draw_buffer(struct gl_context *ctx, GLenum buffers)
 {
nouveau_validate_framebuffer(ctx);
context_dirty(ctx, FRAMEBUFFER);
@@ -519,7 +519,7 @@ nouveau_state_init(struct gl_context *ctx)
ctx->Driver.DepthFunc = nouveau_depth_func;
ctx->Driver.DepthMask = nouveau_depth_mask;
ctx->Driver.ReadBuffer = nouveau_read_buffer;
-   ctx->Driver.DrawBuffers = nouveau_draw_buffers;
+   ctx->Driver.DrawBuffer = nouveau_draw_buffer;
ctx->Driver.Enable = nouveau_enable;
ctx->Driver.Fogfv = nouveau_fog;
ctx->Driver.Lightfv = nouveau_light;
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 5492227de08..7bb5725d085 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -304,9 +304,7 @@ draw_buffer(struct gl_context *ctx, struct gl_framebuffer 
*fb,
 
/* Call device driver function only if fb is the bound draw buffer */
if (fb == ctx->DrawBuffer) {
-  if (ctx->Driver.DrawBuffers)
- ctx->Driver.DrawBuffers(ctx, 1, );
-  else if (ctx->Driver.DrawBuffer)
+  if (ctx->Driver.DrawBuffer)
  ctx->Driver.DrawBuffer(ctx, buffer);
}
 }
@@ -587,9 +585,7 @@ draw_buffers(struct gl_context *ctx, struct gl_framebuffer 
*fb, GLsizei n,
 * may not be valid.
 */
if (fb == ctx->DrawBuffer) {
-  if (ctx->Driver.DrawBuffers)
- ctx->Driver.DrawBuffers(ctx, n, buffers);
-  else if (ctx->Driver.DrawBuffer)
+  if (ctx->Driver.DrawBuffer)
  ctx->Driver.DrawBuffer(ctx, n > 0 ? buffers[0] : GL_NONE);
}
 }
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 64ddd818835..d85d89ef50c 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -612,8 +612,6 @@ struct dd_function_table {
void (*DepthRange)(struct gl_context *ctx);
/** Specify the current buffer for writing */
void (*DrawBuffer)( struct gl_context *ctx, GLenum buffer );
-   /** Specify the buffers for writing for fragment programs*/
-   void (*DrawBuffers)(struct gl_context *ctx, GLsizei n, const GLenum 
*buffers);
/** Enable or disable server-side gl capabilities */
void (*Enable)(struct gl_context *ctx, GLenum cap, GLboolean state);
/** Specify fog parameters */
diff --git a/src/mesa/state_tracker/st_cb_fbo.c 
b/src/mesa/state_tracker/st_cb_fbo.c
index f859133e399..696a08fd65b 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -714,13 +714,12 @@ st_validate_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
  * created FBOs.
  */
 static void
-st_DrawBuffers(struct gl_context *ctx, GLsizei count, const GLenum *buffers)
+st_DrawBuffer(struct gl_context *ctx, GLenum buffer)
 {
struct st_context *st = st_context(ctx);
struct gl_framebuffer *fb = ctx->DrawBuffer;
 
-   (void) count;
-   (void) buffers;
+   (void) buffer;
 
if (_mesa_is_winsys_fbo(fb)) {
   GLuint i;
@@ -737,7 +736,7 @@ st_DrawBuffers(struct gl_context *ctx, GLsizei count, const 
GLenum *buffers)
 
 
 /**
- * Called via glReadBuffer.  As with st_DrawBuffers, we use this function
+ * Called via glReadBuffer.  As with st_DrawBuffer, we use this function
  * to check if we need to allocate a renderbuffer on demand.
  */
 static void
@@ -869,7 +868,7 @@ st_init_fbo_functions(struct dd_function_table *functions)
functions->FinishRenderTexture = st_finish_render_texture;
functions->ValidateFramebuffer = st_validate_framebuffer;
 
-   functions->DrawBuffers = st_DrawBuffers;
+   functions->DrawBuffer = st_DrawBuffer;
functions->ReadBuffer = st_ReadBuffer;
 
functions->MapRenderbuffer = st_MapRenderbuffer;
-- 

Re: [Mesa-dev] [RFC 3/3] mesa: call DrawBuffer(s) driver hook in update_framebuffer for windows-system FB

2018-04-13 Thread Timothy Arceri



On 14/04/18 14:45, Timothy Arceri wrote:

From: Boyan Ding 

When draw buffers are changed on a bound framebuffer, DrawBuffer(s) hook
should be called. However, it is missing in update_framebuffer with
window-system framebuffer, in which FB's draw buffer state should match
context state, potentially resulting in a change.

Note: This seems to be needed because gallium delays creating the front
buffer. i965 works fine without this change.


Meant to add here:

Fixes a number of Wine apps and the following piglit test on gallium:

./bin/glx-multi-context-front




V2 (Timothy Arceri):
  - Rebased on merged/simplified DrawBuffer driver function
  - Move DrawBuffer call outside fb->ColorDrawBuffer[0] !=
ctx->Color.DrawBuffer[0] check to make piglit pass.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99116
---
  src/mesa/main/framebuffer.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c
index 249e775f8cb..81d3b370e73 100644
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -623,6 +623,12 @@ update_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
   _mesa_drawbuffers(ctx, fb, ctx->Const.MaxDrawBuffers,
 ctx->Color.DrawBuffer, NULL);
}
+
+  /* Call device driver function if fb is the bound draw buffer. */
+  if (fb == ctx->DrawBuffer) {
+ if (ctx->Driver.DrawBuffer)
+ctx->Driver.DrawBuffer(ctx);
+  }
 }
 else {
/* This is a user-created framebuffer.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-13 Thread Dylan Baker
While I can't reproduce this on my machine (running Arch), I can reproduce it on
a machine running Ubuntu 16.04. I'll look into it more.

Dylan

Quoting Marek Olšák (2018-04-10 12:47:32)
> cmake .. -G Ninja -DCMAKE_INSTALL_PREFIX=/usr/llvm/x86_64-linux-gnu
> -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU" -DLLVM_ENABLE_ASSERTIONS=ON \
>   -DCMAKE_BUILD_TYPE=RelWithDebInfo -DLLVM_BUILD_LLVM_DYLIB=ON
> -DLLVM_LINK_LLVM_DYLIB=ON \
>   -DLLVM_APPEND_VC_REV=OFF
> -DCMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO="-fuse-ld=gold" \
>   -DCMAKE_C_FLAGS_RELWITHDEBINFO="-O2 -g
> -fno-omit-frame-pointer" \
>   -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="-O2 -g
> -fno-omit-frame-pointer"
> 
> Marek
> 
> On Tue, Apr 10, 2018 at 3:11 PM, Dylan Baker  wrote:
> 
> Quoting Marek Olšák (2018-04-10 11:03:59)
> > On Mon, Apr 9, 2018 at 5:37 PM, Dylan Baker  wrote:
> >
> >     Are you building LLVM yourself, or is that a build that comes with
> your
> >     distro?
> >     Also, what is your distro?
> >
> >
> > Ubuntu 16.04. LLVM is in /usr/llvm/ bin is not in PATH by default,
> include
> > is not in the include path by default, but lib is in the ld path.
> >
> > I build LLVM with shared libs myself.
> 
> with -DBUILD_SHARED_LIBS=1?
>
> Dylan
> 
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-13 Thread Kenneth Graunke
On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote:
> On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote:
> > From: "Xiong, James" 
> > 
> > On non-LLC platforms, we malloc shadow batch/state buffers
> > of the same sizes as our batch/state buffers' GEM allocations.
> > However the buffer allocator reuses similar-sized gem objects,
> > it returns a buffer larger than we asked for in some cases
> > and we end up with smaller shadow buffers. If we utilize the
> > full-size of the over-allocated batch/state buffers, we may wind
> > up accessing beyond the bounds of the shadow buffers and cause
> > segmentation fault and/or memory corruption.
> 
> Oh, good catch!  We do indeed malloc too little if the bufmgr rounds up
> the BO size.  Thanks for finding this!
> 
> > A few examples:
> >  casebatch  state
> >  request bo   shadow request bo  shadow
> > init020K 20K  20K16K 16K 16K
> > grow_buffer 130K 32K  30K24K 24K 24K
> > grow_buffer 248K 48K  48K36K 40K 36K
> > grow_buffer 372K 80K  72K60K 64K 60K
> > grow_buffer 4120K128K 120K   -   -   -
> > 
> > batch #1, #3, #4; state #2 and #3 are problematic. We can change
> > the order to allocate the bo first, then allocate the shadow
> > buffer using the bo's size so that the shadow buffer have at
> > least an equivalent size of the gem allocation.
> > 
> > Another problem: even though the state/batch buffer could grow,
> > when checking if it runs out space, we always compare with the
> > initial batch/state sizes. To utilize the entire buffers, change
> > to compare with the actual sizes.
> 
> This is actually intentional.  Our goal is to flush batches when the
> amount of commands or state reaches those thresholds.  Occasionally,
> we'll be in the middle of emitting a draw, and unable to stop.  In that
> case, we grow the batch and keep going.  But after that, we're beyond
> our original target, so we flush next time.  We don't want to grow
> without bounds...it's meant more for emergencies, or if we've badly
> estimated the size of the draw call.
> 
> I've sent a simpler patch which I think should hopefully fix your bug:
> https://patchwork.freedesktop.org/patch/217107/

Lionel noticed that I botched that patch.  Here's an actual one:

https://patchwork.freedesktop.org/patch/217108/


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] i965: Fix shadow batches to be the same size as the real BO.

2018-04-13 Thread Kenneth Graunke
brw_bo_alloc may round up our allocation size to the next bucket size.
In this case, we would malloc a shadow buffer that was the original
intended size, but use bo->size (the larger size) for all of our checks.

This could cause us to run off the end of the shadow buffer.

v2: Actually use the new BO size (caught by Lionel)

Reported-by: James Xiong 
Fixes: c7dcee58b5fe183e1653c13bff6a212f0d157b29 (i965: Avoid problems from 
referencing orphaned BOs after growing.)
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 55889be7327..a29159e41ba 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -360,8 +360,11 @@ grow_buffer(struct brw_context *brw,
   /* We can't safely use realloc, as it may move the existing buffer,
* breaking existing pointers the caller may still be using.  Just
* malloc a new copy and memcpy it like the normal BO path.
+   *
+   * Use bo->size rather than new_size because the bufmgr may have
+   * rounded up the size, and we want the shadow size to match.
*/
-  grow->map = malloc(new_size);
+  grow->map = malloc(new_bo->size);
} else {
   grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
}
-- 
2.16.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-13 Thread James Xiong
On Fri, 13 Apr 2018 14:33:09 -0700
Kenneth Graunke  wrote:

> On Friday, April 13, 2018 2:08:40 PM PDT James Xiong wrote:
> > On Fri, 13 Apr 2018 13:51:02 -0700
> > Kenneth Graunke  wrote:
> >   
> > > On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote:  
> > > > On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote:
> > > > > From: "Xiong, James" 
> > > > > 
> > > > > On non-LLC platforms, we malloc shadow batch/state buffers
> > > > > of the same sizes as our batch/state buffers' GEM allocations.
> > > > > However the buffer allocator reuses similar-sized gem objects,
> > > > > it returns a buffer larger than we asked for in some cases
> > > > > and we end up with smaller shadow buffers. If we utilize the
> > > > > full-size of the over-allocated batch/state buffers, we may
> > > > > wind up accessing beyond the bounds of the shadow buffers and
> > > > > cause segmentation fault and/or memory corruption.
> > > > 
> > > > Oh, good catch!  We do indeed malloc too little if the bufmgr  
> > Thank you for taking time to review, Kenneth.  
> > > > 
> > > > > A few examples:
> > > > >  casebatch  state
> > > > >  request bo   shadow request bo  shadow
> > > > > init020K 20K  20K16K 16K 16K
> > > > > grow_buffer 130K 32K  30K24K 24K 24K
> > > > > grow_buffer 248K 48K  48K36K 40K 36K
> > > > > grow_buffer 372K 80K  72K60K 64K 60K
> > > > > grow_buffer 4120K128K 120K   -   -   -
> > > > > 
> > > > > batch #1, #3, #4; state #2 and #3 are problematic. We can
> > > > > change the order to allocate the bo first, then allocate the
> > > > > shadow buffer using the bo's size so that the shadow buffer
> > > > > have at least an equivalent size of the gem allocation.
> > > > > 
> > > > > Another problem: even though the state/batch buffer could
> > > > > grow, when checking if it runs out space, we always compare
> > > > > with the initial batch/state sizes. To utilize the entire
> > > > > buffers, change to compare with the actual sizes.
> > > > 
> > > > This is actually intentional.  Our goal is to flush batches
> > > > when the amount of commands or state reaches those thresholds.
> > > > Occasionally, we'll be in the middle of emitting a draw, and
> > > > unable to stop.  In that case, we grow the batch and keep
> > > > going.  But after that, we're beyond our original target, so we
> > > > flush next time.  We don't want to grow without bounds...it's
> > > > meant more for emergencies, or if we've badly estimated the
> > > > size of the draw call.  
> > I am not sure I get it. Let me give an example: the state buffer
> > gets grown once from 16K to 24K in brw_state_batch(), the used_size
> > becomes 20K, then brw_require_statebuffer_space(1024) gets called to
> > ask for 1K space, with the original logical, it compares the used
> > size with 16K and flush the batch even though the state buffer
> > still has 4K space available?  
> 
> Yes, the idea is to flush at around 16K of state.  If we happen to be
> in the middle of a draw and run out of space, we'll grow to 24K.
> Once it's over 16K, we flush as soon as we can.
> 
> We'd like to be fairly consistent on our batch size.  Running larger
> batches can lead to differences in performance, and large batches can
> lead to degradation in the interactivity of the system (especially on
> GPUs without preemption).
> 
> The hope is to grow once, at most.  If we check against the BO size,
> we might grow repeatedly, which would lead to really large batches and
> things would get out of hand.
I see, thanks for the explanation, Kenneth.
> 
> > > > 
> > > > I've sent a simpler patch which I think should hopefully fix
> > > > your bug: https://patchwork.freedesktop.org/patch/217107/
> > > 
> > > Lionel noticed that I botched that patch.  Here's an actual one:
> > > 
> > > https://patchwork.freedesktop.org/patch/217108/  
> > Yes it will fix the existing bug. However the assumption here is
> > that the init allocation size will NOT be rounded up as it happens
> > to be the bucket size.
> > I am working on an optimization to improve memory usage(that's how
> > I find out this bug), this assumption is no longer true.
> > Essentially the bufmgr could return a buffer with the same or
> > larger size whether it is same as the bucket's or not. Anyway I
> > guess I can send the fix later along with the optimization
> > patches.  
> 
> Ah, that's a good point.  Your patch also tries to use the BO size
> for the initial malloc as well, which is a good idea...
So what do you want? you want me to change this patch and sent for
review or take yours for now.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-13 Thread Kenneth Graunke
On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote:
> From: "Xiong, James" 
> 
> On non-LLC platforms, we malloc shadow batch/state buffers
> of the same sizes as our batch/state buffers' GEM allocations.
> However the buffer allocator reuses similar-sized gem objects,
> it returns a buffer larger than we asked for in some cases
> and we end up with smaller shadow buffers. If we utilize the
> full-size of the over-allocated batch/state buffers, we may wind
> up accessing beyond the bounds of the shadow buffers and cause
> segmentation fault and/or memory corruption.

Oh, good catch!  We do indeed malloc too little if the bufmgr rounds up
the BO size.  Thanks for finding this!

> A few examples:
>  casebatch  state
>  request bo   shadow request bo  shadow
> init020K 20K  20K16K 16K 16K
> grow_buffer 130K 32K  30K24K 24K 24K
> grow_buffer 248K 48K  48K36K 40K 36K
> grow_buffer 372K 80K  72K60K 64K 60K
> grow_buffer 4120K128K 120K   -   -   -
> 
> batch #1, #3, #4; state #2 and #3 are problematic. We can change
> the order to allocate the bo first, then allocate the shadow
> buffer using the bo's size so that the shadow buffer have at
> least an equivalent size of the gem allocation.
> 
> Another problem: even though the state/batch buffer could grow,
> when checking if it runs out space, we always compare with the
> initial batch/state sizes. To utilize the entire buffers, change
> to compare with the actual sizes.

This is actually intentional.  Our goal is to flush batches when the
amount of commands or state reaches those thresholds.  Occasionally,
we'll be in the middle of emitting a draw, and unable to stop.  In that
case, we grow the batch and keep going.  But after that, we're beyond
our original target, so we flush next time.  We don't want to grow
without bounds...it's meant more for emergencies, or if we've badly
estimated the size of the draw call.

I've sent a simpler patch which I think should hopefully fix your bug:
https://patchwork.freedesktop.org/patch/217107/

> Cc: mesa-sta...@lists.freedesktop.org
> Signed-off-by: Xiong, James 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 49 
> +--
>  2 files changed, 32 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index f049d08..39aae08 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -477,6 +477,7 @@ struct brw_growing_bo {
> struct brw_bo *partial_bo;
> uint32_t *partial_bo_map;
> unsigned partial_bytes;
> +   unsigned shadow_size;
>  };
>  
>  struct intel_batchbuffer {
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index 7286140..facbbf8 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -107,12 +107,6 @@ intel_batchbuffer_init(struct brw_context *brw)
>  
> batch->use_shadow_copy = !devinfo->has_llc;
>  
> -   if (batch->use_shadow_copy) {
> -  batch->batch.map = malloc(BATCH_SZ);
> -  batch->map_next = batch->batch.map;
> -  batch->state.map = malloc(STATE_SZ);
> -   }
> -
> init_reloc_list(>batch_relocs, 250);
> init_reloc_list(>state_relocs, 250);
>  
> @@ -212,10 +206,25 @@ intel_batchbuffer_reset(struct brw_context *brw)
> batch->last_bo = batch->batch.bo;
>  
> recreate_growing_buffer(brw, >batch, "batchbuffer", BATCH_SZ);
> -   batch->map_next = batch->batch.map;
>  
> recreate_growing_buffer(brw, >state, "statebuffer", STATE_SZ);
>  
> +   if (batch->use_shadow_copy) {
> +  if (batch->batch.shadow_size < batch->batch.bo->size) {
> + free(batch->batch.map);
> + batch->batch.map = malloc(batch->batch.bo->size);
> + batch->batch.shadow_size = batch->batch.bo->size;
> +  }
> +
> +  if (batch->state.shadow_size < batch->state.bo->size) {
> + free(batch->state.map);
> + batch->state.map = malloc(batch->state.bo->size);
> + batch->state.shadow_size = batch->state.bo->size;
> +  }
> +   }
> +
> +   batch->map_next = batch->batch.map;
> +
> /* Avoid making 0 a valid state offset - otherwise the decoder will try
>  * and decode data when we use offset 0 as a null pointer.
>  */
> @@ -361,7 +370,8 @@ grow_buffer(struct brw_context *brw,
> * breaking existing pointers the caller may still be using.  Just
> * malloc a new copy and memcpy it like the normal BO path.
> */
> -  grow->map = malloc(new_size);
> +  grow->map = malloc(new_bo->size);
> +  grow->shadow_size = new_bo->size;
> } else {
>grow->map = 

Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-13 Thread Tom Stellard
On 04/09/2018 02:27 PM, Marek Olšák wrote:
> See:
> https://cgit.freedesktop.org/mesa/mesa/commit/?id=f55d1f806e6b6c33af559de166d08ec8fa3ebe90
> 

This happens when mesa is built with rtti enabled and llvm is not.

-Tom

> Marek
> 
> On Mon, Apr 9, 2018 at 5:08 PM, Dylan Baker  > wrote:
> 
> Quoting Marek Olšák (2018-04-09 13:44:27)
> > meson fails to link LLVM on my setup, so I can't use it, therefore all 
> my meson
> > changes are untested.
> >
> > Even if meson worked, I have to use make, because that's what users use.
> >
> > This change simplifies the meson build too.
> >
> > Marek
> >
> 
> What happens with LLVM on your system?
> 
> Dylan
> 
> 
> 
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-13 Thread Dylan Baker
Okay, I've figured it out. On my system RTTI is enabled by default, on ubuntu
16.04 RTTI is disabled. meson doesn't account for this in it's llvm module,
because you would need to compile all C++ code with -fno-rtti to make it link
reliably.

short answer:
add `-DLLVM_ENABLE_RTTI=1` to the cmake invocation for LLVM
or 
`meson build --cpp_args="-fno-rtti" ...` for mesa

I can probably fix this in mesa, but it will require bumping the meson version.

Dylan

Quoting Marek Olšák (2018-04-10 12:47:32)
> cmake .. -G Ninja -DCMAKE_INSTALL_PREFIX=/usr/llvm/x86_64-linux-gnu
> -DLLVM_TARGETS_TO_BUILD="X86;AMDGPU" -DLLVM_ENABLE_ASSERTIONS=ON \
>   -DCMAKE_BUILD_TYPE=RelWithDebInfo -DLLVM_BUILD_LLVM_DYLIB=ON
> -DLLVM_LINK_LLVM_DYLIB=ON \
>   -DLLVM_APPEND_VC_REV=OFF
> -DCMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO="-fuse-ld=gold" \
>   -DCMAKE_C_FLAGS_RELWITHDEBINFO="-O2 -g
> -fno-omit-frame-pointer" \
>   -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="-O2 -g
> -fno-omit-frame-pointer"
> 
> Marek
> 
> On Tue, Apr 10, 2018 at 3:11 PM, Dylan Baker  wrote:
> 
> Quoting Marek Olšák (2018-04-10 11:03:59)
> > On Mon, Apr 9, 2018 at 5:37 PM, Dylan Baker  wrote:
> >
> >     Are you building LLVM yourself, or is that a build that comes with
> your
> >     distro?
> >     Also, what is your distro?
> >
> >
> > Ubuntu 16.04. LLVM is in /usr/llvm/ bin is not in PATH by default,
> include
> > is not in the include path by default, but lib is in the ld path.
> >
> > I build LLVM with shared libs myself.
> 
> with -DBUILD_SHARED_LIBS=1?
>
> Dylan
> 
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] i965: Fix shadow batches to be the same size as the real BO.

2018-04-13 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 13/04/18 13:49, Kenneth Graunke wrote:

brw_bo_alloc may round up our allocation size to the next bucket size.
In this case, we would malloc a shadow buffer that was the original
intended size, but use bo->size (the larger size) for all of our checks.

This could cause us to run off the end of the shadow buffer.

v2: Actually use the new BO size (caught by Lionel)

Reported-by: James Xiong 
Fixes: c7dcee58b5fe183e1653c13bff6a212f0d157b29 (i965: Avoid problems from 
referencing orphaned BOs after growing.)
---
  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 55889be7327..a29159e41ba 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -360,8 +360,11 @@ grow_buffer(struct brw_context *brw,
/* We can't safely use realloc, as it may move the existing buffer,
 * breaking existing pointers the caller may still be using.  Just
 * malloc a new copy and memcpy it like the normal BO path.
+   *
+   * Use bo->size rather than new_size because the bufmgr may have
+   * rounded up the size, and we want the shadow size to match.
 */
-  grow->map = malloc(new_size);
+  grow->map = malloc(new_bo->size);
 } else {
grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [AppVeyor] mesa master #7453 failed

2018-04-13 Thread AppVeyor



Build mesa 7453 failed


Commit 918b798668 by Marek Olšák on 4/5/2018 9:54 PM:

radeonsi: make sure CP DMA is idle at the end of IBs


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 40/45] swr/rast: minimize codegen redundant work

2018-04-13 Thread George Kyriazis
Move filtering of redundant codegen operations into gen scripts themselves
---
 .../drivers/swr/rasterizer/codegen/gen_archrast.py | 111 +
 .../drivers/swr/rasterizer/codegen/gen_backends.py |  97 +--
 .../drivers/swr/rasterizer/codegen/gen_common.py   | 131 +++--
 .../drivers/swr/rasterizer/codegen/gen_knobs.py|  53 ++---
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   |  42 +--
 .../swr/rasterizer/codegen/gen_llvm_types.py   |  29 -
 6 files changed, 335 insertions(+), 128 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
index aa09f22..c5842aa 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py
@@ -24,7 +24,7 @@ from __future__ import print_function
 import os
 import sys
 import re
-from gen_common import ArgumentParser, MakoTemplateWriter
+from gen_common import *
 
 def parse_event_fields(lines, idx, event_dict):
 field_names = []
@@ -144,6 +144,10 @@ def main():
 print('Error: Could not find private proto file %s' % 
proto_private_filename, file=sys.stderr)
 return 1
 
+final_output_dir = output_dir
+MakeDir(final_output_dir)
+output_dir = MakeTmpDir('_codegen')
+
 protos = {}
 protos['events'] = {}   # event dictionary containing events with 
their fields
 protos['event_names'] = []  # needed to keep events in order parsed. dict 
is not ordered.
@@ -153,53 +157,64 @@ def main():
 parse_protos(protos, proto_filename)
 parse_protos(protos, proto_private_filename)
 
-# Generate event header
-if args.gen_event_hpp:
-curdir = os.path.dirname(os.path.abspath(__file__))
-template_file = os.sep.join([curdir, 'templates', 'gen_ar_event.hpp'])
-output_fullpath = os.sep.join([output_dir, output_filename])
-
-MakoTemplateWriter.to_file(template_file, output_fullpath,
-cmdline=sys.argv,
-filename=output_filename,
-protos=protos)
-
-# Generate event implementation
-if args.gen_event_cpp:
-curdir = os.path.dirname(os.path.abspath(__file__))
-template_file = os.sep.join([curdir, 'templates', 'gen_ar_event.cpp'])
-output_fullpath = os.sep.join([output_dir, output_filename])
-
-MakoTemplateWriter.to_file(template_file, output_fullpath,
-cmdline=sys.argv,
-filename=output_filename,
-protos=protos)
-
-# Generate event handler header
-if args.gen_eventhandler_hpp:
-curdir = os.path.dirname(os.path.abspath(__file__))
-template_file = os.sep.join([curdir, 'templates', 
'gen_ar_eventhandler.hpp'])
-output_fullpath = os.sep.join([output_dir, output_filename])
-
-MakoTemplateWriter.to_file(template_file, output_fullpath,
-cmdline=sys.argv,
-filename=output_filename,
-event_header='gen_ar_event.hpp',
-protos=protos)
-
-# Generate event handler header
-if args.gen_eventhandlerfile_hpp:
-curdir = os.path.dirname(os.path.abspath(__file__))
-template_file = os.sep.join([curdir, 'templates', 
'gen_ar_eventhandlerfile.hpp'])
-output_fullpath = os.sep.join([output_dir, output_filename])
-
-MakoTemplateWriter.to_file(template_file, output_fullpath,
-cmdline=sys.argv,
-filename=output_filename,
-event_header='gen_ar_eventhandler.hpp',
-protos=protos)
-
-return 0
+rval = 0
+
+try:
+# Generate event header
+if args.gen_event_hpp:
+curdir = os.path.dirname(os.path.abspath(__file__))
+template_file = os.sep.join([curdir, 'templates', 
'gen_ar_event.hpp'])
+output_fullpath = os.sep.join([output_dir, output_filename])
+
+MakoTemplateWriter.to_file(template_file, output_fullpath,
+cmdline=sys.argv,
+filename=output_filename,
+protos=protos)
+
+# Generate event implementation
+if args.gen_event_cpp:
+curdir = os.path.dirname(os.path.abspath(__file__))
+template_file = os.sep.join([curdir, 'templates', 
'gen_ar_event.cpp'])
+output_fullpath = os.sep.join([output_dir, output_filename])
+
+MakoTemplateWriter.to_file(template_file, output_fullpath,
+cmdline=sys.argv,
+filename=output_filename,
+protos=protos)
+
+# Generate event handler header
+if args.gen_eventhandler_hpp:
+curdir = os.path.dirname(os.path.abspath(__file__))
+template_file = os.sep.join([curdir, 'templates', 
'gen_ar_eventhandler.hpp'])
+output_fullpath = os.sep.join([output_dir, 

[Mesa-dev] [PATCH 36/45] swr/rast: Type-check TemplateArgUnroller

2018-04-13 Thread George Kyriazis
Allows direct use of enum values in conversion to template args.
---
 src/gallium/drivers/swr/rasterizer/core/utils.h | 39 +
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/utils.h 
b/src/gallium/drivers/swr/rasterizer/core/utils.h
index c926f6a..d6cbf24 100644
--- a/src/gallium/drivers/swr/rasterizer/core/utils.h
+++ b/src/gallium/drivers/swr/rasterizer/core/utils.h
@@ -268,12 +268,15 @@ public:
 };
 
 // Ranged integer argument for TemplateArgUnroller
-template 
-struct IntArg
+template 
+struct RangedArg
 {
-uint32_t val;
+T val;
 };
 
+template 
+using IntArg = RangedArg;
+
 // Recursive template used to auto-nest conditionals.  Converts dynamic 
boolean function
 // arguments to static template arguments.
 template 
@@ -307,49 +310,49 @@ struct TemplateArgUnroller
 }
 
 //-
-// Integer value (within specified range)
+// Ranged value (within specified range)
 //-
 
 // Last Arg Terminator
-template 
-static typename TermT::FuncType GetFunc(IntArg iArg)
+template 
+static typename TermT::FuncType GetFunc(RangedArg iArg)
 {
 if (iArg.val == TMax)
 {
-return TermT::template GetFunc>();
+return TermT::template GetFunc>();
 }
 if (TMax > TMin)
 {
-return TemplateArgUnroller::GetFunc(IntArg{iArg.val});
+return TemplateArgUnroller::GetFunc(RangedArg{iArg.val});
 }
 SWR_ASSUME(false); return nullptr;
 }
-template 
-static typename TermT::FuncType GetFunc(IntArg iArg)
+template 
+static typename TermT::FuncType GetFunc(RangedArg iArg)
 {
 SWR_ASSERT(iArg.val == TVal);
-return TermT::template GetFunc>();
+return TermT::template GetFunc>();
 }
 
 // Recursively parse args
-template 
-static typename TermT::FuncType GetFunc(IntArg iArg, TArgsT... 
remainingArgs)
+template 
+static typename TermT::FuncType GetFunc(RangedArg iArg, 
TArgsT... remainingArgs)
 {
 if (iArg.val == TMax)
 {
-return TemplateArgUnroller>::GetFunc(remainingArgs...);
+return TemplateArgUnroller>::GetFunc(remainingArgs...);
 }
 if (TMax > TMin)
 {
-return TemplateArgUnroller::GetFunc(IntArg{iArg.val}, remainingArgs...);
+return TemplateArgUnroller::GetFunc(RangedArg{iArg.val}, remainingArgs...);
 }
 SWR_ASSUME(false); return nullptr;
 }
-template 
-static typename TermT::FuncType GetFunc(IntArg iArg, TArgsT... 
remainingArgs)
+template 
+static typename TermT::FuncType GetFunc(RangedArg iArg, 
TArgsT... remainingArgs)
 {
 SWR_ASSERT(iArg.val == TVal);
-return TemplateArgUnroller>::GetFunc(remainingArgs...);
+return TemplateArgUnroller>::GetFunc(remainingArgs...);
 }
 };
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/45] swr/rast: Add some archrast counters

2018-04-13 Thread George Kyriazis
Hook up archrast counters for shader stats: instructions executed.
---
 .../drivers/swr/rasterizer/archrast/archrast.cpp   |  4 +--
 .../drivers/swr/rasterizer/archrast/events.proto   | 30 ++
 .../drivers/swr/rasterizer/core/backend.cpp|  1 +
 .../drivers/swr/rasterizer/core/backend_impl.h |  4 +++
 .../drivers/swr/rasterizer/core/backend_sample.cpp |  5 +++-
 .../swr/rasterizer/core/backend_singlesample.cpp   |  5 +++-
 .../drivers/swr/rasterizer/core/frontend.cpp   |  8 ++
 7 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp 
b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 12dfc0e..2184673 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -61,7 +61,7 @@ namespace ArchRast
 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If 
holds, its fine.
 };
 
-struct GSStats
+struct GSInfo
 {
 uint32_t inputPrimCount;
 uint32_t primGeneratedCount;
@@ -369,7 +369,7 @@ namespace ArchRast
 DepthStencilStats mDSOmZ = {};
 CStats mClipper = {};
 TEStats mTS = {};
-GSStats mGS = {};
+GSInfo mGS = {};
 RastStats rastStats = {};
 CullStats mCullStats = {};
 AlphaStats mAlphaStats = {};
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto 
b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index deb0373..f924b57 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -115,6 +115,36 @@ event FrontendStatsEvent
 uint64_t SoNumPrimsWritten3;
 };
 
+event VSStats
+{
+uint32_t numInstExecuted;
+};
+
+event HSStats
+{
+uint32_t numInstExecuted;
+};
+
+event DSStats
+{
+uint32_t numInstExecuted;
+};
+
+event GSStats
+{
+uint32_t numInstExecuted;
+};
+
+event PSStats
+{
+uint32_t numInstExecuted;
+};
+
+event CSStats
+{
+uint32_t numInstExecuted;
+};
+
 event BackendStatsEvent
 {
 uint32_t drawId;
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
index ccc7150..1e0769a 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend.cpp
@@ -81,6 +81,7 @@ void ProcessComputeBE(DRAW_CONTEXT* pDC, uint32_t workerId, 
uint32_t threadGroup
 state.pfnCsFunc(GetPrivateState(pDC), );
 
 UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
+AR_EVENT(CSStats(csContext.stats.numInstExecuted));
 
 RDTSC_END(BEDispatch, 1);
 }
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h 
b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
index dd349a1..20b2ec5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -968,6 +968,10 @@ void BackendPixelRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_t
 UPDATE_STAT_BE(PsInvocations, 
_mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
 RDTSC_END(BEPixelShader, 0);
 
+// update stats
+UPDATE_STAT_BE(PsInvocations, 
_mm_popcnt_u32(_simd_movemask_ps(activeLanes)));
+AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+
 // update active lanes to remove any discarded or oMask'd pixels
 activeLanes = _simd_castsi_ps(_simd_and_si(psContext.activeMask, 
_simd_cmpgt_epi32(psContext.oMask, _simd_setzero_si(;
 if(!_simd_movemask_ps(activeLanes)) { goto Endtile; };
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp 
b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
index 4982025..c7c6c533 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_sample.cpp
@@ -163,10 +163,13 @@ void BackendSampleRate(DRAW_CONTEXT *pDC, uint32_t 
workerId, uint32_t x, uint32_
 
 // execute pixel shader
 RDTSC_BEGIN(BEPixelShader, pDC->drawId);
-UPDATE_STAT_BE(PsInvocations, 
_mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
 state.psState.pfnPixelShader(GetPrivateState(pDC), 
);
 RDTSC_END(BEPixelShader, 0);
 
+// update stats
+UPDATE_STAT_BE(PsInvocations, 
_mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
+AR_EVENT(PSStats(psContext.stats.numInstExecuted));
+
 vCoverageMask = _simd_castsi_ps(psContext.activeMask);
 
 // late-Z
diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp 
b/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp
index 452fba1..26d5a75 100644
--- 

[Mesa-dev] [PATCH 25/45] swr/rast: Enable generalized fetch jit

2018-04-13 Thread George Kyriazis
Enable generalized fetch jit with 8 or 16 wide SIMD target. Still some
work needed to remove some simd8 double pumping for 16-wide target.

Also removed unused non-gather load vertices path.
---
 .../drivers/swr/rasterizer/jitter/builder.cpp  |   26 +-
 .../drivers/swr/rasterizer/jitter/builder.h|4 +-
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  |   69 +-
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 1197 +++-
 .../drivers/swr/rasterizer/jitter/fetch_jit.h  |6 +-
 5 files changed, 169 insertions(+), 1133 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
index 625f132..53947c3 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
@@ -66,16 +66,7 @@ namespace SwrJit
 mSimd4FP64Ty = VectorType::get(mDoubleTy, 4);
 
 // Built in types: target simd
-
-mSimdInt1Ty = VectorType::get(mInt1Ty,  mVWidth);
-mSimdInt16Ty= VectorType::get(mInt16Ty, mVWidth);
-mSimdInt32Ty= VectorType::get(mInt32Ty, mVWidth);
-mSimdInt64Ty= VectorType::get(mInt64Ty, mVWidth);
-mSimdFP16Ty = VectorType::get(mFP16Ty,  mVWidth);
-mSimdFP32Ty = VectorType::get(mFP32Ty,  mVWidth);
-mSimdVectorTy   = ArrayType::get(mSimdFP32Ty, 4);
-mSimdVectorIntTy= ArrayType::get(mSimdInt32Ty, 4);
-mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
+SetTargetWidth(pJitMgr->mVWidth);
 
 // Built in types: simd16
 
@@ -105,4 +96,19 @@ namespace SwrJit
 mSimd16IntPtrTy = mSimd16Int64Ty;
 }
 }
+
+void Builder::SetTargetWidth(uint32_t width)
+{
+mVWidth = width;
+
+mSimdInt1Ty = VectorType::get(mInt1Ty, mVWidth);
+mSimdInt16Ty = VectorType::get(mInt16Ty, mVWidth);
+mSimdInt32Ty = VectorType::get(mInt32Ty, mVWidth);
+mSimdInt64Ty = VectorType::get(mInt64Ty, mVWidth);
+mSimdFP16Ty = VectorType::get(mFP16Ty, mVWidth);
+mSimdFP32Ty = VectorType::get(mFP32Ty, mVWidth);
+mSimdVectorTy = ArrayType::get(mSimdFP32Ty, 4);
+mSimdVectorIntTy = ArrayType::get(mSimdInt32Ty, 4);
+mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
+}
 }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
index 6b2c9f0..4c79bab 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -46,7 +46,7 @@ namespace SwrJit
 JitManager *mpJitMgr;
 IRBuilder<> *mpIRBuilder;
 
-uint32_t mVWidth;   // vector width simd8
+uint32_t mVWidth;   // vector width target simd
 uint32_t mVWidth16; // vector width simd16
 
 // Built in types: scalar
@@ -95,6 +95,8 @@ namespace SwrJit
 
 Type*mSimd32Int8Ty;
 
+void SetTargetWidth(uint32_t width);
+
 #include "gen_builder.hpp"
 #include "gen_builder_meta.hpp"
 #include "gen_builder_intrin.hpp"
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 0550493..4840fef 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -38,6 +38,7 @@ namespace SwrJit
 {
 void Builder::AssertRastyMemoryParams(Value* ptr, JIT_MEM_CLIENT usage)
 {
+SWR_ASSERT(ptr->getType() != mInt64Ty, "Address appears to be GFX 
access.  Requires translation through BuilderGfxMem.");
 }
 
 Value *Builder::GEP(Value* ptr, const std::initializer_list 
)
@@ -175,78 +176,14 @@ namespace SwrJit
 {
 AssertRastyMemoryParams(pBase, usage);
 
-Value* vGather;
-
-// use avx2 gather instruction if available
-if (JM()->mArch.AVX2())
-{
-vGather = VGATHERDD(vSrc, pBase, vIndices, VMASK(vMask), C(scale));
-}
-else
-{
-Value* pStack = STACKSAVE();
-
-// store vSrc on the stack.  this way we can select between a 
valid load address and the vSrc address
-Value* vSrcPtr = ALLOCA(vSrc->getType());
-STORE(vSrc, vSrcPtr);
-
-vGather = VUNDEF_I();
-Value *vScaleVec = VIMMED1((uint32_t)scale);
-Value *vOffsets = MUL(vIndices, vScaleVec);
-for (uint32_t i = 0; i < mVWidth; ++i)
-{
-// single component byte index
-Value *offset = VEXTRACT(vOffsets, C(i));
-// byte pointer to component
-Value *loadAddress = GEP(pBase, offset);
-loadAddress = BITCAST(loadAddress, PointerType::get(mInt32Ty, 
0));
-// pointer to the value to load if we're masking off a 

[Mesa-dev] [PATCH 32/45] swr/rast: Fix alloca usage in jitter

2018-04-13 Thread George Kyriazis
Fix issue where temporary allocas were getting hoisted to function entry
unnecessarily. We now explicitly mark temporary allocas and skip hoisting
during the hoist pass. Shuold reduce stack usage.
---
 src/gallium/drivers/swr/rasterizer/jitter/builder.cpp   | 17 +
 src/gallium/drivers/swr/rasterizer/jitter/builder.h |  2 ++
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp   |  1 +
 3 files changed, 20 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
index 53947c3..bd81560 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
@@ -111,4 +111,21 @@ namespace SwrJit
 mSimdVectorIntTy = ArrayType::get(mSimdInt32Ty, 4);
 mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
 }
+
+/// @brief Mark this alloca as temporary to avoid hoisting later on
+void Builder::SetTempAlloca(Value* inst)
+{
+AllocaInst* pAlloca = dyn_cast(inst);
+SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
+MDNode* N = MDNode::get(JM()->mContext, MDString::get(JM()->mContext, 
"is_temp_alloca"));
+pAlloca->setMetadata("is_temp_alloca", N);
+}
+
+bool Builder::IsTempAlloca(Value* inst)
+{
+AllocaInst* pAlloca = dyn_cast(inst);
+SWR_ASSERT(pAlloca, "Unexpected non-alloca instruction");
+
+return (pAlloca->getMetadata("is_temp_alloca") != nullptr);
+}
 }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
index 4c79bab..27a32bc 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -96,6 +96,8 @@ namespace SwrJit
 Type*mSimd32Int8Ty;
 
 void SetTargetWidth(uint32_t width);
+void SetTempAlloca(Value* inst);
+bool IsTempAlloca(Value* inst);
 
 #include "gen_builder.hpp"
 #include "gen_builder_meta.hpp"
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index cd9806a..5d8637e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -229,6 +229,7 @@ namespace SwrJit
 
 // store vSrc on the stack.  this way we can select between a 
valid load address and the vSrc address
 Value* vSrcPtr = ALLOCA(vSrc->getType());
+SetTempAlloca(vSrcPtr);
 STORE(vSrc, vSrcPtr);
 
 vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/45] swr/rast: WIP builder rewrite.

2018-04-13 Thread George Kyriazis
Start removing avx2 macros for functionality that exists in llvm.
---
 src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 5 -
 src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h | 9 -
 2 files changed, 14 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 113c616..3e1fbfe 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -53,12 +53,7 @@ intrinsics = [
 ['VMINPS', 'x86_avx_min_ps_256', ['a', 'b']],
 ['VMAXPS', 'x86_avx_max_ps_256', ['a', 'b']],
 ['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding']],
-['VCMPPS', 'x86_avx_cmp_ps_256', ['a', 'b', 'cmpop']],
-['VBLENDVPS', 'x86_avx_blendv_ps_256', ['a', 'b', 'mask']],
 ['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control']],
-['VMASKLOADD', 'x86_avx2_maskload_d_256', ['src', 'mask']],
-['VMASKMOVPS', 'x86_avx_maskload_ps_256', ['src', 'mask']],
-['VMASKSTOREPS', 'x86_avx_maskstore_ps_256', ['src', 'mask', 'val']],
 ['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b']],
 ['VPERMD', 'x86_avx2_permd', ['a', 'idx']],
 ['VPERMPS', 'x86_avx2_permps', ['idx', 'a']],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 9660bc6..549f328 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -96,15 +96,6 @@ CallInst *CALL(Value *Callee, Value* arg);
 CallInst *CALL2(Value *Callee, Value* arg1, Value* arg2);
 CallInst *CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3);
 
-Value *VCMPPS_EQ(Value* a, Value* b){ return VCMPPS(a, b, 
C((uint8_t)_CMP_EQ_OQ)); }
-Value *VCMPPS_LT(Value* a, Value* b){ return VCMPPS(a, b, 
C((uint8_t)_CMP_LT_OQ)); }
-Value *VCMPPS_LE(Value* a, Value* b){ return VCMPPS(a, b, 
C((uint8_t)_CMP_LE_OQ)); }
-Value *VCMPPS_ISNAN(Value* a, Value* b) { return VCMPPS(a, b, 
C((uint8_t)_CMP_UNORD_Q)); }
-Value *VCMPPS_NEQ(Value* a, Value* b)   { return VCMPPS(a, b, 
C((uint8_t)_CMP_NEQ_OQ)); }
-Value *VCMPPS_GE(Value* a, Value* b){ return VCMPPS(a, b, 
C((uint8_t)_CMP_GE_OQ)); }
-Value *VCMPPS_GT(Value* a, Value* b){ return VCMPPS(a, b, 
C((uint8_t)_CMP_GT_OQ)); }
-Value *VCMPPS_NOTNAN(Value* a, Value* b){ return VCMPPS(a, b, 
C((uint8_t)_CMP_ORD_Q)); }
-
 Value *MASK(Value *vmask);
 Value *MASK_16(Value *vmask);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/45] swr/rast: Add "Num Instructions Executed" stats intrinsic.

2018-04-13 Thread George Kyriazis
Added a SWR_SHADER_STATS structure which is passed to each shader. The
stats pass will instrument the shader to populate this.
---
 src/gallium/drivers/swr/rasterizer/core/state.h | 28 ++---
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h 
b/src/gallium/drivers/swr/rasterizer/core/state.h
index 22acbe0..47ffacf 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -214,6 +214,15 @@ struct SIMDVERTEX_T
 };
 
 //
+/// SWR_SHADER_STATS
+/// @brief Structure passed to shader for stats collection.
+/
+struct SWR_SHADER_STATS
+{
+uint32_t numInstExecuted; // This is roughly the API instructions executed 
and not x86.
+};
+
+//
 /// SWR_VS_CONTEXT
 /// @brief Input to vertex shader
 /
@@ -232,6 +241,7 @@ struct SWR_VS_CONTEXT
 simd16scalari VertexID16;   // IN: Vertex ID (16-wide)
 #endif
 #endif
+SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
 };
 
 /
@@ -281,6 +291,7 @@ struct SWR_HS_CONTEXT
 simdscalari mask;   // IN: Active mask for shader
 ScalarPatch* pCPout;// OUT: Output control point patch
 // SIMD-sized-array of SCALAR patches
+SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
 };
 
 //
@@ -298,6 +309,7 @@ struct SWR_DS_CONTEXT
 simdscalar* pDomainV;   // IN: (SIMD) Domain Point V coords
 simdscalari mask;   // IN: Active mask for shader
 simdscalar* pOutputData;// OUT: (SIMD) Vertex Attributes (2D array 
of vectors, one row per attribute-component)
+SWR_SHADER_STATS stats; // OUT: shader statistics used for 
archrast.
 };
 
 //
@@ -312,6 +324,7 @@ struct SWR_GS_CONTEXT
 uint32_t InstanceID;// IN: input instance ID
 simdscalari mask;   // IN: Active mask for shader
 uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains 
vertices for all output streams)
+SWR_SHADER_STATS stats; // OUT: shader statistics used for 
archrast.
 };
 
 struct PixelPositions
@@ -358,6 +371,8 @@ struct SWR_PS_CONTEXT
 uint32_t rasterizerSampleCount; // IN: sample count used by the 
rasterizer
 
 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render 
target hottiles
+
+SWR_SHADER_STATS stats; // OUT: shader statistics used for 
archrast.
 };
 
 //
@@ -391,14 +406,13 @@ struct SWR_CS_CONTEXT
 // Dispatch dimensions used by shader to compute system values from the 
tile counter.
 uint32_t dispatchDims[3];
 
-uint8_t* pTGSM;  // Thread Group Shared Memory pointer.
-
-uint8_t* pSpillFillBuffer;  // Spill/fill buffer for barrier support
-
-uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the 
shader, shader is responsible
-// for subdividing scratch space per 
instance/simd
-
+uint8_t* pTGSM;   // Thread Group Shared Memory pointer.
+uint8_t* pSpillFillBuffer;// Spill/fill buffer for barrier support
+uint8_t* pScratchSpace;   // Pointer to scratch space buffer used by 
the shader, shader is responsible
+  // for subdividing scratch space per 
instance/simd
 uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
+
+SWR_SHADER_STATS stats;   // OUT: shader statistics used for archrast.
 };
 
 // enums
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 43/45] swr/rast: Optimize late/bindless JIT of samplers

2018-04-13 Thread George Kyriazis
Add per-worker thread private data to all shader calls
Add per-worker sampler cache and jit context
Add late LoadTexel JIT support
Add per-worker-thread Sampler / LoadTexel JIT
---
 src/gallium/drivers/swr/rasterizer/core/api.cpp|  7 ++-
 src/gallium/drivers/swr/rasterizer/core/api.h  | 47 +++
 .../drivers/swr/rasterizer/core/backend.cpp|  9 +--
 src/gallium/drivers/swr/rasterizer/core/backend.h  |  4 +-
 .../drivers/swr/rasterizer/core/backend_clear.cpp  | 19 +++---
 .../drivers/swr/rasterizer/core/backend_impl.h |  7 ++-
 .../drivers/swr/rasterizer/core/backend_sample.cpp |  5 +-
 .../swr/rasterizer/core/backend_singlesample.cpp   |  6 +-
 src/gallium/drivers/swr/rasterizer/core/binner.cpp |  2 +-
 src/gallium/drivers/swr/rasterizer/core/context.h  |  3 +-
 .../drivers/swr/rasterizer/core/frontend.cpp   | 29 +
 .../drivers/swr/rasterizer/core/rasterizer.cpp |  4 +-
 .../drivers/swr/rasterizer/core/rasterizer_impl.h  | 15 ++---
 src/gallium/drivers/swr/rasterizer/core/state.h| 18 +++---
 .../drivers/swr/rasterizer/core/threads.cpp| 68 +-
 src/gallium/drivers/swr/rasterizer/core/threads.h  |  5 +-
 .../drivers/swr/rasterizer/core/tilemgr.cpp| 21 +++
 src/gallium/drivers/swr/rasterizer/core/tilemgr.h  |  4 +-
 .../drivers/swr/rasterizer/jitter/JitManager.cpp   | 16 ++---
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp|  8 ++-
 .../drivers/swr/rasterizer/memory/ClearTile.cpp|  1 +
 .../drivers/swr/rasterizer/memory/LoadTile.cpp |  1 +
 .../drivers/swr/rasterizer/memory/StoreTile.cpp|  1 +
 src/gallium/drivers/swr/swr_memory.h   |  9 ++-
 src/gallium/drivers/swr/swr_shader.cpp |  9 +++
 25 files changed, 213 insertions(+), 105 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp 
b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 3141db6..e37e2e4 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -1,5 +1,5 @@
 /
-* Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -122,6 +122,11 @@ HANDLE SwrCreateContext(
 pContext->apiThreadInfo.numAPIThreadsPerCore= 1;
 }
 
+if (pCreateInfo->pWorkerPrivateState)
+{
+pContext->workerPrivateState = *pCreateInfo->pWorkerPrivateState;
+}
+
 memset(>WaitLock, 0, sizeof(pContext->WaitLock));
 memset(>FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty));
 new (>WaitLock) std::mutex();
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.h 
b/src/gallium/drivers/swr/rasterizer/core/api.h
index 7247fa4..b171188 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.h
+++ b/src/gallium/drivers/swr/rasterizer/core/api.h
@@ -1,5 +1,5 @@
 /
-* Copyright (C) 2014-2016 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -115,7 +115,8 @@ struct SWR_RECT
 /// @param x - destination x coordinate
 /// @param y - destination y coordinate
 /// @param pDstHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, SWR_FORMAT 
dstFormat,
+typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE hPrivateContext, HANDLE 
hWorkerPrivateData,
+SWR_FORMAT dstFormat,
 SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
 uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t 
*pDstHotTile);
 
@@ -127,7 +128,8 @@ typedef void(SWR_API *PFN_LOAD_TILE)(HANDLE 
hPrivateContext, SWR_FORMAT dstForma
 /// @param x - destination x coordinate
 /// @param y - destination y coordinate
 /// @param pSrcHotTile - pointer to the hot tile surface
-typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, SWR_FORMAT 
srcFormat,
+typedef void(SWR_API *PFN_STORE_TILE)(HANDLE hPrivateContext, HANDLE 
hWorkerPrivateData,
+SWR_FORMAT srcFormat,
 SWR_RENDERTARGET_ATTACHMENT renderTargetIndex,
 uint32_t x, uint32_t y, uint32_t renderTargetArrayIndex, uint8_t 
*pSrcHotTile);
 
@@ -139,7 +141,7 @@ typedef void(SWR_API *PFN_STORE_TILE)(HANDLE 
hPrivateContext, SWR_FORMAT srcForm
 /// @param y - destination y coordinate
 /// @param renderTargetArrayIndex - render target array offset from arrayIndex
 /// @param pClearColor - pointer to the hot tile's clear value
-typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext,
+typedef void(SWR_API *PFN_CLEAR_TILE)(HANDLE hPrivateContext, HANDLE 

[Mesa-dev] [PATCH 34/45] swr/rast: fix comment

2018-04-13 Thread George Kyriazis
---
 src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 8d659d0..cdfddf3 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -970,7 +970,7 @@ extern "C" void GetSimdValid16bitIndicesGfx(gfxptr_t 
indices, gfxptr_t lastIndex
 
 //
 /// @brief Loads a simd of valid indices. OOB indices are set to 0
-/// *Note* have to do 16bit index checking in scalar until we have AVX-512
+/// *Note* have to do 8bit index checking in scalar until we have AVX-512
 /// support
 /// @param pIndices - pointer to 8 bit indices
 /// @param pLastIndex - pointer to last valid index
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 44/45] swr/rast: Replace x86 VMOVMSK with llvm-only implementation

2018-04-13 Thread George Kyriazis
---
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   |  1 -
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  |  2 +-
 .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 25 --
 .../drivers/swr/rasterizer/jitter/builder_misc.h   |  2 ++
 .../rasterizer/jitter/functionpasses/lower_x86.cpp |  1 -
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 9c1e9e0..bced657 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -58,7 +58,6 @@ intrinsics = [
 ['VPTESTC', ['a', 'b'], 'mInt32Ty'],
 ['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
 ['VFMADDPS',['a', 'b', 'c'], 'a'],
-['VMOVMSKPS',   ['a'], 'mInt32Ty'],
 ['VPHADDD', ['a', 'b'], 'a'],
 ['PDEP32',  ['a', 'b'], 'a'],
 ['RDTSC',   [], 'mInt64Ty'],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 3c3c157..f9f3e92 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -608,7 +608,7 @@ namespace SwrJit
 pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
 pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, 
PointerType::get(mInt32Ty, 0));
 
-Value* pMask = VMOVMSKPS(BITCAST(vMask, mSimdFP32Ty));
+Value* pMask = VMOVMSK(vMask);
 
 // Setup loop basic block
 BasicBlock* pLoop = BasicBlock::Create(mpJitMgr->mContext, 
"Scatter_Loop", pFunc);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index aa9e2dd..f893693 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -525,6 +525,28 @@ namespace SwrJit
 return S_EXT(mask, mSimd16Int32Ty);
 }
 
+/// @brief Convert  llvm mask to integer
+Value *Builder::VMOVMSK(Value* mask)
+{
+SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
+uint32_t numLanes = mask->getType()->getVectorNumElements();
+Value* i32Result;
+if (numLanes == 8)
+{
+i32Result = BITCAST(mask, mInt8Ty);
+}
+else if (numLanes == 16)
+{
+i32Result = BITCAST(mask, mInt16Ty);
+}
+else
+{
+SWR_ASSERT("Unsupported vector width");
+i32Result = BITCAST(mask, mInt8Ty);
+}
+return Z_EXT(i32Result, mInt32Ty);
+}
+
 //
 /// @brief Generate a VPSHUFB operation in LLVM IR.  If not  
 /// supported on the underlying platform, emulate it
@@ -768,8 +790,7 @@ namespace SwrJit
 /// @brief pop count on vector mask (e.g. <8 x i1>)
 Value* Builder::VPOPCNT(Value* a)
 {
-Value* b = BITCAST(VMASK(a), mSimdFP32Ty);
-return POPCNT(VMOVMSKPS(b));
+return POPCNT(VMOVMSK(a));
 }
 
 //
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 7308821..bd4be9f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -102,6 +102,8 @@ Value *MASK_16(Value *vmask);
 Value *VMASK(Value *mask);
 Value *VMASK_16(Value *mask);
 
+Value *VMOVMSK(Value *mask);
+
 //
 /// @brief functions that build IR to call x86 intrinsics directly, or
 /// emulate them with other instructions if not available on the host
diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 7cfa772..856d67d 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -79,7 +79,6 @@ namespace SwrJit
 {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
 {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
 {"meta.intrinsic.VFMADDPS",Intrinsic::x86_fma_vfmadd_ps_256},
-{"meta.intrinsic.VMOVMSKPS",   Intrinsic::x86_avx_movmsk_ps_256},
 {"meta.intrinsic.VPHADDD", Intrinsic::x86_avx2_phadd_d},
 {"meta.intrinsic.PDEP32",  Intrinsic::x86_bmi_pdep_32},
 {"meta.intrinsic.RDTSC",   Intrinsic::x86_rdtsc},
-- 
2.7.4

___
mesa-dev mailing list

[Mesa-dev] [PATCH 20/45] swr/rast: Start refactoring of builder/packetizer.

2018-04-13 Thread George Kyriazis
Move x86 intrinsic lowering to a separate pass. Builder now instantiates
generic intrinsics for features not supported by llvm. The separate x86
lowering pass is responsible for lowering to valid x86 for the target
SIMD architecture. Currently it's a port of existing code to get it
up and running quickly. Will eventually support optimized x86 for AVX,
AVX2 and AVX512.
---
 src/gallium/drivers/swr/Makefile.am|   6 +-
 src/gallium/drivers/swr/Makefile.sources   |   3 +-
 src/gallium/drivers/swr/SConscript |   4 +-
 src/gallium/drivers/swr/meson.build|   3 +-
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   |  58 +--
 .../drivers/swr/rasterizer/codegen/meson.build |   2 +-
 .../rasterizer/codegen/templates/gen_builder.hpp   |  11 +-
 .../drivers/swr/rasterizer/jitter/blend_jit.cpp|   3 +
 .../drivers/swr/rasterizer/jitter/builder.cpp  |   4 +
 .../drivers/swr/rasterizer/jitter/builder.h|   6 +-
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  |   5 +-
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp|   3 +
 .../rasterizer/jitter/functionpasses/lower_x86.cpp | 455 +
 .../swr/rasterizer/jitter/functionpasses/passes.h  |  37 ++
 .../drivers/swr/rasterizer/jitter/meson.build  |   8 +-
 .../swr/rasterizer/jitter/streamout_jit.cpp|   3 +
 16 files changed, 565 insertions(+), 46 deletions(-)
 create mode 100644 
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
 create mode 100644 
src/gallium/drivers/swr/rasterizer/jitter/functionpasses/passes.h

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 32dd9e5..c22f09e 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -80,7 +80,7 @@ BUILT_SOURCES = \
rasterizer/codegen/gen_knobs.h \
rasterizer/jitter/gen_state_llvm.h \
rasterizer/jitter/gen_builder.hpp \
-   rasterizer/jitter/gen_builder_x86.hpp \
+   rasterizer/jitter/gen_builder_meta.hpp \
rasterizer/jitter/gen_builder_intrin.hpp \
rasterizer/archrast/gen_ar_event.hpp \
rasterizer/archrast/gen_ar_event.cpp \
@@ -134,12 +134,12 @@ rasterizer/jitter/gen_builder.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py rast
--output rasterizer/jitter \
--gen_h
 
-rasterizer/jitter/gen_builder_x86.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
+rasterizer/jitter/gen_builder_meta.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
$(srcdir)/rasterizer/codegen/gen_llvm_ir_macros.py \
--output rasterizer/jitter \
-   --gen_x86_h
+   --gen_meta_h
 
 rasterizer/jitter/gen_builder_intrin.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
diff --git a/src/gallium/drivers/swr/Makefile.sources 
b/src/gallium/drivers/swr/Makefile.sources
index 4924da1..a7fcba8 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -152,7 +152,8 @@ JITTER_CXX_SOURCES := \
rasterizer/jitter/JitManager.h \
rasterizer/jitter/streamout_jit.cpp \
rasterizer/jitter/streamout_jit.h \
-   rasterizer/jitter/shader_lib/DebugOutput.cpp
+   rasterizer/jitter/shader_lib/DebugOutput.cpp \
+   rasterizer/jitter/functionpasses/lower_x86.cpp
 
 MEMORY_CXX_SOURCES := \
rasterizer/memory/ClearTile.cpp \
diff --git a/src/gallium/drivers/swr/SConscript 
b/src/gallium/drivers/swr/SConscript
index 5097be6..528cfac 100644
--- a/src/gallium/drivers/swr/SConscript
+++ b/src/gallium/drivers/swr/SConscript
@@ -76,10 +76,10 @@ Depends('rasterizer/jitter/gen_builder.hpp',
 swrroot + 'rasterizer/codegen/templates/gen_builder.hpp')
 
 env.CodeGenerate(
-target = 'rasterizer/jitter/gen_builder_x86.hpp',
+target = 'rasterizer/jitter/gen_builder_meta.hpp',
 script = swrroot + 'rasterizer/codegen/gen_llvm_ir_macros.py',
 source = '',
-command = python_cmd + ' $SCRIPT --output ' + bldroot + 
'/rasterizer/jitter --gen_x86_h'
+command = python_cmd + ' $SCRIPT --output ' + bldroot + 
'/rasterizer/jitter --gen_meta_h'
 )
 Depends('rasterizer/jitter/gen_builder.hpp',
 swrroot + 'rasterizer/codegen/templates/gen_builder.hpp')
diff --git a/src/gallium/drivers/swr/meson.build 
b/src/gallium/drivers/swr/meson.build
index 3848232..949f582 100644
--- a/src/gallium/drivers/swr/meson.build
+++ b/src/gallium/drivers/swr/meson.build
@@ -80,6 +80,7 @@ files_swr_mesa = files(
   'rasterizer/jitter/streamout_jit.cpp',
   'rasterizer/jitter/streamout_jit.h',
   'rasterizer/jitter/shader_lib/DebugOutput.cpp',
+  

[Mesa-dev] [PATCH 24/45] swr/rast: Add builder_gfx_mem.{h|cpp}

2018-04-13 Thread George Kyriazis
Needed to support full translation.  Builder_gfx_mem will convert gfxptr_t
from 64 bit int to regular pointer types for use by builder_mem.
---
 src/gallium/drivers/swr/Makefile.sources   |   2 +
 src/gallium/drivers/swr/meson.build|   2 +
 .../swr/rasterizer/jitter/builder_gfx_mem.cpp  | 136 +
 .../swr/rasterizer/jitter/builder_gfx_mem.h|  67 ++
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp|   7 +-
 5 files changed, 210 insertions(+), 4 deletions(-)
 create mode 100644 
src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
 create mode 100644 src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h

diff --git a/src/gallium/drivers/swr/Makefile.sources 
b/src/gallium/drivers/swr/Makefile.sources
index a7fcba8..dd815dc 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -142,6 +142,8 @@ JITTER_CXX_SOURCES := \
rasterizer/jitter/builder_math.h \
rasterizer/jitter/builder_mem.cpp \
rasterizer/jitter/builder_mem.h \
+   rasterizer/jitter/builder_gfx_mem.cpp \
+   rasterizer/jitter/builder_gfx_mem.h \
rasterizer/jitter/builder_misc.cpp \
rasterizer/jitter/builder_misc.h \
rasterizer/jitter/fetch_jit.cpp \
diff --git a/src/gallium/drivers/swr/meson.build 
b/src/gallium/drivers/swr/meson.build
index 949f582..1cb40f8 100644
--- a/src/gallium/drivers/swr/meson.build
+++ b/src/gallium/drivers/swr/meson.build
@@ -70,6 +70,8 @@ files_swr_mesa = files(
   'rasterizer/jitter/builder_math.h',
   'rasterizer/jitter/builder_mem.cpp',
   'rasterizer/jitter/builder_mem.h',
+  'rasterizer/jitter/builder_gfx_mem.cpp',
+  'rasterizer/jitter/builder_gfx_mem.h',
   'rasterizer/jitter/builder_misc.cpp',
   'rasterizer/jitter/builder_misc.h',
   'rasterizer/jitter/fetch_jit.cpp',
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
new file mode 100644
index 000..bfb3057
--- /dev/null
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
@@ -0,0 +1,136 @@
+/
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file builder_gfx_mem.cpp
+*
+* @brief Definition of the gfx mem builder
+*
+* Notes:
+*
+**/
+#include "jit_pch.hpp"
+#include "builder.h"
+#include "common/rdtsc_buckets.h"
+#include "builder_gfx_mem.h"
+
+
+namespace SwrJit
+{
+using namespace llvm;
+
+BuilderGfxMem::BuilderGfxMem(JitManager* pJitMgr) :
+Builder(pJitMgr)
+{
+mpfnTranslateGfxAddress = nullptr;
+mpParamSimDC = nullptr;
+}
+
+void BuilderGfxMem::NotifyPrivateContextSet()
+{
+}
+
+void BuilderGfxMem::AssertGFXMemoryParams(Value* ptr, 
Builder::JIT_MEM_CLIENT usage)
+{
+SWR_ASSERT(ptr->getType() == mInt64Ty, "GFX addresses must be gfxptr_t 
and not converted to system pointers.");
+SWR_ASSERT(usage != MEM_CLIENT_RASTY, "Rasty memory should not go 
through the translation path and should not be gfxptr_t.");
+}
+
+//
+/// @brief Generate a masked gather operation in LLVM IR.  If not  
+/// supported on the underlying platform, emulate it with loads
+/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
+/// @param pBase - Int8* base VB address pointer value
+/// @param vIndices - SIMD wide value of VB byte offsets
+/// @param vMask - SIMD wide mask that controls whether to access memory 
or the src values
+/// @param scale - value to scale indices by
+Value *BuilderGfxMem::GATHERPS(Value *vSrc, Value *pBase, 

[Mesa-dev] [PATCH 42/45] swr/rast: Implement VROUND intrinsic in x86 lowering pass

2018-04-13 Thread George Kyriazis
---
 .../rasterizer/jitter/functionpasses/lower_x86.cpp | 38 +-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 983b227..7cfa772 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -72,7 +72,6 @@ namespace SwrJit
 // Map of intrinsics that haven't been moved to the new mechanism yet. If 
used, these get the previous behavior of
 // mapping directly to avx/avx2 intrinsics.
 static std::map intrinsicMap = {
-{"meta.intrinsic.VROUND",  Intrinsic::x86_avx_round_ps_256},
 {"meta.intrinsic.BEXTR_32",Intrinsic::x86_bmi_bextr_32},
 {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
 {"meta.intrinsic.VCVTPS2PH",   Intrinsic::x86_vcvtps2ph_256},
@@ -90,6 +89,8 @@ namespace SwrJit
 Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, 
CallInst* pCallInst);
 Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
 Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
+Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
+
 Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst, Intrinsic::ID intrin);
 
 static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
@@ -105,6 +106,7 @@ namespace SwrJit
 {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_vcvtph2ps_256, 
  Intrinsic::not_intrinsic},  NO_EMU}},
+{"meta.intrinsic.VROUND",  {{Intrinsic::x86_avx_round_ps_256,  
  DOUBLE},NO_EMU}},
 },
 {   // AVX2
 {"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  DOUBLE},NO_EMU}},
@@ -115,6 +117,7 @@ namespace SwrJit
 {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_vcvtph2ps_256, 
  Intrinsic::not_intrinsic},  NO_EMU}},
+{"meta.intrinsic.VROUND",  {{Intrinsic::x86_avx_round_ps_256,  
  DOUBLE},NO_EMU}},
 },
 {   // AVX512
 {"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx512_rcp14_ps_256,   
  Intrinsic::x86_avx512_rcp14_ps_512},NO_EMU}},
@@ -125,6 +128,7 @@ namespace SwrJit
 {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VCVTPD2PS",   
{{Intrinsic::x86_avx512_mask_cvtpd2ps_256,
Intrinsic::x86_avx512_mask_cvtpd2ps_512 },  NO_EMU}},
 {"meta.intrinsic.VCVTPH2PS",   
{{Intrinsic::x86_avx512_mask_vcvtph2ps_256,   
Intrinsic::x86_avx512_mask_vcvtph2ps_512 }, NO_EMU}},
+{"meta.intrinsic.VROUND",  {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic }, VROUND_EMU}},
 }
 };
 
@@ -499,6 +503,38 @@ namespace SwrJit
 return cast(v32Gather);
 }
 
+// No support for vroundps in avx512 (it is available in kncni), so 
emulate with avx instructions
+Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst)
+{
+SWR_ASSERT(arch == AVX512);
+
+auto B = pThis->B;
+auto vf32Src = pCallInst->getOperand(0);
+auto i8Round = pCallInst->getOperand(1);
+auto pfnFunc = Intrinsic::getDeclaration(B->JM()->mpCurrentModule, 
Intrinsic::x86_avx_round_ps_256);
+
+if (width == W256)
+{
+return cast(B->CALL2(pfnFunc, vf32Src, i8Round));
+}
+else if (width == W512)
+{
+auto v8f32SrcLo = B->EXTRACT_16(vf32Src, 0);
+auto v8f32SrcHi = B->EXTRACT_16(vf32Src, 1);
+
+auto v8f32ResLo = B->CALL2(pfnFunc, v8f32SrcLo, i8Round);
+auto v8f32ResHi = B->CALL2(pfnFunc, v8f32SrcHi, i8Round);
+
+return cast(B->JOIN_16(v8f32ResLo, v8f32ResHi));
+}
+

[Mesa-dev] [PATCH 22/45] swr/rast: Cleanup of JitManager convenience types

2018-04-13 Thread George Kyriazis
Small cleanup. Remove convenience types from JitManager and standardize
on the Builder's convenience types.
---
 .../drivers/swr/rasterizer/jitter/JitManager.cpp | 19 ---
 .../drivers/swr/rasterizer/jitter/JitManager.h   | 20 
 .../drivers/swr/rasterizer/jitter/builder.cpp|  7 +++
 src/gallium/drivers/swr/rasterizer/jitter/builder.h  |  3 ++-
 4 files changed, 5 insertions(+), 44 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
index bfb1d2e..9080964 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -110,11 +110,6 @@ JitManager::JitManager(uint32_t simdWidth, const char 
*arch, const char* core)
 mpExec->RegisterJITEventListener(vTune);
 #endif
 
-mFP32Ty = Type::getFloatTy(mContext);   // float type
-mInt8Ty = Type::getInt8Ty(mContext);
-mInt32Ty = Type::getInt32Ty(mContext);   // int type
-mInt64Ty = Type::getInt64Ty(mContext);   // int type
-
 // fetch function signature
 #if USE_SIMD16_SHADERS
 // typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, 
simd16vertex& out);
@@ -135,20 +130,6 @@ JitManager::JitManager(uint32_t simdWidth, const char 
*arch, const char* core)
 
 mFetchShaderTy = FunctionType::get(Type::getVoidTy(mContext), fsArgs, 
false);
 
-mSimtFP32Ty = VectorType::get(mFP32Ty, mVWidth);
-mSimtInt32Ty = VectorType::get(mInt32Ty, mVWidth);
-
-mSimdVectorTy = ArrayType::get(mSimtFP32Ty, 4);
-mSimdVectorInt32Ty = ArrayType::get(mSimtInt32Ty, 4);
-
-#if USE_SIMD16_SHADERS
-mSimd16FP32Ty = ArrayType::get(mSimtFP32Ty, 2);
-mSimd16Int32Ty = ArrayType::get(mSimtInt32Ty, 2);
-
-mSimd16VectorFP32Ty = ArrayType::get(mSimd16FP32Ty, 4);
-mSimd16VectorInt32Ty = ArrayType::get(mSimd16Int32Ty, 4);
-
-#endif
 #if defined(_WIN32)
 // explicitly instantiate used symbols from potentially staticly linked 
libs
 sys::DynamicLibrary::AddSymbol("exp2f", );
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h 
b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
index 3660249..86e6758 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.h
@@ -143,26 +143,6 @@ struct JitManager
 uint32_tmVWidth;
 
 
-// Built in types.
-llvm::Type* mInt8Ty;
-llvm::Type* mInt32Ty;
-llvm::Type* mInt64Ty;
-llvm::Type* mFP32Ty;
-
-llvm::Type* mSimtFP32Ty;
-llvm::Type* mSimtInt32Ty;
-
-llvm::Type* mSimdVectorInt32Ty;
-llvm::Type* mSimdVectorTy;
-
-#if USE_SIMD16_SHADERS
-llvm::Type* mSimd16FP32Ty;
-llvm::Type* mSimd16Int32Ty;
-
-llvm::Type* mSimd16VectorFP32Ty;
-llvm::Type* mSimd16VectorInt32Ty;
-
-#endif
 // fetch shader types
 llvm::FunctionType* mFetchShaderTy;
 
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
index 260daab..625f132 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.cpp
@@ -42,10 +42,8 @@ namespace SwrJit
 : mpJitMgr(pJitMgr),
   mpPrivateContext(nullptr)
 {
-SWR_ASSERT(pJitMgr->mVWidth == 8);
-
 mVWidth = pJitMgr->mVWidth;
-mVWidth16 = pJitMgr->mVWidth * 2;
+mVWidth16 = 16;
 
 mpIRBuilder = >mBuilder;
 
@@ -67,7 +65,7 @@ namespace SwrJit
 
 mSimd4FP64Ty = VectorType::get(mDoubleTy, 4);
 
-// Built in types: simd8
+// Built in types: target simd
 
 mSimdInt1Ty = VectorType::get(mInt1Ty,  mVWidth);
 mSimdInt16Ty= VectorType::get(mInt16Ty, mVWidth);
@@ -76,6 +74,7 @@ namespace SwrJit
 mSimdFP16Ty = VectorType::get(mFP16Ty,  mVWidth);
 mSimdFP32Ty = VectorType::get(mFP32Ty,  mVWidth);
 mSimdVectorTy   = ArrayType::get(mSimdFP32Ty, 4);
+mSimdVectorIntTy= ArrayType::get(mSimdInt32Ty, 4);
 mSimdVectorTRTy = ArrayType::get(mSimdFP32Ty, 5);
 
 // Built in types: simd16
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
index 0b57fbf..6b2c9f0 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder.h
@@ -68,7 +68,7 @@ namespace SwrJit
 
 Type*mSimd4FP64Ty;
 
-// Built in types: simd8
+// Built in types: target SIMD
 
 Type*mSimdFP16Ty;
 Type*mSimdFP32Ty;
@@ -79,6 +79,7 @@ namespace SwrJit
 Type*mSimdIntPtrTy;
 Type*

[Mesa-dev] [PATCH 30/45] swr/rast: Fix byte offset for non-indexed draws

2018-04-13 Thread George Kyriazis
---
 src/gallium/drivers/swr/rasterizer/core/frontend.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 25d1073..2076859 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1729,13 +1729,14 @@ void ProcessDraw(
 uint32_t offset;
 offset = std::min(endVertex-i, (uint32_t) 
KNOB_SIMD16_WIDTH);
 #if USE_SIMD16_SHADERS
+offset *= 4; // convert from index to address
 fetchInfo_lo.pLastIndex += offset;
 #else
-fetchInfo_lo.pLastIndex += std::min(offset, (uint32_t) 
KNOB_SIMD_WIDTH);
+fetchInfo_lo.pLastIndex += std::min(offset, (uint32_t) 
KNOB_SIMD_WIDTH) * 4; // * 4 for converting index to address
 uint32_t offset2 = std::min(offset, (uint32_t) 
KNOB_SIMD16_WIDTH)-KNOB_SIMD_WIDTH;
 assert(offset >= 0);
 fetchInfo_hi.pLastIndex = fetchInfo_hi.pIndices;
-fetchInfo_hi.pLastIndex += offset2;
+fetchInfo_hi.pLastIndex += offset2 * 4; // * 4 for 
converting index to address
 #endif
 }
 // 1. Execute FS/VS for a single SIMD.
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/45] swr/rast: Changes to allow jitter to compile with LLVM5

2018-04-13 Thread George Kyriazis
---
 src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp 
b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
index 031bced..b1d6076 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/jit_pch.hpp
@@ -1,5 +1,5 @@
 /
-* Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2017-2018 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -112,6 +112,22 @@ using PassManager = llvm::legacy::PassManager;
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #endif
 
+#if LLVM_VERSION_MAJOR >= 5
+static const auto Sync_CrossThread = llvm::SyncScope::System;
+static const auto Attrib_FunctionIndex = llvm::AttributeList::FunctionIndex;
+static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, 
const llvm::AttrBuilder )
+{
+return llvm::AttributeSet::get(ctx, b);
+}
+#else
+static const auto Sync_CrossThread = llvm::SynchronizationScope::CrossThread;
+static const auto Attrib_FunctionIndex = llvm::AttributeSet::FunctionIndex;
+static inline llvm::AttributeSet GetFuncAttribSet(llvm::LLVMContext& ctx, 
const llvm::AttrBuilder )
+{
+return llvm::AttributeSet::get(ctx, Attrib_FunctionIndex, b);
+}
+#endif
+
 #pragma pop_macro("DEBUG")
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/45] swr/rast: LLVM 6 fix

2018-04-13 Thread George Kyriazis
for getting masked gather intrinsic (also compatible with LLVM 4)
---
 src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index ac01223..7c223d1 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -390,7 +390,7 @@ namespace SwrJit
 /// @param pVecPassthru - SIMD wide vector of values to load when lane is 
inactive
 Value* Builder::GATHER_PTR(Value* pVecSrcPtr, Value* pVecMask, Value* 
pVecPassthru)
 {
-Function* pMaskedGather = 
llvm::Intrinsic::getDeclaration(JM()->mpCurrentModule, 
Intrinsic::masked_gather, { pVecPassthru->getType() });
+Function* pMaskedGather = 
llvm::Intrinsic::getDeclaration(JM()->mpCurrentModule, 
Intrinsic::masked_gather, { pVecPassthru->getType(), pVecSrcPtr->getType() });
 
 return CALL(pMaskedGather, { pVecSrcPtr, C(0), pVecMask, pVecPassthru 
});
 }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 39/45] swr/rast: double-pump in x86 lowering pass

2018-04-13 Thread George Kyriazis
Add support for double-pumping a smaller SIMD width intrinsic.
---
 .../rasterizer/jitter/functionpasses/lower_x86.cpp | 30 ++
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 9423b28..983b227 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -90,11 +90,14 @@ namespace SwrJit
 Instruction* NO_EMU(LowerX86* pThis, TargetArch arch, TargetWidth width, 
CallInst* pCallInst);
 Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
 Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
+Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst, Intrinsic::ID intrin);
+
+static Intrinsic::ID DOUBLE = (Intrinsic::ID)-1;
 
 static std::map intrinsicMap2[] = {
 //  256 wide   
 512 wide
 {   // AVX
-{"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
+{"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  DOUBLE},NO_EMU}},
 {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VPERM_EMU}},
 {"meta.intrinsic.VPERMD",  {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VPERM_EMU}},
 {"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
@@ -104,7 +107,7 @@ namespace SwrJit
 {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_vcvtph2ps_256, 
  Intrinsic::not_intrinsic},  NO_EMU}},
 },
 {   // AVX2
-{"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
+{"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  DOUBLE},NO_EMU}},
 {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps,   
  Intrinsic::not_intrinsic},  VPERM_EMU}},
 {"meta.intrinsic.VPERMD",  {{Intrinsic::x86_avx2_permd,
  Intrinsic::not_intrinsic},  VPERM_EMU}},
 {"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
@@ -226,7 +229,15 @@ namespace SwrJit
 
 // Check if there is a native intrinsic for this instruction
 Intrinsic::ID id = intrinsic.intrin[vecWidth];
-if (id != Intrinsic::not_intrinsic)
+if (id == DOUBLE)
+{
+// Double pump the next smaller SIMD intrinsic
+SWR_ASSERT(vecWidth != 0, "Cannot double pump smallest SIMD 
width.");
+Intrinsic::ID id2 = intrinsic.intrin[vecWidth - 1];
+SWR_ASSERT(id2 != Intrinsic::not_intrinsic, "Cannot find 
intrinsic to double pump.");
+return DOUBLE_EMU(this, mTarget, vecWidth, pCallInst, id2);
+}
+else if (id != Intrinsic::not_intrinsic)
 {
 Function* pIntrin = 
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, id);
 SmallVector args;
@@ -488,28 +499,25 @@ namespace SwrJit
 return cast(v32Gather);
 }
 
-#if 0
 // Double pump input using Intrin template arg. This blindly extracts 
lower and upper 256 from each vector argument and
 // calls the 256 wide intrinsic, then merges the results to 512 wide
-template
-Value* EMU_512(LowerX86* pThis, TargetArch arch, TargetWidth width, 
CallInst* pCallInst)
+Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst, Intrinsic::ID intrin)
 {
 auto B = pThis->B;
 SWR_ASSERT(width == W512);
 Value* result[2];
-Function* pX86IntrinFunc = 
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, Intrin);
+Function* pX86IntrinFunc = 
Intrinsic::getDeclaration(B->JM()->mpCurrentModule, intrin);
 for (uint32_t i = 0; i < 2; ++i)
 {
 SmallVector args;
 for (auto& arg : pCallInst->arg_operands())
 {
-args.push_back(arg.get()->getType()->isVectorTy ? 
B->EXTRACT_16(arg.get(), i) : arg.get());
+args.push_back(arg.get()->getType()->isVectorTy() ? 

Re: [Mesa-dev] [PATCH i-g-t] [RFC] CONTRIBUTING: commit rights docs

2018-04-13 Thread Harry Wentland
On 2018-04-13 06:00 AM, Daniel Vetter wrote:
> This tries to align with the X.org communities's long-standing
> tradition of trying to be an inclusive community and handing out
> commit rights fairly freely.
> 
> We also tend to not revoke commit rights for people no longer
> regularly active in a given project, as long as they're still part of
> the larger community.
> 
> Finally make sure that commit rights, like anything happening on fd.o
> infrastructre, is subject to the fd.o's Code of Conduct.
> 
> v2: Point at MAINTAINERS for contact info (Daniel S.)
> 
> v3:
> - Make it clear that commit rights are voluntary and that committers
>   need to acknowledge positively when they're nominated by someone
>   else (Keith).
> - Encourage committers to drop their commit rights when they're no
>   longer active, and make it clear they'll get readded (Keith).
> - Add a line that maintainers and committers should actively nominate
>   new committers (me).
> 
> v4: Typo (Petri).
> 
> v5: Typo (Sean).
> 
> v6: Wording clarifications and spelling (Jani).
> 
> v7: Require an explicit commitment to the documented merge criteria
> and rules, instead of just the implied one through the Code of Conduct
> threat (Jani).
> 
> Acked-by: Alex Deucher 
> Acked-by: Arkadiusz Hiler 
> Acked-by: Daniel Stone 
> Acked-by: Eric Anholt 
> Acked-by: Gustavo Padovan 
> Acked-by: Petri Latvala 

Acked-by: Harry Wentland 

Harry

> Cc: Alex Deucher 
> Cc: Arkadiusz Hiler 
> Cc: Ben Widawsky 
> Cc: Daniel Stone 
> Cc: Dave Airlie 
> Cc: Eric Anholt 
> Cc: Gustavo Padovan 
> Cc: Jani Nikula 
> Cc: Joonas Lahtinen 
> Cc: Keith Packard 
> Cc: Kenneth Graunke 
> Cc: Kristian H. Kristensen 
> Cc: Maarten Lankhorst 
> Cc: Petri Latvala 
> Cc: Rodrigo Vivi 
> Cc: Sean Paul 
> Reviewed-by: Keith Packard 
> Signed-off-by: Daniel Vetter 
> ---
> If you wonder about the wide distribution list for an igt patch: I'd
> like to start a discussions about x.org community norms around commit
> rights at large, at least for all the shared repos. I plan to propose
> the same text for drm-misc and libdrm too, and hopefully others like
> mesa/xserver/wayland would follow.
> 
> fd.o admins also plan to discuss this (and a pile of other topics and
> hosting and code of conduct) with all projects, ideally this here
> would end up as the starting point for establishing some community
> norms.
> -Daniel
> ---
>  CONTRIBUTING | 48 
>  1 file changed, 48 insertions(+)
> 
> diff --git a/CONTRIBUTING b/CONTRIBUTING
> index 0180641be3aa..8a118134275c 100644
> --- a/CONTRIBUTING
> +++ b/CONTRIBUTING
> @@ -51,4 +51,52 @@ A short list of contribution guidelines:
>  - Changes to the testcases are automatically tested. Take the results into
>account before merging.
>  
> +Commit rights
> +-
> +
> +Commit rights will be granted to anyone who requests them and fulfills the
> +below criteria:
> +
> +- Submitted a few (5-10 as a rule of thumb) non-trivial (not just simple
> +  spelling fixes and whitespace adjustment) patches that have been merged
> +  already.
> +
> +- Are actively participating on discussions about their work (on the mailing
> +  list or IRC). This should not be interpreted as a requirement to review 
> other
> +  peoples patches but just make sure that patch submission isn't one-way
> +  communication. Cross-review is still highly encouraged.
> +
> +- Will be regularly contributing further patches. This includes regular
> +  contributors to other parts of the open source graphics stack who only
> +  do the oddball rare patch within igt itself.
> +
> +- Agrees to use their commit rights in accordance with the documented merge
> +  criteria, tools, and processes.
> +
> +Apply for an account (and any other account change requests) through
> +
> +https://www.freedesktop.org/wiki/AccountRequests/
> +
> +and please ping the maintainers if your request is stuck.
> +
> +Committers are encouraged to request their commit rights get removed when 
> they
> +no longer contribute to the project. Commit rights will be reinstated when 
> they
> +come back to the project.
> +
> +Maintainers and committers should encourage contributors to request commit
> +rights, especially junior contributors tend to underestimate their skills.
> +
> +Code of Conduct
> +---
> +
> +Please be aware the fd.o Code of Conduct also applies to 

Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-13 Thread James Xiong
On Fri, 13 Apr 2018 13:51:02 -0700
Kenneth Graunke  wrote:

> On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote:
> > On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote:  
> > > From: "Xiong, James" 
> > > 
> > > On non-LLC platforms, we malloc shadow batch/state buffers
> > > of the same sizes as our batch/state buffers' GEM allocations.
> > > However the buffer allocator reuses similar-sized gem objects,
> > > it returns a buffer larger than we asked for in some cases
> > > and we end up with smaller shadow buffers. If we utilize the
> > > full-size of the over-allocated batch/state buffers, we may wind
> > > up accessing beyond the bounds of the shadow buffers and cause
> > > segmentation fault and/or memory corruption.  
> > 
> > Oh, good catch!  We do indeed malloc too little if the bufmgr
Thank you for taking time to review, Kenneth.
> >   
> > > A few examples:
> > >  casebatch  state
> > >  request bo   shadow request bo  shadow
> > > init020K 20K  20K16K 16K 16K
> > > grow_buffer 130K 32K  30K24K 24K 24K
> > > grow_buffer 248K 48K  48K36K 40K 36K
> > > grow_buffer 372K 80K  72K60K 64K 60K
> > > grow_buffer 4120K128K 120K   -   -   -
> > > 
> > > batch #1, #3, #4; state #2 and #3 are problematic. We can change
> > > the order to allocate the bo first, then allocate the shadow
> > > buffer using the bo's size so that the shadow buffer have at
> > > least an equivalent size of the gem allocation.
> > > 
> > > Another problem: even though the state/batch buffer could grow,
> > > when checking if it runs out space, we always compare with the
> > > initial batch/state sizes. To utilize the entire buffers, change
> > > to compare with the actual sizes.  
> > 
> > This is actually intentional.  Our goal is to flush batches when the
> > amount of commands or state reaches those thresholds.  Occasionally,
> > we'll be in the middle of emitting a draw, and unable to stop.  In
> > that case, we grow the batch and keep going.  But after that, we're
> > beyond our original target, so we flush next time.  We don't want
> > to grow without bounds...it's meant more for emergencies, or if
> > we've badly estimated the size of the draw call.
I am not sure I get it. Let me give an example: the state buffer
gets grown once from 16K to 24K in brw_state_batch(), the used_size
becomes 20K, then brw_require_statebuffer_space(1024) gets called to
ask for 1K space, with the original logical, it compares the used size
with 16K and flush the batch even though the state buffer still has 4K
space available?
> > 
> > I've sent a simpler patch which I think should hopefully fix your
> > bug: https://patchwork.freedesktop.org/patch/217107/  
> 
> Lionel noticed that I botched that patch.  Here's an actual one:
> 
> https://patchwork.freedesktop.org/patch/217108/
Yes it will fix the existing bug. However the assumption here is
that the init allocation size will NOT be rounded up as it happens to
be the bucket size.
I am working on an optimization to improve memory usage(that's how
I find out this bug), this assumption is no longer true. Essentially the
bufmgr could return a buffer with the same or larger size whether it is
same as the bucket's or not. Anyway I guess I can send the fix
later along with the optimization patches.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] ddebug: add PIPE_OS_UNIX/LINUX checks to fix MSVC build

2018-04-13 Thread Brian Paul
Don't include Unix headers or use Unix functions when building with MSVC.
---
 src/gallium/auxiliary/driver_ddebug/dd_draw.c | 5 +
 src/gallium/auxiliary/driver_ddebug/dd_util.h | 9 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/driver_ddebug/dd_draw.c 
b/src/gallium/auxiliary/driver_ddebug/dd_draw.c
index c404ea0..cb5db8a 100644
--- a/src/gallium/auxiliary/driver_ddebug/dd_draw.c
+++ b/src/gallium/auxiliary/driver_ddebug/dd_draw.c
@@ -37,6 +37,7 @@
 #include "tgsi/tgsi_scan.h"
 #include "util/os_time.h"
 #include 
+#include "pipe/p_config.h"
 
 
 static void
@@ -69,6 +70,7 @@ dd_get_file_stream(struct dd_screen *dscreen, unsigned 
apitrace_call_number)
 static void
 dd_dump_dmesg(FILE *f)
 {
+#ifdef PIPE_OS_LINUX
char line[2000];
FILE *p = popen("dmesg | tail -n60", "r");
 
@@ -80,6 +82,7 @@ dd_dump_dmesg(FILE *f)
   fputs(line, f);
 
pclose(p);
+#endif
 }
 
 static unsigned
@@ -611,7 +614,9 @@ dd_dump_call(FILE *f, struct dd_draw_state *state, struct 
dd_call *call)
 static void
 dd_kill_process(void)
 {
+#ifdef PIPE_OS_UNIX
sync();
+#endif
fprintf(stderr, "dd: Aborting the process...\n");
fflush(stdout);
fflush(stderr);
diff --git a/src/gallium/auxiliary/driver_ddebug/dd_util.h 
b/src/gallium/auxiliary/driver_ddebug/dd_util.h
index bdfb7cc..8953e34 100644
--- a/src/gallium/auxiliary/driver_ddebug/dd_util.h
+++ b/src/gallium/auxiliary/driver_ddebug/dd_util.h
@@ -30,14 +30,19 @@
 
 #include 
 #include 
-#include 
-#include 
 
 #include "c99_alloca.h"
 #include "os/os_process.h"
 #include "util/u_atomic.h"
 #include "util/u_debug.h"
 
+#include "pipe/p_config.h"
+#ifdef PIPE_OS_UNIX
+#include 
+#include 
+#endif
+
+
 /* name of the directory in home */
 #define DD_DIR "ddebug_dumps"
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] gallium/util: put (void) in a few function signatures

2018-04-13 Thread Brian Paul
To match the header file.
---
 src/gallium/auxiliary/util/u_network.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_network.c 
b/src/gallium/auxiliary/util/u_network.c
index e74293b..89395f5 100644
--- a/src/gallium/auxiliary/util/u_network.c
+++ b/src/gallium/auxiliary/util/u_network.c
@@ -20,7 +20,7 @@
 #endif
 
 boolean
-u_socket_init()
+u_socket_init(void)
 {
 #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
WORD wVersionRequested;
@@ -44,7 +44,7 @@ u_socket_init()
 }
 
 void
-u_socket_stop()
+u_socket_stop(void)
 {
 #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
WSACleanup();
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] gallium/osmesa: link with winsock2 library on Windows

2018-04-13 Thread Brian Paul
To fix the MSVC build.  The build broke because we started to compile
the ddebug code on Windows after the mtypes.h changes.  Building ddebug
caused us to also use the u_network.c code for the first time.
---
 src/gallium/targets/osmesa/SConscript | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/targets/osmesa/SConscript 
b/src/gallium/targets/osmesa/SConscript
index f49f1fe..3df5c50 100644
--- a/src/gallium/targets/osmesa/SConscript
+++ b/src/gallium/targets/osmesa/SConscript
@@ -39,6 +39,9 @@ if env['platform'] == 'windows':
 sources += ['osmesa.mingw.def']
 else:
 sources += ['osmesa.def']
+# Link with winsock2 library
+env.Append(LIBS = ['ws2_32'])
+
 
 gallium_osmesa = env.SharedLibrary(
 target ='osmesa',
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] mesa: protect #include of unistd.h with _MSV_VER check

2018-04-13 Thread Brian Paul
unistd.h is unix only.
---
 src/mesa/program/program_lexer.l | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/program/program_lexer.l b/src/mesa/program/program_lexer.l
index 13eb902..c4973fd 100644
--- a/src/mesa/program/program_lexer.l
+++ b/src/mesa/program/program_lexer.l
@@ -21,7 +21,11 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
+#ifndef _MSC_VER
 #include 
+#endif
+
 #include "main/glheader.h"
 #include "main/imports.h"
 #include "program/prog_instruction.h"
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] mesa: remove unused 'i' in dimensions_error_check()

2018-04-13 Thread Brian Paul
---
 src/mesa/main/texgetimage.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c
index 69521c5..0ab9ed4 100644
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -913,7 +913,6 @@ dimensions_error_check(struct gl_context *ctx,
const char *caller)
 {
const struct gl_texture_image *texImage;
-   int i;
 
if (xoffset < 0) {
   _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Fix shadow batches to be the same size as the real BO.

2018-04-13 Thread Kenneth Graunke
brw_bo_alloc may round up our allocation size to the next bucket size.
In this case, we would malloc a shadow buffer that was the original
intended size, but use bo->size (the larger size) for all of our checks.

This could cause us to run off the end of the shadow buffer.

Reported-by: James Xiong 
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 55889be7327..1c5574cbfb0 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -360,8 +360,11 @@ grow_buffer(struct brw_context *brw,
   /* We can't safely use realloc, as it may move the existing buffer,
* breaking existing pointers the caller may still be using.  Just
* malloc a new copy and memcpy it like the normal BO path.
+   *
+   * Use bo->size rather than new_size because the bufmgr may have
+   * rounded up the size, and we want the shadow size to match.
*/
-  grow->map = malloc(new_size);
+  grow->map = malloc(bo->size);
} else {
   grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
}
-- 
2.16.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix radv_layout_dcc_compressed() when image doesn't have DCC

2018-04-13 Thread Mark Janes
Hi Samuel,

When this patch is applied directly to the 18.0 branch, I get a mesa
compile error:

radv_image.c:1175:9: error: implicit declaration of function
‘radv_image_has_dcc’; did you mean ‘radv_image_alloc_dcc’?

Is there another patch that this is dependent on?

-Mark

Samuel Pitoiset  writes:

> num_dcc_levels means that DCC is supported, but this doesn't
> mean that it's enabled by the driver. Instead, we should rely
> on radv_image_has_dcc().
>
> This fixes some multisample regressions since 0babc8e5d66
> ("radv: fix picking the method for resolve subpass") on Vega.
> This is because the resolve method changed from HW to FS, but
> those fails are totally unexpected, so there might some
> differences between Polaris and Vega here.
>
> Fixes: 44fcf587445 ("radv: Disable DCC for GENERAL layout and compute 
> transfer dest.")
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_image.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
> index acb569203d4..a14e7c18b29 100644
> --- a/src/amd/vulkan/radv_image.c
> +++ b/src/amd/vulkan/radv_image.c
> @@ -1241,7 +1241,7 @@ bool radv_layout_dcc_compressed(const struct radv_image 
> *image,
>   (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
>   return false;
>  
> - return image->surface.num_dcc_levels > 0 && layout != 
> VK_IMAGE_LAYOUT_GENERAL;
> + return radv_image_has_dcc(image) && layout != VK_IMAGE_LAYOUT_GENERAL;
>  }
>  
>  
> -- 
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106039] Undefined version strings in pc files with meson build

2018-04-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106039

Bug ID: 106039
   Summary: Undefined version strings in pc files with meson build
   Product: Mesa
   Version: 18.0
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Other
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: re...@rezso.net
QA Contact: mesa-dev@lists.freedesktop.org

If I commpile the mesa 18.0.0 with meson, the Version is empty in all .pc
files.
Currently I need a patch / sed to fix this.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] i965: Make sure the shadow buffers have enough space

2018-04-13 Thread Kenneth Graunke
On Friday, April 13, 2018 2:08:40 PM PDT James Xiong wrote:
> On Fri, 13 Apr 2018 13:51:02 -0700
> Kenneth Graunke  wrote:
> 
> > On Friday, April 13, 2018 1:35:45 PM PDT Kenneth Graunke wrote:
> > > On Monday, April 9, 2018 4:06:16 PM PDT James Xiong wrote:  
> > > > From: "Xiong, James" 
> > > > 
> > > > On non-LLC platforms, we malloc shadow batch/state buffers
> > > > of the same sizes as our batch/state buffers' GEM allocations.
> > > > However the buffer allocator reuses similar-sized gem objects,
> > > > it returns a buffer larger than we asked for in some cases
> > > > and we end up with smaller shadow buffers. If we utilize the
> > > > full-size of the over-allocated batch/state buffers, we may wind
> > > > up accessing beyond the bounds of the shadow buffers and cause
> > > > segmentation fault and/or memory corruption.  
> > > 
> > > Oh, good catch!  We do indeed malloc too little if the bufmgr
> Thank you for taking time to review, Kenneth.
> > >   
> > > > A few examples:
> > > >  casebatch  state
> > > >  request bo   shadow request bo  shadow
> > > > init020K 20K  20K16K 16K 16K
> > > > grow_buffer 130K 32K  30K24K 24K 24K
> > > > grow_buffer 248K 48K  48K36K 40K 36K
> > > > grow_buffer 372K 80K  72K60K 64K 60K
> > > > grow_buffer 4120K128K 120K   -   -   -
> > > > 
> > > > batch #1, #3, #4; state #2 and #3 are problematic. We can change
> > > > the order to allocate the bo first, then allocate the shadow
> > > > buffer using the bo's size so that the shadow buffer have at
> > > > least an equivalent size of the gem allocation.
> > > > 
> > > > Another problem: even though the state/batch buffer could grow,
> > > > when checking if it runs out space, we always compare with the
> > > > initial batch/state sizes. To utilize the entire buffers, change
> > > > to compare with the actual sizes.  
> > > 
> > > This is actually intentional.  Our goal is to flush batches when the
> > > amount of commands or state reaches those thresholds.  Occasionally,
> > > we'll be in the middle of emitting a draw, and unable to stop.  In
> > > that case, we grow the batch and keep going.  But after that, we're
> > > beyond our original target, so we flush next time.  We don't want
> > > to grow without bounds...it's meant more for emergencies, or if
> > > we've badly estimated the size of the draw call.
> I am not sure I get it. Let me give an example: the state buffer
> gets grown once from 16K to 24K in brw_state_batch(), the used_size
> becomes 20K, then brw_require_statebuffer_space(1024) gets called to
> ask for 1K space, with the original logical, it compares the used size
> with 16K and flush the batch even though the state buffer still has 4K
> space available?

Yes, the idea is to flush at around 16K of state.  If we happen to be in
the middle of a draw and run out of space, we'll grow to 24K.  Once it's
over 16K, we flush as soon as we can.

We'd like to be fairly consistent on our batch size.  Running larger
batches can lead to differences in performance, and large batches can
lead to degradation in the interactivity of the system (especially on
GPUs without preemption).

The hope is to grow once, at most.  If we check against the BO size, we
might grow repeatedly, which would lead to really large batches and
things would get out of hand.

> > > 
> > > I've sent a simpler patch which I think should hopefully fix your
> > > bug: https://patchwork.freedesktop.org/patch/217107/  
> > 
> > Lionel noticed that I botched that patch.  Here's an actual one:
> > 
> > https://patchwork.freedesktop.org/patch/217108/
> Yes it will fix the existing bug. However the assumption here is
> that the init allocation size will NOT be rounded up as it happens to
> be the bucket size.
> I am working on an optimization to improve memory usage(that's how
> I find out this bug), this assumption is no longer true. Essentially the
> bufmgr could return a buffer with the same or larger size whether it is
> same as the bucket's or not. Anyway I guess I can send the fix
> later along with the optimization patches.

Ah, that's a good point.  Your patch also tries to use the BO size
for the initial malloc as well, which is a good idea...


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/17] radeonsi: skip DCC render feedback checking if color writes are disabled

2018-04-13 Thread Marek Olšák
The crashes should be fixed in mesa/master.

Marek

On Thu, Apr 12, 2018 at 9:17 PM, Timothy Arceri 
wrote:

> On 13/04/18 10:45, Timothy Arceri wrote:
>
>> This change cause around 20+ piglit crashes on my Polaris.
>>
>> e.g tests/spec/arb_compute_shader/execution/atomic-counter.shader_test
>>
>> Thread 1 "shader_runner" received signal SIGSEGV, Segmentation fault.
>> 0x71009ccc in si_get_total_colormask (sctx=0x64b140) at
>> si_pipe.h:945
>> 945if (sctx->queued.named.rasterizer->rasterizer_discard)
>>
>>
>> It also seems to cause hundreds of test failures e.g
>>
>> ./bin/copyteximage CUBE -auto
>>
>>
>> Unfortunately it doesn't revert cleanly either.
>>
>
> Actually ignore this second problem I'm seeing a lot of intermittent test
> failures. These are being caused by something else, however the
> crash above is caused by this commit.
>
>
>
>> On 04/04/18 11:59, Marek Olšák wrote:
>>
>>> From: Marek Olšák 
>>>
>>> The previous patch is required for this.
>>> ---
>>>   src/gallium/drivers/radeonsi/si_blit.c  |  5 +
>>>   src/gallium/drivers/radeonsi/si_pipe.h  | 17 +
>>>   src/gallium/drivers/radeonsi/si_state_shaders.c |  6 +-
>>>   3 files changed, 23 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>>> b/src/gallium/drivers/radeonsi/si_blit.c
>>> index 45770b0d9bf..8dd8bc2a4dd 100644
>>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>>> @@ -706,20 +706,25 @@ static void 
>>> si_check_render_feedback_resident_images(struct
>>> si_context *sctx)
>>>   si_check_render_feedback_texture(sctx, tex,
>>>view->u.tex.level,
>>>view->u.tex.level,
>>>view->u.tex.first_layer,
>>>view->u.tex.last_layer);
>>>   }
>>>   }
>>>   static void si_check_render_feedback(struct si_context *sctx)
>>>   {
>>> +/* There is no render feedback if color writes are disabled.
>>> + * (e.g. a pixel shader with image stores)
>>> + */
>>> +if (!si_get_total_colormask(sctx))
>>> +return;
>>>   if (!sctx->need_check_render_feedback)
>>>   return;
>>>   for (int i = 0; i < SI_NUM_SHADERS; ++i) {
>>>   si_check_render_feedback_images(sctx, >images[i]);
>>>   si_check_render_feedback_textures(sctx, >samplers[i]);
>>>   }
>>>   si_check_render_feedback_resident_images(sctx);
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>>> b/src/gallium/drivers/radeonsi/si_pipe.h
>>> index e3d45ef6c3b..e65c946d186 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>>> @@ -933,11 +933,28 @@ vi_tc_compat_htile_enabled(struct r600_texture
>>> *tex, unsigned level)
>>>   }
>>>   static inline unsigned si_get_ps_iter_samples(struct si_context *sctx)
>>>   {
>>>   if (sctx->ps_uses_fbfetch)
>>>   return sctx->framebuffer.nr_samples;
>>>   return sctx->ps_iter_samples;
>>>   }
>>> +static inline unsigned si_get_total_colormask(struct si_context *sctx)
>>> +{
>>> +if (sctx->queued.named.rasterizer->rasterizer_discard)
>>> +return 0;
>>> +
>>> +struct si_shader_selector *ps = sctx->ps_shader.cso;
>>> +unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit &
>>> + sctx->queued.named.blend->cb_target_mask;
>>> +
>>> +if (!ps->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
>>> +colormask &= ps->colors_written_4bit;
>>> +else if (!ps->colors_written_4bit)
>>> +colormask = 0; /* color0 writes all cbufs, but it's not written
>>> */
>>> +
>>> +return colormask;
>>> +}
>>> +
>>>   #endif
>>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> index d7742eafb04..f2d29e40744 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>>> @@ -1208,25 +1208,21 @@ static void si_shader_selector_key_hw_vs(struct
>>> si_context *sctx,
>>>   bool ps_disabled = true;
>>>   if (ps) {
>>>   const struct si_state_blend *blend = sctx->queued.named.blend;
>>>   bool alpha_to_coverage = blend && blend->alpha_to_coverage;
>>>   bool ps_modifies_zs = ps->info.uses_kill ||
>>> ps->info.writes_z ||
>>> ps->info.writes_stencil ||
>>> ps->info.writes_samplemask ||
>>> alpha_to_coverage ||
>>> si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS;
>>> -
>>> -unsigned ps_colormask = sctx->framebuffer.colorbuf_enabled_4bit
>>> &
>>> -sctx->queued.named.blend->cb_target_mask;
>>> -if (!ps->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBU
>>> FS])
>>> -  

[Mesa-dev] [PATCH 00/10] radv: various changes in the DCC/FMASK/CMASK codepath

2018-04-13 Thread Samuel Pitoiset
Hi,

This series mostly cleans up the DCC/FMASK/CMASK codepath. This
also adds some little improvements that shouldn't impact performance
because reducing the number of fast-clear eliminate passes doesn't
matter much.

DCC for MSAA textures is more complicated than expected because it
seems like there is plenty of corner cases. Should be easier to
implement with a cleaner codebase.

No CTS changes on Vega.

Please review!

Samuel Pitoiset (10):
  radv: disable prediction only if it has been enabled
  radv: handle DCC image transitions before CMASK/FMASK transitions
  radv: add radv_handle_color_image_transition() helper
  radv: clean up radv_handle_image_transition() a bit
  radv: make radv_initialise_cmask() static
  radv: add radv_init_color_image_metadata() helper
  radv: merge radv_handle_{dcc,cmask}_image_transition() functions
  radv: handle CMASK/FMASK transitions only if DCC is disabled
  radv: don't fast-clear eliminate after resolving a subpass with
compute
  radv: clean up radv_decompress_resolve_subpass_src()

 src/amd/vulkan/radv_cmd_buffer.c  | 144 --
 src/amd/vulkan/radv_meta_fast_clear.c |   2 +-
 src/amd/vulkan/radv_meta_resolve.c|  17 ++-
 src/amd/vulkan/radv_meta_resolve_cs.c |  14 ---
 src/amd/vulkan/radv_private.h |   2 -
 5 files changed, 95 insertions(+), 84 deletions(-)

-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] radv: handle DCC image transitions before CMASK/FMASK transitions

2018-04-13 Thread Samuel Pitoiset
Mostly because DCC implies a fast-clear eliminate and we
should be able to skip some DCC decompressions by setting
a predicate like for CMASK and FMASK.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f73526b5fc..92c00f5394 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3854,15 +3854,15 @@ static void radv_handle_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
   dst_queue_mask, range,
   pending_clears);
 
-   if (radv_image_has_cmask(image) || radv_image_has_fmask(image))
-   radv_handle_cmask_image_transition(cmd_buffer, image, 
src_layout,
-  dst_layout, src_queue_mask,
-  dst_queue_mask, range);
-
if (radv_image_has_dcc(image))
radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
 dst_layout, src_queue_mask,
 dst_queue_mask, range);
+
+   if (radv_image_has_cmask(image) || radv_image_has_fmask(image))
+   radv_handle_cmask_image_transition(cmd_buffer, image, 
src_layout,
+  dst_layout, src_queue_mask,
+  dst_queue_mask, range);
 }
 
 void radv_CmdPipelineBarrier(
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/10] radv: disable prediction only if it has been enabled

2018-04-13 Thread Samuel Pitoiset
When decompressing DCC we don't enable it, so it's useless
to disable it. This reduces the number of prediction packets
sent to the GPU when performing color decompression passes.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_fast_clear.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_meta_fast_clear.c 
b/src/amd/vulkan/radv_meta_fast_clear.c
index 327c1ae440..d5af7a1b0c 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -667,7 +667,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer 
*cmd_buffer,
_buffer->pool->alloc);
 
}
-   if (radv_image_has_dcc(image)) {
+   if (!decompress_dcc && radv_image_has_dcc(image)) {
cmd_buffer->state.predicating = false;
radv_emit_set_predication_state_from_image(cmd_buffer, image, 
false);
}
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] glsl: #undef THIS macro to fix MSVC build

2018-04-13 Thread Neha Bhende
For the series,


Reviewed-by: Neha Bhende


Regards,

Neha


From: Brian Paul 
Sent: Friday, April 13, 2018 9:03:41 AM
To: mesa-dev@lists.freedesktop.org
Cc: Charmaine Lee; Neha Bhende
Subject: [PATCH 3/3] glsl: #undef THIS macro to fix MSVC build

THIS is a macro in one of the MSVC header files.  It's also a token
in the GLSL lexer.  This causes a compilation failure with MSVC.
This issue seems to be newly exposed after the recent mtypes.h removal
patches.
---
 src/compiler/glsl/glsl_parser_extras.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index 66bd1a3..5b9b6cc 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -33,6 +33,11 @@
 #include 
 #include "glsl_symbol_table.h"

+/* THIS is a macro defined somewhere deep in the Windows MSVC header files.
+ * Undefine it here to avoid collision with the lexer's THIS token.
+ */
+#undef THIS
+
 struct gl_context;

 struct glsl_switch_state {
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/10] radv: Support allocating variable size descriptor sets.

2018-04-13 Thread Samuel Pitoiset

Patches 3-7 are:

Reviewed-by: Samuel Pitoiset 

On 04/12/2018 01:44 AM, Bas Nieuwenhuizen wrote:

---
  src/amd/vulkan/radv_descriptor_set.c | 21 +
  1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_descriptor_set.c 
b/src/amd/vulkan/radv_descriptor_set.c
index 9b35451c497..55b4aaa388c 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -392,6 +392,7 @@ static VkResult
  radv_descriptor_set_create(struct radv_device *device,
   struct radv_descriptor_pool *pool,
   const struct radv_descriptor_set_layout *layout,
+  const uint32_t *variable_count,
   struct radv_descriptor_set **out_set)
  {
struct radv_descriptor_set *set;
@@ -420,9 +421,9 @@ radv_descriptor_set_create(struct radv_device *device,
}
  
  	set->layout = layout;

-   if (layout->size) {
-   uint32_t layout_size = align_u32(layout->size, 32);
-   set->size = layout->size;
+   uint32_t layout_size = align_u32(layout->size, 32);
+   if (layout_size) {
+   set->size = layout_size;
  
  		if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {

vk_free2(>alloc, NULL, set);
@@ -648,14 +649,26 @@ VkResult radv_AllocateDescriptorSets(
uint32_t i;
struct radv_descriptor_set *set = NULL;
  
+	const VkDescriptorSetVariableDescriptorCountAllocateInfoEXT *variable_counts =

+   vk_find_struct_const(pAllocateInfo->pNext, 
DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT);
+   const uint32_t zero = 0;
+
/* allocate a set of buffers for each shader to contain descriptors */
for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
RADV_FROM_HANDLE(radv_descriptor_set_layout, layout,
 pAllocateInfo->pSetLayouts[i]);
  
+		const uint32_t *variable_count = NULL;

+   if (variable_counts) {
+   if (i < variable_counts->descriptorSetCount)
+   variable_count = 
variable_counts->pDescriptorCounts + i;
+   else
+   variable_count = 
+   }
+
assert(!(layout->flags & 
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
  
-		result = radv_descriptor_set_create(device, pool, layout, );

+   result = radv_descriptor_set_create(device, pool, layout, 
variable_count, );
if (result != VK_SUCCESS)
break;
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/10] radv: Keep a global BO list for VkMemory.

2018-04-13 Thread Samuel Pitoiset

Reviewed-by: Samuel Pitoiset 

We should remove some radv_cs_add_buffer() calls and re-enable local BOs 
to reduce overhead, but this can be done later.


Make sure to check the system submission path.

On 04/12/2018 01:44 AM, Bas Nieuwenhuizen wrote:

With update after bind we can't attach bo's to the command buffer
from the descriptor set anymore, so we have to have a global BO
list.

I am somewhat surprised this works really well even though we have
implicit synchronization in the WSI based on the bo list associations
and with the new behavior every command buffer is associated with
every swapchain image. But I could not find slowdowns in games because
of it.
---
  src/amd/vulkan/radv_device.c  | 125 +-
  src/amd/vulkan/radv_private.h |   8 ++
  src/amd/vulkan/radv_radeon_winsys.h   |   6 +
  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c |  46 ++-
  4 files changed, 146 insertions(+), 39 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 22e8f1e7a78..c81b69fef5c 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1208,6 +1208,55 @@ radv_queue_finish(struct radv_queue *queue)
queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
  }
  
+static void

+radv_bo_list_init(struct radv_bo_list *bo_list)
+{
+   pthread_mutex_init(_list->mutex, NULL);
+   bo_list->list.count = bo_list->capacity = 0;
+   bo_list->list.bos = NULL;
+}
+
+static void
+radv_bo_list_finish(struct radv_bo_list *bo_list)
+{
+   free(bo_list->list.bos);
+   pthread_mutex_destroy(_list->mutex);
+}
+
+static VkResult radv_bo_list_add(struct radv_bo_list *bo_list, struct 
radeon_winsys_bo *bo)
+{
+   pthread_mutex_lock(_list->mutex);
+   if (bo_list->list.count == bo_list->capacity) {
+   unsigned capacity = MAX2(4, bo_list->capacity * 2);
+   void *data = realloc(bo_list->list.bos, capacity * 
sizeof(struct radeon_winsys_bo*));
+
+   if (!data) {
+   pthread_mutex_unlock(_list->mutex);
+   return VK_ERROR_OUT_OF_HOST_MEMORY;
+   }
+
+   bo_list->list.bos = (struct radeon_winsys_bo**)data;
+   bo_list->capacity = capacity;
+   }
+
+   bo_list->list.bos[bo_list->list.count++] = bo;
+   pthread_mutex_unlock(_list->mutex);
+   return VK_SUCCESS;
+}
+
+static void radv_bo_list_remove(struct radv_bo_list *bo_list, struct 
radeon_winsys_bo *bo)
+{
+   pthread_mutex_lock(_list->mutex);
+   for(unsigned i = 0; i < bo_list->list.count; ++i) {
+   if (bo_list->list.bos[i] == bo) {
+   bo_list->list.bos[i] = 
bo_list->list.bos[bo_list->list.count - 1];
+   --bo_list->list.count;
+   break;
+   }
+   }
+   pthread_mutex_unlock(_list->mutex);
+}
+
  static void
  radv_device_init_gs_info(struct radv_device *device)
  {
@@ -1308,6 +1357,8 @@ VkResult radv_CreateDevice(
mtx_init(>shader_slab_mutex, mtx_plain);
list_inithead(>shader_slabs);
  
+	radv_bo_list_init(>bo_list);

+
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
const VkDeviceQueueCreateInfo *queue_create = 
>pQueueCreateInfos[i];
uint32_t qfi = queue_create->queueFamilyIndex;
@@ -1440,6 +1491,8 @@ VkResult radv_CreateDevice(
  fail_meta:
radv_device_finish_meta(device);
  fail:
+   radv_bo_list_finish(>bo_list);
+
if (device->trace_bo)
device->ws->buffer_destroy(device->trace_bo);
  
@@ -1487,6 +1540,7 @@ void radv_DestroyDevice(
  
  	radv_destroy_shader_slabs(device);
  
+	radv_bo_list_finish(>bo_list);

vk_free(>alloc, device);
  }
  
@@ -2257,7 +2311,7 @@ static VkResult radv_signal_fence(struct radv_queue *queue,
  
  	ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,

   
>device->empty_cs[queue->queue_family_index],
-  1, NULL, NULL, _info,
+  1, NULL, NULL, _info, NULL,
   false, fence->fence);
radv_free_sem_info(_info);
  
@@ -2334,7 +2388,7 @@ VkResult radv_QueueSubmit(

ret = queue->device->ws->cs_submit(ctx, 
queue->queue_idx,
   
>device->empty_cs[queue->queue_family_index],
   1, NULL, 
NULL,
-  _info,
+  _info, 
NULL,
   false, 
base_fence);
if (ret) {

[Mesa-dev] [PATCH 07/10] radv: merge radv_handle_{dcc, cmask}_image_transition() functions

2018-04-13 Thread Samuel Pitoiset
Into radv_handle_color_image_transition().

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 58 ++--
 1 file changed, 17 insertions(+), 41 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 48877bde4a..afe953d90c 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3766,20 +3766,6 @@ static void radv_initialise_cmask(struct radv_cmd_buffer 
*cmd_buffer,
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
 }
 
-static void radv_handle_cmask_image_transition(struct radv_cmd_buffer 
*cmd_buffer,
-  struct radv_image *image,
-  VkImageLayout src_layout,
-  VkImageLayout dst_layout,
-  unsigned src_queue_mask,
-  unsigned dst_queue_mask,
-  const VkImageSubresourceRange 
*range)
-{
-   if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) &&
-  !radv_layout_can_fast_clear(image, dst_layout, 
dst_queue_mask)) {
-   radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
-   }
-}
-
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 struct radv_image *image, uint32_t value)
 {
@@ -3794,25 +3780,6 @@ void radv_initialize_dcc(struct radv_cmd_buffer 
*cmd_buffer,
 RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
 }
 
-static void radv_handle_dcc_image_transition(struct radv_cmd_buffer 
*cmd_buffer,
-struct radv_image *image,
-VkImageLayout src_layout,
-VkImageLayout dst_layout,
-unsigned src_queue_mask,
-unsigned dst_queue_mask,
-const VkImageSubresourceRange 
*range)
-{
-   if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
-   radv_initialize_dcc(cmd_buffer, image, 0xu);
-   } else if (radv_layout_dcc_compressed(image, src_layout, 
src_queue_mask) &&
-  !radv_layout_dcc_compressed(image, dst_layout, 
dst_queue_mask)) {
-   radv_decompress_dcc(cmd_buffer, image, range);
-   } else if (radv_layout_can_fast_clear(image, src_layout, 
src_queue_mask) &&
-  !radv_layout_can_fast_clear(image, dst_layout, 
dst_queue_mask)) {
-   radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
-   }
-}
-
 /**
  * Initialize DCC/FMASK/CMASK metadata for a color image.
  */
@@ -3864,15 +3831,24 @@ static void radv_handle_color_image_transition(struct 
radv_cmd_buffer *cmd_buffe
return;
}
 
-   if (radv_image_has_dcc(image))
-   radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
-dst_layout, src_queue_mask,
-dst_queue_mask, range);
+   if (radv_image_has_dcc(image)) {
+   if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
+   radv_initialize_dcc(cmd_buffer, image, 0xu);
+   } else if (radv_layout_dcc_compressed(image, src_layout, 
src_queue_mask) &&
+  !radv_layout_dcc_compressed(image, dst_layout, 
dst_queue_mask)) {
+   radv_decompress_dcc(cmd_buffer, image, range);
+   } else if (radv_layout_can_fast_clear(image, src_layout, 
src_queue_mask) &&
+  !radv_layout_can_fast_clear(image, dst_layout, 
dst_queue_mask)) {
+   radv_fast_clear_flush_image_inplace(cmd_buffer, image, 
range);
+   }
+   }
 
-   if (radv_image_has_cmask(image) || radv_image_has_fmask(image))
-   radv_handle_cmask_image_transition(cmd_buffer, image, 
src_layout,
-  dst_layout, src_queue_mask,
-  dst_queue_mask, range);
+   if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
+   if (radv_layout_can_fast_clear(image, src_layout, 
src_queue_mask) &&
+   !radv_layout_can_fast_clear(image, dst_layout, 
dst_queue_mask)) {
+   radv_fast_clear_flush_image_inplace(cmd_buffer, image, 
range);
+   }
+   }
 }
 
 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/10] radv: clean up radv_handle_image_transition() a bit

2018-04-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 31 ---
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 270dcd5a9e..a74bad1981 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3718,6 +3718,9 @@ static void radv_handle_depth_image_transition(struct 
radv_cmd_buffer *cmd_buffe
   const VkImageSubresourceRange 
*range,
   VkImageAspectFlags 
pending_clears)
 {
+   if (!radv_image_has_htile(image))
+   return;
+
if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
(pending_clears & vk_format_aspects(image->vk_format)) == 
vk_format_aspects(image->vk_format) &&
cmd_buffer->state.render_area.offset.x == 0 && 
cmd_buffer->state.render_area.offset.y == 0 &&
@@ -3867,18 +3870,24 @@ static void radv_handle_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
return;
}
 
-   unsigned src_queue_mask = radv_image_queue_family_mask(image, 
src_family, cmd_buffer->queue_family_index);
-   unsigned dst_queue_mask = radv_image_queue_family_mask(image, 
dst_family, cmd_buffer->queue_family_index);
+   unsigned src_queue_mask =
+   radv_image_queue_family_mask(image, src_family,
+cmd_buffer->queue_family_index);
+   unsigned dst_queue_mask =
+   radv_image_queue_family_mask(image, dst_family,
+cmd_buffer->queue_family_index);
 
-   if (radv_image_has_htile(image))
-   radv_handle_depth_image_transition(cmd_buffer, image, 
src_layout,
-  dst_layout, src_queue_mask,
-  dst_queue_mask, range,
-  pending_clears);
-
-   radv_handle_color_image_transition(cmd_buffer, image, src_layout,
-  dst_layout, src_queue_mask,
-  dst_queue_mask, range);
+   if (vk_format_is_depth(image->vk_format)) {
+   radv_handle_depth_image_transition(cmd_buffer, image,
+  src_layout, dst_layout,
+  src_queue_mask, 
dst_queue_mask,
+  range, pending_clears);
+   } else {
+   radv_handle_color_image_transition(cmd_buffer, image,
+  src_layout, dst_layout,
+  src_queue_mask, 
dst_queue_mask,
+  range);
+   }
 }
 
 void radv_CmdPipelineBarrier(
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] radv: make radv_initialise_cmask() static

2018-04-13 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 4 ++--
 src/amd/vulkan/radv_private.h| 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index a74bad1981..8fe96b2e50 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3753,8 +3753,8 @@ static void radv_handle_depth_image_transition(struct 
radv_cmd_buffer *cmd_buffe
}
 }
 
-void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
-  struct radv_image *image, uint32_t value)
+static void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_image *image, uint32_t value)
 {
struct radv_cmd_state *state = _buffer->state;
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index df8fe891dc..1869604e9e 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1728,8 +1728,6 @@ void radv_meta_push_descriptor_set(struct radv_cmd_buffer 
*cmd_buffer,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet 
*pDescriptorWrites);
 
-void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
-  struct radv_image *image, uint32_t value);
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 struct radv_image *image, uint32_t value);
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/10] radv: handle CMASK/FMASK transitions only if DCC is disabled

2018-04-13 Thread Samuel Pitoiset
DCC implies a fast-clear eliminate, so I think this sounds
reasonable.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index afe953d90c..72fb6d6357 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3841,9 +3841,7 @@ static void radv_handle_color_image_transition(struct 
radv_cmd_buffer *cmd_buffe
   !radv_layout_can_fast_clear(image, dst_layout, 
dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, 
range);
}
-   }
-
-   if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
+   } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
if (radv_layout_can_fast_clear(image, src_layout, 
src_queue_mask) &&
!radv_layout_can_fast_clear(image, dst_layout, 
dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, 
range);
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/10] radv: clean up radv_decompress_resolve_subpass_src()

2018-04-13 Thread Samuel Pitoiset
To handle the source color image transitions in the same place.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_resolve.c | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_resolve.c 
b/src/amd/vulkan/radv_meta_resolve.c
index 1828eb37f4..d66f1c9f93 100644
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -689,18 +689,15 @@ radv_decompress_resolve_subpass_src(struct 
radv_cmd_buffer *cmd_buffer)
dest_att.attachment == VK_ATTACHMENT_UNUSED)
continue;
 
-   struct radv_image_view *src_iview =
-   fb->attachments[src_att.attachment].attachment;
+   struct radv_image *src_image =
+   fb->attachments[src_att.attachment].attachment->image;
 
-   VkImageSubresourceRange range;
-   range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-   range.baseMipLevel = 0;
-   range.levelCount = 1;
-   range.baseArrayLayer = 0;
-   range.layerCount = 1;
+   VkImageResolve region = {};
+   region.srcSubresource.baseArrayLayer = 0;
+   region.srcSubresource.mipLevel = 0;
+   region.srcSubresource.layerCount = 1;
 
-   radv_fast_clear_flush_image_inplace(cmd_buffer,
-   src_iview->image, );
+   radv_decompress_resolve_src(cmd_buffer, src_image, 1, );
}
 }
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/10] radv: add radv_init_color_image_metadata() helper

2018-04-13 Thread Samuel Pitoiset
In order to separate initialization from decompression. In the
future, that will allow us to init DCC/FMASK/CMASK in one shot.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 51 +---
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 8fe96b2e50..48877bde4a 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3774,12 +3774,7 @@ static void radv_handle_cmask_image_transition(struct 
radv_cmd_buffer *cmd_buffe
   unsigned dst_queue_mask,
   const VkImageSubresourceRange 
*range)
 {
-   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
-   if (radv_image_has_fmask(image))
-   radv_initialise_cmask(cmd_buffer, image, 0xu);
-   else
-   radv_initialise_cmask(cmd_buffer, image, 0xu);
-   } else if (radv_layout_can_fast_clear(image, src_layout, 
src_queue_mask) &&
+   if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) &&
   !radv_layout_can_fast_clear(image, dst_layout, 
dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
}
@@ -3809,10 +3804,6 @@ static void radv_handle_dcc_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
 {
if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
radv_initialize_dcc(cmd_buffer, image, 0xu);
-   } else if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
-   radv_initialize_dcc(cmd_buffer, image,
-   radv_layout_dcc_compressed(image, 
dst_layout, dst_queue_mask) ?
-0x20202020u : 0xu);
} else if (radv_layout_dcc_compressed(image, src_layout, 
src_queue_mask) &&
   !radv_layout_dcc_compressed(image, dst_layout, 
dst_queue_mask)) {
radv_decompress_dcc(cmd_buffer, image, range);
@@ -3822,6 +3813,39 @@ static void radv_handle_dcc_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
}
 }
 
+/**
+ * Initialize DCC/FMASK/CMASK metadata for a color image.
+ */
+static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer,
+  struct radv_image *image,
+  VkImageLayout src_layout,
+  VkImageLayout dst_layout,
+  unsigned src_queue_mask,
+  unsigned dst_queue_mask)
+{
+   if (radv_image_has_cmask(image)) {
+   uint32_t value = 0xu; /* Fully expanded mode. */
+
+   /*  TODO: clarify this. */
+   if (radv_image_has_fmask(image)) {
+   value = 0xu;
+   }
+
+   radv_initialise_cmask(cmd_buffer, image, value);
+   }
+
+   if (radv_image_has_dcc(image)) {
+   uint32_t value = 0xu; /* Fully expanded mode. */
+
+   if (radv_layout_dcc_compressed(image, dst_layout,
+  dst_queue_mask)) {
+   value = 0x20202020u;
+   }
+
+   radv_initialize_dcc(cmd_buffer, image, value);
+   }
+}
+
 /**
  * Handle color image transitions for DCC/FMASK/CMASK.
  */
@@ -3833,6 +3857,13 @@ static void radv_handle_color_image_transition(struct 
radv_cmd_buffer *cmd_buffe
   unsigned dst_queue_mask,
   const VkImageSubresourceRange 
*range)
 {
+   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+   radv_init_color_image_metadata(cmd_buffer, image,
+  src_layout, dst_layout,
+  src_queue_mask, dst_queue_mask);
+   return;
+   }
+
if (radv_image_has_dcc(image))
radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
 dst_layout, src_queue_mask,
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/10] radv: add radv_handle_color_image_transition() helper

2018-04-13 Thread Samuel Pitoiset
To handle CMASK, FMASK and DCC transitions in the same place.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_cmd_buffer.c | 34 +++-
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 92c00f5394..270dcd5a9e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -3819,6 +3819,28 @@ static void radv_handle_dcc_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
}
 }
 
+/**
+ * Handle color image transitions for DCC/FMASK/CMASK.
+ */
+static void radv_handle_color_image_transition(struct radv_cmd_buffer 
*cmd_buffer,
+  struct radv_image *image,
+  VkImageLayout src_layout,
+  VkImageLayout dst_layout,
+  unsigned src_queue_mask,
+  unsigned dst_queue_mask,
+  const VkImageSubresourceRange 
*range)
+{
+   if (radv_image_has_dcc(image))
+   radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
+dst_layout, src_queue_mask,
+dst_queue_mask, range);
+
+   if (radv_image_has_cmask(image) || radv_image_has_fmask(image))
+   radv_handle_cmask_image_transition(cmd_buffer, image, 
src_layout,
+  dst_layout, src_queue_mask,
+  dst_queue_mask, range);
+}
+
 static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
 struct radv_image *image,
 VkImageLayout src_layout,
@@ -3854,15 +3876,9 @@ static void radv_handle_image_transition(struct 
radv_cmd_buffer *cmd_buffer,
   dst_queue_mask, range,
   pending_clears);
 
-   if (radv_image_has_dcc(image))
-   radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
-dst_layout, src_queue_mask,
-dst_queue_mask, range);
-
-   if (radv_image_has_cmask(image) || radv_image_has_fmask(image))
-   radv_handle_cmask_image_transition(cmd_buffer, image, 
src_layout,
-  dst_layout, src_queue_mask,
-  dst_queue_mask, range);
+   radv_handle_color_image_transition(cmd_buffer, image, src_layout,
+  dst_layout, src_queue_mask,
+  dst_queue_mask, range);
 }
 
 void radv_CmdPipelineBarrier(
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/10] radv: don't fast-clear eliminate after resolving a subpass with compute

2018-04-13 Thread Samuel Pitoiset
That looks useless, and I think radv_handle_image_transition()
will do a fast-clear eliminate because it's called after the
resolve.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_meta_resolve_cs.c | 14 --
 1 file changed, 14 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c 
b/src/amd/vulkan/radv_meta_resolve_cs.c
index 6d605aba01..628208d635 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -517,18 +517,4 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer 
*cmd_buffer)
}
 
radv_meta_restore(_state, cmd_buffer);
-
-   for (uint32_t i = 0; i < subpass->color_count; ++i) {
-   VkAttachmentReference dest_att = 
subpass->resolve_attachments[i];
-   struct radv_image *dst_img = 
cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
-   if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
-   continue;
-   VkImageSubresourceRange range;
-   range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
-   range.baseMipLevel = 0;
-   range.levelCount = 1;
-   range.baseArrayLayer = 0;
-   range.layerCount = 1;
-   radv_fast_clear_flush_image_inplace(cmd_buffer, dst_img, 
);
-   }
 }
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/10] spirv: Add support for VK_EXT_descriptor_indexing uniform indexing caps.

2018-04-13 Thread Samuel Pitoiset



On 04/12/2018 01:44 AM, Bas Nieuwenhuizen wrote:

---
  src/compiler/shader_info.h| 1 +
  src/compiler/spirv/spirv_to_nir.c | 6 ++
  2 files changed, 7 insertions(+)

diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
index ababe520b2d..c8128fea01b 100644
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -53,6 +53,7 @@ struct spirv_supported_capabilities {
 bool subgroup_vote;
 bool gcn_shader;
 bool trinary_minmax;
+   bool full_uniform_desciptor_indexing;


How about "descriptor_array_dynamic_indexing"? Other than that, there is 
a typo: descriptor



  };
  
  typedef struct shader_info {

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 78c1e9ff597..04d26841188 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -3382,6 +3382,12 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, 
SpvOp opcode,
   spv_check_supported(shader_viewport_index_layer, cap);
   break;
  
+  case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT:

+  case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT:
+  case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT:
+ spv_check_supported(full_uniform_desciptor_indexing, cap);
+ break;
+
default:
   vtn_fail("Unhandled capability");
}


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/10] spirv: Add support for VK_EXT_descriptor_indexing uniform indexing caps.

2018-04-13 Thread Bas Nieuwenhuizen
On Fri, Apr 13, 2018 at 8:06 PM, Samuel Pitoiset
 wrote:
>
>
> On 04/12/2018 01:44 AM, Bas Nieuwenhuizen wrote:
>>
>> ---
>>   src/compiler/shader_info.h| 1 +
>>   src/compiler/spirv/spirv_to_nir.c | 6 ++
>>   2 files changed, 7 insertions(+)
>>
>> diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h
>> index ababe520b2d..c8128fea01b 100644
>> --- a/src/compiler/shader_info.h
>> +++ b/src/compiler/shader_info.h
>> @@ -53,6 +53,7 @@ struct spirv_supported_capabilities {
>>  bool subgroup_vote;
>>  bool gcn_shader;
>>  bool trinary_minmax;
>> +   bool full_uniform_desciptor_indexing;
>
>
> How about "descriptor_array_dynamic_indexing"? Other than that, there is a
> typo: descriptor

The thing this allows is dynamically uniform indexing for some
descriptor types which did not allow it previously, but a lot already
supported it. Hence the full.

>
>
>>   };
>> typedef struct shader_info {
>> diff --git a/src/compiler/spirv/spirv_to_nir.c
>> b/src/compiler/spirv/spirv_to_nir.c
>> index 78c1e9ff597..04d26841188 100644
>> --- a/src/compiler/spirv/spirv_to_nir.c
>> +++ b/src/compiler/spirv/spirv_to_nir.c
>> @@ -3382,6 +3382,12 @@ vtn_handle_preamble_instruction(struct vtn_builder
>> *b, SpvOp opcode,
>>spv_check_supported(shader_viewport_index_layer, cap);
>>break;
>>   +  case SpvCapabilityInputAttachmentArrayDynamicIndexingEXT:
>> +  case SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT:
>> +  case SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT:
>> + spv_check_supported(full_uniform_desciptor_indexing, cap);
>> + break;
>> +
>> default:
>>vtn_fail("Unhandled capability");
>> }
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Include unistd.h in program_lexer

2018-04-13 Thread Lionel Landwerlin

Reviewed-by: Lionel Landwerlin 

On 13/04/18 10:18, Dylan Baker wrote:

Which was previously provided implicitly by mtypes.h

CC: Marek Olšák 
CC: Mark Janes 
Fixes: 43d66c8c2d4d3d4dee1309856b6ce6c5393682e5
("mesa: include mtypes.h less")
Signed-off-by: Dylan Baker 
---
  src/mesa/program/program_lexer.l | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/mesa/program/program_lexer.l b/src/mesa/program/program_lexer.l
index 2e168b83bdb..13eb9025148 100644
--- a/src/mesa/program/program_lexer.l
+++ b/src/mesa/program/program_lexer.l
@@ -21,6 +21,7 @@
   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   * DEALINGS IN THE SOFTWARE.
   */
+#include 
  #include "main/glheader.h"
  #include "main/imports.h"
  #include "program/prog_instruction.h"



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/45] swr/rast: Use blend context struct to pass params

2018-04-13 Thread George Kyriazis
Stuff parameters into a blend context struct before passing down through
the PFN_BLEND_JIT_FUNC function pointer. Needed for stat changes.
---
 .../drivers/swr/rasterizer/core/backend_impl.h | 44 ++-
 src/gallium/drivers/swr/rasterizer/core/state.h| 17 ++--
 .../drivers/swr/rasterizer/jitter/blend_jit.cpp| 50 +++---
 3 files changed, 62 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h 
b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
index 2cfd52e..8c539e3 100644
--- a/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
+++ b/src/gallium/drivers/swr/rasterizer/core/backend_impl.h
@@ -724,24 +724,26 @@ INLINE void OutputMerger4x2(SWR_PS_CONTEXT , 
uint8_t* ()[SW
 
 const SWR_RENDER_TARGET_BLEND_STATE *pRTBlend = 
>renderTarget[rt];
 
+SWR_BLEND_CONTEXT blendContext = { 0 };
 {
 // pfnBlendFunc may not update all channels.  Initialize with PS 
output.
 /// TODO: move this into the blend JIT.
 blendOut = psContext.shaded[rt];
 
+blendContext.pBlendState = pBlendState;
+blendContext.src = [rt];
+blendContext.src1 = [1];
+blendContext.src0alpha = reinterpret_cast([0].w);
+blendContext.sampleNum = sample;
+blendContext.pDst = (simdvector *) 
+blendContext.result = 
+blendContext.oMask = 
+blendContext.pMask = reinterpret_cast();
+
 // Blend outputs and update coverage mask for alpha test
 if(pfnBlendFunc[rt] != nullptr)
 {
-pfnBlendFunc[rt](
-pBlendState,
-psContext.shaded[rt],
-psContext.shaded[1],
-psContext.shaded[0].w,
-sample,
-pColorSample,
-blendOut,
-,
-(simdscalari*));
+pfnBlendFunc[rt]();
 }
 }
 
@@ -811,24 +813,26 @@ INLINE void OutputMerger8x2(SWR_PS_CONTEXT , 
uint8_t* ()[SW
 pColorSample = nullptr;
 }
 
+SWR_BLEND_CONTEXT blendContext = { 0 };
 {
 // pfnBlendFunc may not update all channels.  Initialize with PS 
output.
 /// TODO: move this into the blend JIT.
 blendOut = psContext.shaded[rt];
 
+blendContext.pBlendState= pBlendState;
+blendContext.src= [rt];
+blendContext.src1   = [1];
+blendContext.src0alpha  = reinterpret_cast([0].w);
+blendContext.sampleNum  = sample;
+blendContext.pDst   = 
+blendContext.result = 
+blendContext.oMask  = 
+blendContext.pMask  = reinterpret_cast();
+
 // Blend outputs and update coverage mask for alpha test
 if(pfnBlendFunc[rt] != nullptr)
 {
-pfnBlendFunc[rt](
-pBlendState,
-psContext.shaded[rt],
-psContext.shaded[1],
-psContext.shaded[0].w,
-sample,
-reinterpret_cast(),
-blendOut,
-,
-reinterpret_cast());
+pfnBlendFunc[rt]();
 }
 }
 
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h 
b/src/gallium/drivers/swr/rasterizer/core/state.h
index 6b108d9..8c26ec6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -876,6 +876,19 @@ struct SWR_BLEND_STATE
 };
 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
 
+struct SWR_BLEND_CONTEXT
+{
+const SWR_BLEND_STATE*  pBlendState;
+simdvector* src;
+simdvector* src1;
+simdvector* src0alpha;
+uint32_tsampleNum;
+simdvector* pDst;
+simdvector* result;
+simdscalari*oMask;
+simdscalari*pMask;
+};
+
 //
 /// FUNCTION POINTERS FOR SHADERS
 
@@ -892,9 +905,7 @@ typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, 
SWR_CS_CONTEXT* pCsConte
 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT 
*pContext);
 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT 
*pContext);
-typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
-simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t 
sample,
-uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* 
vCoverageMask);
+typedef void(__cdecl 

[Mesa-dev] [PATCH 00/45] OpenSWR driver misc changes

2018-04-13 Thread George Kyriazis
Lots of SWR-specifc changes, including:
- work for 16-wide simd operation across all avx flavors
- separate avx instrinsics into a separate x86 lowering pass
- stats work
- misc other cleanup

George Kyriazis (45):
  swr/rast: Add some instructions to jitter
  swr/rast: Introduce JIT_MEM_CLIENT
  swr/rast: Use blend context struct to pass params
  swr/rast: Add debug type info for i128
  swr/rast: Silence some unused variable warnings
  swr/rast: Add some archrast stats
  swr/rast: Changes to allow jitter to compile with LLVM5
  swr/rast: LLVM 6 fix
  swr/rast: WIP builder rewrite.
  swr/rast: Add autogen of helper llvm intrinsics.
  swr/rast: WIP builder rewrite (2)
  swr/rast: Permute work for simd16
  swr/rast: Add MEM_ADD helper function to Builder.
  swr/rast: Add "Num Instructions Executed" stats intrinsic.
  swr/rast: Code cleanup
  swr/rast: Add some archrast counters
  swr/rast: Fix name mangling for LLVM pow intrinsic
  swr/rast: Move CallPrint() to a separate file
  swr/rast: Simplify #define usage in gen source file
  swr/rast: Start refactoring of builder/packetizer.
  swr/rast: Lower PERMD and PERMPS to x86.
  swr/rast: Cleanup of JitManager convenience types
  swr/rast: Lower VGATHERPS and VGATHERPS_16 to x86.
  swr/rast: Add builder_gfx_mem.{h|cpp}
  swr/rast: Enable generalized fetch jit
  swr: add x86 lowering pass to fragment shader
  swr/rast: Fix codegen for typedef types
  swr/rast: Adding translate call to builder_gfx_mem.
  swr/rast: Add support for setting optimization level
  swr/rast: Fix byte offset for non-indexed draws
  swr/rast: Change gfx pointers to gfxptr_t
  swr/rast: Fix alloca usage in jitter
  swr/rast: add cvt instructions in x86 lowering pass
  swr/rast: fix comment
  swr/rast: Add vgather to x86 lowering pass.
  swr/rast: Type-check TemplateArgUnroller
  swr/rast: Add shader stats infrastructure (WIP)
  swr/rast: Fix 64bit float loads in x86 lowering pass
  swr/rast: double-pump in x86 lowering pass
  swr/rast: minimize codegen redundant work
  swr/rast: Refactor to improve code sharing.
  swr/rast: Implement VROUND intrinsic in x86 lowering pass
  swr/rast: Optimize late/bindless JIT of samplers
  swr/rast: Replace x86 VMOVMSK with llvm-only implementation
  swr/rast: Fix VGATHERPD lowering

 src/gallium/drivers/swr/Makefile.am|   14 +-
 src/gallium/drivers/swr/Makefile.sources   |6 +-
 src/gallium/drivers/swr/SConscript |   13 +-
 src/gallium/drivers/swr/meson.build|6 +-
 .../drivers/swr/rasterizer/archrast/archrast.cpp   |   97 +-
 .../drivers/swr/rasterizer/archrast/events.proto   |   54 +
 .../swr/rasterizer/archrast/events_private.proto   |   45 +
 .../drivers/swr/rasterizer/codegen/gen_archrast.py |  111 +-
 .../drivers/swr/rasterizer/codegen/gen_backends.py |   97 +-
 .../drivers/swr/rasterizer/codegen/gen_common.py   |  131 +-
 .../drivers/swr/rasterizer/codegen/gen_knobs.py|   53 +-
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   |  180 ++-
 .../swr/rasterizer/codegen/gen_llvm_types.py   |   30 +-
 .../drivers/swr/rasterizer/codegen/knob_defs.py|   35 +
 .../drivers/swr/rasterizer/codegen/meson.build |2 +-
 .../rasterizer/codegen/templates/gen_builder.hpp   |   29 +-
 .../drivers/swr/rasterizer/common/simd16intrin.h   |1 +
 .../drivers/swr/rasterizer/common/simdintrin.h |1 +
 .../swr/rasterizer/common/simdlib_256_avx.inl  |6 +
 .../swr/rasterizer/common/simdlib_256_avx2.inl |7 +
 .../swr/rasterizer/common/simdlib_512_avx512.inl   |6 +
 .../swr/rasterizer/common/simdlib_512_emu.inl  |   16 +-
 src/gallium/drivers/swr/rasterizer/core/api.cpp|   15 +-
 src/gallium/drivers/swr/rasterizer/core/api.h  |   47 +-
 .../drivers/swr/rasterizer/core/backend.cpp|   10 +-
 src/gallium/drivers/swr/rasterizer/core/backend.h  |4 +-
 .../drivers/swr/rasterizer/core/backend_clear.cpp  |   19 +-
 .../drivers/swr/rasterizer/core/backend_impl.h |   73 +-
 .../drivers/swr/rasterizer/core/backend_sample.cpp |   14 +-
 .../swr/rasterizer/core/backend_singlesample.cpp   |   15 +-
 src/gallium/drivers/swr/rasterizer/core/binner.cpp |4 +-
 src/gallium/drivers/swr/rasterizer/core/context.h  |5 +-
 .../drivers/swr/rasterizer/core/frontend.cpp   |   78 +-
 src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp |   42 +-
 .../drivers/swr/rasterizer/core/rasterizer.cpp |4 +-
 .../drivers/swr/rasterizer/core/rasterizer_impl.h  |   15 +-
 src/gallium/drivers/swr/rasterizer/core/state.h|   91 +-
 .../drivers/swr/rasterizer/core/threads.cpp|   68 +-
 src/gallium/drivers/swr/rasterizer/core/threads.h  |5 +-
 .../drivers/swr/rasterizer/core/tilemgr.cpp|   21 +-
 src/gallium/drivers/swr/rasterizer/core/tilemgr.h  |4 +-
 src/gallium/drivers/swr/rasterizer/core/utils.h|   39 +-
 .../drivers/swr/rasterizer/jitter/JitManager.cpp   |   44 +-
 .../drivers/swr/rasterizer/jitter/JitManager.h |   21 -
 

[Mesa-dev] [PATCH 01/45] swr/rast: Add some instructions to jitter

2018-04-13 Thread George Kyriazis
VPHADDD, PMAXUD, PMINUD
---
 .../drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py |  1 +
 src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp   | 12 
 src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h |  2 ++
 3 files changed, 15 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index aab499b..113c616 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -71,6 +71,7 @@ intrinsics = [
 ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c']],
 ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a']],
 ['INTERRUPT', 'x86_int', ['a']],
+['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b']],
 ]
 
 this_dir = os.path.dirname(os.path.abspath(__file__))
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 0148d8e..704b0f2 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -756,6 +756,18 @@ namespace SwrJit
 return SELECT(cmp, a, b);
 }
 
+Value *Builder::PMAXUD(Value* a, Value* b)
+{
+Value* cmp = ICMP_UGT(a, b);
+return SELECT(cmp, a, b);
+}
+
+Value *Builder::PMINUD(Value* a, Value* b)
+{
+Value* cmp = ICMP_ULT(a, b);
+return SELECT(cmp, a, b);
+}
+
 // Helper function to create alloca in entry block of function
 Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
 {
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 5195678..9660bc6 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -128,6 +128,8 @@ Value *CVTPH2PS(Value* a, const llvm::Twine& name = "");
 Value *CVTPS2PH(Value* a, Value* rounding);
 Value *PMAXSD(Value* a, Value* b);
 Value *PMINSD(Value* a, Value* b);
+Value *PMAXUD(Value* a, Value* b);
+Value *PMINUD(Value* a, Value* b);
 Value *VABSPS(Value* a);
 Value *FMADDPS(Value* a, Value* b, Value* c);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/45] swr/rast: Introduce JIT_MEM_CLIENT

2018-04-13 Thread George Kyriazis
Help assist with usage tracking of memory accesses
---
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  | 58 ++
 .../drivers/swr/rasterizer/jitter/builder_mem.h| 47 --
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp|  6 +--
 3 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index 6fa60a1..ac01223 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -36,6 +36,9 @@
 
 namespace SwrJit
 {
+void Builder::AssertRastyMemoryParams(Value* ptr, JIT_MEM_CLIENT usage)
+{
+}
 
 Value *Builder::GEP(Value* ptr, const std::initializer_list 
)
 {
@@ -69,28 +72,33 @@ namespace SwrJit
 return IN_BOUNDS_GEP(ptr, indices);
 }
 
-LoadInst* Builder::LOAD(Value *Ptr, const char *Name)
+LoadInst* Builder::LOAD(Value *Ptr, const char *Name, JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(Ptr, usage);
 return IRB()->CreateLoad(Ptr, Name);
 }
 
-LoadInst* Builder::LOAD(Value *Ptr, const Twine )
+LoadInst* Builder::LOAD(Value *Ptr, const Twine , JIT_MEM_CLIENT 
usage)
 {
+AssertRastyMemoryParams(Ptr, usage);
 return IRB()->CreateLoad(Ptr, Name);
 }
 
-LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine )
+LoadInst* Builder::LOAD(Type *Ty, Value *Ptr, const Twine , 
JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(Ptr, usage);
 return IRB()->CreateLoad(Ty, Ptr, Name);
 }
 
-LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine )
+LoadInst* Builder::LOAD(Value *Ptr, bool isVolatile, const Twine , 
JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(Ptr, usage);
 return IRB()->CreateLoad(Ptr, isVolatile, Name);
 }
 
-LoadInst *Builder::LOAD(Value *basePtr, const 
std::initializer_list , const llvm::Twine& name)
+LoadInst *Builder::LOAD(Value *basePtr, const 
std::initializer_list , const llvm::Twine& name, 
JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(basePtr, usage);
 std::vector valIndices;
 for (auto i : indices)
 valIndices.push_back(C(i));
@@ -158,8 +166,10 @@ namespace SwrJit
 /// @param vIndices - SIMD wide value of VB byte offsets
 /// @param vMask - SIMD wide mask that controls whether to access memory 
or the src values
 /// @param scale - value to scale indices by
-Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value 
*vMask, uint8_t scale)
+Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value 
*vMask, uint8_t scale, JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(pBase, usage);
+
 Value *vGather;
 Value *pBasePtr = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
 
@@ -204,8 +214,10 @@ namespace SwrJit
 return vGather;
 }
 
-Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, 
Value *vMask, uint8_t scale)
+Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, 
Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(pBase, usage);
+
 Value *vGather = VUNDEF_F_16();
 
 // use AVX512F gather instruction if available
@@ -244,8 +256,10 @@ namespace SwrJit
 /// @param vIndices - SIMD wide value of VB byte offsets
 /// @param vMask - SIMD wide mask that controls whether to access memory 
or the src values
 /// @param scale - value to scale indices by
-Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, 
Value* vMask, uint8_t scale)
+Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, 
Value* vMask, uint8_t scale, JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(pBase, usage);
+
 Value* vGather;
 
 // use avx2 gather instruction if available
@@ -286,8 +300,10 @@ namespace SwrJit
 return vGather;
 }
 
-Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, 
Value *vMask, uint8_t scale)
+Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, 
Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
 {
+AssertRastyMemoryParams(pBase, usage);
+
 Value *vGather = VUNDEF_I_16();
 
 // use AVX512F gather instruction if available
@@ -380,21 +396,21 @@ namespace SwrJit
 }
 
 void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* 
byteOffsets,
-Value* mask, Value* vGatherComponents[], bool bPackedOutput)
+Value* mask, Value* vGatherComponents[], bool bPackedOutput, 
JIT_MEM_CLIENT usage)
 {
 const SWR_FORMAT_INFO  = GetFormatInfo(format);
 if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
 {
-

[Mesa-dev] [PATCH 04/45] swr/rast: Add debug type info for i128

2018-04-13 Thread George Kyriazis
Help support debug info in 16 wide shaders.
---
 src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
index 0cefa43..bfb1d2e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -288,6 +288,7 @@ DIType* JitManager::GetDebugIntegerType(Type* pTy)
 case 16: return builder.createBasicType("int16", 16, 
dwarf::DW_ATE_signed); break;
 case 32: return builder.createBasicType("int", 32, dwarf::DW_ATE_signed); 
break;
 case 64: return builder.createBasicType("int64", 64, 
dwarf::DW_ATE_signed); break;
+case 128: return builder.createBasicType("int128", 128, 
dwarf::DW_ATE_signed); break;
 default: SWR_ASSERT(false, "Unimplemented integer bit width");
 }
 return nullptr;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/45] swr/rast: Add some archrast stats

2018-04-13 Thread George Kyriazis
Add stats for degenerate and backfacing primitive counts

Wire archrast stats for alpha blend and alpha test.
pass value to jitter, upon return have archrast event increment a value
---
 .../drivers/swr/rasterizer/archrast/archrast.cpp   | 35 +-
 .../drivers/swr/rasterizer/archrast/events.proto   | 19 
 .../swr/rasterizer/archrast/events_private.proto   | 15 ++
 .../drivers/swr/rasterizer/core/backend_impl.h | 18 +++
 .../drivers/swr/rasterizer/core/backend_sample.cpp |  4 +--
 .../swr/rasterizer/core/backend_singlesample.cpp   |  4 +--
 src/gallium/drivers/swr/rasterizer/core/binner.cpp |  2 ++
 src/gallium/drivers/swr/rasterizer/core/state.h|  2 ++
 .../drivers/swr/rasterizer/jitter/blend_jit.cpp| 17 +++
 9 files changed, 105 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp 
b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 1f87dba..12dfc0e 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -73,6 +73,18 @@ namespace ArchRast
 uint32_t rasterTiles = 0;
 };
 
+struct CullStats
+{
+uint32_t degeneratePrimCount = 0;
+uint32_t backfacePrimCount = 0;
+};
+
+struct AlphaStats
+{
+uint32_t alphaTestCount = 0;
+uint32_t alphaBlendCount = 0;
+};
+
 //
 /// @brief Event handler that handles API thread events. This is shared
 ///between the API and its caller (e.g. driver shim) but typically
@@ -280,7 +292,12 @@ namespace ArchRast
 // Rasterized Subspans
 EventHandlerFile::Handle(RasterTiles(drawId, 
rastStats.rasterTiles));
 
-//Reset Internal Counters
+// Alpha Subspans
+EventHandlerFile::Handle(AlphaEvent(drawId, 
mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
+
+// Primitive Culling
+EventHandlerFile::Handle(CullEvent(drawId, 
mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
+
 mDSSingleSample = {};
 mDSSampleRate = {};
 mDSCombined = {};
@@ -288,6 +305,8 @@ namespace ArchRast
 mDSNullPS = {};
 
 rastStats = {};
+mCullStats = {};
+mAlphaStats = {};
 mNeedFlush = false;
 }
 
@@ -327,6 +346,18 @@ namespace ArchRast
 rastStats.rasterTiles += event.data.rasterTiles;
 }
 
+virtual void Handle(const CullInfoEvent& event)
+{
+mCullStats.degeneratePrimCount += 
_mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & 
~event.data.degeneratePrimMask));
+mCullStats.backfacePrimCount   += 
_mm_popcnt_u32(event.data.validMask ^ (event.data.validMask & 
~event.data.backfacePrimMask));
+}
+
+virtual void Handle(const AlphaInfoEvent& event)
+{
+mAlphaStats.alphaTestCount  += event.data.alphaTestEnable;
+mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
+}
+
 protected:
 bool mNeedFlush;
 // Per draw stats
@@ -340,6 +371,8 @@ namespace ArchRast
 TEStats mTS = {};
 GSStats mGS = {};
 RastStats rastStats = {};
+CullStats mCullStats = {};
+AlphaStats mAlphaStats = {};
 
 };
 
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto 
b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
index 7d9a68d..deb0373 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto
+++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto
@@ -180,6 +180,7 @@ event LateStencilSampleRate
 uint64_t failCount;
 };
 
+// Total Early-Z counts, SingleSample and SampleRate
 event EarlyZ
 {
 uint32_t drawId;
@@ -187,6 +188,7 @@ event EarlyZ
 uint64_t failCount;
 }; 
 
+// Total LateZ counts, SingleSample and SampleRate
 event LateZ
 {
 uint32_t drawId;
@@ -194,6 +196,7 @@ event LateZ
 uint64_t failCount;
 };
 
+// Total EarlyStencil counts, SingleSample and SampleRate
 event EarlyStencil
 {
 uint32_t drawId; 
@@ -201,6 +204,7 @@ event EarlyStencil
 uint64_t failCount;
 };
 
+// Total LateStencil counts, SingleSample and SampleRate
 event LateStencil
 {
 uint32_t drawId; 
@@ -302,3 +306,18 @@ event ClipperEvent
 uint32_t trivialAcceptCount;
 uint32_t mustClipCount;
 };
+
+event CullEvent
+{
+uint32_t drawId;
+uint64_t backfacePrimCount;
+uint64_t degeneratePrimCount;
+};
+
+event AlphaEvent
+{
+uint32_t drawId;
+uint32_t alphaTestCount;
+uint32_t alphaBlendCount;
+};
+
diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto 
b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto
index f0a9310..37593be 100644
--- 

[Mesa-dev] [PATCH 19/45] swr/rast: Simplify #define usage in gen source file

2018-04-13 Thread George Kyriazis
Removed preprocessor defines from structures passed to LLVM jitted code.

The python scripts do not understand the preprocessor defines and ignores
them. So for fields that are compiled out due to a preprocessor define
the LLVM script accounts for them anyway because it doesn't know what
the defines are set to. The sanitize defines for open source are fine
in that they're safely used.
---
 src/gallium/drivers/swr/rasterizer/core/state.h | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h 
b/src/gallium/drivers/swr/rasterizer/core/state.h
index 47ffacf..084ca54 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -234,13 +234,12 @@ struct SWR_VS_CONTEXT
 uint32_t InstanceID;// IN: Instance ID, constant across all verts 
of the SIMD
 simdscalari VertexID;   // IN: Vertex ID
 simdscalari mask;   // IN: Active mask for shader
-#if USE_SIMD16_FRONTEND
+
+// SIMD16 Frontend fields.
 uint32_t AlternateOffset;   // IN: amount to offset for interleaving 
even/odd simd8 in simd16vertex output
-#if USE_SIMD16_VS
 simd16scalari mask16;   // IN: Active mask for shader (16-wide)
 simd16scalari VertexID16;   // IN: Vertex ID (16-wide)
-#endif
-#endif
+
 SWR_SHADER_STATS stats; // OUT: shader statistics used for archrast.
 };
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 29/45] swr/rast: Add support for setting optimization level

2018-04-13 Thread George Kyriazis
for JIT compilation
---
 .../drivers/swr/rasterizer/codegen/knob_defs.py| 35 ++
 .../swr/rasterizer/common/simdlib_512_emu.inl  |  2 +-
 src/gallium/drivers/swr/rasterizer/core/state.h| 13 
 .../drivers/swr/rasterizer/jitter/JitManager.cpp   | 10 +--
 .../drivers/swr/rasterizer/jitter/JitManager.h |  1 -
 .../swr/rasterizer/jitter/builder_gfx_mem.cpp  |  4 +--
 .../swr/rasterizer/jitter/builder_gfx_mem.h|  3 +-
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  |  1 -
 .../drivers/swr/rasterizer/jitter/builder_mem.h|  4 ---
 9 files changed, 55 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py 
b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
index d4bf193..c9d1f5d 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py
@@ -193,6 +193,41 @@ KNOBS = [
 'category'  : 'debug_adv',
 }],
 
+['JIT_OPTIMIZATION_LEVEL', {
+'type'  : 'int',
+'default'   : '-1',
+'desc'  : ['JIT compile optimization level:',],
+'category'  : 'debug',
+'control'   : 'dropdown',
+'choices' : [
+{
+'name'  : 'Automatic',
+'desc'  : 'Automatic based on other KNOB and build settings',
+'value' : -1,
+},
+{
+'name'  : 'Debug',
+'desc'  : 'No optimization: -O0',
+'value' : 0,
+},
+{
+'name'  : 'Less',
+'desc'  : 'Some optimization: -O1',
+'value' : 1,
+},
+{
+'name'  : 'Optimize',
+'desc'  : 'Default Clang / LLVM optimizations: -O2',
+'value' : 2,
+},
+{
+'name'  : 'Aggressive',
+'desc'  : 'Maximum optimization: -O3',
+'value' : 3,
+},
+],
+}],
+
 ['JIT_CACHE_DIR', {
 'type'  : 'std::string',
 'default'   : r'%TEMP%\SWR\JitCache' if sys.platform == 'win32' else 
'${HOME}/.swr/jitcache',
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
index 5d5120a..55981dc 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_emu.inl
@@ -426,7 +426,7 @@ static SIMDINLINE bool SIMDCALL testz_ps(Float const , 
Float const )  // ret
   SIMD256T::testz_ps(a.v8[1], b.v8[1]));
 }
 
-static SIMDINLINE int SIMDCALL testz_si(Integer const , Integer const )  
// return all_lanes_zero(a & b) ? 1 : 0 (int)
+static SIMDINLINE bool SIMDCALL testz_si(Integer const , Integer const )  
// return all_lanes_zero(a & b) ? 1 : 0 (int)
 {
 return  0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) &
   SIMD256T::testz_si(a.v8[1], b.v8[1]));
diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h 
b/src/gallium/drivers/swr/rasterizer/core/state.h
index 084ca54..9233446 100644
--- a/src/gallium/drivers/swr/rasterizer/core/state.h
+++ b/src/gallium/drivers/swr/rasterizer/core/state.h
@@ -1,5 +1,5 @@
 /
-* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+* Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -526,6 +526,11 @@ enum SWR_AUX_MODE
 AUX_MODE_DEPTH,
 };
 
+struct SWR_LOD_OFFSETS
+{
+uint32_t offsets[2][15];
+};
+
 //
 /// SWR_SURFACE_STATE
 //
@@ -866,11 +871,9 @@ enum SWR_MULTISAMPLE_COUNT
 SWR_MULTISAMPLE_TYPE_COUNT
 };
 
-INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // 
@llvm_func_start
+static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int 
sampleCountEnum) // @llvm_func_start
 {
-static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 
8, 16};
-assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
-return sampleCountLUT[sampleCount];
+return uint32_t(1) << sampleCountEnum;
 } // @llvm_func_end
 
 struct SWR_BLEND_STATE
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
index 9080964..7f9c9dd 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/JitManager.cpp
@@ -66,6 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, 
const char* core)
 

[Mesa-dev] [PATCH 05/45] swr/rast: Silence some unused variable warnings

2018-04-13 Thread George Kyriazis
---
 src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 5feb5fa..1ee6691 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -253,7 +253,13 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& 
fetchState)
: vIndices2 = 
GetSimdValid32bitIndices(indices2, pLastIndex);
 #endif
 break; // incoming type is already 32bit int
-default: SWR_INVALID("Unsupported index type"); vIndices = nullptr; 
break;
+default:
+SWR_INVALID("Unsupported index type");
+vIndices = nullptr;
+#if USE_SIMD16_SHADERS
+vIndices2 = nullptr;
+#endif
+break;
 }
 
 if(fetchState.bForceSequentialAccessEnable)
@@ -434,6 +440,10 @@ void FetchJit::JitLoadVertices(const FETCH_COMPILE_STATE 
, Value* str
 }
 else if (ied.InstanceStrideEnable)
 {
+// silence unused variable warnings
+startOffset = C(0);
+vCurIndices = vIndices;
+
 SWR_ASSERT((0), "TODO: Fill out more once driver sends this 
down.");
 }
 else
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Include unistd.h in program_lexer

2018-04-13 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Apr 13, 2018 at 1:18 PM, Dylan Baker  wrote:

> Which was previously provided implicitly by mtypes.h
>
> CC: Marek Olšák 
> CC: Mark Janes 
> Fixes: 43d66c8c2d4d3d4dee1309856b6ce6c5393682e5
>("mesa: include mtypes.h less")
> Signed-off-by: Dylan Baker 
> ---
>  src/mesa/program/program_lexer.l | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/mesa/program/program_lexer.l b/src/mesa/program/program_
> lexer.l
> index 2e168b83bdb..13eb9025148 100644
> --- a/src/mesa/program/program_lexer.l
> +++ b/src/mesa/program/program_lexer.l
> @@ -21,6 +21,7 @@
>   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
>   * DEALINGS IN THE SOFTWARE.
>   */
> +#include 
>  #include "main/glheader.h"
>  #include "main/imports.h"
>  #include "program/prog_instruction.h"
> --
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/10] radv: Enable VK_EXT_descriptor_indexing.

2018-04-13 Thread Samuel Pitoiset

Patches 9-10 are:

Reviewed-by: Samuel Pitoiset 

On 04/12/2018 01:44 AM, Bas Nieuwenhuizen wrote:

This adds everything except non-uniform indexing, which needs a bit
more work and testing.
---
  src/amd/vulkan/radv_device.c  | 39 +++
  src/amd/vulkan/radv_extensions.py |  1 +
  src/amd/vulkan/radv_shader.c  |  2 ++
  3 files changed, 42 insertions(+)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c81b69fef5c..bdbbfc162a2 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -735,6 +735,31 @@ void radv_GetPhysicalDeviceFeatures2(
features->samplerYcbcrConversion = false;
break;
}
+   case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
+   VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features 
=
+   
(VkPhysicalDeviceDescriptorIndexingFeaturesEXT*)features;
+   features->shaderInputAttachmentArrayDynamicIndexing = 
true;
+   features->shaderUniformTexelBufferArrayDynamicIndexing 
= true;
+   features->shaderStorageTexelBufferArrayDynamicIndexing 
= true;
+   features->shaderUniformBufferArrayNonUniformIndexing = 
false;
+   features->shaderSampledImageArrayNonUniformIndexing = 
false;
+   features->shaderStorageBufferArrayNonUniformIndexing = 
false;
+   features->shaderStorageImageArrayNonUniformIndexing = 
false;
+   features->shaderInputAttachmentArrayNonUniformIndexing 
= false;
+   
features->shaderUniformTexelBufferArrayNonUniformIndexing = false;
+   
features->shaderStorageTexelBufferArrayNonUniformIndexing = false;
+   features->descriptorBindingUniformBufferUpdateAfterBind 
= true;
+   features->descriptorBindingSampledImageUpdateAfterBind 
= true;
+   features->descriptorBindingStorageImageUpdateAfterBind 
= true;
+   features->descriptorBindingStorageBufferUpdateAfterBind 
= true;
+   
features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
+   
features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
+   features->descriptorBindingUpdateUnusedWhilePending = 
true;
+   features->descriptorBindingPartiallyBound = true;
+   features->descriptorBindingVariableDescriptorCount = 
true;
+   features->runtimeDescriptorArray = true;
+   break;
+   }
default:
break;
}
@@ -1002,6 +1027,20 @@ void radv_GetPhysicalDeviceProperties2(
properties->vgprAllocationGranularity = 4;
break;
}
+   case 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
+   VkPhysicalDeviceDescriptorIndexingPropertiesEXT 
*properties =
+   
(VkPhysicalDeviceDescriptorIndexingPropertiesEXT*)ext;
+   properties->maxUpdateAfterBindDescriptorsInAllPools = 
UINT32_MAX;
+   
properties->shaderUniformBufferArrayNonUniformIndexingNative = false;
+   
properties->shaderSampledImageArrayNonUniformIndexingNative = false;
+   
properties->shaderStorageBufferArrayNonUniformIndexingNative = false;
+   
properties->shaderStorageImageArrayNonUniformIndexingNative = false;
+   
properties->shaderInputAttachmentArrayNonUniformIndexingNative = false;
+   properties->robustBufferAccessUpdateAfterBind = false;
+   properties->quadDivergentImplicitLod = false;
+   /* TODO rest */
+   break;
+   }
default:
break;
}
diff --git a/src/amd/vulkan/radv_extensions.py 
b/src/amd/vulkan/radv_extensions.py
index a680f42dec7..3131a0ad417 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -87,6 +87,7 @@ EXTENSIONS = [
  Extension('VK_KHR_multiview', 1, True),
  Extension('VK_EXT_debug_report',  9, True),
  Extension('VK_EXT_depth_range_unrestricted',  1, True),
+Extension('VK_EXT_descriptor_indexing',   2, True),
  Extension('VK_EXT_discard_rectangles',1, True),
  Extension('VK_EXT_external_memory_dma_buf',   1, True),
  Extension('VK_EXT_external_memory_host',  1, 
'device->rad_info.has_userptr'),
diff --git 

[Mesa-dev] [PATCH 12/45] swr/rast: Permute work for simd16

2018-04-13 Thread George Kyriazis
Fix slow permutes in PA tri lists under SIMD16 emulation on AVX

Added missing permute (interlane, immediate) to SIMDLIB
---
 .../drivers/swr/rasterizer/common/simd16intrin.h   |  1 +
 .../drivers/swr/rasterizer/common/simdintrin.h |  1 +
 .../swr/rasterizer/common/simdlib_256_avx.inl  |  6 
 .../swr/rasterizer/common/simdlib_256_avx2.inl |  7 
 .../swr/rasterizer/common/simdlib_512_avx512.inl   |  6 
 .../swr/rasterizer/common/simdlib_512_emu.inl  | 14 ++--
 src/gallium/drivers/swr/rasterizer/core/pa_avx.cpp | 42 +-
 7 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h 
b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
index 019b26d..98a8b9b 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -138,6 +138,7 @@ typedef SIMD512 SIMD16;
 #define _simd16_cmpeq_epi8  SIMD16::cmpeq_epi8
 #define _simd16_cmpgt_epi8  SIMD16::cmpgt_epi8
 
+#define _simd16_permute_ps_i(a, i)  SIMD16::permute_ps(a)
 #define _simd16_permute_ps  SIMD16::permute_ps
 #define _simd16_permute_epi32   SIMD16::permute_epi32
 #define _simd16_sllv_epi32  SIMD16::sllv_epi32
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h 
b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
index fce360d..b1471a9 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
@@ -106,6 +106,7 @@ typedef SIMD256 SIMD;
 #define _simd_cmpgt_epi16   SIMD::cmpgt_epi16
 #define _simd_cmpeq_epi16   SIMD::cmpeq_epi16
 #define _simd_movemask_epi8 SIMD::movemask_epi8
+#define _simd_permute_ps_i(a, i)SIMD::permute_ps(a)
 #define _simd_permute_psSIMD::permute_ps
 #define _simd_permute_epi32 SIMD::permute_epi32
 #define _simd_srlv_epi32SIMD::srlv_epi32
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
index 42b4552..00c094a 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx.inl
@@ -479,6 +479,12 @@ SIMD_EMU_IWRAPPER_2(packs_epi32);   // See documentation 
for _mm256_packs_epi32
 SIMD_EMU_IWRAPPER_2(packus_epi16);  // See documentation for 
_mm256_packus_epi16 and _mm512_packus_epi16
 SIMD_EMU_IWRAPPER_2(packus_epi32);  // See documentation for 
_mm256_packus_epi32 and _mm512_packus_epi32
 
+template
+static SIMDINLINE Float SIMDCALL permute_ps(Float const )
+{
+return _mm256_permute_ps(a, ImmT);
+}
+
 static SIMDINLINE Integer SIMDCALL permute_epi32(Integer const , Integer 
const ) // return a[swiz[i]] for each 32-bit lane i (int32)
 {
 Integer result;
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
index 9cd0a64..96c24ff 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx2.inl
@@ -174,6 +174,13 @@ SIMD_IWRAPPER_2(packs_epi16);   // See documentation for 
_mm256_packs_epi16 and
 SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm256_packs_epi32 
and _mm512_packs_epi32
 SIMD_IWRAPPER_2(packus_epi16);  // See documentation for _mm256_packus_epi16 
and _mm512_packus_epi16
 SIMD_IWRAPPER_2(packus_epi32);  // See documentation for _mm256_packus_epi32 
and _mm512_packus_epi32
+
+template
+static SIMDINLINE Float SIMDCALL permute_ps(Float const )
+{
+return _mm256_permute_ps(a, ImmT);
+}
+
 SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32);
 
 static SIMDINLINE Float SIMDCALL permute_ps(Float const , Integer const 
)// return a[swiz[i]] for each 32-bit lane i (float)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl 
b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index f3a58f9..dfe19d3 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -433,6 +433,12 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer a, 
SIMD256Impl::Integer b)
 // SIMD_IWRAPPER_2(packus_epi16);  // See documentation for 
_mm512_packus_epi16 and _mm512_packus_epi16
 // SIMD_IWRAPPER_2(packus_epi32);  // See documentation for 
_mm512_packus_epi32 and _mm512_packus_epi32
 
+template
+static SIMDINLINE Float SIMDCALL permute_ps(Float const )
+{
+return _mm512_permute_ps(a, ImmT);
+}
+
 static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz)
// return a[swiz[i]] for each 32-bit lane i (float)
 {
 return 

[Mesa-dev] [PATCH 10/45] swr/rast: Add autogen of helper llvm intrinsics.

2018-04-13 Thread George Kyriazis
Replace sqrt, maskload, fp min/max, cttz, ctlz with llvm equivalent.
Replace AVX maskedstore intrinsic with LLVM intrinsic. Add helper llvm
macros for stacksave, stackrestore, popcnt.
---
 src/gallium/drivers/swr/Makefile.am|   8 ++
 src/gallium/drivers/swr/SConscript |   9 ++
 src/gallium/drivers/swr/meson.build|   2 +-
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   | 100 ++---
 .../rasterizer/codegen/templates/gen_builder.hpp   |  20 -
 .../drivers/swr/rasterizer/jitter/builder.h|   1 +
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  |  50 +--
 .../drivers/swr/rasterizer/jitter/builder_mem.h|   5 --
 .../drivers/swr/rasterizer/jitter/builder_misc.cpp |  13 ---
 .../drivers/swr/rasterizer/jitter/builder_misc.h   |  11 ---
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp|   8 +-
 .../drivers/swr/rasterizer/jitter/meson.build  |  11 +++
 .../swr/rasterizer/jitter/streamout_jit.cpp|  18 ++--
 13 files changed, 130 insertions(+), 126 deletions(-)

diff --git a/src/gallium/drivers/swr/Makefile.am 
b/src/gallium/drivers/swr/Makefile.am
index 5ec9213..32dd9e5 100644
--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -81,6 +81,7 @@ BUILT_SOURCES = \
rasterizer/jitter/gen_state_llvm.h \
rasterizer/jitter/gen_builder.hpp \
rasterizer/jitter/gen_builder_x86.hpp \
+   rasterizer/jitter/gen_builder_intrin.hpp \
rasterizer/archrast/gen_ar_event.hpp \
rasterizer/archrast/gen_ar_event.cpp \
rasterizer/archrast/gen_ar_eventhandler.hpp \
@@ -140,6 +141,13 @@ rasterizer/jitter/gen_builder_x86.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py
--output rasterizer/jitter \
--gen_x86_h
 
+rasterizer/jitter/gen_builder_intrin.hpp: 
rasterizer/codegen/gen_llvm_ir_macros.py 
rasterizer/codegen/templates/gen_builder.hpp rasterizer/codegen/gen_common.py
+   $(MKDIR_GEN)
+   $(PYTHON_GEN) \
+   $(srcdir)/rasterizer/codegen/gen_llvm_ir_macros.py \
+   --output rasterizer/jitter \
+   --gen_intrin_h
+
 rasterizer/archrast/gen_ar_event.hpp: rasterizer/codegen/gen_archrast.py 
rasterizer/codegen/templates/gen_ar_event.hpp rasterizer/archrast/events.proto 
rasterizer/archrast/events_private.proto rasterizer/codegen/gen_common.py
$(MKDIR_GEN)
$(PYTHON_GEN) \
diff --git a/src/gallium/drivers/swr/SConscript 
b/src/gallium/drivers/swr/SConscript
index cc4025b..5097be6 100644
--- a/src/gallium/drivers/swr/SConscript
+++ b/src/gallium/drivers/swr/SConscript
@@ -85,6 +85,15 @@ Depends('rasterizer/jitter/gen_builder.hpp',
 swrroot + 'rasterizer/codegen/templates/gen_builder.hpp')
 
 env.CodeGenerate(
+target = 'rasterizer/jitter/gen_builder_intrin.hpp',
+script = swrroot + 'rasterizer/codegen/gen_llvm_ir_macros.py',
+source = '',
+command = python_cmd + ' $SCRIPT --output ' + bldroot + 
'/rasterizer/jitter --gen_intrin_h'
+)
+Depends('rasterizer/jitter/gen_builder.hpp',
+swrroot + 'rasterizer/codegen/templates/gen_builder.hpp')
+
+env.CodeGenerate(
 target = './gen_swr_context_llvm.h',
 script = swrroot + 'rasterizer/codegen/gen_llvm_types.py',
 source = 'swr_context.h',
diff --git a/src/gallium/drivers/swr/meson.build 
b/src/gallium/drivers/swr/meson.build
index 4bcd4f4..b28abd6 100644
--- a/src/gallium/drivers/swr/meson.build
+++ b/src/gallium/drivers/swr/meson.build
@@ -296,7 +296,7 @@ endif
 libmesaswr = static_library(
   'mesaswr',
   [files_swr_mesa, files_swr_common, gen_knobs_h, gen_knobs_cpp,
-   gen_builder_hpp, gen_builder_x86_hpp],
+   gen_builder_hpp, gen_builder_x86_hpp, gen_builder_intrin_hpp],
   cpp_args : [cpp_vis_args, swr_cpp_args, swr_avx_args, swr_arch_defines],
   include_directories : [inc_common, swr_incs],
   dependencies : dep_llvm,
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 3e1fbfe..9dfc1e7 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -42,32 +42,40 @@ inst_aliases = {
 }
 
 intrinsics = [
-['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 
'mask', 'scale']],
-['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 
'mask', 'scale']],
-['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 
'indices', 'mask', 'scale']],
-['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 
'mask', 'scale']],
-['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 
'indices', 'mask', 'scale']],
-['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']],
-['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']],
-['VRCPPS', 'x86_avx_rcp_ps_256', ['a']],
-['VMINPS', 'x86_avx_min_ps_256', 

[Mesa-dev] [PATCH 41/45] swr/rast: Refactor to improve code sharing.

2018-04-13 Thread George Kyriazis
---
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 79 ++
 1 file changed, 36 insertions(+), 43 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 767866f..af97b83 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -63,6 +63,7 @@ struct FetchJit : public BuilderGfxMem
 Value* GetSimdValid32bitIndices(Value* vIndices, Value* pLastIndex);
 Value* GetSimdValid16bitIndices(Value* vIndices, Value* pLastIndex);
 Value* GetSimdValid8bitIndices(Value* vIndices, Value* pLastIndex);
+template Value* GetSimdValidIndicesHelper(Value* pIndices, 
Value* pLastIndex);
 
 // package up Shuffle*bpcGatherd args into a tuple for convenience
 typedef std::tuplegetType() == mInt64Ty && pLastIndex->getType() == 
mInt64Ty, "Function expects gfxptr_t for both input parameters.");
 
+Type* Ty = nullptr;
+
+static_assert(sizeof(T) == sizeof(uint16_t) || sizeof(T) == 
sizeof(uint8_t), "Unsupported type for use with GetSimdValidIndicesHelper");
+constexpr bool bSize = (sizeof(T) == sizeof(uint16_t));
+if (bSize)
+{
+Ty = mInt16PtrTy;
+}
+else if (sizeof(T) == sizeof(uint8_t))
+{
+Ty = mInt8PtrTy;
+}
+else
+{
+SWR_ASSERT(false, "This should never happen as per static_assert 
above.");
+}
+
 Value* vIndices = VUNDEF_I();
 
 {
 // store 0 index on stack to be used to conditionally load from if 
index address is OOB
-Value* pZeroIndex = ALLOCA(mInt8Ty);
-STORE(C((uint8_t)0), pZeroIndex);
+Value* pZeroIndex = ALLOCA(Ty);
+STORE(C((T)0), pZeroIndex);
 
 // Load a SIMD of index pointers
 for (int64_t lane = 0; lane < mVWidth; lane++)
 {
 // Calculate the address of the requested index
-Value *pIndex = GEP(pIndices, C(lane), mInt8PtrTy);
+Value *pIndex = GEP(pIndices, C(lane), Ty);
 
-pLastIndex = INT_TO_PTR(pLastIndex, mInt8PtrTy);
+pLastIndex = INT_TO_PTR(pLastIndex, Ty);
 
 // check if the address is less than the max index, 
 Value* mask = ICMP_ULT(pIndex, pLastIndex);
 
 // if valid, load the index. if not, load 0 from the stack
 Value* pValid = SELECT(mask, pIndex, pZeroIndex);
-Value *index = LOAD(pValid, "valid index", 
PointerType::get(mInt8Ty, 0), GFX_MEM_CLIENT_FETCH);
+Value *index = LOAD(pValid, "valid index", Ty, 
GFX_MEM_CLIENT_FETCH);
 
 // zero extended index to 32 bits and insert into the correct simd 
lane
 index = Z_EXT(index, mInt32Ty);
@@ -1028,43 +1040,24 @@ Value* FetchJit::GetSimdValid8bitIndices(Value* 
pIndices, Value* pLastIndex)
 
 //
 /// @brief Loads a simd of valid indices. OOB indices are set to 0
+/// *Note* have to do 8bit index checking in scalar until we have AVX-512
+/// support
+/// @param pIndices - pointer to 8 bit indices
+/// @param pLastIndex - pointer to last valid index
+Value* FetchJit::GetSimdValid8bitIndices(Value* pIndices, Value* pLastIndex)
+{
+return GetSimdValidIndicesHelper(pIndices, pLastIndex);
+}
+
+//
+/// @brief Loads a simd of valid indices. OOB indices are set to 0
 /// *Note* have to do 16bit index checking in scalar until we have AVX-512
 /// support
 /// @param pIndices - pointer to 16 bit indices
 /// @param pLastIndex - pointer to last valid index
 Value* FetchJit::GetSimdValid16bitIndices(Value* pIndices, Value* pLastIndex)
 {
-SWR_ASSERT(pIndices->getType() == mInt64Ty && pLastIndex->getType() == 
mInt64Ty, "Function expects gfxptr_t for both input parameters.");
-
-

[Mesa-dev] [PATCH 28/45] swr/rast: Adding translate call to builder_gfx_mem.

2018-04-13 Thread George Kyriazis
---
 src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp | 5 +
 src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h   | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
index bfb3057..090b761 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp
@@ -133,4 +133,9 @@ namespace SwrJit
 return Builder::LOAD(BasePtr, offset, name);
 }
 
+Value* BuilderGfxMem::TranlsateGfxAddress(Value* xpGfxAddress)
+{
+return INT_TO_PTR(xpGfxAddress, PointerType::get(mInt8Ty, 0));
+}
+
 }
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
index 18c25b9..8f39b9d 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h
@@ -51,6 +51,8 @@ namespace SwrJit
 
 virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, 
Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
+Value* TranlsateGfxAddress(Value* xpGfxAddress);
+
 protected:
 
 void AssertGFXMemoryParams(Value* ptr, Builder::JIT_MEM_CLIENT usage);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 37/45] swr/rast: Add shader stats infrastructure (WIP)

2018-04-13 Thread George Kyriazis
---
 .../drivers/swr/rasterizer/archrast/archrast.cpp   | 64 +++--
 .../drivers/swr/rasterizer/archrast/events.proto   | 65 --
 .../swr/rasterizer/archrast/events_private.proto   | 30 ++
 .../drivers/swr/rasterizer/jitter/builder.h| 23 
 4 files changed, 148 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp 
b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
index 2184673..871db79 100644
--- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
+++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp
@@ -61,7 +61,7 @@ namespace ArchRast
 //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If 
holds, its fine.
 };
 
-struct GSInfo
+struct GSStateInfo
 {
 uint32_t inputPrimCount;
 uint32_t primGeneratedCount;
@@ -155,7 +155,7 @@ namespace ArchRast
 mDSSampleRate.earlyStencilTestFailCount += 
_mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
 
 //earlyZ test single and multi sample
-mDSCombined.earlyZTestPassCount  += 
_mm_popcnt_u32(event.data.depthPassMask);
+mDSCombined.earlyZTestPassCount += 
_mm_popcnt_u32(event.data.depthPassMask);
 mDSCombined.earlyZTestFailCount += 
_mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
 
 //earlyStencil test single and multi sample
@@ -257,11 +257,51 @@ namespace ArchRast
 mClipper.trivialAcceptCount += _mm_popcnt_u32(event.data.validMask 
& ~event.data.clipMask);
 }
 
+struct ShaderStats
+{
+uint32_t numInstExecuted;
+};
+
+virtual void Handle(const VSStats& event)
+{
+mShaderStats[SHADER_VERTEX].numInstExecuted += 
event.data.numInstExecuted;
+}
+
+virtual void Handle(const GSStats& event)
+{
+mShaderStats[SHADER_GEOMETRY].numInstExecuted += 
event.data.numInstExecuted;
+}
+
+virtual void Handle(const DSStats& event)
+{
+mShaderStats[SHADER_DOMAIN].numInstExecuted += 
event.data.numInstExecuted;
+}
+
+virtual void Handle(const HSStats& event)
+{
+mShaderStats[SHADER_HULL].numInstExecuted += 
event.data.numInstExecuted;
+}
+
+virtual void Handle(const PSStats& event)
+{
+mShaderStats[SHADER_PIXEL].numInstExecuted += 
event.data.numInstExecuted;
+mNeedFlush = true;
+}
+
+virtual void Handle(const CSStats& event)
+{
+mShaderStats[SHADER_COMPUTE].numInstExecuted += 
event.data.numInstExecuted;
+mNeedFlush = true;
+}
+
 // Flush cached events for this draw
 virtual void FlushDraw(uint32_t drawId)
 {
 if (mNeedFlush == false) return;
 
+EventHandlerFile::Handle(PSInfo(drawId, 
mShaderStats[SHADER_PIXEL].numInstExecuted));
+EventHandlerFile::Handle(CSInfo(drawId, 
mShaderStats[SHADER_COMPUTE].numInstExecuted));
+
 //singleSample
 EventHandlerFile::Handle(EarlyZSingleSample(drawId, 
mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
 EventHandlerFile::Handle(LateZSingleSample(drawId, 
mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
@@ -297,7 +337,7 @@ namespace ArchRast
 
 // Primitive Culling
 EventHandlerFile::Handle(CullEvent(drawId, 
mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
-
+
 mDSSingleSample = {};
 mDSSampleRate = {};
 mDSCombined = {};
@@ -307,6 +347,10 @@ namespace ArchRast
 rastStats = {};
 mCullStats = {};
 mAlphaStats = {};
+
+mShaderStats[SHADER_PIXEL] = {};
+mShaderStats[SHADER_COMPUTE] = {};
+
 mNeedFlush = false;
 }
 
@@ -323,6 +367,16 @@ namespace ArchRast
 EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, 
mGS.primGeneratedCount));
 EventHandlerFile::Handle(GSVertsInput(event.data.drawId, 
mGS.vertsInput));
 
+EventHandlerFile::Handle(VSInfo(event.data.drawId, 
mShaderStats[SHADER_VERTEX].numInstExecuted));
+EventHandlerFile::Handle(HSInfo(event.data.drawId, 
mShaderStats[SHADER_HULL].numInstExecuted));
+EventHandlerFile::Handle(DSInfo(event.data.drawId, 
mShaderStats[SHADER_DOMAIN].numInstExecuted));
+EventHandlerFile::Handle(GSInfo(event.data.drawId, 
mShaderStats[SHADER_GEOMETRY].numInstExecuted));
+
+mShaderStats[SHADER_VERTEX] = {};
+mShaderStats[SHADER_HULL] = {};
+mShaderStats[SHADER_DOMAIN] = {};
+mShaderStats[SHADER_GEOMETRY] = {};
+
 //Reset Internal Counters
 mClipper = {};
 mTS = {};
@@ 

[Mesa-dev] [PATCH 27/45] swr/rast: Fix codegen for typedef types

2018-04-13 Thread George Kyriazis
---
 src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
index a127976..d8863c0 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_types.py
@@ -143,6 +143,7 @@ def gen_llvm_types(input_file, output_file):
 is_llvm_typedef = re.search(r'@llvm_typedef', line)
 if is_llvm_typedef is not None:
 is_llvm_typedef = True
+continue
 else:
 is_llvm_typedef = False
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/45] swr/rast: Lower PERMD and PERMPS to x86.

2018-04-13 Thread George Kyriazis
Add support for providing an emulation callback function for arch/width
combinations that don't map cleanly to an x86 intrinsic.
---
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  |  8 +--
 .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 70 --
 .../drivers/swr/rasterizer/jitter/builder_misc.h   |  2 -
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 20 +++
 4 files changed, 14 insertions(+), 86 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index f366864..a27f02e 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -555,7 +555,7 @@ namespace SwrJit
 // 256i - 01234567
 //       
 
-Value* vi128XY = BITCAST(PERMD(vShufResult, C({ 0, 1, 4, 
5, 2, 3, 6, 7 })), v128bitTy);
+Value* vi128XY = BITCAST(VPERMD(vShufResult, C({ 0, 1, 4, 
5, 2, 3, 6, 7 })), v128bitTy);
 // after PERMD: move and pack xy components into each 128bit lane
 // 256i - 01234567
 //       
@@ -565,7 +565,7 @@ namespace SwrJit
 if (info.numComps > 2)
 {
 Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[1], 
v32x8Ty), vConstMask), vGatherTy);
-vi128ZW = BITCAST(PERMD(vShufResult, C({ 0, 1, 4, 5, 
2, 3, 6, 7 })), v128bitTy);
+vi128ZW = BITCAST(VPERMD(vShufResult, C({ 0, 1, 4, 5, 
2, 3, 6, 7 })), v128bitTy);
 }
 
 for (uint32_t i = 0; i < 4; i++)
@@ -644,7 +644,7 @@ namespace SwrJit
 // 256i - 01234567
 //       
 
-Value* vi128XY = BITCAST(PERMD(vShufResult, C({ 0, 4, 0, 
0, 1, 5, 0, 0 })), v128Ty);
+Value* vi128XY = BITCAST(VPERMD(vShufResult, C({ 0, 4, 0, 
0, 1, 5, 0, 0 })), v128Ty);
 // after PERMD: move and pack xy and zw components in low 64 bits 
of each 128bit lane
 // 256i - 01234567
 //  dcdc dcdc   dcdc dcdc (dc - don't care)
@@ -653,7 +653,7 @@ namespace SwrJit
 Value* vi128ZW = nullptr;
 if (info.numComps > 2)
 {
-vi128ZW = BITCAST(PERMD(vShufResult, C({ 2, 6, 0, 0, 
3, 7, 0, 0 })), v128Ty);
+vi128ZW = BITCAST(VPERMD(vShufResult, C({ 2, 6, 0, 0, 
3, 7, 0, 0 })), v128Ty);
 }
 
 // sign extend all enabled components. If we have a fill 
vVertexElements, output to current simdvertex
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 54987c7..aa9e2dd 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -602,76 +602,6 @@ namespace SwrJit
 }
 
 //
-/// @brief Generate a VPERMD operation (shuffle 32 bit integer values 
-/// across 128 bit lanes) in LLVM IR.  If not supported on the underlying 
-/// platform, emulate it
-/// @param a - 256bit SIMD lane(8x32bit) of integer values.
-/// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
-Value *Builder::PERMD(Value* a, Value* idx)
-{
-Value* res;
-// use avx2 permute instruction if available
-if(JM()->mArch.AVX2())
-{
-res = VPERMD(a, idx);
-}
-else
-{
-if (isa(idx))
-{
-res = VSHUFFLE(a, a, idx);
-}
-else
-{
-res = VUNDEF_I();
-for (uint32_t l = 0; l < JM()->mVWidth; ++l)
-{
-Value* pIndex = VEXTRACT(idx, C(l));
-Value* pVal = VEXTRACT(a, pIndex);
-res = VINSERT(res, pVal, C(l));
-}
-}
-}
-return res;
-}
-
-//
-/// @brief Generate a VPERMPS operation (shuffle 32 bit float values 
-/// across 128 bit lanes) in LLVM IR.  If not supported on the underlying 
-/// platform, emulate it
-/// @param a - 256bit SIMD lane(8x32bit) of float values.
-/// @param idx - 256bit SIMD lane(8x32bit) of 3 bit lane index values
-Value *Builder::PERMPS(Value* a, Value* idx)
-{
-Value* res;
-// use avx2 permute instruction if available
-if (JM()->mArch.AVX2())
-{
-// llvm 3.6.0 swapped the order of the args to vpermd
-res = VPERMPS(idx, a);
-  

[Mesa-dev] [PATCH 45/45] swr/rast: Fix VGATHERPD lowering

2018-04-13 Thread George Kyriazis
Also Implement VHSUBPS in x86 lowering pass.
---
 .../rasterizer/jitter/functionpasses/lower_x86.cpp | 82 ++
 1 file changed, 69 insertions(+), 13 deletions(-)

diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 856d67d..baf3ab5 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -75,7 +75,6 @@ namespace SwrJit
 {"meta.intrinsic.BEXTR_32",Intrinsic::x86_bmi_bextr_32},
 {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
 {"meta.intrinsic.VCVTPS2PH",   Intrinsic::x86_vcvtps2ph_256},
-{"meta.intrinsic.VHSUBPS", Intrinsic::x86_avx_hsub_ps_256},
 {"meta.intrinsic.VPTESTC", Intrinsic::x86_avx_ptestc_256},
 {"meta.intrinsic.VPTESTZ", Intrinsic::x86_avx_ptestz_256},
 {"meta.intrinsic.VFMADDPS",Intrinsic::x86_fma_vfmadd_ps_256},
@@ -89,6 +88,7 @@ namespace SwrJit
 Instruction* VPERM_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
 Instruction* VGATHER_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
 Instruction* VROUND_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
+Instruction* VHSUB_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst);
 
 Instruction* DOUBLE_EMU(LowerX86* pThis, TargetArch arch, TargetWidth 
width, CallInst* pCallInst, Intrinsic::ID intrin);
 
@@ -106,6 +106,7 @@ namespace SwrJit
 {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_vcvtph2ps_256, 
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VROUND",  {{Intrinsic::x86_avx_round_ps_256,  
  DOUBLE},NO_EMU}},
+{"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256,   
  DOUBLE},NO_EMU}},
 },
 {   // AVX2
 {"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  DOUBLE},NO_EMU}},
@@ -117,6 +118,7 @@ namespace SwrJit
 {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VCVTPH2PS",   {{Intrinsic::x86_vcvtph2ps_256, 
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VROUND",  {{Intrinsic::x86_avx_round_ps_256,  
  DOUBLE},NO_EMU}},
+{"meta.intrinsic.VHSUBPS", {{Intrinsic::x86_avx_hsub_ps_256,   
  DOUBLE},NO_EMU}},
 },
 {   // AVX512
 {"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx512_rcp14_ps_256,   
  Intrinsic::x86_avx512_rcp14_ps_512},NO_EMU}},
@@ -127,7 +129,8 @@ namespace SwrJit
 {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VCVTPD2PS",   
{{Intrinsic::x86_avx512_mask_cvtpd2ps_256,
Intrinsic::x86_avx512_mask_cvtpd2ps_512 },  NO_EMU}},
 {"meta.intrinsic.VCVTPH2PS",   
{{Intrinsic::x86_avx512_mask_vcvtph2ps_256,   
Intrinsic::x86_avx512_mask_vcvtph2ps_512 }, NO_EMU}},
-{"meta.intrinsic.VROUND",  {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic }, VROUND_EMU}},
+{"meta.intrinsic.VROUND",  {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VROUND_EMU}},
+{"meta.intrinsic.VHSUBPS", {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VHSUB_EMU}},
 }
 };
 
@@ -454,21 +457,45 @@ namespace SwrJit
 }
 else if (width == W512)
 {
-// Double pump 8-wide
-auto v32Mask = B->BITCAST(pThis->VectorMask(vi1Mask), 
vSrc->getType());
-Value *src0 = B->EXTRACT_16(vSrc, 0);
-Value *src1 = B->EXTRACT_16(vSrc, 1);
+// Double pump 4-wide for 64bit elements
+if (vSrc->getType()->getVectorElementType() == B->mDoubleTy)
+{
+auto v64Mask = B->S_EXT(pThis->VectorMask(vi1Mask), 
B->mInt64Ty);
+v64Mask = B->BITCAST(v64Mask, vSrc->getType());
+
+Value* src0 = B->VSHUFFLE(vSrc, vSrc, B->C({ 0, 1, 2, 3 
}));
+Value* src1 = B->VSHUFFLE(vSrc, vSrc, 

[Mesa-dev] [PATCH 11/45] swr/rast: WIP builder rewrite (2)

2018-04-13 Thread George Kyriazis
Finish up the remaining explicit intrinsic uses. At this point all
explicit Intrinsic::getDeclaration() usage has been replaced with auto
generated macros generated with gen_llvm_ir_macros.py. Going forward,
make sure to only use the intrinsics here, adding new ones as needed.

Next step is to remove all references to x86 intrinsics to keep the
builder target-independent. Any x86 lowering will be handled by a
separate pass.
---
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py| 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 9dfc1e7..0245584 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -61,8 +61,9 @@ intrinsics = [
 ['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b']],
 ['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c']],
 ['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a']],
-['INTERRUPT', 'x86_int', ['a']],
 ['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b']],
+['PDEP32', 'x86_bmi_pdep_32', ['a', 'b']],
+['RDTSC', 'x86_rdtsc', []],
 ]
 
 llvm_intrinsics = [
@@ -74,7 +75,11 @@ llvm_intrinsics = [
 ['VMINPS', 'minnum', ['a', 'b'], ['a']],
 ['VMAXPS', 'maxnum', ['a', 'b'], ['a']],
 ['DEBUGTRAP', 'debugtrap', [], []],
-['POPCNT', 'ctpop', ['a'], ['a']]
+['POPCNT', 'ctpop', ['a'], ['a']],
+['LOG2', 'log2', ['a'], ['a']],
+['FABS', 'fabs', ['a'], ['a']],
+['EXP2', 'exp2', ['a'], ['a']],
+['POW', 'pow', ['a', 'b'], ['a', 'b']]
 ]
 
 this_dir = os.path.dirname(os.path.abspath(__file__))
@@ -225,10 +230,14 @@ def generate_x86_h(output_dir):
 functions = []
 for inst in intrinsics:
 #print('Inst: %s, x86: %s numArgs: %d' % (inst[0], inst[1], 
len(inst[2])))
-declargs = 'Value* ' + ', Value* '.join(inst[2])
+if len(inst[2]) != 0:
+declargs = 'Value* ' + ', Value* '.join(inst[2])
+decl = 'Value* %s(%s, const llvm::Twine& name = "")' % (inst[0], 
declargs)
+else:
+decl = 'Value* %s(const llvm::Twine& name = "")' % (inst[0])
 
 functions.append({
-'decl'  : 'Value* %s(%s, const llvm::Twine& name = "")' % 
(inst[0], declargs),
+'decl'  : decl,
 'intrin': inst[1],
 'args'  : inst[2],
 })
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 35/45] swr/rast: Add vgather to x86 lowering pass.

2018-04-13 Thread George Kyriazis
Add support for generic VGATHERPD intrinsic in x86 lowering pass.
---
 .../rasterizer/jitter/functionpasses/lower_x86.cpp | 85 +++---
 1 file changed, 76 insertions(+), 9 deletions(-)

diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index b27335f..9423b28 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -72,7 +72,6 @@ namespace SwrJit
 // Map of intrinsics that haven't been moved to the new mechanism yet. If 
used, these get the previous behavior of
 // mapping directly to avx/avx2 intrinsics.
 static std::map intrinsicMap = {
-{"meta.intrinsic.VGATHERPD",   
Intrinsic::x86_avx2_gather_d_pd_256},
 {"meta.intrinsic.VROUND",  Intrinsic::x86_avx_round_ps_256},
 {"meta.intrinsic.BEXTR_32",Intrinsic::x86_bmi_bextr_32},
 {"meta.intrinsic.VPSHUFB", Intrinsic::x86_avx2_pshuf_b},
@@ -98,6 +97,7 @@ namespace SwrJit
 {"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VPERMPS", {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VPERM_EMU}},
 {"meta.intrinsic.VPERMD",  {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VPERM_EMU}},
+{"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
@@ -107,6 +107,7 @@ namespace SwrJit
 {"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx_rcp_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
 {"meta.intrinsic.VPERMPS", {{Intrinsic::x86_avx2_permps,   
  Intrinsic::not_intrinsic},  VPERM_EMU}},
 {"meta.intrinsic.VPERMD",  {{Intrinsic::x86_avx2_permd,
  Intrinsic::not_intrinsic},  VPERM_EMU}},
+{"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VCVTPD2PS",   {{Intrinsic::x86_avx_cvt_pd2_ps_256,
  Intrinsic::not_intrinsic},  NO_EMU}},
@@ -116,6 +117,7 @@ namespace SwrJit
 {"meta.intrinsic.VRCPPS",  {{Intrinsic::x86_avx512_rcp14_ps_256,   
  Intrinsic::x86_avx512_rcp14_ps_512},NO_EMU}},
 {"meta.intrinsic.VPERMPS", 
{{Intrinsic::x86_avx512_mask_permvar_sf_256,  
Intrinsic::x86_avx512_mask_permvar_sf_512}, NO_EMU}},
 {"meta.intrinsic.VPERMD",  
{{Intrinsic::x86_avx512_mask_permvar_si_256,  
Intrinsic::x86_avx512_mask_permvar_si_512}, NO_EMU}},
+{"meta.intrinsic.VGATHERPD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VGATHERPS",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VGATHERDD",   {{Intrinsic::not_intrinsic, 
  Intrinsic::not_intrinsic},  VGATHER_EMU}},
 {"meta.intrinsic.VCVTPD2PS",   
{{Intrinsic::x86_avx512_mask_cvtpd2ps_256,
Intrinsic::x86_avx512_mask_cvtpd2ps_512 },  NO_EMU}},
@@ -207,6 +209,13 @@ namespace SwrJit
 return mask;
 }
 
+// Convert  mask to  x86 mask
+Value* VectorMask(Value* vi1Mask)
+{
+uint32_t numElem = vi1Mask->getType()->getVectorNumElements();
+return B->S_EXT(vi1Mask, VectorType::get(B->mInt32Ty, numElem));
+}
+
 Instruction* ProcessIntrinsicAdvanced(CallInst* pCallInst)
 {
 Function* pFunc = pCallInst->getCalledFunction();
@@ -406,17 +415,33 @@ namespace SwrJit
 }
 else if (arch == AVX2 || (arch == AVX512 && width == W256))
 {
-Function* pX86IntrinFunc = srcTy == 

[Mesa-dev] [PATCH 26/45] swr: add x86 lowering pass to fragment shader

2018-04-13 Thread George Kyriazis
Needed because soem FP paths (namely stipple) use gather intrinsics
that now need to be lowered to x86.
---
 src/gallium/drivers/swr/swr_shader.cpp | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/swr/swr_shader.cpp 
b/src/gallium/drivers/swr/swr_shader.cpp
index 477fa7f..6ea021a 100644
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -27,11 +27,13 @@
 #include "JitManager.h"
 #include "llvm-c/Core.h"
 #include "llvm/Support/CBindingWrapping.h"
+#include "llvm/IR/LegacyPassManager.h"
 #pragma pop_macro("DEBUG")
 
 #include "state.h"
 #include "gen_state_llvm.h"
 #include "builder.h"
+#include "functionpasses/passes.h"
 
 #include "tgsi/tgsi_strings.h"
 #include "util/u_format.h"
@@ -1389,6 +1391,11 @@ BuilderSWR::CompileFS(struct swr_context *ctx, 
swr_jit_fs_key )
 
gallivm_compile_module(gallivm);
 
+   // after the gallivm passes, we have to lower the core's intrinsics
+   llvm::legacy::FunctionPassManager lowerPass(JM()->mpCurrentModule);
+   lowerPass.add(createLowerX86Pass(mpJitMgr, this));
+   lowerPass.run(*pFunction);
+
PFN_PIXEL_KERNEL kernel =
   (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction));
debug_printf("frag shader  %p\n", kernel);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/45] swr/rast: Add MEM_ADD helper function to Builder.

2018-04-13 Thread George Kyriazis
mem[offset] += value

This function will be heavily used by all stats intrinsics.
---
 src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp | 7 +++
 src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h   | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index b4f30fb..c32472f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -134,6 +134,13 @@ namespace SwrJit
 return GEP(base, offset);
 }
 
+Value* Builder::MEM_ADD(Value* i32Incr, Value* basePtr, const 
std::initializer_list , const llvm::Twine& name)
+{
+Value* i32Value = LOAD(GEP(basePtr, indices), name);
+Value* i32Result = ADD(i32Value, i32Incr);
+return STORE(i32Result, GEP(basePtr, indices));
+}
+
 //
 /// @brief Generate a masked gather operation in LLVM IR.  If not  
 /// supported on the underlying platform, emulate it with loads
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
index c694104..9adc1a5 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
@@ -60,6 +60,8 @@ LoadInst *LOADV(Value *BasePtr, const 
std::initializer_list , con
 StoreInst *STORE(Value *Val, Value *BasePtr, const 
std::initializer_list );
 StoreInst *STOREV(Value *Val, Value *BasePtr, const 
std::initializer_list );
 
+Value* MEM_ADD(Value* i32Incr, Value* basePtr, const 
std::initializer_list , const llvm::Twine& name = "");
+
 void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
 Value* mask, Value* vGatherComponents[], bool bPackedOutput, 
JIT_MEM_CLIENT usage = MEM_CLIENT_RASTY);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/45] swr/rast: Fix name mangling for LLVM pow intrinsic

2018-04-13 Thread George Kyriazis
---
 src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 0245584..324f24a 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -79,7 +79,7 @@ llvm_intrinsics = [
 ['LOG2', 'log2', ['a'], ['a']],
 ['FABS', 'fabs', ['a'], ['a']],
 ['EXP2', 'exp2', ['a'], ['a']],
-['POW', 'pow', ['a', 'b'], ['a', 'b']]
+['POW', 'pow', ['a', 'b'], ['a']]
 ]
 
 this_dir = os.path.dirname(os.path.abspath(__file__))
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Include unistd.h in program_lexer

2018-04-13 Thread Dylan Baker
Which was previously provided implicitly by mtypes.h

CC: Marek Olšák 
CC: Mark Janes 
Fixes: 43d66c8c2d4d3d4dee1309856b6ce6c5393682e5
   ("mesa: include mtypes.h less")
Signed-off-by: Dylan Baker 
---
 src/mesa/program/program_lexer.l | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/program/program_lexer.l b/src/mesa/program/program_lexer.l
index 2e168b83bdb..13eb9025148 100644
--- a/src/mesa/program/program_lexer.l
+++ b/src/mesa/program/program_lexer.l
@@ -21,6 +21,7 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+#include 
 #include "main/glheader.h"
 #include "main/imports.h"
 #include "program/prog_instruction.h"
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100629] No mans sky renders white screen under wine in linux

2018-04-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100629

--- Comment #6 from Matteo Bruni  ---
Notice that No Man's Sky is an OpenGL game so Wine should have no effect on the
game requiring compatibility profile or not.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 33/45] swr/rast: add cvt instructions in x86 lowering pass

2018-04-13 Thread George Kyriazis
Support generic VCVTPD2PS and VCVTPH2PS in x86 lowering pass.
---
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   | 70 --
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  | 14 -
 .../drivers/swr/rasterizer/jitter/builder_mem.h|  3 -
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp|  6 +-
 .../rasterizer/jitter/functionpasses/lower_x86.cpp | 14 ++---
 5 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 2636e60..4a7d2e9 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -42,28 +42,26 @@ inst_aliases = {
 }
 
 intrinsics = [
-['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 
'mask', 'scale'], 'mSimd4FP64Ty'],
-['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 
'mask', 'scale'], 'mSimdFP32Ty'],
-['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 
'mask', 'scale'], 'mSimd16FP32Ty'],
-['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 
'mask', 'scale'], 'mSimdInt32Ty'],
-['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 
'mask', 'scale'], 'mSimd16Int32Ty'],
-['VRCPPS', 'x86_avx_rcp_ps_256', ['a'], 'mSimdFP32Ty'],
-['VROUND', 'x86_avx_round_ps_256', ['a', 'rounding'], 'mSimdFP32Ty'],
-['BEXTR_32', 'x86_bmi_bextr_32', ['src', 'control'], 'mInt32Ty'],
-['VPSHUFB', 'x86_avx2_pshuf_b', ['a', 'b'], 'mSimd32Int8Ty'],
-['VPERMD', 'x86_avx2_permd', ['a', 'idx'], 'mSimdInt32Ty'],
-['VPERMPS', 'x86_avx2_permps', ['idx', 'a'], 'mSimdFP32Ty'],
-['VCVTPD2PS', 'x86_avx_cvt_pd2_ps_256', ['a'], 'mSimdFP32Ty'],
-['VCVTPH2PS', 'x86_vcvtph2ps_256', ['a'], 'mSimdFP32Ty'],
-['VCVTPS2PH', 'x86_vcvtps2ph_256', ['a', 'round'], 'mSimdFP16Ty'],
-['VHSUBPS', 'x86_avx_hsub_ps_256', ['a', 'b'], 'mSimdFP32Ty'],
-['VPTESTC', 'x86_avx_ptestc_256', ['a', 'b'], 'mInt32Ty'],
-['VPTESTZ', 'x86_avx_ptestz_256', ['a', 'b'], 'mInt32Ty'],
-['VFMADDPS', 'x86_fma_vfmadd_ps_256', ['a', 'b', 'c'], 'mSimdFP32Ty'],
-['VMOVMSKPS', 'x86_avx_movmsk_ps_256', ['a'], 'mInt32Ty'],
-['VPHADDD', 'x86_avx2_phadd_d', ['a', 'b'], 'mSimdInt32Ty'],
-['PDEP32', 'x86_bmi_pdep_32', ['a', 'b'], 'mInt32Ty'],
-['RDTSC', 'x86_rdtsc', [], 'mInt64Ty'],
+['VGATHERPD',   ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
+['VGATHERPS',   ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
+['VGATHERDD',   ['src', 'pBase', 'indices', 'mask', 'scale'], 'src'],
+['VRCPPS',  ['a'], 'a'],
+['VROUND',  ['a', 'rounding'], 'a'],
+['BEXTR_32',['src', 'control'], 'src'],
+['VPSHUFB', ['a', 'b'], 'a'],
+['VPERMD',  ['a', 'idx'], 'a'],
+['VPERMPS', ['idx', 'a'], 'a'],
+['VCVTPD2PS',   ['a'], 'VectorType::get(mFP32Ty, 
a->getType()->getVectorNumElements())'],
+['VCVTPH2PS',   ['a'], 'VectorType::get(mFP32Ty, 
a->getType()->getVectorNumElements())'],
+['VCVTPS2PH',   ['a', 'round'], 'mSimdFP16Ty'],
+['VHSUBPS', ['a', 'b'], 'a'],
+['VPTESTC', ['a', 'b'], 'mInt32Ty'],
+['VPTESTZ', ['a', 'b'], 'mInt32Ty'],
+['VFMADDPS',['a', 'b', 'c'], 'a'],
+['VMOVMSKPS',   ['a'], 'mInt32Ty'],
+['VPHADDD', ['a', 'b'], 'a'],
+['PDEP32',  ['a', 'b'], 'a'],
+['RDTSC',   [], 'mInt64Ty'],
 ]
 
 llvm_intrinsics = [
@@ -231,19 +229,31 @@ def generate_meta_h(output_dir):
 
 functions = []
 for inst in intrinsics:
+name = inst[0]
+args = inst[1]
+ret = inst[2]
+
 #print('Inst: %s, x86: %s numArgs: %d' % (inst[0], inst[1], 
len(inst[2])))
-if len(inst[2]) != 0:
-declargs = 'Value* ' + ', Value* '.join(inst[2])
-decl = 'Value* %s(%s, const llvm::Twine& name = "")' % (inst[0], 
declargs)
+if len(args) != 0:
+declargs = 'Value* ' + ', Value* '.join(args)
+decl = 'Value* %s(%s, const llvm::Twine& name = "")' % (name, 
declargs)
 else:
-decl = 'Value* %s(const llvm::Twine& name = "")' % (inst[0])
+decl = 'Value* %s(const llvm::Twine& name = "")' % (name)
+
+# determine the return type of the intrinsic. It can either be:
+# - type of one of the input arguments
+# - snippet of code to set the return type
+
+if ret in args:
+returnTy = ret + '->getType()'
+else:
+returnTy = ret
 
 functions.append({
 'decl'  : decl,
-'name'  : inst[0],
-'intrin': inst[1],
-'args'  : inst[2],
-'returnType': inst[3]
+'name'  : name,
+'args'  : args,
+'returnType': returnTy
 })
 
 

[Mesa-dev] [PATCH 31/45] swr/rast: Change gfx pointers to gfxptr_t

2018-04-13 Thread George Kyriazis
Changing type to gfxptr for indices and related changes to fetch and mem
builder code.
---
 .../swr/rasterizer/codegen/gen_llvm_ir_macros.py   |  4 +-
 src/gallium/drivers/swr/rasterizer/core/api.cpp|  8 +-
 src/gallium/drivers/swr/rasterizer/core/context.h  |  2 +-
 .../drivers/swr/rasterizer/core/frontend.cpp   | 40 -
 src/gallium/drivers/swr/rasterizer/core/state.h|  6 +-
 .../swr/rasterizer/jitter/builder_gfx_mem.cpp  | 80 +++--
 .../swr/rasterizer/jitter/builder_gfx_mem.h| 24 --
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  | 35 ++--
 .../drivers/swr/rasterizer/jitter/builder_mem.h| 23 +++--
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 99 --
 src/gallium/drivers/swr/swr_state.cpp  |  2 +-
 11 files changed, 220 insertions(+), 103 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py 
b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index bdd785a..2636e60 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -162,7 +162,9 @@ def parse_ir_builder(input_file):
 if (func_name == 'CreateInsertNUWNSWBinOp' or
 func_name == 'CreateMaskedIntrinsic' or
 func_name == 'CreateAlignmentAssumptionHelper' or
-func_name == 'CreateLoad'):
+func_name == 'CreateGEP' or
+func_name == 'CreateLoad' or
+func_name == 'CreateMaskedLoad'):
 ignore = True
 
 # Convert CamelCase to CAMEL_CASE
diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp 
b/src/gallium/drivers/swr/rasterizer/core/api.cpp
index 53bd2d2..3141db6 100644
--- a/src/gallium/drivers/swr/rasterizer/core/api.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp
@@ -1321,8 +1321,8 @@ void DrawIndexedInstance(
 }
 
 int draw = 0;
-uint8_t *pIB = (uint8_t*)pState->indexBuffer.pIndices;
-pIB += (uint64_t)indexOffset * (uint64_t)indexSize;
+gfxptr_t xpIB = pState->indexBuffer.xpIndices;
+xpIB += (uint64_t)indexOffset * (uint64_t)indexSize;
 
 pState->topology = topology;
 pState->forceFront = false;
@@ -1360,7 +1360,7 @@ void DrawIndexedInstance(
 pDC->pState->pfnProcessPrims != nullptr);
 pDC->FeWork.desc.draw.pDC = pDC;
 pDC->FeWork.desc.draw.numIndices = numIndicesForDraw;
-pDC->FeWork.desc.draw.pIB = (int*)pIB;
+pDC->FeWork.desc.draw.xpIB = xpIB;
 pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format;
 
 pDC->FeWork.desc.draw.numInstances = numInstances;
@@ -1376,7 +1376,7 @@ void DrawIndexedInstance(
 AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, 
numIndicesForDraw, indexOffset, baseVertex,
 numInstances, startInstance, pState->tsState.tsEnable, 
pState->gsState.gsEnable, pState->soState.soEnable, 
pState->gsState.outputTopology, draw));
 
-pIB += maxIndicesPerDraw * indexSize;
+xpIB += maxIndicesPerDraw * indexSize;
 remainingIndices -= numIndicesForDraw;
 draw++;
 }
diff --git a/src/gallium/drivers/swr/rasterizer/core/context.h 
b/src/gallium/drivers/swr/rasterizer/core/context.h
index 489aa78..7bc69f5 100644
--- a/src/gallium/drivers/swr/rasterizer/core/context.h
+++ b/src/gallium/drivers/swr/rasterizer/core/context.h
@@ -176,7 +176,7 @@ struct DRAW_WORK
 };
 union
 {
-const int32_t* pIB;// DrawIndexed: App supplied indices
+gfxptr_t   xpIB;  // DrawIndexed: App supplied int32 
indices 
 uint32_t   startVertex;// Draw: Starting vertex in VB to render 
from.
 };
 int32_tbaseVertex;
diff --git a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp 
b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
index 2076859..30c2e7b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/frontend.cpp
@@ -1527,28 +1527,24 @@ void ProcessDraw(
 uint32_t indexSize = 0;
 uint32_t endVertex = work.numVerts;
 
-const int32_t* pLastRequestedIndex = nullptr;
+gfxptr_t xpLastRequestedIndex = 0;
 if (IsIndexedT::value)
 {
 switch (work.type)
 {
 case R32_UINT:
 indexSize = sizeof(uint32_t);
-pLastRequestedIndex = &(work.pIB[endVertex]);
 break;
 case R16_UINT:
 indexSize = sizeof(uint16_t);
-// nasty address offset to last index
-pLastRequestedIndex = 
(int32_t*)(&(((uint16_t*)work.pIB)[endVertex]));
 break;
 case R8_UINT:
 indexSize = sizeof(uint8_t);
-// nasty address offset to last index
-pLastRequestedIndex = 

[Mesa-dev] [PATCH 15/45] swr/rast: Code cleanup

2018-04-13 Thread George Kyriazis
Removing some code that doesn't seem to do anything meaningful.
---
 src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 5c8d813..5971a52 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -156,14 +156,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& 
fetchState)
 mpFetchInfo->setName("fetchInfo");
 Value*pVtxOut = &*argitr;
 pVtxOut->setName("vtxOutput");
-// this is just shorthand to tell LLVM to get a pointer to the base 
address of simdvertex
-// index 0(just the pointer to the simdvertex structure
-// index 1(which element of the simdvertex structure to offset to(in this 
case 0)
-// so the indices being i32's doesn't matter
-// TODO: generated this GEP with a VECTOR structure type so this makes 
sense
-std::vectorvtxInputIndices(2, C(0));
-// GEP
-pVtxOut = GEP(pVtxOut, C(0));
+
 #if USE_SIMD16_SHADERS
 #if 0// USE_SIMD16_BUILDER
 pVtxOut = BITCAST(pVtxOut, PointerType::get(VectorType::get(mFP32Ty, 
mVWidth16), 0));
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 38/45] swr/rast: Fix 64bit float loads in x86 lowering pass

2018-04-13 Thread George Kyriazis
---
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  | 39 +-
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp| 31 +
 2 files changed, 25 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index a163b8f..3c3c157 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -201,44 +201,7 @@ namespace SwrJit
 /// @param scale - value to scale indices by
 Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, 
Value* vMask, uint8_t scale)
 {
-Value* vGather;
-
-// use avx2 gather instruction if available
-if (JM()->mArch.AVX2())
-{
-vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth / 
2)), VectorType::get(mDoubleTy, mVWidth / 2));
-vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
-}
-else
-{
-Value* pStack = STACKSAVE();
-
-// store vSrc on the stack.  this way we can select between a 
valid load address and the vSrc address
-Value* vSrcPtr = ALLOCA(vSrc->getType());
-SetTempAlloca(vSrcPtr);
-STORE(vSrc, vSrcPtr);
-
-vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
-Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale));
-Value *vOffsets = MUL(vIndices, vScaleVec);
-for (uint32_t i = 0; i < mVWidth / 2; ++i)
-{
-// single component byte index
-Value *offset = VEXTRACT(vOffsets, C(i));
-// byte pointer to component
-Value *loadAddress = GEP(pBase, offset);
-loadAddress = BITCAST(loadAddress, PointerType::get(mDoubleTy, 
0));
-// pointer to the value to load if we're masking off a 
component
-Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
-Value *selMask = VEXTRACT(vMask, C(i));
-// switch in a safe address to load if we're trying to access 
a vertex
-Value *validAddress = SELECT(selMask, loadAddress, 
maskLoadAddress);
-Value *val = LOAD(validAddress);
-vGather = VINSERT(vGather, val, C(i));
-}
-STACKRESTORE(pStack);
-}
-return vGather;
+return VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
 }
 
 //
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index cdfddf3..767866f 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -230,7 +230,6 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& 
fetchState)
 }
 
 // Fetch attributes from memory and output to a simdvertex struct
-// since VGATHER has a perf penalty on HSW vs BDW, allow client to choose 
which fetch method to use
 JitGatherVertices(fetchState, streams, vIndices, pVtxOut);
 
 RET_VOID();
@@ -763,13 +762,31 @@ void FetchJit::JitGatherVertices(const 
FETCH_COMPILE_STATE ,
 // if we need to gather the component
 if (compCtrl[i] == StoreSrc)
 {
-Value *vMaskLo = VSHUFFLE(vGatherMask, 
VUNDEF(mInt1Ty, 8), C({0, 1, 2, 3}));
-Value *vMaskHi = VSHUFFLE(vGatherMask, 
VUNDEF(mInt1Ty, 8), C({4, 5, 6, 7}));
+Value* vShufLo;
+Value* vShufHi;
+Value* vShufAll;
 
-Value *vOffsetsLo = VEXTRACTI128(vOffsets, 
C(0));
-Value *vOffsetsHi = VEXTRACTI128(vOffsets, 
C(1));
+if (mVWidth == 8)
+{
+vShufLo = C({ 0, 1, 2, 3 });
+vShufHi = C({ 4, 5, 6, 7 });
+vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
+}
+else
+{
+SWR_ASSERT(mVWidth == 16);
+vShufLo = C({ 0, 1, 2, 3, 4, 5, 6, 7 });
+vShufHi = C({ 8, 9, 10, 11, 12, 13, 14, 15 
});
+vShufAll = C({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 
9, 10, 11, 12, 13, 14, 15 });
+}
+
+Value *vMaskLo = VSHUFFLE(vGatherMask, 
vGatherMask, vShufLo);
+Value *vMaskHi = 

[Mesa-dev] [PATCH 18/45] swr/rast: Move CallPrint() to a separate file

2018-04-13 Thread George Kyriazis
Needed work for jit code debug.
---
 src/gallium/drivers/swr/Makefile.sources   |  3 +-
 src/gallium/drivers/swr/meson.build|  1 +
 .../drivers/swr/rasterizer/jitter/builder_misc.cpp | 22 +-
 .../rasterizer/jitter/shader_lib/DebugOutput.cpp   | 51 ++
 4 files changed, 56 insertions(+), 21 deletions(-)
 create mode 100644 
src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp

diff --git a/src/gallium/drivers/swr/Makefile.sources 
b/src/gallium/drivers/swr/Makefile.sources
index cbf7395..4924da1 100644
--- a/src/gallium/drivers/swr/Makefile.sources
+++ b/src/gallium/drivers/swr/Makefile.sources
@@ -151,7 +151,8 @@ JITTER_CXX_SOURCES := \
rasterizer/jitter/JitManager.cpp \
rasterizer/jitter/JitManager.h \
rasterizer/jitter/streamout_jit.cpp \
-   rasterizer/jitter/streamout_jit.h
+   rasterizer/jitter/streamout_jit.h \
+   rasterizer/jitter/shader_lib/DebugOutput.cpp
 
 MEMORY_CXX_SOURCES := \
rasterizer/memory/ClearTile.cpp \
diff --git a/src/gallium/drivers/swr/meson.build 
b/src/gallium/drivers/swr/meson.build
index b28abd6..3848232 100644
--- a/src/gallium/drivers/swr/meson.build
+++ b/src/gallium/drivers/swr/meson.build
@@ -79,6 +79,7 @@ files_swr_mesa = files(
   'rasterizer/jitter/JitManager.h',
   'rasterizer/jitter/streamout_jit.cpp',
   'rasterizer/jitter/streamout_jit.h',
+  'rasterizer/jitter/shader_lib/DebugOutput.cpp',
 )
 
 files_swr_arch = files(
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index c266018..54987c7 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -33,10 +33,10 @@
 
 #include 
 
+extern "C" void CallPrint(const char* fmt, ...);
+
 namespace SwrJit
 {
-void __cdecl CallPrint(const char* fmt, ...);
-
 //
 /// @brief Convert an IEEE 754 32-bit single precision float to an
 ///16 bit float with 5 exponent bits and a variable
@@ -846,24 +846,6 @@ namespace SwrJit
 /// @brief C functions called by LLVM IR
 //
 
-//
-/// @brief called in JIT code, inserted by PRINT
-/// output to both stdout and visual studio debug console
-void __cdecl CallPrint(const char* fmt, ...)
-{
-va_list args;
-va_start(args, fmt);
-vprintf(fmt, args);
-
-#if defined( _WIN32 )
-char strBuf[1024];
-vsnprintf_s(strBuf, _TRUNCATE, fmt, args);
-OutputDebugStringA(strBuf);
-#endif
-
-va_end(args);
-}
-
 Value *Builder::VEXTRACTI128(Value* a, Constant* imm8)
 {
 bool flag = !imm8->isZeroValue();
diff --git 
a/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp
new file mode 100644
index 000..54d45e6
--- /dev/null
+++ b/src/gallium/drivers/swr/rasterizer/jitter/shader_lib/DebugOutput.cpp
@@ -0,0 +1,51 @@
+/
+* Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+* IN THE SOFTWARE.
+*
+* @file DebugOutput.cpp
+*
+* @brief Shader support library implementation for printed Debug output
+*
+* Notes:
+*
+**/
+#include 
+#include "common/os.h"
+
+
+//
+/// @brief called in JIT code, inserted by PRINT
+/// output to both stdout and visual studio debug console
+extern "C" void CallPrint(const char* 

[Mesa-dev] [PATCH 23/45] swr/rast: Lower VGATHERPS and VGATHERPS_16 to x86.

2018-04-13 Thread George Kyriazis
Some more work to do before we can support simultaneous 8-wide and
16-wide and remove the VGATHERPS_16 version.
---
 .../drivers/swr/rasterizer/jitter/builder_mem.cpp  | 69 +-
 1 file changed, 2 insertions(+), 67 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp 
b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
index a27f02e..0550493 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
@@ -153,79 +153,14 @@ namespace SwrJit
 {
 AssertRastyMemoryParams(pBase, usage);
 
-Value *vGather;
-Value *pBasePtr = INT_TO_PTR(pBase, PointerType::get(mInt8Ty, 0));
-
-// use avx2 gather instruction if available
-if (JM()->mArch.AVX2())
-{
-vGather = VGATHERPS(vSrc, pBasePtr, vIndices, vMask, C(scale));
-}
-else
-{
-Value* pStack = STACKSAVE();
-
-// store vSrc on the stack.  this way we can select between a 
valid load address and the vSrc address
-Value* vSrcPtr = ALLOCA(vSrc->getType());
-STORE(vSrc, vSrcPtr);
-
-vGather = VUNDEF_F();
-Value *vScaleVec = VIMMED1((uint32_t)scale);
-Value *vOffsets = MUL(vIndices, vScaleVec);
-for (uint32_t i = 0; i < mVWidth; ++i)
-{
-// single component byte index
-Value *offset = VEXTRACT(vOffsets, C(i));
-// byte pointer to component
-Value *loadAddress = GEP(pBasePtr, offset);
-loadAddress = BITCAST(loadAddress, PointerType::get(mFP32Ty, 
0));
-// pointer to the value to load if we're masking off a 
component
-Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
-Value *selMask = VEXTRACT(vMask, C(i));
-// switch in a safe address to load if we're trying to access 
a vertex 
-Value *validAddress = SELECT(selMask, loadAddress, 
maskLoadAddress);
-Value *val = LOAD(validAddress);
-vGather = VINSERT(vGather, val, C(i));
-}
-
-STACKRESTORE(pStack);
-}
-
-return vGather;
+return VGATHERPS(vSrc, pBase, vIndices, vMask, C(scale));
 }
 
 Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, 
Value *vMask, uint8_t scale, JIT_MEM_CLIENT usage)
 {
 AssertRastyMemoryParams(pBase, usage);
 
-Value *vGather = VUNDEF_F_16();
-
-// use AVX512F gather instruction if available
-if (JM()->mArch.AVX512F())
-{
-// force mask to , required by vgather2
-Value *mask = BITCAST(vMask, mInt16Ty);
-
-vGather = VGATHERPS_16(vSrc, pBase, vIndices, mask, 
C((uint32_t)scale));
-}
-else
-{
-Value *src0 = EXTRACT_16(vSrc, 0);
-Value *src1 = EXTRACT_16(vSrc, 1);
-
-Value *indices0 = EXTRACT_16(vIndices, 0);
-Value *indices1 = EXTRACT_16(vIndices, 1);
-
-Value *mask0 = EXTRACT_16(vMask, 0);
-Value *mask1 = EXTRACT_16(vMask, 1);
-
-Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale);
-Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale);
-
-vGather = JOIN_16(gather0, gather1);
-}
-
-return vGather;
+return VGATHERPS_16(vSrc, pBase, vIndices, vMask, C(scale));
 }
 
 //
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] radeonsi: implement mechanism for IBs without partial flushes at the end (v6)

2018-04-13 Thread Marek Olšák
Ping

On Fri, Apr 6, 2018 at 10:31 PM, Marek Olšák  wrote:

> From: Marek Olšák 
>
> (This patch doesn't enable the behavior. It will be enabled in a later
> commit.)
>
> Draw calls from multiple IBs can be executed in parallel.
>
> v2: do emit partial flushes on SI
> v3: invalidate all shader caches at the beginning of IBs
> v4: don't call si_emit_cache_flush in si_flush_gfx_cs if not needed,
> only do this for flushes invoked internally
> v5: empty IBs should wait for idle if the flush requires it
> v6: split the commit
>
> If we artificially limit the number of draw calls per IB to 5, we'll get
> a lot more IBs, leading to a lot more partial flushes. Let's see how
> the removal of partial flushes changes GPU utilization in that scenario:
>
> With partial flushes (time busy):
> CP: 99%
> SPI: 86%
> CB: 73:
>
> Without partial flushes (time busy):
> CP: 99%
> SPI: 93%
> CB: 81%
> ---
>  src/gallium/drivers/radeon/radeon_winsys.h |  7 
>  src/gallium/drivers/radeonsi/si_gfx_cs.c   | 52
> ++
>  src/gallium/drivers/radeonsi/si_pipe.h |  1 +
>  3 files changed, 46 insertions(+), 14 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h
> b/src/gallium/drivers/radeon/radeon_winsys.h
> index 157b2e40550..fae4fb7a95d 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -21,20 +21,27 @@
>   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
> THE
>   * USE OR OTHER DEALINGS IN THE SOFTWARE. */
>
>  #ifndef RADEON_WINSYS_H
>  #define RADEON_WINSYS_H
>
>  /* The public winsys interface header for the radeon driver. */
>
> +/* Whether the next IB can start immediately and not wait for draws and
> + * dispatches from the current IB to finish. */
> +#define RADEON_FLUSH_START_NEXT_GFX_IB_NOW (1u << 31)
> +
> +#define RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW \
> +   (PIPE_FLUSH_ASYNC | RADEON_FLUSH_START_NEXT_GFX_IB_NOW)
> +
>  #include "pipebuffer/pb_buffer.h"
>
>  #include "amd/common/ac_gpu_info.h"
>  #include "amd/common/ac_surface.h"
>
>  /* Tiling flags. */
>  enum radeon_bo_layout {
>  RADEON_LAYOUT_LINEAR = 0,
>  RADEON_LAYOUT_TILED,
>  RADEON_LAYOUT_SQUARETILED,
> diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> index 2d5e510b19e..63bff29e63a 100644
> --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
> +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
> @@ -62,25 +62,42 @@ void si_need_gfx_cs_space(struct si_context *ctx)
> unsigned need_dwords = 2048 + ctx->num_cs_dw_queries_suspend;
> if (!ctx->ws->cs_check_space(cs, need_dwords))
> si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
>  }
>
>  void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
>  struct pipe_fence_handle **fence)
>  {
> struct radeon_winsys_cs *cs = ctx->gfx_cs;
> struct radeon_winsys *ws = ctx->ws;
> +   unsigned wait_flags = 0;
>
> if (ctx->gfx_flush_in_progress)
> return;
>
> -   if (!radeon_emitted(cs, ctx->initial_gfx_cs_size))
> +   if (ctx->chip_class == VI && ctx->screen->info.drm_minor <= 1) {
> +   /* DRM 3.1.0 doesn't flush TC for VI correctly. */
> +   wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> + SI_CONTEXT_CS_PARTIAL_FLUSH |
> + SI_CONTEXT_INV_GLOBAL_L2;
> +   } else if (ctx->chip_class == SI) {
> +   /* The kernel flushes L2 before shaders are finished. */
> +   wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> + SI_CONTEXT_CS_PARTIAL_FLUSH;
> +   } else if (!(flags & RADEON_FLUSH_START_NEXT_GFX_IB_NOW)) {
> +   wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
> + SI_CONTEXT_CS_PARTIAL_FLUSH;
> +   }
> +
> +   /* Drop this flush if it's a no-op. */
> +   if (!radeon_emitted(cs, ctx->initial_gfx_cs_size) &&
> +   (!wait_flags || !ctx->gfx_last_ib_is_busy))
> return;
>
> if (si_check_device_reset(ctx))
> return;
>
> if (ctx->screen->debug_flags & DBG(CHECK_VM))
> flags &= ~PIPE_FLUSH_ASYNC;
>
> /* If the state tracker is flushing the GFX IB, si_flush_from_st is
>  * responsible for flushing the DMA IB and merging the fences from
> both.
> @@ -96,27 +113,25 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned
> flags,
>
> if (!LIST_IS_EMPTY(>active_queries))
> si_suspend_queries(ctx);
>
> ctx->streamout.suspended = false;
> if (ctx->streamout.begin_emitted) {
> si_emit_streamout_end(ctx);
>  

[Mesa-dev] [AppVeyor] mesa master #7459 completed

2018-04-13 Thread AppVeyor


Build mesa 7459 completed



Commit 6a519a157b by Brian Paul on 4/13/2018 9:34 PM:

gallium/osmesa: link with winsock2 library on Windows\n\nTo fix the MSVC build.  The build broke because we started to compile\nthe ddebug code on Windows after the mtypes.h changes.  Building ddebug\ncaused us to also use the u_network.c code for the first time.\n\nReviewed-by: Charmaine Lee 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106039] Undefined version strings in pc files with meson build

2018-04-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106039

Dylan Baker  changed:

   What|Removed |Added

 CC||baker.dyla...@gmail.com

--- Comment #1 from Dylan Baker  ---
This will be fixed in 18.0.1, it's due to a missing script in the source
tarball.

here's the commit:
bc2fdb9759dc702ec351a044b3fd408c0701fedb

which is queued for the 18.0.1 release.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] gallium/util: put (void) in a few function signatures

2018-04-13 Thread Marek Olšák
For patches 1 - 4:

Reviewed-by: Marek Olšák 

Marek

On Fri, Apr 13, 2018 at 5:35 PM, Brian Paul  wrote:

> To match the header file.
> ---
>  src/gallium/auxiliary/util/u_network.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_network.c
> b/src/gallium/auxiliary/util/u_network.c
> index e74293b..89395f5 100644
> --- a/src/gallium/auxiliary/util/u_network.c
> +++ b/src/gallium/auxiliary/util/u_network.c
> @@ -20,7 +20,7 @@
>  #endif
>
>  boolean
> -u_socket_init()
> +u_socket_init(void)
>  {
>  #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
> WORD wVersionRequested;
> @@ -44,7 +44,7 @@ u_socket_init()
>  }
>
>  void
> -u_socket_stop()
> +u_socket_stop(void)
>  {
>  #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
> WSACleanup();
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] gallium/osmesa: link with winsock2 library on Windows

2018-04-13 Thread Charmaine Lee


For the series, Reviewed-by: Charmaine Lee 


From: Brian Paul 
Sent: Friday, April 13, 2018 2:35:37 PM
To: mesa-dev@lists.freedesktop.org
Cc: Charmaine Lee; Neha Bhende
Subject: [PATCH 5/5] gallium/osmesa: link with winsock2 library on Windows

To fix the MSVC build.  The build broke because we started to compile
the ddebug code on Windows after the mtypes.h changes.  Building ddebug
caused us to also use the u_network.c code for the first time.
---
 src/gallium/targets/osmesa/SConscript | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/targets/osmesa/SConscript 
b/src/gallium/targets/osmesa/SConscript
index f49f1fe..3df5c50 100644
--- a/src/gallium/targets/osmesa/SConscript
+++ b/src/gallium/targets/osmesa/SConscript
@@ -39,6 +39,9 @@ if env['platform'] == 'windows':
 sources += ['osmesa.mingw.def']
 else:
 sources += ['osmesa.def']
+# Link with winsock2 library
+env.Append(LIBS = ['ws2_32'])
+

 gallium_osmesa = env.SharedLibrary(
 target ='osmesa',
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 100629] No mans sky renders white screen under wine in linux

2018-04-13 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=100629

--- Comment #7 from Timothy Arceri  ---
(In reply to Matteo Bruni from comment #6)
> Notice that No Man's Sky is an OpenGL game so Wine should have no effect on
> the game requiring compatibility profile or not.

In that case the game using compat profile is the problem i.e. a known missing
feature in Mesa rather than a bug. There are plans to start adding support to
radeonsi but it's still a little while off.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-13 Thread Marek Olšák
On Fri, Apr 13, 2018 at 5:26 PM, Dylan Baker  wrote:

> Okay, I've figured it out. On my system RTTI is enabled by default, on
> ubuntu
> 16.04 RTTI is disabled. meson doesn't account for this in it's llvm module,
> because you would need to compile all C++ code with -fno-rtti to make it
> link
> reliably.
>
> short answer:
> add `-DLLVM_ENABLE_RTTI=1` to the cmake invocation for LLVM
> or
> `meson build --cpp_args="-fno-rtti" ...` for mesa
>
> I can probably fix this in mesa, but it will require bumping the meson
> version.
>

My meson doesn't have --cpp_args, so I guess 0.45.1 is too old.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium: move ddebug, noop, rbug, trace to auxiliary to improve build times

2018-04-13 Thread Dylan Baker
Quoting Marek Olšák (2018-04-13 15:33:02)
> On Fri, Apr 13, 2018 at 5:26 PM, Dylan Baker  wrote:
> 
> Okay, I've figured it out. On my system RTTI is enabled by default, on
> ubuntu
> 16.04 RTTI is disabled. meson doesn't account for this in it's llvm 
> module,
> because you would need to compile all C++ code with -fno-rtti to make it
> link
> reliably.
> 
> short answer:
> add `-DLLVM_ENABLE_RTTI=1` to the cmake invocation for LLVM
> or
> `meson build --cpp_args="-fno-rtti" ...` for mesa
> 
> I can probably fix this in mesa, but it will require bumping the meson
> version.
> 
> 
> My meson doesn't have --cpp_args, so I guess 0.45.1 is too old.

Gah, those damn -- vs -D arguments again. I need to go yell at meson people
about this, again.

Use the CXXFLAGS variable like autotools for the initial meson configure, or
-Dcpp_args for `meson configure`. I'm going to go yell at meson people again
about the -D vs -- thing...

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   3   >