[Mesa-dev] [PATCH 3/3] st/va: flush the context before calling flush_frontbuffer(v2)

2016-09-19 Thread Nayan Deshmukh
so that the texture is rendered to back buffer before calling
flush_frontbuffer and can be copied to a different buffer in
the function

v2: change comment style

Signed-off-by: Nayan Deshmukh 
Reviewed-by: Michel Dänzer 
---
 src/gallium/state_trackers/va/surface.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/va/surface.c 
b/src/gallium/state_trackers/va/surface.c
index 00df69d..115db43 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -321,10 +321,14 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID 
surface_id, void* draw, short s
   return status;
}
 
+   /* flush before calling flush_frontbuffer so that rendering is flushed
+* to back buffer so the texture can be copied in flush_frontbuffer
+*/
+   drv->pipe->flush(drv->pipe, NULL, 0);
+
screen->flush_frontbuffer(screen, tex, 0, 0,
  vscreen->get_private(vscreen), NULL);
 
-   drv->pipe->flush(drv->pipe, NULL, 0);
 
pipe_resource_reference(, NULL);
pipe_surface_reference(_draw, NULL);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] st/vdpau: flush the context before calling flush_frontbuffer

2016-09-19 Thread Nayan Deshmukh
so that the texture is rendered to back buffer before calling
flush_frontbuffer and can be copied to a different buffer in
the function

Signed-off-by: Nayan Deshmukh 
Reviewed-by: Michel Dänzer 
---
 src/gallium/state_trackers/vdpau/presentation.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/presentation.c 
b/src/gallium/state_trackers/vdpau/presentation.c
index 2862eaf..f35d73a 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -271,11 +271,14 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue 
presentation_queue,
}
 
vscreen->set_next_timestamp(vscreen, earliest_presentation_time);
-   pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0,
-   vscreen->get_private(vscreen), NULL);
 
+   // flush before calling flush_frontbuffer so that rendering is flushed
+   //  to back buffer so the texture can be copied in flush_frontbuffer
pipe->screen->fence_reference(pipe->screen, >fence, NULL);
pipe->flush(pipe, >fence, 0);
+   pipe->screen->flush_frontbuffer(pipe->screen, tex, 0, 0,
+   vscreen->get_private(vscreen), NULL);
+
pq->last_surf = surf;
 
if (dump_window == -1) {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] vl/dri3: handle the case of different GPU(v4.2)

2016-09-19 Thread Nayan Deshmukh
In case of prime when rendering is done on GPU other then the
server GPU, use a seprate linear buffer for each back buffer
which will be displayed using present extension.

v2: Use a seprate linear buffer for each back buffer (Michel)
v3: Change variable names and fix coding style (Leo and Emil)
v4: Use PIPE_BIND_SAMPLER_VIEW for back buffer in case when
a seprate linear buffer is used (Michel)
v4.1: remove empty line
v4.2: destroy the context and handle the case when
  create_context fails (Emil)

Signed-off-by: Nayan Deshmukh 
Reviewed-by: Leo Liu 
Acked-by: Michel Dänzer 
---
 src/gallium/auxiliary/vl/vl_winsys_dri3.c | 66 +--
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c 
b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index 3d596a6..191a64b 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -49,6 +49,7 @@
 struct vl_dri3_buffer
 {
struct pipe_resource *texture;
+   struct pipe_resource *linear_texture;
 
uint32_t pixmap;
uint32_t sync_fence;
@@ -69,6 +70,8 @@ struct vl_dri3_screen
xcb_present_event_t eid;
xcb_special_event_t *special_event;
 
+   struct pipe_context *pipe;
+
struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM];
int cur_back;
 
@@ -82,6 +85,7 @@ struct vl_dri3_screen
int64_t last_ust, ns_frame, last_msc, next_msc;
 
bool flushed;
+   bool is_different_gpu;
 };
 
 static void
@@ -102,6 +106,8 @@ dri3_free_back_buffer(struct vl_dri3_screen *scrn,
xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
xshmfence_unmap_shm(buffer->shm_fence);
pipe_resource_reference(>texture, NULL);
+   if (buffer->linear_texture)
+   pipe_resource_reference(>linear_texture, NULL);
FREE(buffer);
 }
 
@@ -209,7 +215,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
xcb_sync_fence_t sync_fence;
struct xshmfence *shm_fence;
int buffer_fd, fence_fd;
-   struct pipe_resource templ;
+   struct pipe_resource templ, *pixmap_buffer_texture;
struct winsys_handle whandle;
unsigned usage;
 
@@ -226,8 +232,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
   goto close_fd;
 
memset(, 0, sizeof(templ));
-   templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
-PIPE_BIND_SCANOUT | PIPE_BIND_SHARED;
+   templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
templ.format = PIPE_FORMAT_B8G8R8X8_UNORM;
templ.target = PIPE_TEXTURE_2D;
templ.last_level = 0;
@@ -235,16 +240,34 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
templ.height0 = scrn->height;
templ.depth0 = 1;
templ.array_size = 1;
-   buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen,
- );
-   if (!buffer->texture)
-  goto unmap_shm;
 
+   if (scrn->is_different_gpu) {
+  buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen,
+);
+  if (!buffer->texture)
+ goto unmap_shm;
+
+  templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
+PIPE_BIND_LINEAR;
+  buffer->linear_texture = 
scrn->base.pscreen->resource_create(scrn->base.pscreen,
+  );
+  pixmap_buffer_texture = buffer->linear_texture;
+
+  if (!buffer->linear_texture)
+ goto no_linear_texture;
+   } else {
+  templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED;
+  buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen,
+);
+  if (!buffer->texture)
+ goto unmap_shm;
+  pixmap_buffer_texture = buffer->texture;
+   }
memset(, 0, sizeof(whandle));
whandle.type= DRM_API_HANDLE_TYPE_FD;
usage = PIPE_HANDLE_USAGE_EXPLICIT_FLUSH | PIPE_HANDLE_USAGE_READ;
scrn->base.pscreen->resource_get_handle(scrn->base.pscreen, NULL,
-   buffer->texture, ,
+   pixmap_buffer_texture, ,
usage);
buffer_fd = whandle.handle;
buffer->pitch = whandle.stride;
@@ -271,6 +294,8 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
 
return buffer;
 
+no_linear_texture:
+   pipe_resource_reference(>texture, NULL);
 unmap_shm:
xshmfence_unmap_shm(shm_fence);
 close_fd:
@@ -474,6 +499,7 @@ vl_dri3_flush_frontbuffer(struct pipe_screen *screen,
struct vl_dri3_screen *scrn = (struct vl_dri3_screen *)context_private;
uint32_t options = XCB_PRESENT_OPTION_NONE;
struct vl_dri3_buffer *back;
+   struct pipe_box src_box;
 
back = scrn->back_buffers[scrn->cur_back];
if (!back)
@@ -485,6 +511,16 @@ vl_dri3_flush_frontbuffer(struct 

Re: [Mesa-dev] [PATCH] vl/dri3: handle the case of different GPU(v4.1)

2016-09-19 Thread Nayan Deshmukh
Hi Leo,



On Tue, Sep 20, 2016 at 1:56 AM, Leo Liu  wrote:

>
>
> On 09/17/2016 07:33 AM, Nayan Deshmukh wrote:
>
> Hi Leo,
>
> Could you push the patches? I don't have the push access.
>
>
>
> Can you rebase all your reviewed patches, and add RB to it, and then you
> can send them to me ?
>
> Surely. :)

I will also include changes suggested by emil.

> Sorry for too busy to do this for you.
>
> Regards,
> Leo
>
>
>
> Regards,
> Nayan.
>
> On Fri, Sep 16, 2016 at 7:44 PM, Leo Liu  wrote:
>
>> This Patch is Reviewed-by: Leo Liu 
>>
>>
>> On 09/16/2016 08:51 AM, Nayan Deshmukh wrote:
>>
>>> In case of prime when rendering is done on GPU other then the
>>> server GPU, use a seprate linear buffer for each back buffer
>>> which will be displayed using present extension.
>>>
>>> v2: Use a seprate linear buffer for each back buffer (Michel)
>>> v3: Change variable names and fix coding style (Leo and Emil)
>>> v4: Use PIPE_BIND_SAMPLER_VIEW for back buffer in case when
>>>  a seprate linear buffer is used (Michel)
>>> v4.1: remove empty line
>>>
>>> Signed-off-by: Nayan Deshmukh < 
>>> nayan26deshm...@gmail.com>
>>> ---
>>>   src/gallium/auxiliary/vl/vl_winsys_dri3.c | 61
>>> ---
>>>   1 file changed, 48 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
>>> b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
>>> index 3d596a6..e0aaad8 100644
>>> --- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
>>> +++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
>>> @@ -49,6 +49,7 @@
>>>   struct vl_dri3_buffer
>>>   {
>>>  struct pipe_resource *texture;
>>> +   struct pipe_resource *linear_texture;
>>>uint32_t pixmap;
>>>  uint32_t sync_fence;
>>> @@ -69,6 +70,8 @@ struct vl_dri3_screen
>>>  xcb_present_event_t eid;
>>>  xcb_special_event_t *special_event;
>>>   +   struct pipe_context *pipe;
>>> +
>>>  struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM];
>>>  int cur_back;
>>>   @@ -82,6 +85,7 @@ struct vl_dri3_screen
>>>  int64_t last_ust, ns_frame, last_msc, next_msc;
>>>bool flushed;
>>> +   bool is_different_gpu;
>>>   };
>>> static void
>>> @@ -102,6 +106,8 @@ dri3_free_back_buffer(struct vl_dri3_screen *scrn,
>>>  xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
>>>  xshmfence_unmap_shm(buffer->shm_fence);
>>>  pipe_resource_reference(>texture, NULL);
>>> +   if (buffer->linear_texture)
>>> +   pipe_resource_reference(>linear_texture, NULL);
>>>  FREE(buffer);
>>>   }
>>>   @@ -209,7 +215,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
>>>  xcb_sync_fence_t sync_fence;
>>>  struct xshmfence *shm_fence;
>>>  int buffer_fd, fence_fd;
>>> -   struct pipe_resource templ;
>>> +   struct pipe_resource templ, *pixmap_buffer_texture;
>>>  struct winsys_handle whandle;
>>>  unsigned usage;
>>>   @@ -226,8 +232,7 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
>>> goto close_fd;
>>>memset(, 0, sizeof(templ));
>>> -   templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
>>> -PIPE_BIND_SCANOUT | PIPE_BIND_SHARED;
>>> +   templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
>>>  templ.format = PIPE_FORMAT_B8G8R8X8_UNORM;
>>>  templ.target = PIPE_TEXTURE_2D;
>>>  templ.last_level = 0;
>>> @@ -235,16 +240,34 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
>>>  templ.height0 = scrn->height;
>>>  templ.depth0 = 1;
>>>  templ.array_size = 1;
>>> -   buffer->texture = scrn->base.pscreen->resource_c
>>> reate(scrn->base.pscreen,
>>> - );
>>> -   if (!buffer->texture)
>>> -  goto unmap_shm;
>>>   +   if (scrn->is_different_gpu) {
>>> +  buffer->texture = scrn->base.pscreen->resource_c
>>> reate(scrn->base.pscreen,
>>> +);
>>> +  if (!buffer->texture)
>>> + goto unmap_shm;
>>> +
>>> +  templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
>>> +PIPE_BIND_LINEAR;
>>> +  buffer->linear_texture = scrn->base.pscreen->resource_c
>>> reate(scrn->base.pscreen,
>>> +
>>> );
>>> +  pixmap_buffer_texture = buffer->linear_texture;
>>> +
>>> +  if (!buffer->linear_texture)
>>> + goto no_linear_texture;
>>> +   } else {
>>> +  templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED;
>>> +  buffer->texture = scrn->base.pscreen->resource_c
>>> reate(scrn->base.pscreen,
>>> +);
>>> +  if (!buffer->texture)
>>> + goto unmap_shm;
>>> +  pixmap_buffer_texture = buffer->texture;
>>> +   }
>>>  memset(, 0, sizeof(whandle));
>>>  whandle.type= DRM_API_HANDLE_TYPE_FD;
>>>  usage = PIPE_HANDLE_USAGE_EXPLICIT_FLUSH | PIPE_HANDLE_USAGE_READ;
>>>  

[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions, textures, uniforms and more

2016-09-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97863

Tapani Pälli  changed:

   What|Removed |Added

 CC||lem...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/va: flush the context before calling flush_frontbuffer(v2)

2016-09-19 Thread Michel Dänzer
On 16/09/16 07:33 PM, Nayan Deshmukh wrote:
> so that the texture is rendered to back buffer before calling
> flush_frontbuffer and can be copied to a different buffer in
> the function
> 
> v2: change comment style
> 
> Signed-off-by: Nayan Deshmukh 
> ---
>  src/gallium/state_trackers/va/surface.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/state_trackers/va/surface.c 
> b/src/gallium/state_trackers/va/surface.c
> index 00df69d..115db43 100644
> --- a/src/gallium/state_trackers/va/surface.c
> +++ b/src/gallium/state_trackers/va/surface.c
> @@ -321,10 +321,14 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID 
> surface_id, void* draw, short s
>return status;
> }
>  
> +   /* flush before calling flush_frontbuffer so that rendering is flushed
> +* to back buffer so the texture can be copied in flush_frontbuffer
> +*/
> +   drv->pipe->flush(drv->pipe, NULL, 0);
> +
> screen->flush_frontbuffer(screen, tex, 0, 0,
>   vscreen->get_private(vscreen), NULL);
>  
> -   drv->pipe->flush(drv->pipe, NULL, 0);
>  
> pipe_resource_reference(, NULL);
> pipe_surface_reference(_draw, NULL);
> 

This patch and patch 3 are

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/10] nir: Add a loop analysis pass

2016-09-19 Thread Timothy Arceri
On Fri, 2016-09-16 at 15:25 -0700, Jason Ekstrand wrote:
> > On Thu, Sep 15, 2016 at 12:03 AM, Timothy Arceri  wrote:
> > > > From: Thomas Helland 
> > 
> > 
> > This pass detects induction variables and calculates the
> > 
> > trip count of loops to be used for loop unrolling.
> > 
> > 
> > 
> > I've removed support for float induction values for now, for the
> > 
> > simple reason that they don't appear in my shader-db collection,
> > 
> > and so I don't see it as common enough that we want to pollute the
> > 
> > pass with this in the initial version.
> > 
> > 
> > 
> > V2: Rebase, adapt to removal of function overloads
> > 
> > 
> > 
> > V3: (Timothy Arceri)
> > 
> > > >  - don't try to find trip count if loop terminator conditional is a
phi
> > 
> >  - fix trip count for do-while loops
> > 
> >  - replace conditional type != alu assert with return
> > 
> >  - disable unrolling of loops with continues
> > 
> > > >  - multiple fixes to memory allocation, stop leaking and don't
destroy
> > 
> >    structs we want to use for unrolling.
> > 
> > > >  - fix iteration count bugs when induction var not on RHS of
condition
> > 
> >  - add FIXME for && conditions
> > 
> >  - calculate trip count for unsigned induction/limit vars
> > 
> > 
> > 
> > V4:
> > 
> > - count instructions in a loop
> > 
> > > > - set the limiting_terminator even if we can't find the trip count
for
> > 
> > > >  all terminators. This is needed for complex unrolling where we
handle
> > 
> >  2 terminators and the trip count is unknown for one of them.
> > 
> > - restruct structs so we don't keep information not required after
> > 
> >  analysis and remove dead fields.
> > 
> > > > - force unrolling in some cases as per the rules in the GLSL IR
pass
> > 
> > ---
> > 
> >  src/compiler/Makefile.sources       |    2 +
> > 
> >  src/compiler/nir/nir.h              |   36 +-
> > 
> > > >  src/compiler/nir/nir_loop_analyze.c | 1012
+++
> > 
> >  src/compiler/nir/nir_metadata.c     |    8 +-
> > 
> >  4 files changed, 1056 insertions(+), 2 deletions(-)
> > 
> >  create mode 100644 src/compiler/nir/nir_loop_analyze.c
> > 
> > 
> > 
> > > > diff --git a/src/compiler/Makefile.sources
b/src/compiler/Makefile.sources
> > 
> > index f5b4f9c..7ed26a9 100644
> > 
> > --- a/src/compiler/Makefile.sources
> > 
> > +++ b/src/compiler/Makefile.sources
> > 
> > @@ -190,6 +190,8 @@ NIR_FILES = \
> > 
> >         nir/nir_intrinsics.c \
> > 
> >         nir/nir_intrinsics.h \
> > 
> >         nir/nir_liveness.c \
> > 
> > +       nir/nir_loop_analyze.c \
> > 
> > +       nir/nir_loop_analyze.h \
> > 
> >         nir/nir_lower_alu_to_scalar.c \
> > 
> >         nir/nir_lower_atomics.c \
> > 
> >         nir/nir_lower_bitmap.c \
> > 
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > 
> > index ff7c422..49e8cd8 100644
> > 
> > --- a/src/compiler/nir/nir.h
> > 
> > +++ b/src/compiler/nir/nir.h
> > 
> > @@ -1549,9 +1549,36 @@ nir_if_last_else_node(nir_if *if_stmt)
> > 
> >  }
> > 
> > 
> > 
> >  typedef struct {
> > 
> > +   nir_if *nif;
> > 
> > +
> > 
> > +   nir_instr *conditional_instr;
> > 
> > +
> > 
> > +   struct list_head loop_terminator_link;
> > 
> > +} nir_loop_terminator;
> > 
> > +
> > 
> > +typedef struct {
> > 
> > +   /* Number of instructions in the loop */
> > 
> > +   unsigned num_instructions;
> > 
> > +
> > 
> > +   /* How many times the loop is run (if known) */
> > 
> > +   unsigned trip_count;
> > 
> > +   bool is_trip_count_known;
> > We could use 0 or -1 to indicate "I don't know trip count" instead of
an extra boolean.  Not sure that it matters much.
>  
> > +
> > 
> > +   /* Unroll the loop regardless of its size */
> > 
> > +   bool force_unroll;
> > It seems a bit odd to have this decide to force-unroll.  This is an
analysis pass, not a "make decisions" pass.
> 
> 
> 
>  
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > 
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > 

Re: [Mesa-dev] [PATCH 02/10] nir: Add a loop analysis pass

2016-09-19 Thread Timothy Arceri
I sent this reply on Saturday however is seems something went wrong and
it didn't make it out so here it is again.

On Fri, 2016-09-16 at 15:25 -0700, Jason Ekstrand wrote:
> On Thu, Sep 15, 2016 at 12:03 AM, Timothy Arceri  abora.com> wrote:
> > From: Thomas Helland 
> > 
> > This pass detects induction variables and calculates the
> > trip count of loops to be used for loop unrolling.
> > 
> > I've removed support for float induction values for now, for the
> > simple reason that they don't appear in my shader-db collection,
> > and so I don't see it as common enough that we want to pollute the
> > pass with this in the initial version.
> > 
> > V2: Rebase, adapt to removal of function overloads
> > 
> > V3: (Timothy Arceri)
> >  - don't try to find trip count if loop terminator conditional is a
> > phi
> >  - fix trip count for do-while loops
> >  - replace conditional type != alu assert with return
> >  - disable unrolling of loops with continues
> >  - multiple fixes to memory allocation, stop leaking and don't
> > destroy
> >    structs we want to use for unrolling.
> >  - fix iteration count bugs when induction var not on RHS of
> > condition
> >  - add FIXME for && conditions
> >  - calculate trip count for unsigned induction/limit vars
> > 
> > V4:
> > - count instructions in a loop
> > - set the limiting_terminator even if we can't find the trip count
> > for
> >  all terminators. This is needed for complex unrolling where we
> > handle
> >  2 terminators and the trip count is unknown for one of them.
> > - restruct structs so we don't keep information not required after
> >  analysis and remove dead fields.
> > - force unrolling in some cases as per the rules in the GLSL IR
> > pass
> > ---
> >  src/compiler/Makefile.sources       |    2 +
> >  src/compiler/nir/nir.h              |   36 +-
> >  src/compiler/nir/nir_loop_analyze.c | 1012
> > +++
> >  src/compiler/nir/nir_metadata.c     |    8 +-
> >  4 files changed, 1056 insertions(+), 2 deletions(-)
> >  create mode 100644 src/compiler/nir/nir_loop_analyze.c
> > 
> > diff --git a/src/compiler/Makefile.sources
> > b/src/compiler/Makefile.sources
> > index f5b4f9c..7ed26a9 100644
> > --- a/src/compiler/Makefile.sources
> > +++ b/src/compiler/Makefile.sources
> > @@ -190,6 +190,8 @@ NIR_FILES = \
> >         nir/nir_intrinsics.c \
> >         nir/nir_intrinsics.h \
> >         nir/nir_liveness.c \
> > +       nir/nir_loop_analyze.c \
> > +       nir/nir_loop_analyze.h \
> >         nir/nir_lower_alu_to_scalar.c \
> >         nir/nir_lower_atomics.c \
> >         nir/nir_lower_bitmap.c \
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index ff7c422..49e8cd8 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -1549,9 +1549,36 @@ nir_if_last_else_node(nir_if *if_stmt)
> >  }
> > 
> >  typedef struct {
> > +   nir_if *nif;
> > +
> > +   nir_instr *conditional_instr;
> > +
> > +   struct list_head loop_terminator_link;
> > +} nir_loop_terminator;
> > +
> > +typedef struct {
> > +   /* Number of instructions in the loop */
> > +   unsigned num_instructions;
> > +
> > +   /* How many times the loop is run (if known) */
> > +   unsigned trip_count;
> > +   bool is_trip_count_known;
> 
> We could use 0 or -1 to indicate "I don't know trip count" instead of
> an extra boolean.  Not sure that it matters much.
>  
> > +
> > +   /* Unroll the loop regardless of its size */
> > +   bool force_unroll;
> 
> It seems a bit odd to have this decide to force-unroll.  This is an
> analysis pass, not a "make decisions" pass.
>  
> > +
> > +   nir_loop_terminator *limiting_terminator;
> > +
> > +   /* A list of loop_terminators terminating this loop. */
> > +   struct list_head loop_terminator_list;
> > +} nir_loop_info;
> > +
> > +typedef struct {
> >     nir_cf_node cf_node;
> > 
> >     struct exec_list body; /** < list of nir_cf_node */
> > +
> > +   nir_loop_info *info;
> >  } nir_loop;
> > 
> >  static inline nir_cf_node *
> > @@ -1576,6 +1603,7 @@ typedef enum {
> >     nir_metadata_dominance = 0x2,
> >     nir_metadata_live_ssa_defs = 0x4,
> >     nir_metadata_not_properly_reset = 0x8,
> > +   nir_metadata_loop_analysis = 0x16,
> >  } nir_metadata;
> > 
> >  typedef struct {
> > @@ -1758,6 +1786,8 @@ typedef struct nir_shader_compiler_options {
> >      * information must be inferred from the list of input
> > nir_variables.
> >      */
> >     bool use_interpolated_input_intrinsics;
> > +
> > +   unsigned max_unroll_iterations;
> >  } nir_shader_compiler_options;
> > 
> >  typedef struct nir_shader_info {
> > @@ -1962,7 +1992,7 @@ nir_loop *nir_loop_create(nir_shader
> > *shader);
> >  nir_function_impl *nir_cf_node_get_function(nir_cf_node *node);
> > 
> >  /** requests that the given pieces of metadata be generated */
> > -void nir_metadata_require(nir_function_impl *impl, nir_metadata
> > required);
> > +void 

Re: [Mesa-dev] [PATCH v5] clover: Introduce CLOVER_EXTRA_{COMPILER, LINKER}_OPTIONS

2016-09-19 Thread Francisco Jerez
Vedran Miletić  writes:

> The options specified in the CLOVER_EXTRA_COMPILER_OPTIONS shell
> variable are appended to the compiler options specified by the OpenCL
> program, if any.
> Analogously, the options specified in the CLOVER_EXTRA_LINKER_OPTIONS
> variable are appended to the linker options and the options spoecified
> in the CLOVER_EXTRA_COMPILER_OPTIONS variable.
>
> v2:
>  * rename to CLOVER_EXTRA_COMPILER_OPTIONS
>  * use debug_get_option
>  * append to linker options as well
>
> v3: code cleanups
>
> v4: separate CLOVER_EXTRA_LINKER_OPTIONS options
>
> v5:
>  * fix documentation typo
>  *use CLOVER_EXTRA_COMPILER_OPTIONS in link stage
>
> Signed-off-by: Vedran Miletić 
> Reviewed-by[v1]: Edward O'Callaghan 
> ---
>  docs/envvars.html | 13 +
>  src/gallium/state_trackers/clover/llvm/invocation.cpp | 11 ---
>  2 files changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/docs/envvars.html b/docs/envvars.html
> index cf57ca5..252b783 100644
> --- a/docs/envvars.html
> +++ b/docs/envvars.html
> @@ -235,6 +235,19 @@ Setting to "tgsi", for example, will print all the TGSI 
> shaders.
>  See src/mesa/state_tracker/st_debug.c for other options.
>  
>  
> +Clover state tracker environment variables
> +
> +
> +CLOVER_EXTRA_COMPILER_OPTIONS - allows specifying additional compiler
> +options. Specified options are appended after the options set by the 
> OpenCL
> +program in clBuildProgram and/or clCompileProgram.
> +CLOVER_EXTRA_LINKER_OPTIONS - allows specifying additional linker
> +options. Specified options are appended after the options set by the 
> OpenCL
> +linker in clBuildProgram and/or clLinkProgram and options set in the
> +CLOVER_EXTRA_COMPILER_OPTIONS variable.
> +
> +
> +
>  Softpipe driver environment variables
>  
>  SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment 
> shaders
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
> b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index b5e8b52..68b9d2e 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -199,11 +199,13 @@ clover::llvm::compile_program(const std::string ,
>const std::string ,
>const std::string ,
>std::string _log) {
> +   const std::string all_opts = opts + " " +
> + debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", 
> "");
> if (has_flag(debug::clc))
> -  debug::log(".cl", "// Options: " + opts + '\n' + source);
> +  debug::log(".cl", "// Options: " + all_opts + '\n' + source);
>  
> auto ctx = create_context(r_log);
> -   auto c = create_compiler_instance(target, tokenize(opts + " input.cl"),
> +   auto c = create_compiler_instance(target, tokenize(all_opts + " 
> input.cl"),
>   r_log);
> auto mod = compile(*ctx, *c, "input.cl", source, headers, target, opts,
>r_log);
> @@ -266,7 +268,10 @@ module
>  clover::llvm::link_program(const std::vector ,
> enum pipe_shader_ir ir, const std::string ,
> const std::string , std::string _log) {
> -   std::vector options = tokenize(opts + " input.cl");
> +   const std::string all_opts = opts + " " +
> + debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", 
> "") +
> + debug_get_option("CLOVER_EXTRA_LINKER_OPTIONS", "");

Why do you concatenate the two?  I thought the reason Jan suggested
splitting this was that a bunch of compiler options are invalid as
linker options or the other way around?

> +   std::vector options = tokenize(all_opts + " input.cl");
> const bool create_library = count("-create-library", options);
> erase_if(equals("-create-library"), options);
>  
> -- 
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Problem with RX 480 on Alien: Isolation and Dota 2

2016-09-19 Thread Romain Failliot
2016-09-19 18:40 GMT-04:00 Marek Olšák :
> Do you mean the PC is frozen for 2 minutes or just occasional hiccups
> for 2 minutes?

Occasional hiccups for 2 minutes. The game works well, I wander around
and, from time to time, it freezes for a random duration between 2
seconds and 2 minutes (on average I'd say it's 30 seconds) and then
the game continues like nothing happened (the sound still works during
the freeze though).

I played yesterday for instance and I had 10 minutes without any
freeze and then 2 freezes in less than a minute. It's really random,
but it seems to be related to the new places I visit, once I had one
or more freezes in an area, I won't have the freeze a freeze in this
area anymore. That's why I think it could be linked to shader
compilation.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Problem with RX 480 on Alien: Isolation and Dota 2

2016-09-19 Thread Marek Olšák
On Mon, Sep 19, 2016 at 7:51 PM, Romain Failliot
 wrote:
> 2016-09-15 16:27 GMT-04:00 Marek Olšák :
>> Update your gcc I guess? Sorry, I don't know much about LLVM build
>> requirements. It works with gcc 5.4.0.
>
> I'm using a pretty recent gcc, and the 64-bit compilation works like a
> charm, it's only the 32-bit compilation that has trouble.
> Anyway, I've been advised to use a pre-build rep:
> https://copr.fedorainfracloud.org/coprs/mystro256/polaris-gfx/
>
> And it works pretty well now! I do have a bug with the game
> (i.e.Alien: Isolation) though: my PC hangs for up to 2 minutes. From
> my experience, it seems to be because of the shaders compilation, but
> I'm not 100% sure. Is it a known bug? Is it a problem from the game or
> from the drivers?

Do you mean the PC is frozen for 2 minutes or just occasional hiccups
for 2 minutes?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] mesa: Implement ARB_shader_viewport_layer_array for i965

2016-09-19 Thread Dylan Baker
This extension is a combination of AMD_vertex_shader_viewport_index and
AMD_vertex_shader_layer, making it rather trivial to implement.

For gallium I *think* this needs a new cap because of the addition of
support in tessellation evaluation shaders, and since I don't have any
hardware to test it on, I've left that for someone else to wire up.

Signed-off-by: Dylan Baker 
Reviewed-by: Ilia Mirkin 
Reviewed-by: Kenneth Graunke 
---

v2: - changed messages to gen6+ instead of gen8+.
- remove GLL from EXT list.

 docs/features.txt|  2 +-
 docs/relnotes/12.1.0.html|  1 +
 src/compiler/glsl/builtin_variables.cpp  | 14 --
 src/compiler/glsl/glsl_parser_extras.cpp |  1 +
 src/compiler/glsl/glsl_parser_extras.h   |  2 ++
 src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
 src/mesa/main/extensions_table.h |  1 +
 src/mesa/main/mtypes.h   |  1 +
 8 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index d6c3240..ac93ec6 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -296,7 +296,7 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
   GL_ARB_shader_group_vote  DONE (nvc0)
   GL_ARB_shader_stencil_export  DONE (i965/gen9+, 
radeonsi, softpipe, llvmpipe, swr)
-  GL_ARB_shader_viewport_layer_arraynot started
+  GL_ARB_shader_viewport_layer_arrayDONE (i965/gen6+)
   GL_ARB_sparse_buffer  not started
   GL_ARB_sparse_texture not started
   GL_ARB_sparse_texture2not started
diff --git a/docs/relnotes/12.1.0.html b/docs/relnotes/12.1.0.html
index bb20e4f..65b8e4c 100644
--- a/docs/relnotes/12.1.0.html
+++ b/docs/relnotes/12.1.0.html
@@ -52,6 +52,7 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_indirect_parameters on radeonsi
 GL_ARB_shader_draw_parameters on radeonsi
 GL_ARB_shader_group_vote on nvc0
+GL_ARB_shader_viewport_layer_array on i965/gen6+
 GL_ARB_stencil_texturing on i965/hsw
 GL_ARB_texture_stencil8 on i965/hsw
 GL_EXT_window_rectangles on nv50, nvc0
diff --git a/src/compiler/glsl/builtin_variables.cpp 
b/src/compiler/glsl/builtin_variables.cpp
index 90278d6..8d6413e 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -1000,11 +1000,13 @@ builtin_variable_generator::generate_vs_special_vars()
   add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, 
"gl_BaseInstanceARB");
   add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB");
}
-   if (state->AMD_vertex_shader_layer_enable) {
+   if (state->AMD_vertex_shader_layer_enable ||
+   state->ARB_shader_viewport_layer_array_enable) {
   var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
   var->data.interpolation = INTERP_MODE_FLAT;
}
-   if (state->AMD_vertex_shader_viewport_index_enable) {
+   if (state->AMD_vertex_shader_viewport_index_enable ||
+   state->ARB_shader_viewport_layer_array_enable) {
   var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
   var->data.interpolation = INTERP_MODE_FLAT;
}
@@ -1066,6 +1068,8 @@ builtin_variable_generator::generate_tcs_special_vars()
 void
 builtin_variable_generator::generate_tes_special_vars()
 {
+   ir_variable *var;
+
add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn");
add_system_value(SYSTEM_VALUE_TESS_COORD, vec3_t, "gl_TessCoord");
@@ -1073,6 +1077,12 @@ builtin_variable_generator::generate_tes_special_vars()
 "gl_TessLevelOuter");
add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2),
 "gl_TessLevelInner");
+   if (state->ARB_shader_viewport_layer_array_enable) {
+  var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+  var->data.interpolation = INTERP_MODE_FLAT;
+  var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+  var->data.interpolation = INTERP_MODE_FLAT;
+   }
 }
 
 
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 436ddd0..a21ce50 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -608,6 +608,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(ARB_shader_subroutine),
EXT(ARB_shader_texture_image_samples),
EXT(ARB_shader_texture_lod),
+   EXT(ARB_shader_viewport_layer_array),
EXT(ARB_shading_language_420pack),
EXT(ARB_shading_language_packing),

Re: [Mesa-dev] [PATCH v3] clover: Pass unquoted compiler arguments to Clang

2016-09-19 Thread Francisco Jerez
Vedran Miletić  writes:

> OpenCL apps can quote arguments they pass to the OpenCL compiler, most
> commonly include paths containing spaces.
>
> If the Clang OpenCL compiler was called via a shell, the shell would
> split the arguments with respect to to quotes and then remove quotes
> before passing the arguments to the compiler. Since we call Clang as a
> library, we have to split the argument with respect to quotes and then
> remove quotes before passing the arguments.
>
> v2: move to tokenize(), remove throwing of CL_INVALID_COMPILER_OPTIONS
>

Why did you remove the error checking?  Would it make sense to throw
invalid_build_options_error instead?  (which kind of replaced
error(CL_INVALID_COMPILER_OPTIONS) after the recent clLinkProgram
rework).

> v3: simplify parsing logic, use more C++11
> ---
>  src/gallium/state_trackers/clover/llvm/util.hpp | 33 
> ++---
>  1 file changed, 29 insertions(+), 4 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/llvm/util.hpp 
> b/src/gallium/state_trackers/clover/llvm/util.hpp
> index 8db6f20..c770dd8 100644
> --- a/src/gallium/state_trackers/clover/llvm/util.hpp
> +++ b/src/gallium/state_trackers/clover/llvm/util.hpp
> @@ -42,11 +42,36 @@ namespace clover {
>inline std::vector
>tokenize(const std::string ) {
>   std::vector ss;
> - std::istringstream iss(s);
> - std::string t;
> + std::ostringstream oss;
>  
> - while (getline(iss, t, ' '))
> -ss.push_back(t);
> + // OpenCL programs can pass a single or double quoted argument, most
> + // frequently include path. This is useful so that the path 
> containing
> + // spaces is treated as a single argument, but we should anyhow 
> unquote
> + // quoted arguments before passing them to the compiler.
> + // We do not want to avoid using std::string::replace here, as 
> include
> + // path can contain quotes in file names.

The last sentence in the comment doesn't make much sense to me -- I
don't see how std::string::replace could be useful for this, nor why we
"don't want to avoid using" it.  Maybe just drop the last two lines?

> + bool escape_next = false;
> + bool in_quote_double = false;
> + bool in_quote_single = false;
> + for (auto c : s) {
> +if (escape_next) {
> +   oss.put(c);
> +   escape_next = false;
> +} else if (c == '\\') {
> +   escape_next = true;
> +} else if (c == '"' && !in_quote_single) {
> +   in_quote_double = !in_quote_double;
> +} else if (c == '\'' && !in_quote_double) {
> +   in_quote_single = !in_quote_single;
> +} else if (c != ' ' || in_quote_single || in_quote_double) {
> +   oss.put(c);
> +} else if (oss.tellp() > 0) {
> +   ss.emplace_back(oss.str());
> +   oss.str("");
> +}
> + }
> + if (oss.tellp() > 0)
> +ss.emplace_back(oss.str());
>  

Other than the two minor comments above, the code looks reasonable to
me.

>   return ss;
>}
> -- 
> 2.7.4


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] nv50/ir: optimize IMAD to SHLADD in presence of power of 2

2016-09-19 Thread Ilia Mirkin
On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoiset
 wrote:
> Only and only if src1 is a power of 2 we can replace IMAD by SHLADD.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 13 +
>  1 file changed, 13 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 74a5a85..336f407 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -915,6 +915,7 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue 
> )
>  void
>  ConstantFolding::opnd(Instruction *i, ImmediateValue , int s)
>  {
> +   const Target *target = prog->getTarget();
> const int t = !s;
> const operation op = i->op;
> Instruction *newi = i;
> @@ -1016,6 +1017,18 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>   i->src(1).mod = i->src(2).mod;
>   i->setSrc(2, NULL);
>   i->op = OP_ADD;
> +  } else
> +  if (s == 1 && imm0.isPow2() && target->isOpSupported(i->op, i->dType)) 
> {
> + int32_t v;
> + switch (i->dType) {
> + case TYPE_S32: v = util_last_bit_signed(imm0.reg.data.s32) - 1; 
> break;
> + case TYPE_U32: v = util_last_bit(imm0.reg.data.u32) - 1; break;

Huh? Can the shift be a negative value? I think the shift immediate is
always positive.

> + default:
> +return;
> + }
> + bld.setPosition(i, false);
> + i->op = OP_SHLADD;
> + i->setSrc(1, bld.mkImm(v));
>}
>break;
> case OP_ADD:
> --
> 2.10.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] nv50/ir: add preliminary support for SHLADD

2016-09-19 Thread Samuel Pitoiset



On 09/20/2016 12:16 AM, Ilia Mirkin wrote:

On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoiset
 wrote:

This instruction is available since SM20 (Fermi) and allow to do
(a << b) + c in one shot. In some situations, IMAD should be
replaced by SHLADD when b is a power of 2, and ADD+SHL should be
replaced by SHLADD as well.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h| 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp| 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp   | 6 +++---
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp  | 7 +--
 6 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index d6011d9..bedbdcc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -57,6 +57,7 @@ enum operation
OP_MAD,
OP_FMA,
OP_SAD, // abs(src0 - src1) + src2
+   OP_SHLADD,
OP_ABS,
OP_NEG,
OP_NOT,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 22f2f5d..dbd0f7d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
"mad",
"fma",
"sad",
+   "shladd",
"abs",
"neg",
"not",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 7d7b315..273ec34 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] =
0, 0,   // NOP, PHI
0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
1, 1, 2,// MOV, LOAD, STORE
-   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
+   2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
1, 1, 1,// ABS, NEG, NOT
2, 2, 2, 2, 2,  // AND, OR, XOR, SHL, SHR
2, 2, 1,// MAX, MIN, SAT
@@ -70,10 +70,10 @@ const OpClass Target::operationClass[] =
OPCLASS_MOVE,
OPCLASS_LOAD,
OPCLASS_STORE,
-   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
+   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH,
-   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
+   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
// ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
OPCLASS_CONVERT, OPCLASS_CONVERT,
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index 6b8f767..cf8a08f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
case OP_DIV:
case OP_MOD:
   return false;
+   case OP_SHLADD:
+  if (isFloatType(ty))
+ return false;
+  break;
default:
   break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index b37ea73..5ab95fc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
case OP_EXTBF:
case OP_EXIT: // want exit modifier instead (on NOP if required)
case OP_MEMBAR:
+   case OP_SHLADD:
   return false;
case OP_SAD:
   return ty == TYPE_S32;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index f75e395..d8fa285 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -105,6 +105,7 @@ static const struct opProperties _initProps[] =
{ OP_MAX,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MIN,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MAD,0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
+   { OP_SHLADD, 0x3, 0x0, 0x0, 0x0, 0x4, 0x6 },
{ OP_MADSP,  0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_ABS,0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
{ OP_NEG,0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
@@ -158,13 +159,13 @@ void TargetNVC0::initOpInfo()
{
   // ADD, MUL, MAD, FMA, AND, OR, 

Re: [Mesa-dev] [PATCH 2/6] nvc0/ir: add emission for SHLADD

2016-09-19 Thread Ilia Mirkin
On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoiset
 wrote:
> Unfortunately, we can't use the emit helpers for GF100/GK110
> because src1 and src2 are swapped.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 53 
> ++
>  .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 32 +
>  .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 44 ++
>  3 files changed, 129 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> index 61c450b..2c4e3a7 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> @@ -96,6 +96,7 @@ private:
> void emitDMUL(const Instruction *);
> void emitIMAD(const Instruction *);
> void emitISAD(const Instruction *);
> +   void emitSHLADD(const Instruction *);
> void emitFMAD(const Instruction *);
> void emitDMAD(const Instruction *);
> void emitMADSP(const Instruction *i);
> @@ -757,6 +758,55 @@ CodeEmitterGK110::emitISAD(const Instruction *i)
>  }
>
>  void
> +CodeEmitterGK110::emitSHLADD(const Instruction *i)
> +{
> +   uint8_t addOp =
> +  (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ 
> i->src(1).mod.neg());
> +   const ImmediateValue *imm = i->src(1).get()->asImm();
> +   assert(imm);
> +
> +   if (i->src(2).getFile() == FILE_IMMEDIATE) {
> +  code[0] = 0x1;
> +  code[1] = 0xc0c << 20;
> +   } else {
> +  code[0] = 0x2;
> +  code[1] = 0x20c << 20;
> +   }
> +   code[1] |= addOp << 19;
> +
> +   emitPredicate(i);
> +
> +   defId(i->def(0), 2);
> +   srcId(i->src(0), 10);
> +
> +   if (i->flagsDef >= 0)
> +  code[1] |= 1 << 18;
> +
> +   assert(!(imm->reg.data.u32 & 0xffe0));
> +   code[1] |= imm->reg.data.u32 << 10;
> +
> +   switch (i->src(2).getFile()) {
> +   case FILE_GPR:
> +  assert(code[0] & 0x2);
> +  code[1] |= 0xc << 28;
> +  srcId(i->src(2), 23);
> +  break;
> +   case FILE_MEMORY_CONST:
> +  assert(code[0] & 0x2);
> +  code[1] |= 0x4 << 28;
> +  setCAddress14(i->src(2));
> +  break;
> +   case FILE_IMMEDIATE:
> +  assert(code[0] & 0x1);
> +  setShortImmediate(i, 2);
> +  break;
> +   default:
> +  assert(!"bad src2 file");
> +  break;
> +   }
> +}
> +
> +void
>  CodeEmitterGK110::emitNOT(const Instruction *i)
>  {
> code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
> @@ -2403,6 +2453,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
> case OP_SAD:
>emitISAD(insn);
>break;
> +   case OP_SHLADD:
> +  emitSHLADD(insn);
> +  break;
> case OP_NOT:
>emitNOT(insn);
>break;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> index cfde66c..973a105 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> @@ -152,6 +152,7 @@ private:
> void emitIADD();
> void emitIMUL();
> void emitIMAD();
> +   void emitSHLADD();
> void emitIMNMX();
> void emitICMP();
> void emitISET();
> @@ -1813,6 +1814,34 @@ CodeEmitterGM107::emitIMAD()
>  }
>
>  void
> +CodeEmitterGM107::emitSHLADD()

The convention in the GM107 emitter is to use the nvdisasm names here.
So ISCADD.

> +{
> +   switch (insn->src(2).getFile()) {
> +   case FILE_GPR:
> +  emitInsn(0x5c18);
> +  emitGPR (0x14, insn->src(2));
> +  break;
> +   case FILE_MEMORY_CONST:
> +  emitInsn(0x4c18);
> +  emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
> +  break;
> +   case FILE_IMMEDIATE:
> +  emitInsn(0x3818);
> +  emitIMMD(0x14, 19, insn->src(2));
> +  break;
> +   default:
> +  assert(!"bad src1 file");
> +  break;
> +   }
> +   emitNEG (0x31, insn->src(0));
> +   emitNEG (0x30, insn->src(2));
> +   emitCC  (0x2f);
> +   emitIMMD(0x27, 5, insn->src(1));
> +   emitGPR (0x08, insn->src(0));
> +   emitGPR (0x00, insn->def(0));
> +}
> +
> +void
>  CodeEmitterGM107::emitIMNMX()
>  {
> switch (insn->src(1).getFile()) {
> @@ -3098,6 +3127,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
>   emitIMAD();
>}
>break;
> +   case OP_SHLADD:
> +  emitSHLADD();
> +  break;
> case OP_MIN:
> case OP_MAX:
>if (isFloatType(insn->dType)) {
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> index d8ca6ab..c874b86 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> @@ -101,6 +101,7 @@ private:
> void emitDMUL(const Instruction *);
> void emitIMAD(const Instruction *);
> 

Re: [Mesa-dev] [PATCH 1/6] nv50/ir: add preliminary support for SHLADD

2016-09-19 Thread Ilia Mirkin
On Mon, Sep 19, 2016 at 6:11 PM, Samuel Pitoiset
 wrote:
> This instruction is available since SM20 (Fermi) and allow to do
> (a << b) + c in one shot. In some situations, IMAD should be
> replaced by SHLADD when b is a power of 2, and ADD+SHL should be
> replaced by SHLADD as well.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h| 1 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp| 1 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp   | 6 +++---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 
>  src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp  | 1 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp  | 7 +--
>  6 files changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index d6011d9..bedbdcc 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -57,6 +57,7 @@ enum operation
> OP_MAD,
> OP_FMA,
> OP_SAD, // abs(src0 - src1) + src2
> +   OP_SHLADD,
> OP_ABS,
> OP_NEG,
> OP_NOT,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> index 22f2f5d..dbd0f7d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> @@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
> "mad",
> "fma",
> "sad",
> +   "shladd",
> "abs",
> "neg",
> "not",
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
> index 7d7b315..273ec34 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
> @@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] =
> 0, 0,   // NOP, PHI
> 0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
> 1, 1, 2,// MOV, LOAD, STORE
> -   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
> +   2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, 
> SHLADD
> 1, 1, 1,// ABS, NEG, NOT
> 2, 2, 2, 2, 2,  // AND, OR, XOR, SHL, SHR
> 2, 2, 1,// MAX, MIN, SAT
> @@ -70,10 +70,10 @@ const OpClass Target::operationClass[] =
> OPCLASS_MOVE,
> OPCLASS_LOAD,
> OPCLASS_STORE,
> -   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
> +   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
> OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
> OPCLASS_ARITH, OPCLASS_ARITH,
> -   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
> +   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
> // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
> OPCLASS_CONVERT, OPCLASS_CONVERT,
> OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> index 6b8f767..cf8a08f 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
> @@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
> case OP_DIV:
> case OP_MOD:
>return false;
> +   case OP_SHLADD:
> +  if (isFloatType(ty))
> + return false;
> +  break;
> default:
>break;
> }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> index b37ea73..5ab95fc 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> @@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
> case OP_EXTBF:
> case OP_EXIT: // want exit modifier instead (on NOP if required)
> case OP_MEMBAR:
> +   case OP_SHLADD:
>return false;
> case OP_SAD:
>return ty == TYPE_S32;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index f75e395..d8fa285 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -105,6 +105,7 @@ static const struct opProperties _initProps[] =
> { OP_MAX,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
> { OP_MIN,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
> { OP_MAD,0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] 
> constraint
> +   { OP_SHLADD, 0x3, 0x0, 0x0, 0x0, 0x4, 0x6 },
> { OP_MADSP,  0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
> { OP_ABS,0x0, 

Re: [Mesa-dev] [PATCH] mesa: Implement ARB_shader_viewport_layer_array for i965

2016-09-19 Thread Kenneth Graunke
On Friday, September 9, 2016 4:14:55 PM PDT Dylan Baker wrote:
> This extension is a combination of AMD_vertex_shader_viewport_index and
> AMD_vertex_shader_layer, making it rather trivial to implement.
> 
> For gallium I *think* this needs a new cap because of the addition of
> support in tessellation evaluation shaders, and since I don't have any
> hardware to test it on, I've left that for someone else to wire up.
> 
> Since this requires GL 4.1, this is only available on gen8+.

You've actually enabled this on Gen6+, by virtue of:

> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 0f28546..6573bc2 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -330,6 +330,7 @@ intelInitExtensions(struct gl_context *ctx)
> */
>if (ctx->API == API_OPENGL_CORE) {
>   ctx->Extensions.ARB_shader_subroutine = true;
> + ctx->Extensions.ARB_shader_viewport_layer_array = true;
>   ctx->Extensions.ARB_viewport_array = true;
>   ctx->Extensions.AMD_vertex_shader_viewport_index = true;
>}

   ^^^ this is in a Gen6+ and core only block.

I agree with Ilia that this is the right thing to do - it makes sense
to expose it where AMD_vertex_shader_viewport_index is already exposed.

I'd just drop that sentence from your commit message.

> diff --git a/src/mesa/main/extensions_table.h 
> b/src/mesa/main/extensions_table.h
> index 75cdcb8..38636b4 100644
> --- a/src/mesa/main/extensions_table.h
> +++ b/src/mesa/main/extensions_table.h
> @@ -115,6 +115,7 @@ EXT(ARB_shader_storage_buffer_object, 
> ARB_shader_storage_buffer_object
>  EXT(ARB_shader_subroutine   , ARB_shader_subroutine  
> ,  x , GLC,  x ,  x , 2010)
>  EXT(ARB_shader_texture_image_samples, 
> ARB_shader_texture_image_samples   , GLL, GLC,  x ,  x , 2014)
>  EXT(ARB_shader_texture_lod  , ARB_shader_texture_lod 
> , GLL, GLC,  x ,  x , 2009)
> +EXT(ARB_shader_viewport_layer_array , 
> ARB_shader_viewport_layer_array, GLL, GLC,  x ,  x , 2015)
>  EXT(ARB_shading_language_100, dummy_true 
> , GLL,  x ,  x ,  x , 2003)
>  EXT(ARB_shading_language_420pack, ARB_shading_language_420pack   
> , GLL, GLC,  x ,  x , 2011)
>  EXT(ARB_shading_language_packing, ARB_shading_language_packing   
> , GLL, GLC,  x ,  x , 2011)

As Ilia mentioned, please drop "GLL", changing it to " x ".

Otherwise, this is:
Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] nvc0/ir: add emission for SHLADD

2016-09-19 Thread Samuel Pitoiset
Unfortunately, we can't use the emit helpers for GF100/GK110
because src1 and src2 are swapped.

Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 53 ++
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 32 +
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 44 ++
 3 files changed, 129 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 61c450b..2c4e3a7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -96,6 +96,7 @@ private:
void emitDMUL(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
+   void emitSHLADD(const Instruction *);
void emitFMAD(const Instruction *);
void emitDMAD(const Instruction *);
void emitMADSP(const Instruction *i);
@@ -757,6 +758,55 @@ CodeEmitterGK110::emitISAD(const Instruction *i)
 }
 
 void
+CodeEmitterGK110::emitSHLADD(const Instruction *i)
+{
+   uint8_t addOp =
+  (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
+   const ImmediateValue *imm = i->src(1).get()->asImm();
+   assert(imm);
+
+   if (i->src(2).getFile() == FILE_IMMEDIATE) {
+  code[0] = 0x1;
+  code[1] = 0xc0c << 20;
+   } else {
+  code[0] = 0x2;
+  code[1] = 0x20c << 20;
+   }
+   code[1] |= addOp << 19;
+
+   emitPredicate(i);
+
+   defId(i->def(0), 2);
+   srcId(i->src(0), 10);
+
+   if (i->flagsDef >= 0)
+  code[1] |= 1 << 18;
+
+   assert(!(imm->reg.data.u32 & 0xffe0));
+   code[1] |= imm->reg.data.u32 << 10;
+
+   switch (i->src(2).getFile()) {
+   case FILE_GPR:
+  assert(code[0] & 0x2);
+  code[1] |= 0xc << 28;
+  srcId(i->src(2), 23);
+  break;
+   case FILE_MEMORY_CONST:
+  assert(code[0] & 0x2);
+  code[1] |= 0x4 << 28;
+  setCAddress14(i->src(2));
+  break;
+   case FILE_IMMEDIATE:
+  assert(code[0] & 0x1);
+  setShortImmediate(i, 2);
+  break;
+   default:
+  assert(!"bad src2 file");
+  break;
+   }
+}
+
+void
 CodeEmitterGK110::emitNOT(const Instruction *i)
 {
code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src
@@ -2403,6 +2453,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn)
case OP_SAD:
   emitISAD(insn);
   break;
+   case OP_SHLADD:
+  emitSHLADD(insn);
+  break;
case OP_NOT:
   emitNOT(insn);
   break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index cfde66c..973a105 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -152,6 +152,7 @@ private:
void emitIADD();
void emitIMUL();
void emitIMAD();
+   void emitSHLADD();
void emitIMNMX();
void emitICMP();
void emitISET();
@@ -1813,6 +1814,34 @@ CodeEmitterGM107::emitIMAD()
 }
 
 void
+CodeEmitterGM107::emitSHLADD()
+{
+   switch (insn->src(2).getFile()) {
+   case FILE_GPR:
+  emitInsn(0x5c18);
+  emitGPR (0x14, insn->src(2));
+  break;
+   case FILE_MEMORY_CONST:
+  emitInsn(0x4c18);
+  emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
+  break;
+   case FILE_IMMEDIATE:
+  emitInsn(0x3818);
+  emitIMMD(0x14, 19, insn->src(2));
+  break;
+   default:
+  assert(!"bad src1 file");
+  break;
+   }
+   emitNEG (0x31, insn->src(0));
+   emitNEG (0x30, insn->src(2));
+   emitCC  (0x2f);
+   emitIMMD(0x27, 5, insn->src(1));
+   emitGPR (0x08, insn->src(0));
+   emitGPR (0x00, insn->def(0));
+}
+
+void
 CodeEmitterGM107::emitIMNMX()
 {
switch (insn->src(1).getFile()) {
@@ -3098,6 +3127,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
  emitIMAD();
   }
   break;
+   case OP_SHLADD:
+  emitSHLADD();
+  break;
case OP_MIN:
case OP_MAX:
   if (isFloatType(insn->dType)) {
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index d8ca6ab..c874b86 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -101,6 +101,7 @@ private:
void emitDMUL(const Instruction *);
void emitIMAD(const Instruction *);
void emitISAD(const Instruction *);
+   void emitSHLADD(const Instruction *a);
void emitFMAD(const Instruction *);
void emitDMAD(const Instruction *);
void emitMADSP(const Instruction *);
@@ -759,6 +760,46 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i)
 }
 
 void
+CodeEmitterNVC0::emitSHLADD(const Instruction *i)
+{
+   uint8_t addOp =
+  (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
+   const ImmediateValue *imm = 

Re: [Mesa-dev] [PATCH] gallium/util: make use of strtol() in debug_get_num_option()

2016-09-19 Thread Samuel Pitoiset



On 09/19/2016 11:59 PM, Brian Paul wrote:

Seems OK here.

Tested-by: Brian Paul 


Thanks for testing and reviewing guys.



On 09/19/2016 02:43 AM, Nicolai Hähnle wrote:

Reviewed-by: Nicolai Hähnle 

However, you might want to check with the VMWare guys. I seem to recall
that MSVC is a bit peculiar with some of these library functions.

Cheers,
Nicolai

On 14.09.2016 20:37, Samuel Pitoiset wrote:

This allows to use hexadecimal numbers which are automatically
detected by strtol() when the base is 0.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/util/u_debug.c | 25 -
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_debug.c
b/src/gallium/auxiliary/util/u_debug.c
index 4619526..dd3e167 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -203,25 +203,16 @@ debug_get_num_option(const char *name, long
dfault)
const char *str;

str = os_get_option(name);
-   if (!str)
+   if (!str) {
   result = dfault;
-   else {
-  long sign;
-  char c;
-  c = *str++;
-  if (c == '-') {
- sign = -1;
- c = *str++;
-  }
-  else {
- sign = 1;
-  }
-  result = 0;
-  while ('0' <= c && c <= '9') {
- result = result*10 + (c - '0');
- c = *str++;
+   } else {
+  char *endptr;
+
+  result = strtol(str, , 0);
+  if (str == endptr) {
+ /* Restore the default value when no digits were found. */
+ result = dfault;
   }
-  result *= sign;
}

if (debug_get_option_should_print())




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] nv50/ir: teach insnCanLoad() about SHLADD

2016-09-19 Thread Samuel Pitoiset
Commutativity is not allowed with SHLADD, but src2 can accept
loads. To allow the load propagation pass to do its job, add a
special case like for SUCLAMP because src1 is always an immediate.

This IMAD to SHLADD optimization helps a bunch of shaders from Tomb
Raider, Victor Vran, UE4 demos (+15% perf with Elemental) and Shadow
Warrior.

GF100/GK104:

total instructions in shared programs :2838045 -> 2834712 (-0.12%)
total gprs used in shared programs:396684 -> 396386 (-0.08%)
total local used in shared programs   :34416 -> 34416 (0.00%)

localgpr   inst  bytes
helped   0 32611051105
  hurt   0  55   3   3

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index d8fa285..9bc5b8d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -334,6 +334,8 @@ TargetNVC0::insnCanLoad(const Instruction *i, int s,
   if (i->src(k).getFile() == FILE_IMMEDIATE) {
  if (k == 2 && i->op == OP_SUCLAMP) // special case
 continue;
+ if (k == 1 && i->op == OP_SHLADD) // special case
+continue;
  if (i->getSrc(k)->reg.data.u64 != 0)
 return false;
   } else
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] nv50/ir: optimize SHLADD(a, b, c) to MOV((a << b) + c)

2016-09-19 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 1b99ce7..75c448e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -778,6 +778,9 @@ ConstantFolding::expr(Instruction *i,
   }
   break;
}
+   case OP_SHLADD:
+  res.data.u32 = (a->data.u32 << b->data.u32) + c->data.u32;
+  break;
default:
   return;
}
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] nv50/ir: optimize IMAD to SHLADD in presence of power of 2

2016-09-19 Thread Samuel Pitoiset
Only and only if src1 is a power of 2 we can replace IMAD by SHLADD.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 74a5a85..336f407 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -915,6 +915,7 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue )
 void
 ConstantFolding::opnd(Instruction *i, ImmediateValue , int s)
 {
+   const Target *target = prog->getTarget();
const int t = !s;
const operation op = i->op;
Instruction *newi = i;
@@ -1016,6 +1017,18 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
, int s)
  i->src(1).mod = i->src(2).mod;
  i->setSrc(2, NULL);
  i->op = OP_ADD;
+  } else
+  if (s == 1 && imm0.isPow2() && target->isOpSupported(i->op, i->dType)) {
+ int32_t v;
+ switch (i->dType) {
+ case TYPE_S32: v = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
+ case TYPE_U32: v = util_last_bit(imm0.reg.data.u32) - 1; break;
+ default:
+return;
+ }
+ bld.setPosition(i, false);
+ i->op = OP_SHLADD;
+ i->setSrc(1, bld.mkImm(v));
   }
   break;
case OP_ADD:
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] nv50/ir: optimize SHLADD(a, b, 0x0) to SHL(a, b)

2016-09-19 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 336f407..1b99ce7 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -907,6 +907,14 @@ ConstantFolding::opnd3(Instruction *i, ImmediateValue 
)
  return;
   }
   break;
+   case OP_SHLADD:
+  if (imm2.isInteger(0)) {
+ i->op = OP_SHL;
+ i->setSrc(2, NULL);
+ foldCount++;
+ return;
+  }
+  break;
default:
   return;
}
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] nv50/ir: add preliminary support for SHLADD

2016-09-19 Thread Samuel Pitoiset
This instruction is available since SM20 (Fermi) and allow to do
(a << b) + c in one shot. In some situations, IMAD should be
replaced by SHLADD when b is a power of 2, and ADD+SHL should be
replaced by SHLADD as well.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h| 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp| 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp   | 6 +++---
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp | 4 
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp  | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp  | 7 +--
 6 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index d6011d9..bedbdcc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -57,6 +57,7 @@ enum operation
OP_MAD,
OP_FMA,
OP_SAD, // abs(src0 - src1) + src2
+   OP_SHLADD,
OP_ABS,
OP_NEG,
OP_NOT,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 22f2f5d..dbd0f7d 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -86,6 +86,7 @@ const char *operationStr[OP_LAST + 1] =
"mad",
"fma",
"sad",
+   "shladd",
"abs",
"neg",
"not",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 7d7b315..273ec34 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -30,7 +30,7 @@ const uint8_t Target::operationSrcNr[] =
0, 0,   // NOP, PHI
0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT
1, 1, 2,// MOV, LOAD, STORE
-   2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
+   2, 2, 2, 2, 2, 3, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD, SHLADD
1, 1, 1,// ABS, NEG, NOT
2, 2, 2, 2, 2,  // AND, OR, XOR, SHL, SHR
2, 2, 1,// MAX, MIN, SAT
@@ -70,10 +70,10 @@ const OpClass Target::operationClass[] =
OPCLASS_MOVE,
OPCLASS_LOAD,
OPCLASS_STORE,
-   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD
+   // ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD, SHLADD
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH,
-   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
+   OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
// ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
OPCLASS_CONVERT, OPCLASS_CONVERT,
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
index 6b8f767..cf8a08f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp
@@ -61,6 +61,10 @@ TargetGM107::isOpSupported(operation op, DataType ty) const
case OP_DIV:
case OP_MOD:
   return false;
+   case OP_SHLADD:
+  if (isFloatType(ty))
+ return false;
+  break;
default:
   break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index b37ea73..5ab95fc 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -437,6 +437,7 @@ TargetNV50::isOpSupported(operation op, DataType ty) const
case OP_EXTBF:
case OP_EXIT: // want exit modifier instead (on NOP if required)
case OP_MEMBAR:
+   case OP_SHLADD:
   return false;
case OP_SAD:
   return ty == TYPE_S32;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index f75e395..d8fa285 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -105,6 +105,7 @@ static const struct opProperties _initProps[] =
{ OP_MAX,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MIN,0x3, 0x3, 0x0, 0x0, 0x2, 0x2 },
{ OP_MAD,0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint
+   { OP_SHLADD, 0x3, 0x0, 0x0, 0x0, 0x4, 0x6 },
{ OP_MADSP,  0x0, 0x0, 0x0, 0x0, 0x6, 0x2 },
{ OP_ABS,0x0, 0x0, 0x0, 0x0, 0x1, 0x0 },
{ OP_NEG,0x0, 0x1, 0x0, 0x0, 0x1, 0x0 },
@@ -158,13 +159,13 @@ void TargetNVC0::initOpInfo()
{
   // ADD, MUL, MAD, FMA, AND, OR, XOR, MAX, MIN, SET_AND, SET_OR, SET_XOR,
   // SET, SELP, SLCT
-  0x0670ca00, 0x003f, 0x, 0x
+  

Re: [Mesa-dev] [PATCH] gallium/util: make use of strtol() in debug_get_num_option()

2016-09-19 Thread Brian Paul

Seems OK here.

Tested-by: Brian Paul 

On 09/19/2016 02:43 AM, Nicolai Hähnle wrote:

Reviewed-by: Nicolai Hähnle 

However, you might want to check with the VMWare guys. I seem to recall
that MSVC is a bit peculiar with some of these library functions.

Cheers,
Nicolai

On 14.09.2016 20:37, Samuel Pitoiset wrote:

This allows to use hexadecimal numbers which are automatically
detected by strtol() when the base is 0.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/util/u_debug.c | 25 -
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_debug.c
b/src/gallium/auxiliary/util/u_debug.c
index 4619526..dd3e167 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -203,25 +203,16 @@ debug_get_num_option(const char *name, long dfault)
const char *str;

str = os_get_option(name);
-   if (!str)
+   if (!str) {
   result = dfault;
-   else {
-  long sign;
-  char c;
-  c = *str++;
-  if (c == '-') {
- sign = -1;
- c = *str++;
-  }
-  else {
- sign = 1;
-  }
-  result = 0;
-  while ('0' <= c && c <= '9') {
- result = result*10 + (c - '0');
- c = *str++;
+   } else {
+  char *endptr;
+
+  result = strtol(str, , 0);
+  if (str == endptr) {
+ /* Restore the default value when no digits were found. */
+ result = dfault;
   }
-  result *= sign;
}

if (debug_get_option_should_print())



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/ir: Test thread dispatch packing assumptions.

2016-09-19 Thread Francisco Jerez
Not intended for upstream.  Should cause a GPU hang if some thread is
executed with a non-contiguous dispatch mask breaking assumptions of
brw_stage_has_packed_dispatch().  Doesn't cause any CTS, DEQP or
Piglit regressions, while replacing brw_stage_has_packed_dispatch()
with a dummy implementation that unconditionally returns true on top
of this patch causes multiple GPU hangs.

v2: Drop VEC4 test and clean up slightly for upstream (Jason).
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 03d4f5f..c5fa3f7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6832,3 +6832,33 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
 
return g.get_assembly(final_assembly_size);
 }
+
+/**
+ * Test the dispatch mask packing assumptions of
+ * brw_stage_has_packed_dispatch().  Call this from e.g. the top of
+ * fs_visitor::emit_nir_code() to cause a GPU hang if any shader invocation is
+ * executed with an unexpected dispatch mask.
+ */
+static UNUSED void
+brw_fs_test_dispatch_packing(const fs_builder )
+{
+   const gl_shader_stage stage = bld.shader->stage;
+
+   if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage,
+ bld.shader->stage_prog_data)) {
+  const fs_builder ubld = bld.exec_all().group(1, 0);
+  const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0);
+  const fs_reg mask = (stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :
+   brw_dmask_reg());
+
+  ubld.ADD(tmp, mask, brw_imm_ud(1));
+  ubld.AND(tmp, mask, tmp);
+
+  /* This will loop forever if the dispatch mask doesn't have the expected
+   * form '2^n-1', in which case tmp will be non-zero.
+   */
+  bld.emit(BRW_OPCODE_DO);
+  bld.CMP(bld.null_reg_ud(), tmp, brw_imm_ud(0), BRW_CONDITIONAL_NZ);
+  set_predicate(BRW_PREDICATE_NORMAL, bld.emit(BRW_OPCODE_WHILE));
+   }
+}
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/12] anv: Implement HiZ for simple render passes

2016-09-19 Thread Nanley Chery
On Fri, Sep 02, 2016 at 05:27:11PM -0700, Jason Ekstrand wrote:
> On Wed, Aug 31, 2016 at 8:29 PM, Nanley Chery  wrote:
> 
> > This series enables Hierarchical depth buffer rendering and fast depth
> > clears
> > for render passes with a single subpass running on platforms BDW+.
> > Platforms
> > pre-BDW can test this feature with an environment variable. The FPS of some
> > demos are roughly estimated to increase by as much as ~50% on a SKL GT2.
> >
> > This feature was partially implemented by Chad and Jason. Where applicable,
> > I've tried to accurately note the modifications I've made to their patches
> > without being too verbose. I've also tried to maintain the authorship of
> > their
> > patches when the core of their work remained.
> >
> > The only patch which wasn't retained due to the core of the work being lost
> > was a patch to create a HiZ surface. This was replaced with my patch to
> > update
> > an existing function which does so. This diverged enough for me to feel at
> > risk of misrepresenting the original author's work.
> >
> > Any suggestions with respect to my annotating method, notices of
> > incorrectly
> > attributed credit, or general comments are welcome.
> >
> 
> Feel free to take more credit. :)  Chad and I wrote sketchy, untested,
> skeleton patches.  You were the one who got it working!
> 
> Patches 1, 3-7, and 10-12 have a few comments here and there.  Assuming
> those comments are addressed, those patches are
> 
> Reviewed-by: Jason Ekstrand 
> 

I've made more updates to patch 10 and 12 than your comments so
I'll wait for you to take a look at the V2 before applying your Rb.

> We talked about 2 offline and I sent my little 6-patch series that makes
> the original plan work.
> 
> On patch 9, I gave a bunch of comments but one thing was clear: We need
> tests.  In the interest of merging patches, I think I'd recommend that we
> disable HiZ for mipmapped surfaces (we can just not allocate the surface)
> and don't do fast-clears for anything other than full-RT clears.  That
> seems like the shortest path to getting the patches merged quickly with
> some guarantee of correctness.
> 

I had a local patch to disable gen8 multisampled and BDW+ mipmapped HiZ in
patch 9, but I prefer your plan of not allocating the surface at all.
Partial clears are currently tested by the CTS.

> For partial clears and mipmapped HiZ, I think we need more tests.  There
> may be CTS tests for partial depth clears (In particular, the subpass
> tests) but I'm not sure.  I'll leave it up to you as to whether you'd
> rather write CTS tests or crucible tests.  Crucible may be easier, but the
> CTS needs those tests too, so maybe we should be good citizens and put them
> there?
> 
> 

Yes, we do need more tests. The CTS is steadily increasing its test
coverage so I'm thinking of revisiting those cases once a test
exists for it. If I do write a test, it'd likely be a crucible one.

Nanley

> >
> > Chad Versace (4):
> >   anv: Add anv_image::hiz_surface
> >   anv: Add func anv_image_has_hiz()
> >   anv: Allocate hiz surface
> >   genX/cmd_buffer: Enable rendering to HiZ
> >
> > Jason Ekstrand (3):
> >   anv: Move BindImageMemory to anv_image.c
> >   anv/image: Memset hiz surfaces to 0 when binding memory
> >   anv/cmd_buffer: Add code for performing HZ operations
> >
> > Nanley Chery (5):
> >   isl: Correct a comment in the isl_format enum
> >   isl: Update isl_surf_get_hiz_surf()
> >   isl: Make MSAA pixel scaling function public
> >   genX/cmd_buffer: Enable fast depth clears
> >   anv/TODO: Update the HiZ task
> >
> >  src/intel/isl/isl.c|  41 ++--
> >  src/intel/isl/isl.h|   6 +-
> >  src/intel/vulkan/TODO  |   2 +-
> >  src/intel/vulkan/anv_device.c  |  20 --
> >  src/intel/vulkan/anv_genX.h|   3 +
> >  src/intel/vulkan/anv_image.c   |  67 ++-
> >  src/intel/vulkan/anv_pass.c|  11 +++
> >  src/intel/vulkan/anv_private.h |  18 +
> >  src/intel/vulkan/gen7_cmd_buffer.c |   5 ++
> >  src/intel/vulkan/gen8_cmd_buffer.c | 134 ++
> > +++
> >  src/intel/vulkan/genX_cmd_buffer.c |  45 +++--
> >  11 files changed, 313 insertions(+), 39 deletions(-)
> >
> > --
> > 2.9.3
> >
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.

2016-09-19 Thread Emil Velikov
On 19 September 2016 at 16:38, Jason Ekstrand  wrote:
> It all looks fine to me.  Feel free to add a
>
> Reviewed-by: Jason Ekstrand 
>
> That said, my knowledge of the details of the DRI vfuncs is very limited so
> I'd like to see Emil or Axel sign off on it too, especially since they were
> the ones who had all the comments.
>
Thanks for double-checking Jason.

Afaics patches have a few outstanding style issues (mentioned last
round), but I'll squash those just before committing tomorrow morning.

Regards,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vl/dri3: handle the case of different GPU(v4.1)

2016-09-19 Thread Emil Velikov
Hi Nayan,

On 16 September 2016 at 13:51, Nayan Deshmukh  wrote:

> +   scrn->pipe = scrn->base.pscreen->context_create(scrn->base.pscreen,
> +   >base, 0);
> +
I think you're forgetting to destroy the context in
vl_dri3_screen_destroy and proper handling if context_create fails
seems to be missing.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/12] genX/cmd_buffer: Enable rendering to HiZ

2016-09-19 Thread Nanley Chery
On Fri, Sep 02, 2016 at 03:16:21PM -0700, Chad Versace wrote:
> On Wed 31 Aug 2016, Nanley Chery wrote:
> > From: Chad Versace 
> > 
> > Nanley Chery:
> > (rebase)
> >  - Resolve conflicts with new anv_batch_emit macro
> > (amend)
> >  - Remove wip! tag and handle a QPitch TODO
> >  - Emit 3DSTATE_HIER_DEPTH_BUFFER on pre-BDW systems
> >  - Only use HiZ for single-subpass renderpasses
> >  - Emit the HiZ instruction before the stencil instruction to follow the
> >optimized clear sequence specified in the PRMs
> >  - Don't modify clear params
> >  - Enable resolves when a HiZ buffer is used to ensure depth buffer validity
> > 
> > Provides an FPS increase of ~15% on the Sascha triangle and multisampling
> > demos.
> > 
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/gen8_cmd_buffer.c |  4 
> >  src/intel/vulkan/genX_cmd_buffer.c | 41 
> > ++
> >  2 files changed, 41 insertions(+), 4 deletions(-)
> > 
> > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c 
> > b/src/intel/vulkan/gen8_cmd_buffer.c
> > index 4f27350..7f65fe2 100644
> > --- a/src/intel/vulkan/gen8_cmd_buffer.c
> > +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> > @@ -414,6 +414,10 @@ genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer 
> > *cmd_buffer, enum anv_hz_op op)
> > if (iview == NULL || !anv_image_has_hiz(iview->image))
> >return;
> >  
> > +   /* FIXME: Implement multi-subpass HiZ */
> > +   if (cmd_buffer->state.pass->subpass_count > 1)
> > +  return;
> > +
> > const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
> > const bool full_surface_op =
> >   cmd_state->render_area.extent.width == iview->extent.width &&
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> > b/src/intel/vulkan/genX_cmd_buffer.c
> > index 95ed5f2..349d2a4 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -1040,6 +1040,7 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
> > *cmd_buffer)
> >anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
> > const struct anv_image *image = iview ? iview->image : NULL;
> > const bool has_depth = image && (image->aspects & 
> > VK_IMAGE_ASPECT_DEPTH_BIT);
> > +   const bool has_hiz = image != NULL && anv_image_has_hiz(image);
> > const bool has_stencil =
> >image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
> 
> >  
> > @@ -1052,7 +1053,12 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
> > *cmd_buffer)
> >   db.SurfaceType   = SURFTYPE_2D;
> >   db.DepthWriteEnable  = true;
> >   db.StencilWriteEnable= has_stencil;
> > - db.HierarchicalDepthBufferEnable = false;
> > +
> > + if (cmd_buffer->state.pass->subpass_count == 1) {
> > +db.HierarchicalDepthBufferEnable = has_hiz;
> > + } else {
> > +anv_finishme("Multiple-subpass HiZ not implemented");
> > + }
> >  
> >   db.SurfaceFormat = isl_surf_get_depth_format(>isl_dev,
> >
> > >depth_surface.isl);
> > @@ -1104,6 +1110,34 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer 
> > *cmd_buffer)
> >}
> > }
> >  
> > +   if (has_hiz) {
> 
> Note: This codepath is hit sometimes when
> 3DSTATE_DEPTH_BUFFER.HierarchicalDepthBufferEnable is false.
> Specifically, when subpass_count > 1. It's weird, but I doubt it causes
> any harm. After all, all the surface data programmed by
> 3DSTATE_HIER_BUFFER is valid here regardless of the value of
> HierarchicalDepthBufferEnable.
> 
> > +  anv_batch_emit(_buffer->batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), 
> > hdb) {
> > + hdb.HierarchicalDepthBufferObjectControlState = GENX(MOCS);
> > + hdb.SurfacePitch = image->hiz_surface.isl.row_pitch - 1;
> > + hdb.SurfaceBaseAddress = (struct anv_address) {
> > +.bo = image->bo,
> > +.offset = image->offset + image->hiz_surface.offset,
> > + };
> > +#if GEN_GEN >= 8
> > + /* From the SKL PRM Vol2a:
> > +  *
> > +  *The interpretation of this field is dependent on Surface 
> > Type
> > +  *as follows:
> > +  *- SURFTYPE_1D: distance in pixels between array slices
> > +  *- SURFTYPE_2D/CUBE: distance in rows between array slices
> > +  *- SURFTYPE_3D: distance in rows between R - slices
> > +  *
> > +  * ISL implements HiZ surfaces for 1D depth buffers as 2D. 
> > Therefore
> > +  * the depth buffer needs to be checked for the dimension.
> > +  */
> > + hdb.SurfaceQPitch =
> > +image->depth_surface.isl.dim == ISL_SURF_DIM_1D ?
> > +   isl_surf_get_array_pitch_el(>hiz_surface.isl) >> 2 :
> > +   isl_surf_get_array_pitch_el_rows(>hiz_surface.isl) 
> > >> 2;
> > 

Re: [Mesa-dev] [PATCH] vl/dri3: handle the case of different GPU(v4.1)

2016-09-19 Thread Leo Liu



On 09/17/2016 07:33 AM, Nayan Deshmukh wrote:

Hi Leo,

Could you push the patches? I don't have the push access.



Can you rebase all your reviewed patches, and add RB to it, and then you 
can send them to me ?


Sorry for too busy to do this for you.

Regards,
Leo



Regards,
Nayan.

On Fri, Sep 16, 2016 at 7:44 PM, Leo Liu > wrote:


This Patch is Reviewed-by: Leo Liu >


On 09/16/2016 08:51 AM, Nayan Deshmukh wrote:

In case of prime when rendering is done on GPU other then the
server GPU, use a seprate linear buffer for each back buffer
which will be displayed using present extension.

v2: Use a seprate linear buffer for each back buffer (Michel)
v3: Change variable names and fix coding style (Leo and Emil)
v4: Use PIPE_BIND_SAMPLER_VIEW for back buffer in case when
 a seprate linear buffer is used (Michel)
v4.1: remove empty line

Signed-off-by: Nayan Deshmukh >
---
  src/gallium/auxiliary/vl/vl_winsys_dri3.c | 61
---
  1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index 3d596a6..e0aaad8 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -49,6 +49,7 @@
  struct vl_dri3_buffer
  {
 struct pipe_resource *texture;
+   struct pipe_resource *linear_texture;
   uint32_t pixmap;
 uint32_t sync_fence;
@@ -69,6 +70,8 @@ struct vl_dri3_screen
 xcb_present_event_t eid;
 xcb_special_event_t *special_event;
  +   struct pipe_context *pipe;
+
 struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM];
 int cur_back;
  @@ -82,6 +85,7 @@ struct vl_dri3_screen
 int64_t last_ust, ns_frame, last_msc, next_msc;
   bool flushed;
+   bool is_different_gpu;
  };
static void
@@ -102,6 +106,8 @@ dri3_free_back_buffer(struct
vl_dri3_screen *scrn,
 xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
 xshmfence_unmap_shm(buffer->shm_fence);
 pipe_resource_reference(>texture, NULL);
+   if (buffer->linear_texture)
+   pipe_resource_reference(>linear_texture, NULL);
 FREE(buffer);
  }
  @@ -209,7 +215,7 @@ dri3_alloc_back_buffer(struct
vl_dri3_screen *scrn)
 xcb_sync_fence_t sync_fence;
 struct xshmfence *shm_fence;
 int buffer_fd, fence_fd;
-   struct pipe_resource templ;
+   struct pipe_resource templ, *pixmap_buffer_texture;
 struct winsys_handle whandle;
 unsigned usage;
  @@ -226,8 +232,7 @@ dri3_alloc_back_buffer(struct
vl_dri3_screen *scrn)
goto close_fd;
   memset(, 0, sizeof(templ));
-   templ.bind = PIPE_BIND_RENDER_TARGET |
PIPE_BIND_SAMPLER_VIEW |
-PIPE_BIND_SCANOUT | PIPE_BIND_SHARED;
+   templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
 templ.format = PIPE_FORMAT_B8G8R8X8_UNORM;
 templ.target = PIPE_TEXTURE_2D;
 templ.last_level = 0;
@@ -235,16 +240,34 @@ dri3_alloc_back_buffer(struct
vl_dri3_screen *scrn)
 templ.height0 = scrn->height;
 templ.depth0 = 1;
 templ.array_size = 1;
-   buffer->texture =
scrn->base.pscreen->resource_create(scrn->base.pscreen,
-);
-   if (!buffer->texture)
-  goto unmap_shm;
  +   if (scrn->is_different_gpu) {
+  buffer->texture =
scrn->base.pscreen->resource_create(scrn->base.pscreen,
+   );
+  if (!buffer->texture)
+ goto unmap_shm;
+
+  templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
+PIPE_BIND_LINEAR;
+  buffer->linear_texture =
scrn->base.pscreen->resource_create(scrn->base.pscreen,
+ );
+  pixmap_buffer_texture = buffer->linear_texture;
+
+  if (!buffer->linear_texture)
+ goto no_linear_texture;
+   } else {
+  templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED;
+  buffer->texture =
scrn->base.pscreen->resource_create(scrn->base.pscreen,
+   );
+  if (!buffer->texture)
+ goto unmap_shm;
+  pixmap_buffer_texture = buffer->texture;
+   }
 memset(, 0, 

Re: [Mesa-dev] [PATCH 11/12] genX/cmd_buffer: Enable fast depth clears

2016-09-19 Thread Nanley Chery
On Fri, Sep 02, 2016 at 05:12:58PM -0700, Jason Ekstrand wrote:
> On Wed, Aug 31, 2016 at 8:29 PM, Nanley Chery  wrote:
> 
> > Provides an FPS increase of ~30% on the Sascha triangle and multisampling
> > demos.
> >
> > Clears that happen within a render pass via vkCmdClearAttachments are safe
> > even if the clear color changes. This is because the meta implementation
> > does
> > not use LOAD_OP_CLEAR which avoids any conflicts with 3DSTATE_CLEAR_PARAMS.
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_pass.c| 11 +++
> >  src/intel/vulkan/gen8_cmd_buffer.c |  6 ++
> >  src/intel/vulkan/genX_cmd_buffer.c |  4 +---
> >  3 files changed, 18 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
> > index 69c3c7e..823f9cf 100644
> > --- a/src/intel/vulkan/anv_pass.c
> > +++ b/src/intel/vulkan/anv_pass.c
> > @@ -155,5 +155,16 @@ void anv_GetRenderAreaGranularity(
> >  VkRenderPassrenderPass,
> >  VkExtent2D* pGranularity)
> >  {
> > +   ANV_FROM_HANDLE(anv_render_pass, pass, renderPass);
> > +
> > +   /* This granularity is needed for HiZ fast clears */
> > +   for (unsigned i = 0; i < pass->subpass_count; ++i) {
> > +  if (pass->subpasses[i].depth_stencil_attachment !=
> > +  VK_ATTACHMENT_UNUSED) {
> > + *pGranularity = (VkExtent2D) { 8, 4 };
> > + return;
> > +  }
> > +   }
> >
> 
> Thanks for remembering this!  As mentioned in an earlier e-mail, I'm not
> sure this is needed for Sky Lake or Broadwell with a format other than
> D16_UNORM but setting it all the time doesn't hurt.  8x4 isn't onerous and
> once we get fast clears, it'll get much bigger anyway.
> 
> 

Np. It's a bit unfortunate that the Vulkan apps I've tested don't call
this function. I do need to update the comment as it's not exactly true
(gen8 can have smaller alignments depending on the sample count).

> > +
> > *pGranularity = (VkExtent2D) { 1, 1 };
> >  }
> > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c
> > b/src/intel/vulkan/gen8_cmd_buffer.c
> > index 7f65fe2..ec91ecd 100644
> > --- a/src/intel/vulkan/gen8_cmd_buffer.c
> > +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> > @@ -451,6 +451,12 @@ genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer
> > *cmd_buffer, enum anv_hz_op op)
> >   cmd_state->render_area.extent.height % align_h)
> >  return;
> >}
> > +
> > +  anv_batch_emit(_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp)
> > {
> > + cp.DepthClearValueValid = true;
> > + cp.DepthClearValue =
> > +cmd_buffer->state.attachments[ds].clear_value.depthStencil.
> > depth;
> > +  }
> >
> 
> Hrm... I'm not sure where the best place to set CLEAR_PARAMS is.  It might
> almost be better in BeginSubpass...  In any case, I think this works.  We
> can move it later if we want.
> 
> 
> >break;
> > case ANV_HZ_OP_DEPTH_RESOLVE:
> >if (cmd_buffer->state.pass->attachments[ds].store_op !=
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > b/src/intel/vulkan/genX_cmd_buffer.c
> > index 349d2a4..7d2a6bd 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -1159,9 +1159,6 @@ cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer
> > *cmd_buffer)
> > } else {
> >anv_batch_emit(_buffer->batch, GENX(3DSTATE_STENCIL_BUFFER),
> > sb);
> > }
> > -
> > -   /* Clear the clear params. */
> > -   anv_batch_emit(_buffer->batch, GENX(3DSTATE_CLEAR_PARAMS), cp);
> >  }
> >
> >  /**
> > @@ -1196,6 +1193,7 @@ void genX(CmdBeginRenderPass)(
> >
> > genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
> > genX(cmd_buffer_do_hz_op)(cmd_buffer, ANV_HZ_OP_HIZ_RESOLVE);
> > +   genX(cmd_buffer_do_hz_op)(cmd_buffer, ANV_HZ_OP_CLEAR);
> > anv_cmd_buffer_clear_subpass(cmd_buffer);
> >  }
> >
> > --
> > 2.9.3
> >
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/12] anv/cmd_buffer: Add code for performing HZ operations

2016-09-19 Thread Nanley Chery
On Fri, Sep 02, 2016 at 05:01:28PM -0700, Jason Ekstrand wrote:
> On Wed, Aug 31, 2016 at 8:29 PM, Nanley Chery  wrote:
> 
> > From: Jason Ekstrand 
> >
> 
> First off, this is your patch not mine.  The patch of mine you based this
> on was little more than a skeleton that demonstrated how to use
> PIPE_CONTROL.  All of the interesting stuff in here is yours.
> 
> 

Thanks! V2's commit message will be a lot simpler.

> > Nanley Chery:
> > (rebase)
> >  - Resolve conflicts with the new anv_batch_emit macro
> > (amend)
> >  - Update commit title
> >  - Combine all HZ operations into one function
> >  - Add code for performing HiZ resolve operations
> >  - Add proper stencil and multisampling support
> >  - Set the proper clear rectangles
> >  - Add required cases for aborting an HZ operation
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_genX.h|   3 +
> >  src/intel/vulkan/anv_private.h |   6 ++
> >  src/intel/vulkan/gen7_cmd_buffer.c |   5 ++
> >  src/intel/vulkan/gen8_cmd_buffer.c | 124 ++
> > +++
> >  4 files changed, 138 insertions(+)
> >
> > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
> > index cf5a232..16de990 100644
> > --- a/src/intel/vulkan/anv_genX.h
> > +++ b/src/intel/vulkan/anv_genX.h
> > @@ -54,6 +54,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct
> > anv_cmd_buffer *cmd_buffer);
> >
> >  void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer
> > *cmd_buffer);
> >
> > +void genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer,
> > +   enum anv_hz_op op);
> > +
> >  VkResult
> >  genX(graphics_pipeline_create)(VkDevice _device,
> > struct anv_pipeline_cache *cache,
> > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> > private.h
> > index 5718a19..40325fd 100644
> > --- a/src/intel/vulkan/anv_private.h
> > +++ b/src/intel/vulkan/anv_private.h
> > @@ -1401,6 +1401,12 @@ anv_cmd_buffer_get_depth_stencil_view(const struct
> > anv_cmd_buffer *cmd_buffer);
> >
> >  void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);
> >
> > +enum anv_hz_op {
> > +   ANV_HZ_OP_CLEAR,
> > +   ANV_HZ_OP_HIZ_RESOLVE,
> > +   ANV_HZ_OP_DEPTH_RESOLVE,
> > +};
> >
> 
> Now that blorp is in its own folder, we could use the blorp_hiz_op enum
> instead of rolling our own.  That'll make it easier to add gen7 support.
> 
> 

Sounds good.

> > +
> >  struct anv_fence {
> > struct anv_bo bo;
> > struct drm_i915_gem_execbuffer2 execbuf;
> > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c
> > b/src/intel/vulkan/gen7_cmd_buffer.c
> > index 61778aa..a057a04 100644
> > --- a/src/intel/vulkan/gen7_cmd_buffer.c
> > +++ b/src/intel/vulkan/gen7_cmd_buffer.c
> > @@ -323,6 +323,11 @@ genX(cmd_buffer_flush_dynamic_state)(struct
> > anv_cmd_buffer *cmd_buffer)
> > cmd_buffer->state.dirty = 0;
> >  }
> >
> > +void
> > +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum
> > anv_hz_op op)
> > +{
> > +}
> > +
> >  void genX(CmdSetEvent)(
> >  VkCommandBuffer commandBuffer,
> >  VkEvent event,
> > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c
> > b/src/intel/vulkan/gen8_cmd_buffer.c
> > index e22b4e2..4f27350 100644
> > --- a/src/intel/vulkan/gen8_cmd_buffer.c
> > +++ b/src/intel/vulkan/gen8_cmd_buffer.c
> > @@ -399,6 +399,130 @@ genX(cmd_buffer_flush_compute_state)(struct
> > anv_cmd_buffer *cmd_buffer)
> > genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
> >  }
> >
> > +
> > +/**
> > + * Emit the HZ_OP packet in the sequence specified by the BDW PRM section
> > + * entitled: "Optimized Depth Buffer Clear and/or Stencil Buffer Clear."
> > + */
> > +void
> > +genX(cmd_buffer_do_hz_op)(struct anv_cmd_buffer *cmd_buffer, enum
> > anv_hz_op op)
> > +{
> > +   struct anv_cmd_state *cmd_state = _buffer->state;
> > +   const struct anv_image_view *iview =
> > +  anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
> > +
> > +   if (iview == NULL || !anv_image_has_hiz(iview->image))
> > +  return;
> >
> 
> This looks like something that would be better as an assert.  Silently
> doing nothing is probably fine for resolves.  For clears on the other hand,
> it means silently *not* clearing which would be bad.
> 
> 

We don't silently skip clearing. Clears are marked as having been
performed through the following line's execution later on in this
function:

 /* Mark aspects as cleared */
 cmd_state->attachments[ds].pending_clear_aspects = 0;

> > +
> > +   const uint32_t ds = cmd_state->subpass->depth_stencil_attachment;
> > +   const bool full_surface_op =
> > + cmd_state->render_area.extent.width == iview->extent.width
> > &&
> > + cmd_state->render_area.extent.height ==
> > iview->extent.height;
> >
> 
> I think you also need 

[Mesa-dev] [PATCH v5] clover: Introduce CLOVER_EXTRA_{COMPILER, LINKER}_OPTIONS

2016-09-19 Thread Vedran Miletić
The options specified in the CLOVER_EXTRA_COMPILER_OPTIONS shell
variable are appended to the compiler options specified by the OpenCL
program, if any.
Analogously, the options specified in the CLOVER_EXTRA_LINKER_OPTIONS
variable are appended to the linker options and the options spoecified
in the CLOVER_EXTRA_COMPILER_OPTIONS variable.

v2:
 * rename to CLOVER_EXTRA_COMPILER_OPTIONS
 * use debug_get_option
 * append to linker options as well

v3: code cleanups

v4: separate CLOVER_EXTRA_LINKER_OPTIONS options

v5:
 * fix documentation typo
 *use CLOVER_EXTRA_COMPILER_OPTIONS in link stage

Signed-off-by: Vedran Miletić 
Reviewed-by[v1]: Edward O'Callaghan 
---
 docs/envvars.html | 13 +
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 11 ---
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/docs/envvars.html b/docs/envvars.html
index cf57ca5..252b783 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -235,6 +235,19 @@ Setting to "tgsi", for example, will print all the TGSI 
shaders.
 See src/mesa/state_tracker/st_debug.c for other options.
 
 
+Clover state tracker environment variables
+
+
+CLOVER_EXTRA_COMPILER_OPTIONS - allows specifying additional compiler
+options. Specified options are appended after the options set by the OpenCL
+program in clBuildProgram and/or clCompileProgram.
+CLOVER_EXTRA_LINKER_OPTIONS - allows specifying additional linker
+options. Specified options are appended after the options set by the OpenCL
+linker in clBuildProgram and/or clLinkProgram and options set in the
+CLOVER_EXTRA_COMPILER_OPTIONS variable.
+
+
+
 Softpipe driver environment variables
 
 SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment shaders
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index b5e8b52..68b9d2e 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -199,11 +199,13 @@ clover::llvm::compile_program(const std::string ,
   const std::string ,
   const std::string ,
   std::string _log) {
+   const std::string all_opts = opts + " " +
+ debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", "");
if (has_flag(debug::clc))
-  debug::log(".cl", "// Options: " + opts + '\n' + source);
+  debug::log(".cl", "// Options: " + all_opts + '\n' + source);
 
auto ctx = create_context(r_log);
-   auto c = create_compiler_instance(target, tokenize(opts + " input.cl"),
+   auto c = create_compiler_instance(target, tokenize(all_opts + " input.cl"),
  r_log);
auto mod = compile(*ctx, *c, "input.cl", source, headers, target, opts,
   r_log);
@@ -266,7 +268,10 @@ module
 clover::llvm::link_program(const std::vector ,
enum pipe_shader_ir ir, const std::string ,
const std::string , std::string _log) {
-   std::vector options = tokenize(opts + " input.cl");
+   const std::string all_opts = opts + " " +
+ debug_get_option("CLOVER_EXTRA_COMPILER_OPTIONS", "") 
+
+ debug_get_option("CLOVER_EXTRA_LINKER_OPTIONS", "");
+   std::vector options = tokenize(all_opts + " input.cl");
const bool create_library = count("-create-library", options);
erase_if(equals("-create-library"), options);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] Rename the DEBUG macro to MESA_DEBUG

2016-09-19 Thread Vedran Miletić
On 09/07/2016 06:52 PM, Vedran Miletić wrote:
> LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
> general practice, we should avoid using such generic names when it is
> possible to do so.
> 
> This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
> and removes workarounds previously used to enable building Mesa with
> LLVM (pop_macro() and push_macro() function calls).
> 
> v2:
>  * Rename remaining occurences found by git grep '\'
>  * Use /* !MESA_DEBUG */ with #else instead of /* MESA_DEBUG */
> 
> Signed-off-by: Vedran Miletić 
> Acked-by: Christian König 
> ---

Anyone?

Regards,
Vedran

-- 
Vedran Miletić
vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] clover: Pass unquoted compiler arguments to Clang

2016-09-19 Thread Vedran Miletić
OpenCL apps can quote arguments they pass to the OpenCL compiler, most
commonly include paths containing spaces.

If the Clang OpenCL compiler was called via a shell, the shell would
split the arguments with respect to to quotes and then remove quotes
before passing the arguments to the compiler. Since we call Clang as a
library, we have to split the argument with respect to quotes and then
remove quotes before passing the arguments.

v2: move to tokenize(), remove throwing of CL_INVALID_COMPILER_OPTIONS

v3: simplify parsing logic, use more C++11
---
 src/gallium/state_trackers/clover/llvm/util.hpp | 33 ++---
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/clover/llvm/util.hpp 
b/src/gallium/state_trackers/clover/llvm/util.hpp
index 8db6f20..c770dd8 100644
--- a/src/gallium/state_trackers/clover/llvm/util.hpp
+++ b/src/gallium/state_trackers/clover/llvm/util.hpp
@@ -42,11 +42,36 @@ namespace clover {
   inline std::vector
   tokenize(const std::string ) {
  std::vector ss;
- std::istringstream iss(s);
- std::string t;
+ std::ostringstream oss;
 
- while (getline(iss, t, ' '))
-ss.push_back(t);
+ // OpenCL programs can pass a single or double quoted argument, most
+ // frequently include path. This is useful so that the path containing
+ // spaces is treated as a single argument, but we should anyhow 
unquote
+ // quoted arguments before passing them to the compiler.
+ // We do not want to avoid using std::string::replace here, as include
+ // path can contain quotes in file names.
+ bool escape_next = false;
+ bool in_quote_double = false;
+ bool in_quote_single = false;
+ for (auto c : s) {
+if (escape_next) {
+   oss.put(c);
+   escape_next = false;
+} else if (c == '\\') {
+   escape_next = true;
+} else if (c == '"' && !in_quote_single) {
+   in_quote_double = !in_quote_double;
+} else if (c == '\'' && !in_quote_double) {
+   in_quote_single = !in_quote_single;
+} else if (c != ' ' || in_quote_single || in_quote_double) {
+   oss.put(c);
+} else if (oss.tellp() > 0) {
+   ss.emplace_back(oss.str());
+   oss.str("");
+}
+ }
+ if (oss.tellp() > 0)
+ss.emplace_back(oss.str());
 
  return ss;
   }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Problem with RX 480 on Alien: Isolation and Dota 2

2016-09-19 Thread Romain Failliot
2016-09-15 16:27 GMT-04:00 Marek Olšák :
> Update your gcc I guess? Sorry, I don't know much about LLVM build
> requirements. It works with gcc 5.4.0.

I'm using a pretty recent gcc, and the 64-bit compilation works like a
charm, it's only the 32-bit compilation that has trouble.
Anyway, I've been advised to use a pre-build rep:
https://copr.fedorainfracloud.org/coprs/mystro256/polaris-gfx/

And it works pretty well now! I do have a bug with the game
(i.e.Alien: Isolation) though: my PC hangs for up to 2 minutes. From
my experience, it seems to be because of the shaders compilation, but
I'm not 100% sure. Is it a known bug? Is it a problem from the game or
from the drivers?

Thanks!
Romain
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] spirv: fix AtomicLoad/Store on images

2016-09-19 Thread Jason Ekstrand
This looks good to me.

Reviewed-by: Jason Ekstrand 

Do we have tests for this? If not, we should write some.  I know there are
other image atomic tests in the CTS. They shouldn't be hard to extend.

On Sep 19, 2016 9:36 AM, "Lionel Landwerlin"  wrote:

> OpAtomicLoad/Store should have pointer to images just like the rest of the
> atomic operators. These couple of lines were poorly copied from the
> ssbo/shared_vars cases (the only ones currently tests by the CTS).
>
> Fixes 2afb950161f847d9b0a7 "spirv/nir: Add support for OpAtomicLoad/Store"
> Cc: Timothy Arceri 
> Cc: Jason Ekstrand 
> ---
>  src/compiler/spirv/spirv_to_nir.c | 13 +++--
>  1 file changed, 3 insertions(+), 10 deletions(-)
>
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 49338b2..12b43ee 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -1671,6 +1671,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
> case SpvOpAtomicIDecrement:
> case SpvOpAtomicIAdd:
> case SpvOpAtomicISub:
> +   case SpvOpAtomicLoad:
> case SpvOpAtomicSMin:
> case SpvOpAtomicUMin:
> case SpvOpAtomicSMax:
> @@ -1681,17 +1682,9 @@ vtn_handle_image(struct vtn_builder *b, SpvOp
> opcode,
>image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image;
>break;
>
> -   case SpvOpAtomicLoad: {
> -  image.image =
> - vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
> -  break;
> -   }
> -
> -   case SpvOpAtomicStore: {
> -  image.image =
> - vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
> +   case SpvOpAtomicStore:
> +  image = *vtn_value(b, w[1], vtn_value_type_image_pointer)->image;
>break;
> -   }
>
> case SpvOpImageQuerySize:
>image.image =
> --
> 2.9.3
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] clover: assert struct argument is compiled usably

2016-09-19 Thread Vedran Miletić


On 09/19/2016 07:08 PM, Vedran Miletić wrote:
> On 07/28/2016 07:52 AM, Francisco Jerez wrote:
>> Emil Velikov  writes:
>>
>>> On 6 June 2016 at 00:02, Vedran Miletić  wrote:
 On 06/04/2016 04:18 AM, Francisco Jerez wrote:
>
> Serge Martin  writes:
>
>> From: Vedran Miletić 
>>
>> Make sure that a struct argument did not get compiled into a pointer
>> type with the byval attribute. If we try to handle the pointer with
>> byval, we end up with the pointer size instead of the struct size.
>>
> Ugh, is that a bug in the code below?  How are byval pointers supposed
> to be handled here?  Exactly as if the argument wasn't a pointer at all
> by providing a copy of the pointed-to object as-is in the kernel input
> buffer?  In that case wouldn't the code below need to pass the correct
> size of the pointed-to object as target/api size rather than the size of
> the pointer?
>

 Yes, byval+pointer should be handled as there is no pointer at all.

 I have tried passing the correct size, but IIRC LLVM AMDGPU backend does 
 not
 generate correct asm for byval+pointer variant. The simple solution is to
 fail with an assert here unless Clang generates code both Clover and the
 backend can handle.

>>> Gents, can anyone confirm if the series is still applicable for master
>>> or it's been superseded ?
>>>
>> Hi Emil, I don't think PATCH 1 is useful, but v1.1 of PATCH 2 still
>> makes sense.  It looks like it's going to need some minor rework though
>> for it to apply cleanly on master.
>>
>>> Thanks
>>> Emil
> 
> Hi Emil, Francisco, Serge,
> 
> now that PATCH 1 is merged, can we also merge PATCH 2?
> 
> Thanks,
> Vedran
> 

Oops, it's the other way round. Anyhow, Serge's patch "clover: fix
getting struct args api size" got merged, and I am asking to merge this one.

Regards,
Vedran

-- 
Vedran Miletić
vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] clover: assert struct argument is compiled usably

2016-09-19 Thread Vedran Miletić
On 07/28/2016 07:52 AM, Francisco Jerez wrote:
> Emil Velikov  writes:
> 
>> On 6 June 2016 at 00:02, Vedran Miletić  wrote:
>>> On 06/04/2016 04:18 AM, Francisco Jerez wrote:

 Serge Martin  writes:

> From: Vedran Miletić 
>
> Make sure that a struct argument did not get compiled into a pointer
> type with the byval attribute. If we try to handle the pointer with
> byval, we end up with the pointer size instead of the struct size.
>
 Ugh, is that a bug in the code below?  How are byval pointers supposed
 to be handled here?  Exactly as if the argument wasn't a pointer at all
 by providing a copy of the pointed-to object as-is in the kernel input
 buffer?  In that case wouldn't the code below need to pass the correct
 size of the pointed-to object as target/api size rather than the size of
 the pointer?

>>>
>>> Yes, byval+pointer should be handled as there is no pointer at all.
>>>
>>> I have tried passing the correct size, but IIRC LLVM AMDGPU backend does not
>>> generate correct asm for byval+pointer variant. The simple solution is to
>>> fail with an assert here unless Clang generates code both Clover and the
>>> backend can handle.
>>>
>> Gents, can anyone confirm if the series is still applicable for master
>> or it's been superseded ?
>>
> Hi Emil, I don't think PATCH 1 is useful, but v1.1 of PATCH 2 still
> makes sense.  It looks like it's going to need some minor rework though
> for it to apply cleanly on master.
> 
>> Thanks
>> Emil

Hi Emil, Francisco, Serge,

now that PATCH 1 is merged, can we also merge PATCH 2?

Thanks,
Vedran

-- 
Vedran Miletić
vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] spirv: fix AtomicLoad/Store on images

2016-09-19 Thread Lionel Landwerlin

On 19/09/16 18:02, Jason Ekstrand wrote:


This looks good to me.

Reviewed-by: Jason Ekstrand >


Do we have tests for this? If not, we should write some.  I know there 
are other image atomic tests in the CTS. They shouldn't be hard to extend.




Not that I saw. I'll add some.




On Sep 19, 2016 9:36 AM, "Lionel Landwerlin" > wrote:


OpAtomicLoad/Store should have pointer to images just like the
rest of the
atomic operators. These couple of lines were poorly copied from the
ssbo/shared_vars cases (the only ones currently tests by the CTS).

Fixes 2afb950161f847d9b0a7 "spirv/nir: Add support for
OpAtomicLoad/Store"
Cc: Timothy Arceri >
Cc: Jason Ekstrand >
---
 src/compiler/spirv/spirv_to_nir.c | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c
b/src/compiler/spirv/spirv_to_nir.c
index 49338b2..12b43ee 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1671,6 +1671,7 @@ vtn_handle_image(struct vtn_builder *b,
SpvOp opcode,
case SpvOpAtomicIDecrement:
case SpvOpAtomicIAdd:
case SpvOpAtomicISub:
+   case SpvOpAtomicLoad:
case SpvOpAtomicSMin:
case SpvOpAtomicUMin:
case SpvOpAtomicSMax:
@@ -1681,17 +1682,9 @@ vtn_handle_image(struct vtn_builder *b,
SpvOp opcode,
   image = *vtn_value(b, w[3],
vtn_value_type_image_pointer)->image;
   break;

-   case SpvOpAtomicLoad: {
-  image.image =
- vtn_value(b, w[3],
vtn_value_type_access_chain)->access_chain;
-  break;
-   }
-
-   case SpvOpAtomicStore: {
-  image.image =
- vtn_value(b, w[1],
vtn_value_type_access_chain)->access_chain;
+   case SpvOpAtomicStore:
+  image = *vtn_value(b, w[1],
vtn_value_type_image_pointer)->image;
   break;
-   }

case SpvOpImageQuerySize:
   image.image =
--
2.9.3



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] spirv: fix AtomicLoad/Store on images

2016-09-19 Thread Lionel Landwerlin
OpAtomicLoad/Store should have pointer to images just like the rest of the
atomic operators. These couple of lines were poorly copied from the
ssbo/shared_vars cases (the only ones currently tests by the CTS).

Fixes 2afb950161f847d9b0a7 "spirv/nir: Add support for OpAtomicLoad/Store"
Cc: Timothy Arceri 
Cc: Jason Ekstrand 
---
 src/compiler/spirv/spirv_to_nir.c | 13 +++--
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/compiler/spirv/spirv_to_nir.c 
b/src/compiler/spirv/spirv_to_nir.c
index 49338b2..12b43ee 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1671,6 +1671,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
case SpvOpAtomicIDecrement:
case SpvOpAtomicIAdd:
case SpvOpAtomicISub:
+   case SpvOpAtomicLoad:
case SpvOpAtomicSMin:
case SpvOpAtomicUMin:
case SpvOpAtomicSMax:
@@ -1681,17 +1682,9 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
   image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image;
   break;
 
-   case SpvOpAtomicLoad: {
-  image.image =
- vtn_value(b, w[3], vtn_value_type_access_chain)->access_chain;
-  break;
-   }
-
-   case SpvOpAtomicStore: {
-  image.image =
- vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
+   case SpvOpAtomicStore:
+  image = *vtn_value(b, w[1], vtn_value_type_image_pointer)->image;
   break;
-   }
 
case SpvOpImageQuerySize:
   image.image =
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions, textures, uniforms and more

2016-09-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97863

--- Comment #2 from Elio  ---
Forgot to include the website:

www.khronos.org/registry/webgl/conformance-suites/1.0.3/webgl-conformance-tests.html

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions, textures, uniforms and more

2016-09-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97863

Elio  changed:

   What|Removed |Added

Summary|[BXT] Webglc is failing a   |[BXT] Webglc is failing a
   |lot of tests related to |lot of tests related to
   |extensions textures |extensions, textures,
   |uniforms and more   |uniforms and more

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions textures uniforms and more

2016-09-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97863

Bug ID: 97863
   Summary: [BXT] Webglc is failing a lot of tests related to
extensions textures uniforms and more
   Product: Mesa
   Version: unspecified
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: elio.martinez.mon...@intel.com
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 126626
  --> https://bugs.freedesktop.org/attachment.cgi?id=126626=edit
Results summary

Webglc execution is suffering several failures during execution.(Google
Chrome). Having about 1425 failures from 22644 available tests. 

Google chrome is sending the message "Rats! WebGL hit a snag."

Software configuration:
OS: Ubuntu 16.04
Kernel: 4.7.2 from kernel.org
Graphic stack:


Component : drm

tag   : libdrm-2.4.68

Component : mesa

tag   : mesa-12.0.1

Component : xf86-video-intel

tag   : 2.99.917-701-g205146b

Component : libva

tag   : libva-1.7.2.pre1


Component : intel-driver

tag   : 1.7.2.pre1



Component : cairo

tag   : 1.15.2


Component : xserver

tag   : xorg-server-1.18.3

Component : macros

tag   : util-macros-1.19.0-2-gd7acec2

Component : intel-gpu-tools

tag   : intel-gpu-tools-1.16

Execution:

1.-Download google chrome latest stable version
2.-Open Google chrome with the following command line in terminal "
google-chrome --enable-webgl --ignore-gpu-blacklist"
3.-On the browser click over "run test" button

Expected result:

The tests should run smoothly without pauses or error messages on the status
bar.

Actual result:

As it is described before the browser sends a lot of failures on results html
and is constantly sending "Rats! WebGL hit a snag."

Attaching logs and results

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97863] [BXT] Webglc is failing a lot of tests related to extensions textures uniforms and more

2016-09-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97863

--- Comment #1 from Elio  ---
Created attachment 126627
  --> https://bugs.freedesktop.org/attachment.cgi?id=126627=edit
Dmesg

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/6] gallivm/llvmpipe: prepare support for ARB_gpu_shader_int64.

2016-09-19 Thread Roland Scheidegger
Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle:
> From: Dave Airlie 
> 
> This enables 64-bit integer support in gallivm and
> llvmpipe.
> 
> v2: add conversion opcodes.
> v3:
> - PIPE_CAP_INT64 is not there yet
> - restrict DIV/MOD defaults to the CPU, as for 32 bits
> - TGSI_OPCODE_I2U64 becomes TGSI_OPCODE_U2I64
> 
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.c|   2 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|   4 +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 471 
> +
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|  40 +-
>  src/gallium/auxiliary/tgsi/tgsi_info.h |   3 +-
>  5 files changed, 515 insertions(+), 5 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> index 1ef6ae4..b397261 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
> @@ -357,20 +357,22 @@ lp_build_emit_fetch(
> if (reg->Register.Absolute) {
>switch (stype) {
>case TGSI_TYPE_FLOAT:
>case TGSI_TYPE_DOUBLE:
>case TGSI_TYPE_UNTYPED:
>/* modifiers on movs assume data is float */
>   res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res);
>   break;
>case TGSI_TYPE_UNSIGNED:
>case TGSI_TYPE_SIGNED:
> +  case TGSI_TYPE_UNSIGNED64:
> +  case TGSI_TYPE_SIGNED64:
>case TGSI_TYPE_VOID:
>default:
>   /* abs modifier is only legal on floating point types */
>   assert(0);
>   break;
>}
> }
>  
> if (reg->Register.Negate) {
>switch (stype) {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> index de1150c..b6b3fe3 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
> @@ -330,20 +330,24 @@ typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct 
> lp_build_tgsi_context *,
>  unsigned);
>  
>  struct lp_build_tgsi_context
>  {
> struct lp_build_context base;
>  
> struct lp_build_context uint_bld;
> struct lp_build_context int_bld;
>  
> struct lp_build_context dbl_bld;
> +
> +   struct lp_build_context uint64_bld;
> +   struct lp_build_context int64_bld;
> +
> /** This array stores functions that are used to transform TGSI opcodes to
>   * LLVM instructions.
>   */
> struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
>  
> /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
>  * should compute 1 / sqrt (src0.x) */
> struct lp_build_tgsi_action rsq_action;
>  
> struct lp_build_tgsi_action sqrt_action;
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
> b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 1ee9704..d924770 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -1086,20 +1086,230 @@ static void dfrac_emit(
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
>  {
> LLVMValueRef tmp;
> tmp = lp_build_floor(_base->dbl_bld,
>   emit_data->args[0]);
> emit_data->output[emit_data->chan] =  
> LLVMBuildFSub(bld_base->base.gallivm->builder,
> emit_data->args[0], 
> tmp, "");
>  }
>  
> +/* TGSI_OPCODE_U64MUL */
> +static void
> +u64mul_emit(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   emit_data->output[emit_data->chan] = lp_build_mul(_base->uint64_bld,
> +   emit_data->args[0], emit_data->args[1]);
> +}
> +
> +/* TGSI_OPCODE_U64MOD  */
> +static void
> +u64mod_emit_cpu(
> +   const struct lp_build_tgsi_action * action,
> +   struct lp_build_tgsi_context * bld_base,
> +   struct lp_build_emit_data * emit_data)
> +{
> +   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +   LLVMValueRef div_mask = lp_build_cmp(_base->uint64_bld,
> +PIPE_FUNC_EQUAL, emit_data->args[1],
> +bld_base->uint64_bld.zero);
> +   /* We want to make sure that we never divide/mod by zero to not
> +* generate sigfpe. We don't want to crash just because the
> +* shader is doing something weird. */
> +   LLVMValueRef divisor = LLVMBuildOr(builder,
> +  div_mask,
> +  emit_data->args[1], "");
> +   LLVMValueRef result = lp_build_mod(_base->uint64_bld,
> +  emit_data->args[0], divisor);
> +   /* umod by zero doesn't have a guaranteed return value chose 

Re: [Mesa-dev] [PATCH v2 1/6] gallium: add opcode and types for 64-bit integers. (v3)

2016-09-19 Thread Roland Scheidegger
Am 19.09.2016 um 15:08 schrieb Nicolai Hähnle:
> From: Dave Airlie 
> 
> This just adds the basic support for 64-bit opcodes,
> and the new types.
> 
> v2: add conversion opcodes.
> add documentation.
> v3:
> - make docs more consistent
> - change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64
> 
> Reviewed-by: Marek Olšák  (v2)
> Signed-off-by: Dave Airlie 
> ---
>  src/gallium/auxiliary/tgsi/tgsi_info.c |  92 +--
>  src/gallium/auxiliary/tgsi/tgsi_info.h |   4 +-
>  src/gallium/docs/source/tgsi.rst   | 240 
> +
>  src/gallium/include/pipe/p_shader_tokens.h |  46 --
>  4 files changed, 362 insertions(+), 20 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
> b/src/gallium/auxiliary/tgsi/tgsi_info.c
> index 60e0f2c..18e1bc8 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
> @@ -52,61 +52,61 @@ static const struct tgsi_opcode_info 
> opcode_info[TGSI_OPCODE_LAST] =
> { 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
> { 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 },  /* removed */
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
> { 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
> -   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
> { 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
> { 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
> { 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
> { 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
> { 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
> { 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
> { 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 },  /* removed */
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
> { 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
> -   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 },  /* removed */
> +   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
> { 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
> { 0, 0, 0, 0, 0, 0, 0, NONE, "RET", 

Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.

2016-09-19 Thread Jason Ekstrand
It all looks fine to me.  Feel free to add a

Reviewed-by: Jason Ekstrand 

That said, my knowledge of the details of the DRI vfuncs is very limited so
I'd like to see Emil or Axel sign off on it too, especially since they were
the ones who had all the comments.

--Jason

On Mon, Sep 19, 2016 at 3:55 AM, Weng, Chuanbo 
wrote:

> Seems they haven't got lost, because I see these patches in the
> mailing-list webpage:
> https://lists.freedesktop.org/archives/mesa-dev/2016-September/128847.html
> https://lists.freedesktop.org/archives/mesa-dev/2016-September/128845.html
> https://lists.freedesktop.org/archives/mesa-dev/2016-September/128846.html
> https://lists.freedesktop.org/archives/mesa-dev/2016-September/128844.html
>
> And my gmail account also receives these patches.
>
> Thanks,
> Chuanbo Weng
>
>
> -Original Message-
> From: Nicolai Hähnle [mailto:nhaeh...@gmail.com]
> Sent: Monday, September 19, 2016 4:53 PM
> To: Weng, Chuanbo ; mesa-dev@lists.freedesktop.org;
> emil.l.veli...@gmail.com
> Subject: Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA
> return corresponding offset.
>
> Those patches got lost somehow? Maybe they weren't sent out as replies to
> your first email, check the git configuration for sendemail.thread or the
> git send-email --thread flag.
>
> Cheers,
> Nicolai
>
> On 18.09.2016 09:04, Weng, Chuanbo wrote:
> > Ping for review. Thanks.
> >
> > -Original Message-
> > From: Weng, Chuanbo
> > Sent: Wednesday, September 14, 2016 1:07 AM
> > To: mesa-dev@lists.freedesktop.org; emil.l.veli...@gmail.com
> > Cc: Weng, Chuanbo 
> > Subject: [PATCH v3 0/3] Make eglExportDMABUFImageMESA return
> corresponding offset.
> >
> > This patchset makes eglExportDMABUFImageMESA return corresponding offset
> of EGLImage instead of 0 on intel platfrom with classic dri driver(i965).
> >
> > v2: Add version check of __DRIimageExtension implementation in egl
> loader (Suggested by Axel Davy).
> >
> > v3: Don't add version check of __DRIimageExtension implementation in
> > egl loader. Set the offset only when queryImage() succeeds. (Suggested
> > by Emil
> > Velikov)
> >
> > Chuanbo Weng (3):
> >   dri: add offset attribute and bump version of EGLImage extensions.
> >   egl: return corresponding offset of EGLImage instead of 0.
> >   i965: implement querying __DRI_IMAGE_ATTRIB_OFFSET.
> >
> >  include/GL/internal/dri_interface.h  | 4 +++-
> >  src/egl/drivers/dri2/egl_dri2.c  | 8 +++-
> >  src/mesa/drivers/dri/i965/intel_screen.c | 9 +++--
> >  3 files changed, 17 insertions(+), 4 deletions(-)
> >
> > --
> > 1.9.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 88354] glXSwapBuffers() can cause BadMatch or lock X when performed repeatedly

2016-09-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=88354

Eero Tamminen  changed:

   What|Removed |Added

 Status|NEW |NEEDINFO

--- Comment #3 from Eero Tamminen  ---
On SKL with Ubuntu 16.04, using latest Mesa from Git everything seems to work
fine, same with older Mesa 11.2 coming with Ubuntu.  No crashes / locks either
with Intel DDX or modesetting, with DRI3 or DRI2.

I would think that the issue is either fixed in Mesa, or culprit is something
else than Mesa. Can you try newer Mesa, and if that doesn't help, newer Intel
DDX version?


Btw. your test program shows interesting difference between DRI3 & DRI2.

I increased the test loop count a bit.  With Intel XX / DRI2, test goes through
10 000 rounds "instantly".

With DRI3, it takes 5-10x longer, and perf says following of the Xorg 100% CPU
usage:
-
23.83%  Xorg Xorg  [.] SyncAddTriggerToSyncObject  
18.20%  Xorg intel_drv.so  [.] 0x0010a5e5  
16.83%  Xorg Xorg  [.] TimerSet
16.49%  Xorg Xorg  [.] present_pixmap  
14.41%  Xorg Xorg  [.] present_event_notify
 7.96%  Xorg Xorg  [.] SyncDeleteTriggerFromSyncObject 
...
-


With modesetting instead of Intel DDX:
- test takes even longer and seems to be limited to 60 FPS
- with DRI2, CPU usage is ~1% (LIBGL_DRI3_DISABLE=1 Mesa option)
- with DRI3, CPU usage is 100%
-
Overview:
 98.99% Xorg
 80.49% [kernel.kallsyms]
  6.66% [unknown]  (I think this is on kernel side also)
  5.12% [vdso]
  3.99% modesetting_drv.so
  1.68% libc-2.23.so
  1.64% libdrm.so.2.4.0

Details:
 16.55%  Xorg   [kernel.kallsyms][k] copy_user_enhanced_fast_string   
  9.27%  Xorg   [kernel.kallsyms][k] do_sys_poll  
  8.13%  Xorg   [kernel.kallsyms][.] entry_SYSCALL_64_fastpath   
  5.43%  Xorg   [kernel.kallsyms][k] _raw_spin_unlock_irqrestore 
  4.51%  Xorg   [kernel.kallsyms][k] entry_SYSCALL_64_after_swapgs
  4.44%  Xorg   [kernel.kallsyms][k] _raw_spin_lock_irqsave 
  4.20%  Xorg   [kernel.kallsyms][k] kfree   
  3.79%  Xorg   [kernel.kallsyms][k] drm_ioctl
  3.48%  Xorg   [kernel.kallsyms][k] drm_wait_vblank   
  3.44%  Xorg   [kernel.kallsyms][k] kmem_cache_alloc_trace
-

In general, with DRI3, first (few thousand) swaps go faster than the later
ones.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 0/6] gallium/tgsi: 64-bit integer foundations

2016-09-19 Thread Edward O'Callaghan
This series is,
Reviewed-by: Edward O'Callaghan 

On 09/19/2016 11:08 PM, Nicolai Hähnle wrote:
> Hi everybody,
> 
> here's a v2 of the series. Compared to previously, I have now squashed my
> changes in. I have also included Roland's comments on the tgsi.rst docs,
> and, following his comment, I have changed the 32-bit to 64-bit conversion
> so that there is now a TGSI_OPCODE_I2I64 and a TGSI_OPCODE_U2I64. The
> former does sign extension, the latter does zero extension (and the latter
> could be called U2U64).
> 
> Since this doesn't actually turn any extensions on yet, I plan to push this
> tomorrow unless there are objections or further comments before then.
> 
> Cheers,
> Nicolai
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 



signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/9] gallium/tgsi: 64-bit integer foundations

2016-09-19 Thread Nicolai Hähnle

On 16.09.2016 19:19, Ian Romanick wrote:

On 09/16/2016 06:48 AM, Nicolai Hähnle wrote:

Hi all,

this is really Dave's work, with a few touch-ups from me that I think make
sense. I've kept those separate with the intention to squash. I'd like to
land these in master even before the main ARB_gpu_shader_int64 stuff lands
(that is currently in Ian's court).


If you guys are comfortable enabling it in radeonsi, I think the rest of
the code is close enough to ready to land.  I'm sure that we'll find
more bugs as more tests become available, but that's always the case.
I've updated my arb_gpu_shader_int64 tree, but it's intertwined with
some other stuff.  I can de-tangle it easy enough.


It makes sense to re-test with whatever additional test coverage you've 
come up with by now. Although I suppose the next release is far enough 
off that there's plenty of time to fix things up.


Cheers,
Nicolai




The reason is that radeonsi's ARB_query_buffer_object support needs 64-bit
integers in shaders, and for that it's convenient to have all the TGSI
opcodes and gallivm bits in place already.

Any objections? Reviews?
Thanks,
Nicolai

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 6/6] gallivm: support negation on 64-bit integers

2016-09-19 Thread Nicolai Hähnle
From: Nicolai Hähnle 

This should be analogous to 32-bit integers.
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index b397261..68ac695 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -382,20 +382,24 @@ lp_build_emit_fetch(
  res = lp_build_negate( _base->base, res );
  break;
   case TGSI_TYPE_DOUBLE:
  /* no double build context */
  assert(0);
  break;
   case TGSI_TYPE_SIGNED:
   case TGSI_TYPE_UNSIGNED:
  res = lp_build_negate( _base->int_bld, res );
  break;
+  case TGSI_TYPE_SIGNED64:
+  case TGSI_TYPE_UNSIGNED64:
+ res = lp_build_negate( _base->int64_bld, res );
+ break;
   case TGSI_TYPE_VOID:
   default:
  assert(0);
  break;
   }
}
 
/*
 * Swizzle the argument
 */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/6] tgsi/softpipe: prepare ARB_gpu_shader_int64 support. (v3)

2016-09-19 Thread Nicolai Hähnle
From: Dave Airlie 

This adds all the opcodes to tgsi_exec for softpipe to use.

v2: add conversion opcodes.
v3:
- no PIPE_CAP_INT64 yet
- change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c | 673 ++---
 1 file changed, 541 insertions(+), 132 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 37f3fc7..7b5c56d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -687,25 +687,265 @@ micro_trunc(union tgsi_exec_channel *dst,
 static void
 micro_u2d(union tgsi_double_channel *dst,
   const union tgsi_exec_channel *src)
 {
dst->d[0] = (double)src->u[0];
dst->d[1] = (double)src->u[1];
dst->d[2] = (double)src->u[2];
dst->d[3] = (double)src->u[3];
 }
 
+static void
+micro_i64abs(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
+   dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
+   dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
+   dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
+}
+
+static void
+micro_i64sgn(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
+   dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
+   dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
+   dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
+}
+
+static void
+micro_i64neg(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->i64[0] = -src->i64[0];
+   dst->i64[1] = -src->i64[1];
+   dst->i64[2] = -src->i64[2];
+   dst->i64[3] = -src->i64[3];
+}
+
+static void
+micro_u64seq(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sne(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64slt(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_i64sge(union tgsi_double_channel *dst,
+   const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64sge(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
+   dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
+   dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
+   dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
+}
+
+static void
+micro_u64max(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
+   dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
+   dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
+   dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
+}
+
+static void
+micro_i64max(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+   dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
+   dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
+   dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
+   dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : 

[Mesa-dev] [PATCH v2 5/6] radeonsi: prepare 64-bit integer support. (v2)

2016-09-19 Thread Nicolai Hähnle
From: Dave Airlie 

v2:
- no PIPE_CAP_INT64 yet
- emit DIV/MOD without the divide-by-zero workaround

Reviewed-by: Marek Olšák  (v1)
Signed-off-by: Dave Airlie 
---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c| 69 +++---
 1 file changed, 62 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 4fa43cd..bcb3143 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -44,20 +44,23 @@
 
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  enum tgsi_opcode_type type)
 {
LLVMContextRef ctx = bld_base->base.gallivm->context;
 
switch (type) {
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
return LLVMInt32TypeInContext(ctx);
+   case TGSI_TYPE_UNSIGNED64:
+   case TGSI_TYPE_SIGNED64:
+   return LLVMInt64TypeInContext(ctx);
case TGSI_TYPE_DOUBLE:
return LLVMDoubleTypeInContext(ctx);
case TGSI_TYPE_UNTYPED:
case TGSI_TYPE_FLOAT:
return LLVMFloatTypeInContext(ctx);
default: break;
}
return 0;
 }
 
@@ -1173,26 +1176,32 @@ void radeon_llvm_emit_prepare_cube_coords(struct 
lp_build_tgsi_context *bld_base
 
 static void emit_icmp(const struct lp_build_tgsi_action *action,
  struct lp_build_tgsi_context *bld_base,
  struct lp_build_emit_data *emit_data)
 {
unsigned pred;
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
LLVMContextRef context = bld_base->base.gallivm->context;
 
switch (emit_data->inst->Instruction.Opcode) {
-   case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
-   case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
-   case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
-   case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
-   case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
-   case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
+   case TGSI_OPCODE_USNE:
+   case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
default:
assert(!"unknown instruction");
pred = 0;
break;
}
 
LLVMValueRef v = LLVMBuildICmp(builder, pred,
emit_data->args[0], emit_data->args[1],"");
 
v = LLVMBuildSExtOrBitCast(builder, v,
@@ -1434,21 +1443,26 @@ static void emit_xor(const struct lp_build_tgsi_action 
*action,
 }
 
 static void emit_ssg(const struct lp_build_tgsi_action *action,
 struct lp_build_tgsi_context *bld_base,
 struct lp_build_emit_data *emit_data)
 {
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 
LLVMValueRef cmp, val;
 
-   if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
+   if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
+   cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], 
bld_base->int64_bld.zero, "");
+   val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, 
emit_data->args[0], "");
+   cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, 
bld_base->int64_bld.zero, "");
+   val = LLVMBuildSelect(builder, cmp, val, 
LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
+   } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], 
bld_base->int_bld.zero, "");
val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, 
emit_data->args[0], "");
cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, 
bld_base->int_bld.zero, "");
val = LLVMBuildSelect(builder, cmp, val, 
LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
} else { // float SSG
cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], 
bld_base->base.zero, "");
val = LLVMBuildSelect(builder, cmp, bld_base->base.one, 
emit_data->args[0], "");
cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, 
bld_base->base.zero, "");
val = LLVMBuildSelect(builder, cmp, val, 
LLVMConstReal(bld_base->base.elem_type, -1), "");
}
@@ -1698,29 +1712,33 @@ static void emit_minmax_int(const struct 

[Mesa-dev] [PATCH v2 4/6] gallivm/llvmpipe: prepare support for ARB_gpu_shader_int64.

2016-09-19 Thread Nicolai Hähnle
From: Dave Airlie 

This enables 64-bit integer support in gallivm and
llvmpipe.

v2: add conversion opcodes.
v3:
- PIPE_CAP_INT64 is not there yet
- restrict DIV/MOD defaults to the CPU, as for 32 bits
- TGSI_OPCODE_I2U64 becomes TGSI_OPCODE_U2I64

Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c|   2 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h|   4 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 471 +
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c|  40 +-
 src/gallium/auxiliary/tgsi/tgsi_info.h |   3 +-
 5 files changed, 515 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index 1ef6ae4..b397261 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -357,20 +357,22 @@ lp_build_emit_fetch(
if (reg->Register.Absolute) {
   switch (stype) {
   case TGSI_TYPE_FLOAT:
   case TGSI_TYPE_DOUBLE:
   case TGSI_TYPE_UNTYPED:
   /* modifiers on movs assume data is float */
  res = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS, res);
  break;
   case TGSI_TYPE_UNSIGNED:
   case TGSI_TYPE_SIGNED:
+  case TGSI_TYPE_UNSIGNED64:
+  case TGSI_TYPE_SIGNED64:
   case TGSI_TYPE_VOID:
   default:
  /* abs modifier is only legal on floating point types */
  assert(0);
  break;
   }
}
 
if (reg->Register.Negate) {
   switch (stype) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index de1150c..b6b3fe3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -330,20 +330,24 @@ typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct 
lp_build_tgsi_context *,
 unsigned);
 
 struct lp_build_tgsi_context
 {
struct lp_build_context base;
 
struct lp_build_context uint_bld;
struct lp_build_context int_bld;
 
struct lp_build_context dbl_bld;
+
+   struct lp_build_context uint64_bld;
+   struct lp_build_context int64_bld;
+
/** This array stores functions that are used to transform TGSI opcodes to
  * LLVM instructions.
  */
struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST];
 
/* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action
 * should compute 1 / sqrt (src0.x) */
struct lp_build_tgsi_action rsq_action;
 
struct lp_build_tgsi_action sqrt_action;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 1ee9704..d924770 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -1086,20 +1086,230 @@ static void dfrac_emit(
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
 {
LLVMValueRef tmp;
tmp = lp_build_floor(_base->dbl_bld,
emit_data->args[0]);
emit_data->output[emit_data->chan] =  
LLVMBuildFSub(bld_base->base.gallivm->builder,
emit_data->args[0], 
tmp, "");
 }
 
+/* TGSI_OPCODE_U64MUL */
+static void
+u64mul_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_mul(_base->uint64_bld,
+   emit_data->args[0], emit_data->args[1]);
+}
+
+/* TGSI_OPCODE_U64MOD  */
+static void
+u64mod_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef div_mask = lp_build_cmp(_base->uint64_bld,
+PIPE_FUNC_EQUAL, emit_data->args[1],
+bld_base->uint64_bld.zero);
+   /* We want to make sure that we never divide/mod by zero to not
+* generate sigfpe. We don't want to crash just because the
+* shader is doing something weird. */
+   LLVMValueRef divisor = LLVMBuildOr(builder,
+  div_mask,
+  emit_data->args[1], "");
+   LLVMValueRef result = lp_build_mod(_base->uint64_bld,
+  emit_data->args[0], divisor);
+   /* umod by zero doesn't have a guaranteed return value chose -1 for now. */
+   emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
+div_mask,
+result, "");
+}
+
+/* TGSI_OPCODE_MOD (CPU Only) */
+static void
+i64mod_emit_cpu(
+   const struct 

[Mesa-dev] [PATCH v2 2/6] gallium/tgsi: add support for 64-bit integer immediates.

2016-09-19 Thread Nicolai Hähnle
From: Dave Airlie 

This adds support to TGSI for 64-bit integer immediates.

Reviewed-by: Marek Olšák 
Reviewed-by: Nicolai Hähnle 
Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_dump.c | 14 ++
 src/gallium/auxiliary/tgsi/tgsi_exec.c |  2 ++
 src/gallium/auxiliary/tgsi/tgsi_parse.c|  2 ++
 src/gallium/auxiliary/tgsi/tgsi_text.c | 44 +
 src/gallium/auxiliary/tgsi/tgsi_ureg.c | 45 --
 src/gallium/auxiliary/tgsi/tgsi_ureg.h | 10 +++
 src/gallium/include/pipe/p_shader_tokens.h |  2 ++
 7 files changed, 117 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index d59b7ff..614bcb2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -247,20 +247,34 @@ dump_imm_data(struct tgsi_iterate_context *iter,
assert( num_tokens <= 4 );
for (i = 0; i < num_tokens; i++) {
   switch (data_type) {
   case TGSI_IMM_FLOAT64: {
  union di d;
  d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
  DBL( d.d );
  i++;
  break;
   }
+  case TGSI_IMM_INT64: {
+ union di d;
+ d.i = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
+ UID( d.i );
+ i++;
+ break;
+  }
+  case TGSI_IMM_UINT64: {
+ union di d;
+ d.ui = data[i].Uint | (uint64_t)data[i+1].Uint << 32;
+ UID( d.ui );
+ i++;
+ break;
+  }
   case TGSI_IMM_FLOAT32:
  if (ctx->dump_float_as_hex)
 HFLT( data[i].Float );
  else
 FLT( data[i].Float );
  break;
   case TGSI_IMM_UINT32:
  UID(data[i].Uint);
  break;
   case TGSI_IMM_INT32:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index aff35e6..37f3fc7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -70,20 +70,22 @@
 #define FAST_MATH 0
 
 #define TILE_TOP_LEFT 0
 #define TILE_TOP_RIGHT1
 #define TILE_BOTTOM_LEFT  2
 #define TILE_BOTTOM_RIGHT 3
 
 union tgsi_double_channel {
double d[TGSI_QUAD_SIZE];
unsigned u[TGSI_QUAD_SIZE][2];
+   uint64_t u64[TGSI_QUAD_SIZE];
+   int64_t i64[TGSI_QUAD_SIZE];
 };
 
 struct tgsi_double_vector {
union tgsi_double_channel xy;
union tgsi_double_channel zw;
 };
 
 static void
 micro_abs(union tgsi_exec_channel *dst,
   const union tgsi_exec_channel *src)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c 
b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 16564dd..940af7d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -148,26 +148,28 @@ tgsi_parse_token(
 
   switch (imm->Immediate.DataType) {
   case TGSI_IMM_FLOAT32:
   case TGSI_IMM_FLOAT64:
  for (i = 0; i < imm_count; i++) {
 next_token(ctx, >u[i].Float);
  }
  break;
 
   case TGSI_IMM_UINT32:
+  case TGSI_IMM_UINT64:
  for (i = 0; i < imm_count; i++) {
 next_token(ctx, >u[i].Uint);
  }
  break;
 
   case TGSI_IMM_INT32:
+  case TGSI_IMM_INT64:
  for (i = 0; i < imm_count; i++) {
 next_token(ctx, >u[i].Int);
  }
  break;
 
   default:
  assert( 0 );
   }
 
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c 
b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 8bdec06..be80842 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -288,20 +288,56 @@ static boolean parse_double( const char **pcur, uint32_t 
*val0, uint32_t *val1)
v.dval = strtod(cur, (char**)pcur);
if (*pcur == cur)
   return FALSE;
 
*val0 = v.uval[0];
*val1 = v.uval[1];
 
return TRUE;
 }
 
+static boolean parse_int64( const char **pcur, uint32_t *val0, uint32_t *val1)
+{
+   const char *cur = *pcur;
+   union {
+  int64_t i64val;
+  uint32_t uval[2];
+   } v;
+
+   v.i64val = strtoll(cur, (char**)pcur, 0);
+   if (*pcur == cur)
+  return FALSE;
+
+   *val0 = v.uval[0];
+   *val1 = v.uval[1];
+
+   return TRUE;
+}
+
+static boolean parse_uint64( const char **pcur, uint32_t *val0, uint32_t *val1)
+{
+   const char *cur = *pcur;
+   union {
+  uint64_t u64val;
+  uint32_t uval[2];
+   } v;
+
+   v.u64val = strtoull(cur, (char**)pcur, 0);
+   if (*pcur == cur)
+  return FALSE;
+
+   *val0 = v.uval[0];
+   *val1 = v.uval[1];
+
+   return TRUE;
+}
+
 struct translate_ctx
 {
const char *text;
const char *cur;
struct tgsi_token *tokens;
struct tgsi_token *tokens_cur;
struct tgsi_token *tokens_end;
struct tgsi_header *header;
unsigned processor : 4;
unsigned implied_array_size : 6;

[Mesa-dev] [PATCH v2 0/6] gallium/tgsi: 64-bit integer foundations

2016-09-19 Thread Nicolai Hähnle
Hi everybody,

here's a v2 of the series. Compared to previously, I have now squashed my
changes in. I have also included Roland's comments on the tgsi.rst docs,
and, following his comment, I have changed the 32-bit to 64-bit conversion
so that there is now a TGSI_OPCODE_I2I64 and a TGSI_OPCODE_U2I64. The
former does sign extension, the latter does zero extension (and the latter
could be called U2U64).

Since this doesn't actually turn any extensions on yet, I plan to push this
tomorrow unless there are objections or further comments before then.

Cheers,
Nicolai

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/6] gallium: add opcode and types for 64-bit integers. (v3)

2016-09-19 Thread Nicolai Hähnle
From: Dave Airlie 

This just adds the basic support for 64-bit opcodes,
and the new types.

v2: add conversion opcodes.
add documentation.
v3:
- make docs more consistent
- change TGSI_OPCODE_I2U64 to TGSI_OPCODE_U2I64

Reviewed-by: Marek Olšák  (v2)
Signed-off-by: Dave Airlie 
---
 src/gallium/auxiliary/tgsi/tgsi_info.c |  92 +--
 src/gallium/auxiliary/tgsi/tgsi_info.h |   4 +-
 src/gallium/docs/source/tgsi.rst   | 240 +
 src/gallium/include/pipe/p_shader_tokens.h |  46 --
 4 files changed, 362 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 60e0f2c..18e1bc8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -52,61 +52,61 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MIN", TGSI_OPCODE_MIN },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MAX", TGSI_OPCODE_MAX },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SLT", TGSI_OPCODE_SLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SGE", TGSI_OPCODE_SGE },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "MAD", TGSI_OPCODE_MAD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SUB", TGSI_OPCODE_SUB },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "LRP", TGSI_OPCODE_LRP },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "FMA", TGSI_OPCODE_FMA },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "SQRT", TGSI_OPCODE_SQRT },
{ 1, 3, 0, 0, 0, 0, 0, REPL, "DP2A", TGSI_OPCODE_DP2A },
-   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 22 },  /* removed */
-   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 23 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2U64", TGSI_OPCODE_F2U64 },
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "F2I64", TGSI_OPCODE_F2I64 },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "FRC", TGSI_OPCODE_FRC },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CLAMP", TGSI_OPCODE_CLAMP },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "FLR", TGSI_OPCODE_FLR },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ROUND", TGSI_OPCODE_ROUND },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "EX2", TGSI_OPCODE_EX2 },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "LG2", TGSI_OPCODE_LG2 },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "POW", TGSI_OPCODE_POW },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XPD", TGSI_OPCODE_XPD },
-   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 32 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "U2I64", TGSI_OPCODE_U2I64 },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ABS", TGSI_OPCODE_ABS },
-   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 34 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "I2I64", TGSI_OPCODE_I2I64 },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DPH", TGSI_OPCODE_DPH },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "COS", TGSI_OPCODE_COS },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX", TGSI_OPCODE_DDX },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY", TGSI_OPCODE_DDY },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "KILL", TGSI_OPCODE_KILL },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK2H", TGSI_OPCODE_PK2H },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK2US", TGSI_OPCODE_PK2US },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK4B", TGSI_OPCODE_PK4B },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "PK4UB", TGSI_OPCODE_PK4UB },
-   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 44 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2U64", TGSI_OPCODE_D2U64 },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SEQ", TGSI_OPCODE_SEQ },
-   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 46 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 1, COMP, "D2I64", TGSI_OPCODE_D2I64 },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SGT", TGSI_OPCODE_SGT },
{ 1, 1, 0, 0, 0, 0, 0, REPL, "SIN", TGSI_OPCODE_SIN },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SLE", TGSI_OPCODE_SLE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SNE", TGSI_OPCODE_SNE },
-   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 51 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642D", TGSI_OPCODE_U642D },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TEX", TGSI_OPCODE_TEX },
{ 1, 4, 1, 0, 0, 0, 0, OTHR, "TXD", TGSI_OPCODE_TXD },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXP", TGSI_OPCODE_TXP },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2H", TGSI_OPCODE_UP2H },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP2US", TGSI_OPCODE_UP2US },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4B", TGSI_OPCODE_UP4B },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "UP4UB", TGSI_OPCODE_UP4UB },
-   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 59 },  /* removed */
-   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 60 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 1, COMP, "U642F", TGSI_OPCODE_U642F },
+   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642F", TGSI_OPCODE_I642F },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "ARR", TGSI_OPCODE_ARR },
-   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 62 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 1, COMP, "I642D", TGSI_OPCODE_I642D },
{ 0, 0, 0, 0, 1, 0, 0, NONE, "CAL", TGSI_OPCODE_CAL },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "RET", TGSI_OPCODE_RET },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "SSG", TGSI_OPCODE_SSG },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", 

Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter

2016-09-19 Thread Nayan Deshmukh
Hi Andy,



On Mon, Sep 19, 2016 at 3:27 PM, Andy Furniss  wrote:

> Nayan Deshmukh wrote:
>
>> Hi Andy,
>>
>> Thanks for testing the patches!!
>>
>> On Mon, Sep 19, 2016 at 5:24 AM, Andy Furniss 
>> wrote:
>>
>> Andy Furniss wrote:
>>>
>>> Nayan Deshmukh wrote:

 This series implements lanczos interpolation filter.
>
> Andy, I have made some changes to the code. Can you test the
> patches. I hope the artifacts are reduced this time.
>
>
 The artifacts are still there.


>>> :(
>>>
>>
>> The higher levels involve a hell lot of calucations per pixel so the
>> decreased fps is expected. I was thinking of having only 2 levels for
>> lanczos filter ie. 2 and 4.
>>
>> Christian, will it be fine if we only have 2 levels (with kernel size
>> 2 and 4) of lanczos filter corresponding to HIGH_QUALITY_SCALING_L2
>> and L3?
>>
>
> 2 and 4 still have the offset issue, which does sometimes cause the
> white line.
>

Hi Andy

I am able to reproduce the offset issue, I will try to work on the patch
tonight.

Regards,
Nayan.

>
> I've found another issue with 2 and 4 = unscaled with a raster locked
> res test the 1 pix detail will be lost. The detail does re-appear if you
> scale up.
>
> bz2 compressed vid showing the issue -
>
> https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.

2016-09-19 Thread Weng, Chuanbo
Seems they haven't got lost, because I see these patches in the mailing-list 
webpage:
https://lists.freedesktop.org/archives/mesa-dev/2016-September/128847.html
https://lists.freedesktop.org/archives/mesa-dev/2016-September/128845.html
https://lists.freedesktop.org/archives/mesa-dev/2016-September/128846.html
https://lists.freedesktop.org/archives/mesa-dev/2016-September/128844.html

And my gmail account also receives these patches.

Thanks,
Chuanbo Weng


-Original Message-
From: Nicolai Hähnle [mailto:nhaeh...@gmail.com] 
Sent: Monday, September 19, 2016 4:53 PM
To: Weng, Chuanbo ; mesa-dev@lists.freedesktop.org; 
emil.l.veli...@gmail.com
Subject: Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return 
corresponding offset.

Those patches got lost somehow? Maybe they weren't sent out as replies to your 
first email, check the git configuration for sendemail.thread or the git 
send-email --thread flag.

Cheers,
Nicolai

On 18.09.2016 09:04, Weng, Chuanbo wrote:
> Ping for review. Thanks.
>
> -Original Message-
> From: Weng, Chuanbo
> Sent: Wednesday, September 14, 2016 1:07 AM
> To: mesa-dev@lists.freedesktop.org; emil.l.veli...@gmail.com
> Cc: Weng, Chuanbo 
> Subject: [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding 
> offset.
>
> This patchset makes eglExportDMABUFImageMESA return corresponding offset of 
> EGLImage instead of 0 on intel platfrom with classic dri driver(i965).
>
> v2: Add version check of __DRIimageExtension implementation in egl loader 
> (Suggested by Axel Davy).
>
> v3: Don't add version check of __DRIimageExtension implementation in 
> egl loader. Set the offset only when queryImage() succeeds. (Suggested 
> by Emil
> Velikov)
>
> Chuanbo Weng (3):
>   dri: add offset attribute and bump version of EGLImage extensions.
>   egl: return corresponding offset of EGLImage instead of 0.
>   i965: implement querying __DRI_IMAGE_ATTRIB_OFFSET.
>
>  include/GL/internal/dri_interface.h  | 4 +++-
>  src/egl/drivers/dri2/egl_dri2.c  | 8 +++-
>  src/mesa/drivers/dri/i965/intel_screen.c | 9 +++--
>  3 files changed, 17 insertions(+), 4 deletions(-)
>
> --
> 1.9.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter

2016-09-19 Thread Andy Furniss

Andy Furniss wrote:

Andy Furniss wrote:


bz2 compressed vid showing the issue -

https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms


To be clear this is just the test rez vid I used so you can
possibly recreate the issue your self.


Hmm, interesting, with bicubic, hqscaling=1 this vid behaves
differently with mplayer compared to mpv, the latter looks like
it's scaling a bit, maybe there's an off by one somewhere in mpv
code.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter

2016-09-19 Thread Andy Furniss

Andy Furniss wrote:


bz2 compressed vid showing the issue -

https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms


To be clear this is just the test rez vid I used so you can
possibly recreate the issue your self.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Implement lanczos interpolation filter

2016-09-19 Thread Andy Furniss

Nayan Deshmukh wrote:

Hi Andy,

Thanks for testing the patches!!

On Mon, Sep 19, 2016 at 5:24 AM, Andy Furniss 
wrote:


Andy Furniss wrote:


Nayan Deshmukh wrote:


This series implements lanczos interpolation filter.

Andy, I have made some changes to the code. Can you test the
patches. I hope the artifacts are reduced this time.



The artifacts are still there.



:(


The higher levels involve a hell lot of calucations per pixel so the
decreased fps is expected. I was thinking of having only 2 levels for
lanczos filter ie. 2 and 4.

Christian, will it be fine if we only have 2 levels (with kernel size
2 and 4) of lanczos filter corresponding to HIGH_QUALITY_SCALING_L2
and L3?


2 and 4 still have the offset issue, which does sometimes cause the
white line.

I've found another issue with 2 and 4 = unscaled with a raster locked
res test the 1 pix detail will be lost. The detail does re-appear if you
scale up.

bz2 compressed vid showing the issue -

https://drive.google.com/open?id=0BxP5-S1t9VEEUE5sbUFBV20zSms
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding offset.

2016-09-19 Thread Nicolai Hähnle
Those patches got lost somehow? Maybe they weren't sent out as replies 
to your first email, check the git configuration for sendemail.thread or 
the git send-email --thread flag.


Cheers,
Nicolai

On 18.09.2016 09:04, Weng, Chuanbo wrote:

Ping for review. Thanks.

-Original Message-
From: Weng, Chuanbo
Sent: Wednesday, September 14, 2016 1:07 AM
To: mesa-dev@lists.freedesktop.org; emil.l.veli...@gmail.com
Cc: Weng, Chuanbo 
Subject: [PATCH v3 0/3] Make eglExportDMABUFImageMESA return corresponding 
offset.

This patchset makes eglExportDMABUFImageMESA return corresponding offset of 
EGLImage instead of 0 on intel platfrom with classic dri driver(i965).

v2: Add version check of __DRIimageExtension implementation in egl loader 
(Suggested by Axel Davy).

v3: Don't add version check of __DRIimageExtension implementation in egl 
loader. Set the offset only when queryImage() succeeds. (Suggested by Emil
Velikov)

Chuanbo Weng (3):
  dri: add offset attribute and bump version of EGLImage extensions.
  egl: return corresponding offset of EGLImage instead of 0.
  i965: implement querying __DRI_IMAGE_ATTRIB_OFFSET.

 include/GL/internal/dri_interface.h  | 4 +++-
 src/egl/drivers/dri2/egl_dri2.c  | 8 +++-
 src/mesa/drivers/dri/i965/intel_screen.c | 9 +++--
 3 files changed, 17 insertions(+), 4 deletions(-)

--
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: make use of strtol() in debug_get_num_option()

2016-09-19 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

However, you might want to check with the VMWare guys. I seem to recall 
that MSVC is a bit peculiar with some of these library functions.


Cheers,
Nicolai

On 14.09.2016 20:37, Samuel Pitoiset wrote:

This allows to use hexadecimal numbers which are automatically
detected by strtol() when the base is 0.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/util/u_debug.c | 25 -
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_debug.c 
b/src/gallium/auxiliary/util/u_debug.c
index 4619526..dd3e167 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -203,25 +203,16 @@ debug_get_num_option(const char *name, long dfault)
const char *str;

str = os_get_option(name);
-   if (!str)
+   if (!str) {
   result = dfault;
-   else {
-  long sign;
-  char c;
-  c = *str++;
-  if (c == '-') {
-sign = -1;
-c = *str++;
-  }
-  else {
-sign = 1;
-  }
-  result = 0;
-  while ('0' <= c && c <= '9') {
-result = result*10 + (c - '0');
-c = *str++;
+   } else {
+  char *endptr;
+
+  result = strtol(str, , 0);
+  if (str == endptr) {
+ /* Restore the default value when no digits were found. */
+ result = dfault;
   }
-  result *= sign;
}

if (debug_get_option_should_print())


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 97230] MATLAB hangs if DRI3 enabled with intel driver

2016-09-19 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=97230

Eero Tamminen  changed:

   What|Removed |Added

 CC||eero.t.tammi...@intel.com
 Status|NEW |NEEDINFO

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 30/30] egl/dri2: set WL_bind_wayland_display in a consistent way

2016-09-19 Thread Daniel Stone
On 16 September 2016 at 18:02, Emil Velikov  wrote:
> Introduce a helper and use it throughout the platform code. This allows
> us to reduce the amount of ifdef(s) and (potentially) use
> kms_swrast_dri.so for !drm platforms (namely wayland and x11).

Reviewed-by: Daniel Stone 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev