Re: [Mesa-dev] Mesa Library questions

2013-07-28 Thread Pierre Moreau
Hello Li Zhi,

I have done a quick search, and it seems that glBegin is mapped to 
vbo_exec_Begin. 

I'm not using the same version of Mesa as you, but here is how it works for me. 
In vbo_exec_api.c, function vbo_exec_vtxfmt_init, you get
vfmt-Begin = vbo_exec_Begin;

And after, in vtxfmt.c, function install_vtxfmt
SET_Begin(tab, vfmt-Begin);

SET_Begin calls a macro (SET_by_offset) mapping the vbo_exec_Begin to glBegin. 
The macro is located in dispatch.h

Hope it helps you,
Pierre Moreau

 On 27 juil. 2013, at 03:18, ZhiLi ditto_zh...@hotmail.com wrote:
 
 Hello everyone,
 
 I am an university student working on school project related to OpenGL. 
 Now I am using mesa library (version 9.0.3) as an implementation of OpenGL. 
 I have a few questions about Mesa library.
 
 glBegin()/glEnd() pair is used for immediate mode rendering, so I study the 
 source code to find out 
 how these functions work.
 
 In file vbo_exec_api.c line 800, the comment says function vbo_exec_Begin 
 is called via glBegin().
 
 However, in all library files, I cannot find where and how glBegin() calls 
 the function.
 
 Could you give some hints and explanations? I am really appreciating your 
 help. Thanks.
 
 Li Zhi
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [PATCH] nv50/ir: only enable mul saturate on G200+

2015-05-10 Thread Pierre Moreau
Tested-by: Pierre Moreau pierre.mor...@free.fr

- Mail original -
 Commit 44673512a84 enabled support for saturating fmul. However
 experimentally this does not seem to work on the older chips.
 Restrict
 the feature to G200 (NVA0) and later.
 
 Reported-by: Pierre Moreau pierre.mor...@free.fr
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90350
 Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
 Cc: mesa-sta...@lists.freedesktop.org
 ---
  src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5
  -
  1 file changed, 4 insertions(+), 1 deletion(-)
 
 diff --git
 a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
 b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
 index 70180eb..ca545a6 100644
 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
 +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
 @@ -84,7 +84,7 @@ static const struct opProperties _initProps[] =
 //   neg  abs  not  sat  c[]  s[], a[], imm
 { OP_ADD,0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
 { OP_SUB,0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
 -   { OP_MUL,0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
 +   { OP_MUL,0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
 { OP_MAX,0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
 { OP_MIN,0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
 { OP_MAD,0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special
 constraint
 @@ -188,6 +188,9 @@ void TargetNV50::initOpInfo()
if (prop-mSat  8)
   opInfo[prop-op].dstMods = NV50_IR_MOD_SAT;
 }
 +
 +   if (chipset = 0xa0)
 +  opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT;
  }
  
  unsigned int
 --
 2.3.6
 
 ___
 Nouveau mailing list
 nouv...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/nouveau
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 3/4] nv50: move HW queries to nv50_query_hw.c/h files

2015-10-28 Thread Pierre Moreau
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

On 06:24 PM - Oct 28 2015, Samuel Pitoiset wrote:
> Changes since v2:
> - remove unused 'nv50_hw_query_funcs' struct
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/nouveau/Makefile.sources   |   2 +
>  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 354 ++-
>  src/gallium/drivers/nouveau/nv50/nv50_query.h  |  26 +-
>  src/gallium/drivers/nouveau/nv50/nv50_query_hw.c   | 389 
> +
>  src/gallium/drivers/nouveau/nv50/nv50_query_hw.h   |  39 +++
>  .../drivers/nouveau/nv50/nv50_shader_state.c   |   7 +-
>  src/gallium/drivers/nouveau/nv50/nv50_state.c  |   3 +-
>  src/gallium/drivers/nouveau/nv50/nv50_vbo.c|   5 +-
>  8 files changed, 476 insertions(+), 349 deletions(-)
>  create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
>  create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
> 
> diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
> b/src/gallium/drivers/nouveau/Makefile.sources
> index 06d9d97..83f8113 100644
> --- a/src/gallium/drivers/nouveau/Makefile.sources
> +++ b/src/gallium/drivers/nouveau/Makefile.sources
> @@ -74,6 +74,8 @@ NV50_C_SOURCES := \
>   nv50/nv50_push.c \
>   nv50/nv50_query.c \
>   nv50/nv50_query.h \
> + nv50/nv50_query_hw.c \
> + nv50/nv50_query_hw.h \
>   nv50/nv50_resource.c \
>   nv50/nv50_resource.h \
>   nv50/nv50_screen.c \
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_query.c
> index 1b4abdb..dd9b85b 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
> @@ -26,334 +26,45 @@
>  
>  #include "nv50/nv50_context.h"
>  #include "nv50/nv50_query.h"
> -#include "nv_object.xml.h"
> -
> -#define NV50_QUERY_STATE_READY   0
> -#define NV50_QUERY_STATE_ACTIVE  1
> -#define NV50_QUERY_STATE_ENDED   2
> -#define NV50_QUERY_STATE_FLUSHED 3
> -
> -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts
> - * (since we use only a single GPU channel per screen) will not work 
> properly.
> - *
> - * The first is not that big of an issue because OpenGL does not allow nested
> - * queries anyway.
> - */
> -
> -#define NV50_QUERY_ALLOC_SPACE 256
> -
> -static bool
> -nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int 
> size)
> -{
> -   struct nv50_screen *screen = nv50->screen;
> -   int ret;
> -
> -   if (q->bo) {
> -  nouveau_bo_ref(NULL, >bo);
> -  if (q->mm) {
> - if (q->state == NV50_QUERY_STATE_READY)
> -nouveau_mm_free(q->mm);
> - else
> -nouveau_fence_work(screen->base.fence.current, 
> nouveau_mm_free_work,
> -   q->mm);
> -  }
> -   }
> -   if (size) {
> -  q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, >bo, 
> >base);
> -  if (!q->bo)
> - return false;
> -  q->offset = q->base;
> -
> -  ret = nouveau_bo_map(q->bo, 0, screen->base.client);
> -  if (ret) {
> - nv50_query_allocate(nv50, q, 0);
> - return false;
> -  }
> -  q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
> -   }
> -   return true;
> -}
> -
> -static void
> -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
> -{
> -   nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0);
> -   nouveau_fence_ref(NULL, _query(pq)->fence);
> -   FREE(nv50_query(pq));
> -}
> +#include "nv50/nv50_query_hw.h"
>  
>  static struct pipe_query *
> -nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
> +nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
>  {
> struct nv50_context *nv50 = nv50_context(pipe);
> struct nv50_query *q;
>  
> -   q = CALLOC_STRUCT(nv50_query);
> -   if (!q)
> -  return NULL;
> -
> -   if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) {
> -  FREE(q);
> -  return NULL;
> -   }
> -
> -   q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
> - type == PIPE_QUERY_PRIMITIVES_EMITTED ||
> - type == PIPE_QUERY_SO_STATISTICS ||
> - type == PIPE_QUERY_PIPELINE_STATISTICS);
> -   q->type = type;
> -
> -   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
> -  q->offset -= 32;
> -  q->data -= 32 / sizeof(*q->data); /* we advanc

Re: [Mesa-dev] [PATCH 2/4] nv50: move nva0_so_target_save_offset() to its correct location

2015-10-28 Thread Pierre Moreau
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

On 11:30 PM - Oct 18 2015, Samuel Pitoiset wrote:
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/nouveau/nv50/nv50_query.c | 18 --
>  src/gallium/drivers/nouveau/nv50/nv50_query.h |  3 ---
>  src/gallium/drivers/nouveau/nv50/nv50_state.c | 18 ++
>  3 files changed, 18 insertions(+), 21 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_query.c
> index 7718d69..1b4abdb 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
> @@ -444,24 +444,6 @@ nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, 
> uint16_t method,
>  }
>  
>  void
> -nva0_so_target_save_offset(struct pipe_context *pipe,
> -   struct pipe_stream_output_target *ptarg,
> -   unsigned index, bool serialize)
> -{
> -   struct nv50_so_target *targ = nv50_so_target(ptarg);
> -
> -   if (serialize) {
> -  struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
> -  PUSH_SPACE(push, 2);
> -  BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
> -  PUSH_DATA (push, 0);
> -   }
> -
> -   nv50_query(targ->pq)->index = index;
> -   nv50_query_end(pipe, targ->pq);
> -}
> -
> -void
>  nv50_init_query_functions(struct nv50_context *nv50)
>  {
> struct pipe_context *pipe = >base.pipe;
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h 
> b/src/gallium/drivers/nouveau/nv50/nv50_query.h
> index 722af0c..a703013 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_query.h
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h
> @@ -33,8 +33,5 @@ void nv50_init_query_functions(struct nv50_context *);
>  void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t,
> struct nv50_query *, unsigned result_offset);
>  void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *);
> -void nva0_so_target_save_offset(struct pipe_context *,
> -struct pipe_stream_output_target *,
> -unsigned, bool);
>  
>  #endif
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> index 410e631..8af2add 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> @@ -1057,6 +1057,24 @@ nv50_so_target_create(struct pipe_context *pipe,
>  }
>  
>  static void
> +nva0_so_target_save_offset(struct pipe_context *pipe,
> +   struct pipe_stream_output_target *ptarg,
> +   unsigned index, bool serialize)
> +{
> +   struct nv50_so_target *targ = nv50_so_target(ptarg);
> +
> +   if (serialize) {
> +  struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
> +  PUSH_SPACE(push, 2);
> +  BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
> +  PUSH_DATA (push, 0);
> +   }
> +
> +   nv50_query(targ->pq)->index = index;
> +   pipe->end_query(pipe, targ->pq);
> +}
> +
> +static void
>  nv50_so_target_destroy(struct pipe_context *pipe,
> struct pipe_stream_output_target *ptarg)
>  {
> -- 
> 2.6.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 0/6] gallium: allow drivers to report debug info to st

2015-10-31 Thread Pierre Moreau
On 01:15 AM - Oct 31 2015, Ilia Mirkin wrote:
> I've switched the st/mesa impl around to be set in the manager, based
> on whether it's a debug context. I've also added a st/clover impl
> (entirely untested beyond compilation) as the OpenCL API appears to

Tested on a Tesla card using the hello_world program from [here] [0], and I did
get the debug message from Nouveau with shader compilation stats.

[0]: http://cgit.freedesktop.org/~tstellar/opencl-example/


Pierre

> have similar allowances. Finally, I've added some nouveau patches that
> demonstrate how I intend this to be used. Not exhaustive, but a
> start. I was able to get the fence wait message to trigger by turning
> off buffer range tracking, so I know it's working. (And the compiler
> message has already revealed that st/mesa is double-compiling FF
> programs in some cases.)
> 
> Brian, Marek -- please come to an agreement about how things should be
> named -- I really don't care but I also don't want to go back and
> change it 10 times. Let's figure out something mutually agreeable and
> then I'll change it.
> 
> I believe I've addressed all previous feedback.
> 
> Ilia Mirkin (6):
>   gallium: expose a debug message callback settable by context owner
>   st/mesa: set debug callback for debug contexts
>   st/clover: provide a path for drivers to call through to pfn_notify
>   nouveau: add support for sending debug messages via KHR_debug
>   nv50,nvc0: provide debug messages with shader compilation stats
>   nouveau: send back a debug message when waiting for a fence to
> complete
> 
>  src/gallium/auxiliary/util/u_debug.c   | 16 ++
>  src/gallium/auxiliary/util/u_debug.h   | 24 +++
>  src/gallium/docs/source/context.rst|  3 ++
>  .../drivers/nouveau/codegen/nv50_ir_driver.h   |  1 +
>  .../drivers/nouveau/codegen/nv50_ir_target.cpp |  2 ++
>  src/gallium/drivers/nouveau/nouveau_buffer.c   | 13 
>  src/gallium/drivers/nouveau/nouveau_context.h  |  5 
>  src/gallium/drivers/nouveau/nouveau_fence.c| 14 +++--
>  src/gallium/drivers/nouveau/nouveau_fence.h|  4 ++-
>  src/gallium/drivers/nouveau/nouveau_screen.c   | 21 -
>  src/gallium/drivers/nouveau/nv30/nv30_context.c|  1 +
>  src/gallium/drivers/nouveau/nv30/nv30_screen.c |  2 +-
>  src/gallium/drivers/nouveau/nv50/nv50_context.c|  1 +
>  src/gallium/drivers/nouveau/nv50/nv50_program.c|  8 -
>  src/gallium/drivers/nouveau/nv50/nv50_program.h|  3 +-
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c |  2 +-
>  .../drivers/nouveau/nv50/nv50_shader_state.c   |  2 +-
>  src/gallium/drivers/nouveau/nv50/nv50_state.c  |  3 +-
>  src/gallium/drivers/nouveau/nv50/nv50_vbo.c|  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c|  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.c|  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  3 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c|  8 -
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  2 +-
>  .../drivers/nouveau/nvc0/nvc0_shader_state.c   |  2 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c  |  3 +-
>  src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c   |  4 +--
>  src/gallium/include/pipe/p_context.h   |  4 +++
>  src/gallium/include/pipe/p_defines.h   | 35 
> ++
>  src/gallium/include/pipe/p_state.h | 29 ++
>  src/gallium/state_trackers/clover/api/context.cpp  |  2 +-
>  src/gallium/state_trackers/clover/core/context.cpp | 18 +--
>  src/gallium/state_trackers/clover/core/context.hpp | 13 +++-
>  src/gallium/state_trackers/clover/core/queue.cpp   | 20 +
>  src/mesa/state_tracker/st_manager.c| 18 +++
>  35 files changed, 262 insertions(+), 29 deletions(-)
> 
> -- 
> 2.4.10
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type

2015-10-19 Thread Pierre Moreau
Hi Samuel,

(some comments below)

On 11:36 PM - Oct 18 2015, Samuel Pitoiset wrote:
> While we are at it, store the rotate offset for occlusion queries to
> nv50_hw_query like on nvc0.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 
> +---
>  src/gallium/drivers/nouveau/nv50/nv50_query_hw.h |  3 +-
>  2 files changed, 35 insertions(+), 13 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
> index fcdd183..6260410 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
> @@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct 
> nv50_query *q)
>  * query might set the initial render condition to false even *after* we 
> re-
>  * initialized it to true.
>  */
> -   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
> -  hq->offset += 32;
> -  hq->data += 32 / sizeof(*hq->data);
> +   if (hq->rotate) {
> +  hq->offset += hq->rotate;
> +  hq->data += hq->rotate / sizeof(*hq->data);
>if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
>   nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
>  
> @@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, unsigned 
> type, unsigned index)
>  {
> struct nv50_hw_query *hq;
> struct nv50_query *q;
> +   unsigned space;
>  
> hq = CALLOC_STRUCT(nv50_hw_query);
> if (!hq)
> @@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, 
> unsigned type, unsigned index)
> q->funcs = _query_funcs;
> q->type = type;
>  
> -   if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
> +   switch (q->type) {
> +   case PIPE_QUERY_OCCLUSION_COUNTER:
> +  hq->rotate = 32;

You should have `hq->rotate` default to 0 in other cases, as IIRC, you have no
guaranty about the value of an uninitialised variable.

> +  space = NV50_HW_QUERY_ALLOC_SPACE;
> +  break;
> +   case PIPE_QUERY_PRIMITIVES_GENERATED:
> +   case PIPE_QUERY_PRIMITIVES_EMITTED:
> +   case PIPE_QUERY_SO_STATISTICS:
> +   case PIPE_QUERY_PIPELINE_STATISTICS:
> +  hq->is64bit = true;

Same comment as for `hq->rotate`: have `hq->is64bit` default to `false`.

> +  space = NV50_HW_QUERY_ALLOC_SPACE;
> +  break;
> +   case PIPE_QUERY_TIME_ELAPSED:
> +   case PIPE_QUERY_TIMESTAMP:
> +   case PIPE_QUERY_TIMESTAMP_DISJOINT:
> +   case PIPE_QUERY_GPU_FINISHED:
> +   case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
> +  space = NV50_HW_QUERY_ALLOC_SPACE;
> +  break;
> +   default:
> +  debug_printf("invalid query type: %u\n", type);
> +  FREE(q);
> +  return NULL;
> +   }
> +
> +   if (!nv50_hw_query_allocate(nv50, q, space)) {

`space` is always `NV50_HW_QUERY_ALLOC_SPACE`. Is there an advantage to
introducing this `space` variable? Do you plan to later add other possible
values to it?

Pierre


>FREE(hq);
>return NULL;
> }
>  
> -   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
> +   if (hq->rotate) {
>/* we advance before query_begin ! */
> -  hq->offset -= 32;
> -  hq->data -= 32 / sizeof(*hq->data);
> +  hq->offset -= hq->rotate;
> +  hq->data -= hq->rotate / sizeof(*hq->data);
> }
>  
> -   hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED ||
> - type == PIPE_QUERY_PRIMITIVES_EMITTED ||
> - type == PIPE_QUERY_SO_STATISTICS ||
> - type == PIPE_QUERY_PIPELINE_STATISTICS);
> -
> return q;
>  }
>  
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h 
> b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
> index ea2bf24..3a53e8a 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h
> @@ -24,9 +24,10 @@ struct nv50_hw_query {
> uint32_t sequence;
> struct nouveau_bo *bo;
> uint32_t base_offset;
> -   uint32_t offset; /* base + i * 32 */
> +   uint32_t offset; /* base + i * rotate */
> uint8_t state;
> bool is64bit;
> +   uint8_t rotate;
> int nesting; /* only used for occlusion queries */
> struct nouveau_mm_allocation *mm;
> struct nouveau_fence *fence;
> -- 
> 2.6.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] nv50: add a header file for nv50_query

2015-10-19 Thread Pierre Moreau
Hi Samuel,

(some comments further down)

On 11:30 PM - Oct 18 2015, Samuel Pitoiset wrote:
> Like for nvc0, this will allow to split different types of queries and
> to prepare the way for both global performance counters and MP counters.
> 
> While we are at it, make use of nv50_query struct instead of pipe_query.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/Makefile.sources   |  1 +
>  src/gallium/drivers/nouveau/nv50/nv50_context.h| 12 +--
>  src/gallium/drivers/nouveau/nv50/nv50_query.c  | 29 ++--
>  src/gallium/drivers/nouveau/nv50/nv50_query.h  | 40 
> ++
>  .../drivers/nouveau/nv50/nv50_shader_state.c   |  4 +--
>  src/gallium/drivers/nouveau/nv50/nv50_vbo.c|  3 +-
>  6 files changed, 49 insertions(+), 40 deletions(-)
>  create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query.h
> 
> diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
> b/src/gallium/drivers/nouveau/Makefile.sources
> index c18e9f5..06d9d97 100644
> --- a/src/gallium/drivers/nouveau/Makefile.sources
> +++ b/src/gallium/drivers/nouveau/Makefile.sources
> @@ -73,6 +73,7 @@ NV50_C_SOURCES := \
>   nv50/nv50_program.h \
>   nv50/nv50_push.c \
>   nv50/nv50_query.c \
> + nv50/nv50_query.h \
>   nv50/nv50_resource.c \
>   nv50/nv50_resource.h \
>   nv50/nv50_screen.c \
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h 
> b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> index 69c1212..fb74a97 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> @@ -16,6 +16,7 @@
>  #include "nv50/nv50_program.h"
>  #include "nv50/nv50_resource.h"
>  #include "nv50/nv50_transfer.h"
> +#include "nv50/nv50_query.h"
>  
>  #include "nouveau_context.h"
>  #include "nouveau_debug.h"
> @@ -195,17 +196,6 @@ void nv50_default_kick_notify(struct nouveau_pushbuf *);
>  /* nv50_draw.c */
>  extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
>  
> -/* nv50_query.c */
> -void nv50_init_query_functions(struct nv50_context *);
> -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
> -   struct pipe_query *, unsigned result_offset);
> -void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
> -void nva0_so_target_save_offset(struct pipe_context *,
> -struct pipe_stream_output_target *,
> -unsigned index, bool seralize);
> -
> -#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
> -
>  /* nv50_shader_state.c */
>  void nv50_vertprog_validate(struct nv50_context *);
>  void nv50_gmtyprog_validate(struct nv50_context *);
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_query.c
> index 5368ee7..7718d69 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
> @@ -25,6 +25,7 @@
>  #define NV50_PUSH_EXPLICIT_SPACE_CHECKING
>  
>  #include "nv50/nv50_context.h"
> +#include "nv50/nv50_query.h"
>  #include "nv_object.xml.h"
>  
>  #define NV50_QUERY_STATE_READY   0
> @@ -39,29 +40,8 @@
>   * queries anyway.
>   */
>  
> -struct nv50_query {
> -   uint32_t *data;
> -   uint16_t type;
> -   uint16_t index;
> -   uint32_t sequence;
> -   struct nouveau_bo *bo;
> -   uint32_t base;
> -   uint32_t offset; /* base + i * 32 */
> -   uint8_t state;
> -   bool is64bit;
> -   int nesting; /* only used for occlusion queries */
> -   struct nouveau_mm_allocation *mm;
> -   struct nouveau_fence *fence;
> -};
> -
>  #define NV50_QUERY_ALLOC_SPACE 256
>  
> -static inline struct nv50_query *
> -nv50_query(struct pipe_query *pipe)
> -{
> -   return (struct nv50_query *)pipe;
> -}
> -
>  static bool
>  nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int 
> size)
>  {
> @@ -363,9 +343,8 @@ nv50_query_result(struct pipe_context *pipe, struct 
> pipe_query *pq,
>  }
>  
>  void
> -nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
> +nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q)
>  {
> -   struct nv50_query *q = nv50_query(pq);
> unsigned offset = q->offset;
>  
> PUSH_SPACE(push, 5);
> @@ -453,10 +432,8 @@ nv50_render_condition(struct pipe_context *pipe,
>  
>  void
>  nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
> -  struct pipe_query *pq, unsigned result_offset)
> +  struct nv50_query *q, unsigned result_offset)
>  {
> -   struct nv50_query *q = nv50_query(pq);
> -
> nv50_query_update(q);
> if (q->state != NV50_QUERY_STATE_READY)
>nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h 
> b/src/gallium/drivers/nouveau/nv50/nv50_query.h
> new file mode 

Re: [Mesa-dev] [PATCH 4/4] nv50: do not create an invalid HW query type

2015-10-19 Thread Pierre Moreau
On 11:06 AM - Oct 19 2015, Samuel Pitoiset wrote:
> 
> 
> On 10/19/2015 11:01 AM, Pierre Moreau wrote:
> >Hi Samuel,
> >
> >(some comments below)
> >
> >On 11:36 PM - Oct 18 2015, Samuel Pitoiset wrote:
> >>While we are at it, store the rotate offset for occlusion queries to
> >>nv50_hw_query like on nvc0.
> >>
> >>Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> >>---
> >>  src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 45 
> >> +---
> >>  src/gallium/drivers/nouveau/nv50/nv50_query_hw.h |  3 +-
> >>  2 files changed, 35 insertions(+), 13 deletions(-)
> >>
> >>diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c 
> >>b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
> >>index fcdd183..6260410 100644
> >>--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
> >>+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
> >>@@ -126,9 +126,9 @@ nv50_hw_begin_query(struct nv50_context *nv50, struct 
> >>nv50_query *q)
> >>  * query might set the initial render condition to false even *after* 
> >> we re-
> >>  * initialized it to true.
> >>  */
> >>-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
> >>-  hq->offset += 32;
> >>-  hq->data += 32 / sizeof(*hq->data);
> >>+   if (hq->rotate) {
> >>+  hq->offset += hq->rotate;
> >>+  hq->data += hq->rotate / sizeof(*hq->data);
> >>if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE)
> >>   nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE);
> >>@@ -330,6 +330,7 @@ nv50_hw_create_query(struct nv50_context *nv50, 
> >>unsigned type, unsigned index)
> >>  {
> >> struct nv50_hw_query *hq;
> >> struct nv50_query *q;
> >>+   unsigned space;
> >> hq = CALLOC_STRUCT(nv50_hw_query);
> >> if (!hq)
> >>@@ -339,22 +340,42 @@ nv50_hw_create_query(struct nv50_context *nv50, 
> >>unsigned type, unsigned index)
> >> q->funcs = _query_funcs;
> >> q->type = type;
> >>-   if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) {
> >>+   switch (q->type) {
> >>+   case PIPE_QUERY_OCCLUSION_COUNTER:
> >>+  hq->rotate = 32;
> >You should have `hq->rotate` default to 0 in other cases, as IIRC, you have 
> >no
> >guaranty about the value of an uninitialised variable.
> 
> CALLOC_STRUCT will be initialize all fields to 0.

Oh, that's nice! Didn't know about it.

> 
> >
> >>+  space = NV50_HW_QUERY_ALLOC_SPACE;
> >>+  break;
> >>+   case PIPE_QUERY_PRIMITIVES_GENERATED:
> >>+   case PIPE_QUERY_PRIMITIVES_EMITTED:
> >>+   case PIPE_QUERY_SO_STATISTICS:
> >>+   case PIPE_QUERY_PIPELINE_STATISTICS:
> >>+  hq->is64bit = true;
> >Same comment as for `hq->rotate`: have `hq->is64bit` default to `false`.
> >
> >>+  space = NV50_HW_QUERY_ALLOC_SPACE;
> >>+  break;
> >>+   case PIPE_QUERY_TIME_ELAPSED:
> >>+   case PIPE_QUERY_TIMESTAMP:
> >>+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
> >>+   case PIPE_QUERY_GPU_FINISHED:
> >>+   case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
> >>+  space = NV50_HW_QUERY_ALLOC_SPACE;
> >>+  break;
> >>+   default:
> >>+  debug_printf("invalid query type: %u\n", type);
> >>+  FREE(q);
> >>+  return NULL;
> >>+   }
> >>+
> >>+   if (!nv50_hw_query_allocate(nv50, q, space)) {
> >`space` is always `NV50_HW_QUERY_ALLOC_SPACE`. Is there an advantage to
> >introducing this `space` variable? Do you plan to later add other possible
> >values to it?
> 
> I have a patch locally which reduces the size of that buffer for some
> queries,
> but this is not really related to this series. I'll submit it later (with
> other patches).

One could argue then that you should introduce `space` in those later patches.

Anyway,
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

> 
> >
> >Pierre
> >
> >
> >>FREE(hq);
> >>return NULL;
> >> }
> >>-   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
> >>+   if (hq->rotate) {
> >>/* we advance before query_begin ! */
> >>-  hq->offset -= 32;
> >>-  hq->data -= 32 / sizeof(*hq->data);
> >>+  hq->offset -

Re: [Mesa-dev] [PATCH 13/16] nvc0: enable compute support by default on Fermi

2015-10-18 Thread Pierre Moreau
An LLVM IR <-> SPIR-V was in the plans (see [first proposal][0] and [second 
proposal][1]). My guess is, it is still being worked on and we should hear some 
more around Siggraph Asia (2--5 November), if Khronos is to announce Vulkan and 
SPIR-V's final spec at that conference.

Pierre


PS: Just found that in Siggraph Asia program, on day 2:

>   12:00 – 14:00
>   Kobe Int’l Exhibition Hall No. 2, Convention Hall, Exhibitor Talk Stage, 
> Level 1
>   Exhibition Talks Ex BC FC1 FC
>   Presented by Khronos Group
>   Khronos Graphics, Compute and Vision APIs – including Vulkan Next 
> Generation GPU Acceleration
>   -Neil Trevett, President, Khronos Group

Hopefully they will release the final specs shortly after.


[0]: 
http://llvm.1065342.n5.nabble.com/RFC-Upstreaming-LLVM-SPIR-V-converter-td81206.html
[1]: http://lists.llvm.org/pipermail/llvm-dev/2015-June/086848.html


- Mail original -
> On Sat, Oct 17, 2015 at 4:31 PM, Ilia Mirkin <imir...@alum.mit.edu>
> wrote:
> > On Sat, Oct 17, 2015 at 4:24 PM, Jan Vesely
> > <jan.ves...@rutgers.edu> wrote:
> >> On Sat, 2015-10-17 at 15:24 -0400, Ilia Mirkin wrote:
> >>> "compute" in this context is "initialize the compute engine so
> >>> that
> >>> kernels may be executed", not "convert the llvm ir bitcode that
> >>> clover
> >>> hands us into nv50 ir". The former has actually been around for
> >>> years,
> >>> Samuel just fixed up a few fermi-specific bits.
> >>
> >> Can't we use LLVM IR-> TGSI -> nv IR for that?
> >
> > Sure, there's no LLVM IR -> TGSI conversion though.
> 
> BTW, Pierre Moreau is working on a SPIR-V -> nv50 ir adapter, which
> will hopefully mean that once a SPIR-V llvm backend exists (such a
> thing *is* in the plans by... someone, right?) that would be able to
> be used. He hasn't made a lot of progress though.
> 
> Among other things, SPIR-V is SSA, and nv50 ir input has to be
> non-ssa
> (because, among other things, the various lowering passes generate
> non-ssa code, futz with control flow, etc).
> 
>   -ilia
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] nouveau: remove logically dead code in nouveau_vpe_mb_mv_header()

2015-12-14 Thread Pierre Moreau
On 11:14 AM - Dec 14 2015, Samuel Pitoiset wrote:
> frame cannot be NULL in that branch. Spotted by Coverity.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nouveau_video.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nouveau_video.c 
> b/src/gallium/drivers/nouveau/nouveau_video.c
> index 8bb12b2..fe19bce 100644
> --- a/src/gallium/drivers/nouveau/nouveau_video.c
> +++ b/src/gallium/drivers/nouveau/nouveau_video.c
> @@ -317,8 +317,6 @@ nouveau_vpe_mb_mv_header(struct nouveau_decoder *dec,
>case PIPE_MPEG12_MO_TYPE_16x8: goto mv2;
>case PIPE_MPEG12_MO_TYPE_DUAL_PRIME: {
>base = NV17_MPEG_CMD_CHROMA_MV_HEADER_MV_SPLIT_HALF_MB;
> - if (frame)
> -base |= NV17_MPEG_CMD_CHROMA_MV_HEADER_TYPE_FRAME;

If frame can't be NULL, shouldn't you only remove the `if` statement as you're
otherwise removing used code as well?


Pierre


>   if (forward)
>  nouveau_vpe_mb_mv(dec, base, luma, frame, true,
>dec->picture_structure != 
> PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP,
> -- 
> 2.6.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/3] nvc0: check return value of nvc0_program_validate()

2015-12-14 Thread Pierre Moreau
On 11:56 AM - Dec 14 2015, Ilia Mirkin wrote:
> No, gp->code_size is set by the validation. You need to put that last.

IIRC, you can't assume in which order the compiler will decide to evaluate the
different expressions being AND'ed.

> 
> On Mon, Dec 14, 2015 at 11:51 AM, Samuel Pitoiset
>  wrote:
> > Spotted by Coverity.
> >
> > Signed-off-by: Samuel Pitoiset 
> > ---
> >  src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c | 5 +
> >  1 file changed, 1 insertion(+), 4 deletions(-)
> >
> > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
> > b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
> > index 7e2e999..7c3d03e 100644
> > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
> > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
> > @@ -236,11 +236,8 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
> > struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> > struct nvc0_program *gp = nvc0->gmtyprog;
> >
> > -   if (gp)
> > -  nvc0_program_validate(nvc0, gp);
> > -
> > /* we allow GPs with no code for specifying stream output state only */
> > -   if (gp && gp->code_size) {
> > +   if (gp && gp->code_size && nvc0_program_validate(nvc0, gp)) {
> >const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
> >
> >BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
> > --
> > 2.6.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: Fix scratch allocation size and file

2016-01-01 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp | 4 ++--
 src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index dca799d..c6d5400 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -407,7 +407,7 @@ BuildUtil::loadImm(Value *dst, float f)
 Value *
 BuildUtil::loadImm(Value *dst, double d)
 {
-   return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(), mkImm(d));
+   return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(8), mkImm(d));
 }
 
 Value *
@@ -499,7 +499,7 @@ BuildUtil::DataArray::acquire(ValueMap , int i, int c)
 
   return v;
} else {
-  return up->getScratch();
+  return up->getScratch(eltSize, file);
}
 }
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
index 8f3bf77..d171f64 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
@@ -295,7 +295,7 @@ BuildUtil::mkOp3v(operation op, DataType ty, Value *dst,
 inline LValue *
 BuildUtil::mkLoadv(DataType ty, Symbol *mem, Value *ptr)
 {
-   LValue *dst = getScratch();
+   LValue *dst = getScratch(typeSizeof(ty));
mkLoad(ty, dst, mem, ptr);
return dst;
 }
-- 
2.6.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Enable LTO compilation

2016-05-30 Thread Pierre Moreau
Ok, I found the answer to my question regarding the `-fno-lto`. IMHO that
should be in the commit message, since the patch only checks for availability
and disables LTO in one case, never enabling it explicitely.
Please ignore my previous mail.

Pierre

On 11:06 AM - May 30 2016, Matt Turner wrote:
> On Mon, May 30, 2016 at 11:02 AM, ⚛ <0xe2.0x9a.0...@gmail.com> wrote:
> >
> > On Mon, May 30, 2016 at 7:39 PM, Matt Turner  wrote:
> >>
> >> On Mon, May 30, 2016 at 10:28 AM, <0xe2.0x9a.0...@gmail.com> wrote:
> >> > This patch enables compilation with -flto.
> >> >
> >> > The performance benefits of LTO (GCC 4.9 & 6.1) are about 1% for 
> >> > glxgears.
> >> > Performance changes in OpenGL games haven't been measured yet, my 
> >> > feeling is
> >> > that they are negligible.
> >>
> >> Without a compelling reason, I don't think the build system should be
> >> adding compiler flags like this.
> >
> >
> > What does it mean "like this". The patched build system only checks whether 
> > -flto works because it needs to be disabled in mesa/src/mapi.
> >
> >>
> >> -flto makes debugging impossible, at least the last time I tried it
> >> with gcc. I don't think that's something we would want enabled
> >> whenever it's supported.
> >
> >
> > "Enable LTO compilation" means the person compiling Mesa can choose whether 
> > to use -flto.
> 
> Oh, I see. I misunderstood the patch initially.
> 
> Because src/mapi cannot be compiled with -flto, this patch *disables*
> -flto in that directory if it is enabled. Interesting.
> 
> (Please keep mesa-dev@ in Cc on your replies, and don't sent HTML mail)
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Enable LTO compilation

2016-05-30 Thread Pierre Moreau
On 07:28 PM - May 30 2016, ⚛ wrote:
> This patch enables compilation with -flto.
> 
> The performance benefits of LTO (GCC 4.9 & 6.1) are about 1% for glxgears.
> Performance changes in OpenGL games haven't been measured yet, my feeling
> is that they are negligible.

> diff --git a/configure.ac b/configure.ac
> index fc0b1db..e84a1ad 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -362,6 +362,8 @@ if test "x$GXX" = xyes; then
>  CXXFLAGS="$CXXFLAGS -fno-builtin-memcmp"
>  fi
>  
> +AX_CHECK_COMPILE_FLAG([-flto], AM_CONDITIONAL(COMPILER_UNDERSTANDS_LTO, 
> true), AM_CONDITIONAL(COMPILER_UNDERSTANDS_LTO, false))
> +
>  AC_SUBST([MSVC2013_COMPAT_CFLAGS])
>  AC_SUBST([MSVC2013_COMPAT_CXXFLAGS])
>  
> diff --git a/src/mapi/Makefile.am b/src/mapi/Makefile.am
> index 68a28a2..4b7d266 100644
> --- a/src/mapi/Makefile.am
> +++ b/src/mapi/Makefile.am
> @@ -51,6 +51,11 @@ AM_CPPFLAGS =  
> \
>  
>  include Makefile.sources
>  
> +if COMPILER_UNDERSTANDS_LTO
> +CFLAGS += -fno-lto
> +CXXFLAGS += -fno-lto

This should be `-flto` if I’m not mistaken.

Pierre

> +endif
> +
>  MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
>  PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
>  

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] nvc0: rework nvc0_compute_validate_program()

2016-02-24 Thread Pierre Moreau
Hi Samuel,

On 06:44 PM - Feb 24 2016, Samuel Pitoiset wrote:
> Reduce the amount of duplicated code by re-using
> nvc0_program_validate(). While we are at it, change the prototype
> to return void and remove nvc0_compute.h which is now useless.

Why don't you want to know whether the validation worked or not? If the
validation failed, the program has a bug and Nouveau shouldn't try to run it,
so allocating buffers that will be unused seems wasteful. Am I missing
something here?

Pierre

> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/Makefile.sources   |  1 -
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c| 34 
> ++
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.h|  9 --
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  1 +
>  .../drivers/nouveau/nvc0/nvc0_shader_state.c   | 15 ++
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c|  4 +--
>  6 files changed, 20 insertions(+), 44 deletions(-)
>  delete mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
> 
> diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
> b/src/gallium/drivers/nouveau/Makefile.sources
> index 43ffce6..65f08c7 100644
> --- a/src/gallium/drivers/nouveau/Makefile.sources
> +++ b/src/gallium/drivers/nouveau/Makefile.sources
> @@ -150,7 +150,6 @@ NVC0_C_SOURCES := \
>   nvc0/gm107_texture.xml.h \
>   nvc0/nvc0_3d.xml.h \
>   nvc0/nvc0_compute.c \
> - nvc0/nvc0_compute.h \
>   nvc0/nvc0_compute.xml.h \
>   nvc0/nvc0_context.c \
>   nvc0/nvc0_context.h \
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index a664aaf..060f59d 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -23,7 +23,8 @@
>   */
>  
>  #include "nvc0/nvc0_context.h"
> -#include "nvc0/nvc0_compute.h"
> +
> +#include "nvc0/nvc0_compute.xml.h"
>  
>  int
>  nvc0_screen_compute_setup(struct nvc0_screen *screen,
> @@ -120,34 +121,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
> return 0;
>  }
>  
> -bool
> -nvc0_compute_validate_program(struct nvc0_context *nvc0)
> -{
> -   struct nvc0_program *prog = nvc0->compprog;
> -
> -   if (prog->mem)
> -  return true;
> -
> -   if (!prog->translated) {
> -  prog->translated = nvc0_program_translate(
> - prog, nvc0->screen->base.device->chipset, >base.debug);
> -  if (!prog->translated)
> - return false;
> -   }
> -   if (unlikely(!prog->code_size))
> -  return false;
> -
> -   if (likely(prog->code_size)) {
> -  if (nvc0_program_upload_code(nvc0, prog)) {
> - struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> - BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
> - PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
> - return true;
> -  }
> -   }
> -   return false;
> -}
> -
>  static void
>  nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
>  {
> @@ -292,8 +265,7 @@ nvc0_compute_validate_globals(struct nvc0_context *nvc0)
>  static bool
>  nvc0_compute_state_validate(struct nvc0_context *nvc0)
>  {
> -   if (!nvc0_compute_validate_program(nvc0))
> -  return false;
> +   nvc0_compprog_validate(nvc0);
> if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF)
>nvc0_compute_validate_constbufs(nvc0);
> if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST)
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
> deleted file mode 100644
> index a23f7f3..000
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
> +++ /dev/null
> @@ -1,9 +0,0 @@
> -#ifndef NVC0_COMPUTE_H
> -#define NVC0_COMPUTE_H
> -
> -#include "nvc0/nvc0_compute.xml.h"
> -
> -bool
> -nvc0_compute_validate_program(struct nvc0_context *nvc0);
> -
> -#endif /* NVC0_COMPUTE_H */
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 7aa4b62..0f1ebb0 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -254,6 +254,7 @@ void nvc0_tctlprog_validate(struct nvc0_context *);
>  void nvc0_tevlprog_validate(struct nvc0_context *);
>  void nvc0_gmtyprog_validate(struct nvc0_context *);
>  void nvc0_fragprog_validate(struct nvc0_context *);
> +void nvc0_compprog_validate(struct nvc0_context *);
>  
>  void nvc0_tfb_validate(struct nvc0_context *);
>  
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
> index 2f46c43..6b02ed5 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
> @@ -28,6 +28,8 @@
>  #include "nvc0/nvc0_context.h"
>  #include "nvc0/nvc0_query_hw.h"
>  
> +#include "nvc0/nvc0_compute.xml.h"
> +
>  static inline void
>  

Re: [Mesa-dev] [PATCH 1/3] nvc0: move nvc0_validate_global_residents() to nvc0_compute.c

2016-02-24 Thread Pierre Moreau
Hello Samuel,

On 06:44 PM - Feb 24 2016, Samuel Pitoiset wrote:
> While we are at it, rename it to nvc0_compute_validate_globals() and
> update its prototype.
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c| 15 +++
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  3 +--
>  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 15 ---
>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c|  3 +--
>  4 files changed, 17 insertions(+), 19 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index 0f1265f..7809a11 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -274,6 +274,21 @@ nvc0_compute_validate_buffers(struct nvc0_context *nvc0)
> }
>  }
>  
> +void
> +nvc0_compute_validate_globals(struct nvc0_context *nvc0)
> +{
> +   unsigned i;
> +
> +   for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource 
> *);

I get that renaming `nvc0_validate_global_residents()` to
`nvc0_compte_validate_globals()` brings more consistency with the other
`nvXY_compute_validate_*()` functions, but then one might be tempted to rename
`global_residents` to only `globals`. So if we can't remove the other
`*_residents?`, I would probably keep it in
`nvc0_compute_validate_global_residents()`.

Other than that, Acked-by: Pierre Moreau <pierre.mor...@free.fr>

Pierre

> +++i) {
> +  struct pipe_resource *res = *util_dynarray_element(
> + >global_residents, struct pipe_resource *, i);
> +  if (res)
> + nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL,
> +   nv04_resource(res), NOUVEAU_BO_RDWR);
> +   }
> +}
> +
>  static bool
>  nvc0_compute_state_validate(struct nvc0_context *nvc0)
>  {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index d3e3a81..7aa4b62 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -261,8 +261,6 @@ void nvc0_tfb_validate(struct nvc0_context *);
>  extern void nvc0_init_state_functions(struct nvc0_context *);
>  
>  /* nvc0_state_validate.c */
> -void nvc0_validate_global_residents(struct nvc0_context *,
> -struct nouveau_bufctx *, int bin);
>  bool nvc0_state_validate(struct nvc0_context *, uint32_t state_mask);
>  
>  /* nvc0_surface.c */
> @@ -342,5 +340,6 @@ void nve4_launch_grid(struct pipe_context *, const struct 
> pipe_grid_info *);
>  
>  /* nvc0_compute.c */
>  void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *);
> +void nvc0_compute_validate_globals(struct nvc0_context *);
>  
>  #endif
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index 18e79e36..fbf45ce 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -559,21 +559,6 @@ nvc0_validate_driverconst(struct nvc0_context *nvc0)
> nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST;
>  }
>  
> -void
> -nvc0_validate_global_residents(struct nvc0_context *nvc0,
> -   struct nouveau_bufctx *bctx, int bin)
> -{
> -   unsigned i;
> -
> -   for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource 
> *);
> -++i) {
> -  struct pipe_resource *res = *util_dynarray_element(
> - >global_residents, struct pipe_resource *, i);
> -  if (res)
> - nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR);
> -   }
> -}
> -
>  static void
>  nvc0_validate_derived_1(struct nvc0_context *nvc0)
>  {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> index 652bc6d..5c73740 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
> @@ -317,8 +317,7 @@ nve4_compute_state_validate(struct nvc0_context *nvc0)
> if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES)
>nve4_compute_validate_surfaces(nvc0);
> if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
> -  nvc0_validate_global_residents(nvc0,
> - nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
> +  nvc0_compute_validate_globals(nvc0);
>  
> nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
>  
> -- 

Re: [Mesa-dev] [PATCH 2/3] nvc0: make sure to validate compute global buffers on Fermi

2016-02-24 Thread Pierre Moreau
Acked-by: Pierre Moreau <pierre.mor...@free.fr>

On 06:44 PM - Feb 24 2016, Samuel Pitoiset wrote:
> No reason to not validate those global buffers and this might avoid
> fails if someone try to use the global memory from compute programs.
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> index 7809a11..a664aaf 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> @@ -304,8 +304,10 @@ nvc0_compute_state_validate(struct nvc0_context *nvc0)
>nvc0_compute_validate_textures(nvc0);
> if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
>nvc0_compute_validate_samplers(nvc0);
> +   if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
> +  nvc0_compute_validate_globals(nvc0);
>  
> -   /* TODO: surfaces, global memory buffers */
> +   /* TODO: surfaces */
>  
> nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false);
>  
> -- 
> 2.6.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] nvc0: rework nvc0_compute_validate_program()

2016-02-24 Thread Pierre Moreau
On 08:44 PM - Feb 24 2016, Samuel Pitoiset wrote:
> 
> 
> On 02/24/2016 08:30 PM, Pierre Moreau wrote:
> >Hi Samuel,
> >
> >On 06:44 PM - Feb 24 2016, Samuel Pitoiset wrote:
> >>Reduce the amount of duplicated code by re-using
> >>nvc0_program_validate(). While we are at it, change the prototype
> >>to return void and remove nvc0_compute.h which is now useless.
> >
> >Why don't you want to know whether the validation worked or not? If the
> >validation failed, the program has a bug and Nouveau shouldn't try to run it,
> >so allocating buffers that will be unused seems wasteful. Am I missing
> >something here?
> 
> Because it's useless? If the program can't be correctly validated it's going
> to break the universe. If you look at nvc0_gmtyprog_validate() for example
> you will see that I just follow the same design.

I should have checked the other `*_prog_validate()` functions indeed. And I had
forgotten that when those `*_prog_validate()` functions are called, the shaders
have already been transformed from GLSL to TGSI, so user errors in the input
shaders should have been catched already and only translation errors could
occur at this point.

Pierre


Acked-by: Pierre Moreau <pierre.mor...@free.fr>


> >
> >Pierre
> >
> >>
> >>Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> >>---
> >>  src/gallium/drivers/nouveau/Makefile.sources   |  1 -
> >>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.c| 34 
> >> ++
> >>  src/gallium/drivers/nouveau/nvc0/nvc0_compute.h|  9 --
> >>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  1 +
> >>  .../drivers/nouveau/nvc0/nvc0_shader_state.c   | 15 ++
> >>  src/gallium/drivers/nouveau/nvc0/nve4_compute.c|  4 +--
> >>  6 files changed, 20 insertions(+), 44 deletions(-)
> >>  delete mode 100644 src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
> >>
> >>diff --git a/src/gallium/drivers/nouveau/Makefile.sources 
> >>b/src/gallium/drivers/nouveau/Makefile.sources
> >>index 43ffce6..65f08c7 100644
> >>--- a/src/gallium/drivers/nouveau/Makefile.sources
> >>+++ b/src/gallium/drivers/nouveau/Makefile.sources
> >>@@ -150,7 +150,6 @@ NVC0_C_SOURCES := \
> >>nvc0/gm107_texture.xml.h \
> >>nvc0/nvc0_3d.xml.h \
> >>nvc0/nvc0_compute.c \
> >>-   nvc0/nvc0_compute.h \
> >>nvc0/nvc0_compute.xml.h \
> >>nvc0/nvc0_context.c \
> >>nvc0/nvc0_context.h \
> >>diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
> >>b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> >>index a664aaf..060f59d 100644
> >>--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> >>+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
> >>@@ -23,7 +23,8 @@
> >>   */
> >>
> >>  #include "nvc0/nvc0_context.h"
> >>-#include "nvc0/nvc0_compute.h"
> >>+
> >>+#include "nvc0/nvc0_compute.xml.h"
> >>
> >>  int
> >>  nvc0_screen_compute_setup(struct nvc0_screen *screen,
> >>@@ -120,34 +121,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
> >> return 0;
> >>  }
> >>
> >>-bool
> >>-nvc0_compute_validate_program(struct nvc0_context *nvc0)
> >>-{
> >>-   struct nvc0_program *prog = nvc0->compprog;
> >>-
> >>-   if (prog->mem)
> >>-  return true;
> >>-
> >>-   if (!prog->translated) {
> >>-  prog->translated = nvc0_program_translate(
> >>- prog, nvc0->screen->base.device->chipset, >base.debug);
> >>-  if (!prog->translated)
> >>- return false;
> >>-   }
> >>-   if (unlikely(!prog->code_size))
> >>-  return false;
> >>-
> >>-   if (likely(prog->code_size)) {
> >>-  if (nvc0_program_upload_code(nvc0, prog)) {
> >>- struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> >>- BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
> >>- PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
> >>- return true;
> >>-  }
> >>-   }
> >>-   return false;
> >>-}
> >>-
> >>  static void
> >>  nvc0_compute_validate_samplers(struct nvc0_context *nvc0)
> >>  {
> >>@@ -292,8 +265,7 @@ nvc0_compute_validate_globals(struct nvc0_context *nvc0)
> >>  static bool
> >>  nvc0_compute_state_validate(struct nvc0_co

[Mesa-dev] [PATCH] nv50/ir: Check for valid insn instead of defs size

2016-02-24 Thread Pierre Moreau
On Tesla cards, the first register $r0 contains the thread id; later
generations use a specialised register for it. In order to prevent the register
from being given to anyone, and thus lose the thread id information, an lvalue
is created to represent $r0 and is passed as an argument to the `main`
function.

However, since the inputs and outputs of a function are stored as value
definitions, a definition is added onto the previously created lvalue without
it being associated to an instruction. Therefore, checking the number of
definitions of an lvalue do not ensure that it is associated to an instruction.

Fixes a nullptr dereference in the register allocation pass, while running
compute kernels that do not use $r0.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index d877c25..500ab89 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -853,7 +853,7 @@ isShortRegOp(Instruction *insn)
 static bool
 isShortRegVal(LValue *lval)
 {
-   if (lval->defs.size() == 0)
+   if (lval->getInsn() == NULL)
   return false;
for (Value::DefCIterator def = lval->defs.begin();
 def != lval->defs.end(); ++def)
@@ -1467,7 +1467,7 @@ GCRA::allocateRegisters(ArrayList& insns)
  nodes[i].init(regs, lval);
  RIG.insert([i]);
 
- if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 &&
+ if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
  prog->getTarget()->getChipset() < 0xc0) {
 Instruction *insn = lval->getInsn();
 if (insn->op == OP_MAD || insn->op == OP_SAD)
-- 
2.7.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50: do not advertise about compute shaders

2016-02-19 Thread Pierre Moreau
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

On 08:25 PM - Feb 19 2016, Samuel Pitoiset wrote:
> Compute shaders are totally unsupported. This avoids Clover to
> report that OpenCL is supported on Tesla because it's a lie.
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> index 06b7bf9..8d11dd7 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
> @@ -264,8 +264,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, 
> unsigned shader,
> case PIPE_SHADER_VERTEX:
> case PIPE_SHADER_GEOMETRY:
> case PIPE_SHADER_FRAGMENT:
> -   case PIPE_SHADER_COMPUTE:
>break;
> +   case PIPE_SHADER_COMPUTE:
> default:
>return 0;
> }
> -- 
> 2.6.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [PATCH mesa 3/3] nouveau: Add support for clover / OpenCL kernel input parameters

2016-03-10 Thread Pierre Moreau
On 11:05 AM - Mar 10 2016, Ilia Mirkin wrote:
> On Thu, Mar 10, 2016 at 11:03 AM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > You might want to increment the address by at least
> > `info->prop.cp.inputOffset`, and if inputs still end up in shared on Tesla,
> 
> There's a cp.sharedOffset just for that :) However it doesn't appear
> to get set anywhere...

Oh really?! I completely missed that one… Well, I have some changes to make on
my own code then! :-D Thanks for pointing that out!

Pierre


> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Nouveau] [PATCH mesa 3/3] nouveau: Add support for clover / OpenCL kernel input parameters

2016-03-10 Thread Pierre Moreau
On 04:27 PM - Mar 10 2016, Samuel Pitoiset wrote:
> 
> 
> On 03/10/2016 04:23 PM, Ilia Mirkin wrote:
> >On Thu, Mar 10, 2016 at 10:14 AM, Hans de Goede  wrote:
> >>Add support for clover / OpenCL kernel input parameters.
> >>
> >>Signed-off-by: Hans de Goede 
> >>---
> >>  .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 18 
> >> +++---
> >>  1 file changed, 15 insertions(+), 3 deletions(-)
> >>
> >>diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
> >>b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> >>index a8258af..de0c72b 100644
> >>--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> >>+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> >>@@ -1523,9 +1523,21 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int 
> >>idx, int c, uint32_t address)
> >>
> >> sym->reg.fileIndex = fileIdx;
> >>
> >>-   if (tgsiFile == TGSI_FILE_MEMORY &&
> >>-   code->memoryFiles[fileIdx].mem_type == TGSI_MEMORY_TYPE_SHARED)
> >>-  sym->setFile(FILE_MEMORY_SHARED);
> >>+   if (tgsiFile == TGSI_FILE_MEMORY) {
> >>+  switch (code->memoryFiles[fileIdx].mem_type) {
> >>+  case TGSI_MEMORY_TYPE_SHARED:
> >>+ sym->setFile(FILE_MEMORY_SHARED);

You might want to increment the address by at least
`info->prop.cp.inputOffset`, and if inputs still end up in shared on Tesla,
then increment further by the input size. This input offset of 0x10 (or is it
0x20?) is due to the card sticking the size of a block and of the grid inside
`s[0x0..0x10]` (or maybe Nouveau is doing that, but I doubt it.). So even if
the user inputs end up somewhere else in memory, you most likely still don't
want to overwrite the grid information. This should be necessary only for Tesla
cards.

> >>+ break;
> >>+  case TGSI_MEMORY_TYPE_INPUT:
> >>+ assert(prog->getType() == Program::TYPE_COMPUTE);
> >>+ assert(idx == -1);
> >>+ sym->setFile(FILE_SHADER_INPUT);
> >>+ address += info->prop.cp.inputOffset;
> >
> >What's the idea here? i.e. what is the inputOffset, how is it set, and why?
> 
> I don't get the idea too, btw.
> 
> But prop.cp.inputOffset is only defined for compute on Kepler. It's the
> offset of input parameters in the screen->parm BO but as you already know,
> it is going to be removed because the idea is to use screen->uniform_bo
> instead. I'll do this change *after* the compute shaders support on Kepler.

If I understand correctly, the goal is to have user inputs in a
`screen->uniform_bo`, and so for all generations?

Pierre


> 
> >
> >   -ilia
> >
> >>+ break;
> >>+  default:
> >>+ assert(0); /* TODO: Add support for global and local memory */
> >>+  }
> >>+   }
> >>
> >> if (idx >= 0) {
> >>if (sym->reg.file == FILE_SHADER_INPUT)
> >>--
> >>2.7.2
> >>
> 
> -- 
> -Samuel
> ___
> Nouveau mailing list
> nouv...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] nvc0: avoid using magic numbers for the uniform_bo offsets

2016-03-18 Thread Pierre Moreau
CLIP_PLANES * 4 * 4)
> +/* 8 sets of 32-bits integer pairs sample offsets */
> +#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */
> +#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
> +/* draw parameters (index bais, base instance, drawid) */
> +#define NVC0_CB_AUX_DRAW_INFO   0x180 /* VP */

What is the status of this one? Is the region from 0x180 and after considered
as the `AUX_DRAW_INFO`, and has a subpart named as `AUX_SAMPLE_INFO`, or should
one of those be at 0x180 and the other at 0x1c0, both with a size of 0x40?

Apart from these nitpicks, this is
Acked-by: Pierre Moreau <pierre.mor...@free.fr>

Pierre

> +/* 32 user buffers, at 4 32-bits integers each */
> +#define NVC0_CB_AUX_BUF_INFO(i) 0x200 + (i) * 4 * 4
> +#define NVC0_CB_AUX_BUF_SIZE(NVC0_MAX_BUFFERS * 4 * 4)
> +/* 4 32-bits floats for the vertex runout, put at the end */
> +#define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + NVC0_CB_AUX_SIZE
>  
>  struct nvc0_blitctx;
>  
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index 48e3475..b7c6faf 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -535,8 +535,8 @@ nvc0_program_translate(struct nvc0_program *prog, 
> uint16_t chipset,
>  
> info->io.genUserClip = prog->vp.num_ucps;
> info->io.auxCBSlot = 15;
> -   info->io.ucpBase = 256;
> -   info->io.drawInfoBase = 256 + 128;
> +   info->io.ucpBase = NVC0_CB_AUX_UCP_INFO;
> +   info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO;
>  
> if (prog->type == PIPE_SHADER_COMPUTE) {
>if (chipset >= NVISA_GK104_CHIPSET) {
> @@ -545,17 +545,17 @@ nvc0_program_translate(struct nvc0_program *prog, 
> uint16_t chipset,
>   info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
>   info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
>} else {
> - info->io.suInfoBase = 512;
> + info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>}
>info->io.msInfoCBSlot = 0;
>info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
> } else {
>if (chipset >= NVISA_GK104_CHIPSET) {
> - info->io.texBindBase = 0x20;
> + info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
>   info->io.suInfoBase = 0; /* TODO */
>}
> -  info->io.sampleInfoBase = 256 + 128;
> -  info->io.suInfoBase = 512;
> +  info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
> +  info->io.suInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>info->io.msInfoCBSlot = 15;
>info->io.msInfoBase = 0; /* TODO */
> }
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index d316235..741b5ce 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -922,8 +922,8 @@ nvc0_screen_create(struct nouveau_device *dev)
> for (i = 0; i < 5; ++i) {
>BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>PUSH_DATA (push, 1024);
> -  PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
> -  PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10));
> +  PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
> +  PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(i));
>BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
>PUSH_DATA (push, (15 << 4) | 1);
>if (screen->eng3d->oclass < NVE4_3D_CLASS) {
> @@ -937,8 +937,8 @@ nvc0_screen_create(struct nouveau_device *dev)
> /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
> BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> PUSH_DATA (push, 256);
> -   PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
> -   PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
> +   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
> +   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_RUNOUT_INFO);
> BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
> PUSH_DATA (push, 0);
> PUSH_DATAf(push, 0.0f);
> @@ -946,8 +946,8 @@ nvc0_screen_create(struct nouveau_device *dev)
> PUSH_DATAf(push, 0.0f);
> PUSH_DATAf(push, 0.0f);
> BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
> -   PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
> -   PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10));
> +   PUSH_DATAh(push, 

[Mesa-dev] [PATCH] nv50/ra: `isinf()` is in namespace `std` since C++11

2016-03-18 Thread Pierre Moreau
This fixes a compile error while building Nouveau with C++11 enabled (and
glibc >= 2.23). This happens if SWR is enabled, as it forces C++11.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 500ab89..1b595ae 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1327,7 +1327,11 @@ GCRA::simplify()
bestScore = score;
 }
  }
+#if __cplusplus >= 201103L
+ if (std::isinf(bestScore)) {
+#else
  if (isinf(bestScore)) {
+#endif
 ERROR("no viable spill candidates left\n");
 break;
  }
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50, nvc0: Set only NEW_CP_GLOBALS upon binding

2016-03-13 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/nv50/nv50_state.c | 2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index c73e3ba..b9efb3f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -1246,7 +1246,7 @@ nv50_set_global_bindings(struct pipe_context *pipe,
 
nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_GLOBAL);
 
-   nv50->dirty_cp = NV50_NEW_CP_GLOBALS;
+   nv50->dirty_cp |= NV50_NEW_CP_GLOBALS;
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index c279093..36e3546 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1343,7 +1343,7 @@ nvc0_set_global_bindings(struct pipe_context *pipe,
 
nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
 
-   nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS;
+   nvc0->dirty_cp |= NVC0_NEW_CP_GLOBALS;
 }
 
 void
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50: Mark compute states as dirty on context switch

2016-03-13 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/nv50/nv50_state_validate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
index 5536978..d06ba4a 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -437,6 +437,7 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)
   ctx_to->state = ctx_to->screen->save_state;
 
ctx_to->dirty = ~0;
+   ctx_to->dirty_cp = ~0;
ctx_to->viewports_dirty = ~0;
ctx_to->scissors_dirty = ~0;
 
-- 
2.7.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nouveau: Fix clang reserved-user-defined-literal error.

2016-03-09 Thread Pierre Moreau
I did hit that issue as well, but I have C++11 forced on my SPIR-V branch.

I guess adding the whitespace will still result in code that works with older
C++ version, so the fix can still be accepted even if we do not plan to switch
to C++11 by default.

Pierre


On 11:16 AM - Mar 09 2016, Samuel Pitoiset wrote:
> Nouveau doesn't use c++11 except the codegen part.
> How do you hit that issue? Pretty sure that you forced c++11, right?
> 
> I can't reproduce that compilation error with clang 3.9 btw.
> 
> On 03/09/2016 09:57 AM, Vinson Lee wrote:
> >   CXX  codegen/nv50_ir.lo
> >In file included from codegen/nv50_ir.cpp:28:
> >./nouveau_debug.h:19:30: error: invalid suffix on literal; C++11 requires a 
> >space between literal and identifier
> >   [-Wreserved-user-defined-literal]
> >fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args)
> >  ^
> >
> >Signed-off-by: Vinson Lee 
> >---
> >  src/gallium/drivers/nouveau/nouveau_debug.h | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> >diff --git a/src/gallium/drivers/nouveau/nouveau_debug.h 
> >b/src/gallium/drivers/nouveau/nouveau_debug.h
> >index d17df81..546a4ad 100644
> >--- a/src/gallium/drivers/nouveau/nouveau_debug.h
> >+++ b/src/gallium/drivers/nouveau/nouveau_debug.h
> >@@ -16,7 +16,7 @@
> >  #define NOUVEAU_DEBUG 0
> >
> >  #define NOUVEAU_ERR(fmt, args...) \
> >-   fprintf(stderr, "%s:%d - "fmt, __FUNCTION__, __LINE__, ##args)
> >+   fprintf(stderr, "%s:%d - " fmt, __FUNCTION__, __LINE__, ##args)
> >
> >  #define NOUVEAU_DBG(ch, args...)   \
> > if ((NOUVEAU_DEBUG) & (NOUVEAU_DEBUG_##ch))\
> >
> 
> -- 
> -Samuel
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/11] nv50/ir: Check for valid insn instead of def size

2016-03-13 Thread Pierre Moreau
On Tesla cards, the first register $r0 contains the thread id; later
generations use a specialised register for it. In order to prevent the register
from being given to anyone, and thus lose the thread id information, an lvalue
is created to represent $r0 and is passed as an argument to the `main`
function.

However, since the inputs and outputs of a function are stored as value
definitions, a definition is added onto the previously created lvalue without
it being associated to an instruction. Therefore, checking the number of
definitions of an lvalue do not ensure that it is associated to an instruction.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 62b0aa1..7c319df 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -853,7 +853,7 @@ isShortRegOp(Instruction *insn)
 static bool
 isShortRegVal(LValue *lval)
 {
-   if (lval->defs.size() == 0)
+   if (lval->getInsn() == NULL)
   return false;
for (Value::DefCIterator def = lval->defs.begin();
 def != lval->defs.end(); ++def)
@@ -1467,7 +1467,7 @@ GCRA::allocateRegisters(ArrayList& insns)
  nodes[i].init(regs, lval);
  RIG.insert([i]);
 
- if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 &&
+ if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
  prog->getTarget()->getChipset() < 0xc0) {
 Instruction *insn = lval->getInsn();
 if (insn->op == OP_MAD || insn->op == OP_SAD)
-- 
2.7.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/11] nv50/ir: Check for valid insn instead of def size

2016-03-13 Thread Pierre Moreau
Hum… Something went wrong, sorry. This is the same as the previous patch and
not the updated version…

Pierre

On 02:16 PM - Mar 13 2016, Pierre Moreau wrote:
> On Tesla cards, the first register $r0 contains the thread id; later
> generations use a specialised register for it. In order to prevent the 
> register
> from being given to anyone, and thus lose the thread id information, an lvalue
> is created to represent $r0 and is passed as an argument to the `main`
> function.
> 
> However, since the inputs and outputs of a function are stored as value
> definitions, a definition is added onto the previously created lvalue without
> it being associated to an instruction. Therefore, checking the number of
> definitions of an lvalue do not ensure that it is associated to an 
> instruction.
> 
> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index 62b0aa1..7c319df 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -853,7 +853,7 @@ isShortRegOp(Instruction *insn)
>  static bool
>  isShortRegVal(LValue *lval)
>  {
> -   if (lval->defs.size() == 0)
> +   if (lval->getInsn() == NULL)
>return false;
> for (Value::DefCIterator def = lval->defs.begin();
>  def != lval->defs.end(); ++def)
> @@ -1467,7 +1467,7 @@ GCRA::allocateRegisters(ArrayList& insns)
>   nodes[i].init(regs, lval);
>   RIG.insert([i]);
>  
> - if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 &&
> + if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
>   prog->getTarget()->getChipset() < 0xc0) {
>  Instruction *insn = lval->getInsn();
>  if (insn->op == OP_MAD || insn->op == OP_SAD)
> -- 
> 2.7.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] nv50/ir: Check for valid insn instead of def size

2016-03-13 Thread Pierre Moreau
Functions arguments get a definition from the function itself, a definition
which is therefore not linked to any instruction. If a value ends up having a
definition but no linked instruction, the register allocation pass can skip
that value since it is not being used.

This fixes a null pointer dereference during the register allocation pass, if
a function had unused arguments.

v2: Rewrite commit message based on Ilia Mirkin's comment

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 62b0aa1..7c319df 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -853,7 +853,7 @@ isShortRegOp(Instruction *insn)
 static bool
 isShortRegVal(LValue *lval)
 {
-   if (lval->defs.size() == 0)
+   if (lval->getInsn() == NULL)
   return false;
for (Value::DefCIterator def = lval->defs.begin();
 def != lval->defs.end(); ++def)
@@ -1467,7 +1467,7 @@ GCRA::allocateRegisters(ArrayList& insns)
  nodes[i].init(regs, lval);
  RIG.insert([i]);
 
- if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 &&
+ if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
  prog->getTarget()->getChipset() < 0xc0) {
 Instruction *insn = lval->getInsn();
 if (insn->op == OP_MAD || insn->op == OP_SAD)
-- 
2.7.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] math: Import isinf and others to global namespace

2016-04-13 Thread Pierre Moreau
:-( I'll have another look this evening. Which version of glibc did it failed 
with?

(I saw your reply to the other patch, I'll dig deeper this evening.)

Thanks!
Pierre

> On 13 Apr 2016, at 08:22, Jose Fonseca <jfons...@vmware.com> wrote:
> 
>> On 01/04/16 13:18, Pierre Moreau wrote:
>>> On 01:11 PM - Apr 01 2016, Jose Fonseca wrote:
>>>> On 31/03/16 23:08, Pierre Moreau wrote:
>>>> Starting from C++11, several math functions, like isinf, moved into the std
>>>> namespace. Since cmath undefines those functions before redefining them 
>>>> inside
>>>> the namespace, and glibc 2.23 defines the C variants as macros, the C 
>>>> variants
>>>> in global namespace are not accessible any longer.
>>>> 
>>>> v2: Move the fix outside of Nouveau, as suggested by Jose Fonseca, since 
>>>> anyone
>>>> might need it when GCC switches to C++14 by default with GCC 6.0.
>>>> 
>>>> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
>>>> ---
>>>>  include/cpp11_math.h| 61 
>>>> +
>>>>  src/gallium/auxiliary/util/u_math.h |  3 ++
>>>>  2 files changed, 64 insertions(+)
>>>>  create mode 100644 include/cpp11_math.h
>>>> 
>>>> diff --git a/include/cpp11_math.h b/include/cpp11_math.h
>>>> new file mode 100644
>>>> index 000..1f4aa3c
>>>> --- /dev/null
>>>> +++ b/include/cpp11_math.h
>>> 
>>> I'm not sure a new header is necessary for this.
>>> 
>>> What this is doing is making C++11 math functions "appear" like C99 ones.
>>> It's not making things matching C++11 standard.
>>> 
>>> So IMO the right place for this is the end of c99_math.h
>> 
>> I was planning to put it in the c99 header first, but as I was including some
>> C++ header, I felt that having a separate header which would only be included
>> by C++ files would be better. I could have the `#ifdef __cplusplus` around 
>> the
>> whole block inside the c99_math.h instead, if you prefer it that way.
>> 
>> Thanks,
>> Pierre
> 
> I moved this to c99_math.h and was about to commit, but this causes build 
> failures with gcc 5.0:
> 
>  $ scons
>  [...]
> In file included from src/gallium/auxiliary/util/u_math.h:44:0,
> from src/mesa/main/macros.h:35,
> from src/compiler/glsl/lower_vec_index_to_swizzle.cpp:36:
> include/c99_math.h:198:12: error: ‘constexpr bool std::isinf(double)’ 
> conflicts with a previous declaration
> using std::isinf;
>^
> In file included from /usr/include/features.h:364:0,
> from /usr/include/stdio.h:27,
> from src/compiler/glsl/ir.h:29,
> from src/compiler/glsl/lower_vec_index_to_swizzle.cpp:32:
> /usr/include/x86_64-linux-gnu/bits/mathcalls.h:201:1: note: previous 
> declaration ‘int isinf(double)’
> __MATHDECL_1 (int,isinf,, (_Mdouble_ __value)) __attribute__ ((__const__));
> ^
> 
> 
> 
> 
> It looks like newer GLIBC already does this somehow..  I'm not sure what's 
> the exact trigger.
> 
> 
> I wonder if the problem can be avoided by including math.h at the top:
> 
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index 500ab89..e61f59e7 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -20,6 +20,7 @@
>  * OTHER DEALINGS IN THE SOFTWARE.
>  */
> 
> +#include 
> #include "codegen/nv50_ir.h"
> #include "codegen/nv50_ir_target.h"
> 
> 
> 
> 
> 
> 
> Jose
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] nv50/ir: Check for valid insn instead of def size

2016-03-19 Thread Pierre Moreau
Functions arguments get a definition from the function itself, a definition
which is therefore not linked to any instruction. If a value ends up having a
definition but no linked instruction, the register allocation pass doesn't
need to consider that value.

This fixes a null pointer dereference during the register allocation pass, if
a function had unused arguments.

v2: Rewrite commit message based on Ilia Mirkin's comment
v3: Rewrite an incorrect statement in the commit message that was pointed out
by Ilia Mirkin

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index d877c25..500ab89 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -853,7 +853,7 @@ isShortRegOp(Instruction *insn)
 static bool
 isShortRegVal(LValue *lval)
 {
-   if (lval->defs.size() == 0)
+   if (lval->getInsn() == NULL)
   return false;
for (Value::DefCIterator def = lval->defs.begin();
 def != lval->defs.end(); ++def)
@@ -1467,7 +1467,7 @@ GCRA::allocateRegisters(ArrayList& insns)
  nodes[i].init(regs, lval);
  RIG.insert([i]);
 
- if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 &&
+ if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
  prog->getTarget()->getChipset() < 0xc0) {
 Instruction *insn = lval->getInsn();
 if (insn->op == OP_MAD || insn->op == OP_SAD)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50,nvc0: Fix invalid constant.

2016-03-19 Thread Pierre Moreau
On 06:34 PM - Mar 18 2016, Vinson Lee wrote:
> Fix clang build error.
> 
>   CXX  codegen/nv50_ir_lowering_nvc0.lo
> codegen/nv50_ir_lowering_nvc0.cpp:1783:42: error: invalid suffix 'd' on 
> floating constant
>   Value *zero = bld.loadImm(NULL, 0.0d);
>  ^
> 
> Fixes: c1e4a6bfbf01 ("nv50,nvc0: handle SQRT lowering inside the driver")
> Signed-off-by: Vinson Lee 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index d0936d8..01364b3 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1780,7 +1780,7 @@ NVC0LoweringPass::handleSQRT(Instruction *i)
>  {
> if (i->dType == TYPE_F64) {
>Value *pred = bld.getSSA(1, FILE_PREDICATE);
> -  Value *zero = bld.loadImm(NULL, 0.0d);
> +  Value *zero = bld.loadImm(NULL, 0);

Shouldn't it rather be: `Value *zero = bld.loadImm(NULL, 0.0);` as you want a
double, not an int?

>Value *dst = bld.getSSA(8);
>bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
>bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
> -- 
> 2.7.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] nv50, nvc0: replace resInfoCBSlot by auxCBSlot

2016-03-19 Thread Pierre Moreau
Acked-by: Pierre Moreau <pierre.mor...@free.fr>

On 09:55 PM - Mar 15 2016, Samuel Pitoiset wrote:
> Having two different variables for the driver constant buffer slot
> is confusing and really useless.
> 
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h |  3 +--
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp|  4 ++--
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp| 12 
> ++--
>  src/gallium/drivers/nouveau/nouveau_compiler.c   |  2 --
>  src/gallium/drivers/nouveau/nv50/nv50_program.c  |  1 -
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c  |  4 +---
>  6 files changed, 10 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 9f7d257..21523a2 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -160,7 +160,7 @@ struct nv50_ir_prog_info
>uint8_t clipDistances; /* number of clip distance outputs */
>uint8_t cullDistances; /* number of cull distance outputs */
>int8_t genUserClip;/* request user clip planes for ClipVertex 
> */
> -  uint8_t auxCBSlot; /* constant buffer index of UCP/draw data */
> +  uint8_t auxCBSlot; /* driver constant buffer slot */
>uint16_t ucpBase;  /* base address for UCPs */
>uint16_t drawInfoBase; /* base address for draw parameters */
>uint8_t pointSize; /* output index for PointSize */
> @@ -175,7 +175,6 @@ struct nv50_ir_prog_info
>uint8_t globalAccess;  /* 1 for read, 2 for wr, 3 for rw */
>bool fp64; /* program uses fp64 math */
>bool nv50styleSurfaces;/* generate gX[] access for raw buffers */
> -  uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info 
> */
>uint16_t texBindBase;  /* base address for tex handles (nve4) */
>uint16_t suInfoBase;   /* base address for surface info (nve4) */
>uint16_t sampleInfoBase;   /* base address for sample positions */
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> index 12c5f69..5a46ede 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> @@ -682,7 +682,7 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, 
> Value **ms,
> Value **ms_x, Value **ms_y) {
> // This loads the texture-indexed ms setting from the constant buffer
> Value *tmp = new_LValue(func, FILE_GPR);
> -   uint8_t b = prog->driver->io.resInfoCBSlot;
> +   uint8_t b = prog->driver->io.auxCBSlot;
> off += prog->driver->io.suInfoBase;
> if (prog->getType() > Program::TYPE_VERTEX)
>off += 16 * 2 * 4;
> @@ -1174,7 +1174,7 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i)
>bld.mkLoad(TYPE_F32,
>   def,
>   bld.mkSymbol(
> -   FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot,
> +   FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx),
>   off);
>break;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index d0936d8..d879339 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -600,7 +600,7 @@ NVC0LoweringPass::visit(BasicBlock *bb)
>  inline Value *
>  NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot)
>  {
> -   uint8_t b = prog->driver->io.resInfoCBSlot;
> +   uint8_t b = prog->driver->io.auxCBSlot;
> uint32_t off = prog->driver->io.texBindBase + slot * 4;
> return bld.
>mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), 
> ptr);
> @@ -1204,7 +1204,7 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool 
> needCctl)
>  inline Value *
>  NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off)
>  {
> -   uint8_t b = prog->driver->io.resInfoCBSlot;
> +   uint8_t b = prog->driver->io.auxCBSlot;
> off += prog->driver->io.suInfoBase;
> return bld.
>mkLoadv(TYPE_U32, bld.mkSymbol(F

Re: [Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

2016-03-19 Thread Pierre Moreau
On 06:06 PM - Mar 19 2016, Ilia Mirkin wrote:
> Where are these coming from? Could you perhaps not generate them in
> the first place?

Those are coming from the generated SPIR-V, of the following kernel for
example:

__kernel void global_id(__global int * out)
{
  unsigned id = get_global_id(0);
  out[id] = id;
}

But I don't see any reason why there should be cvt generated in this case. I'll
have to investigate the SPIR-V generation.

However, you could have some `long bar; char foo = convert_char_sat(bar);` in
the OpenCL kernel.

> 
> On Sat, Mar 19, 2016 at 5:56 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the 
> > GPU
> > unhappy. Instead, manually handle the conversion between 64-bit and 32-bit
> > values, and use `cvt` to convert between the original target (resp. source)
> > and 32-bit value. This happens to be the behaviour of NVIDIA's driver.
> >
> > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> > ---
> >  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 59 
> > ++
> >  .../nouveau/codegen/nv50_ir_lowering_nvc0.h|  1 +
> >  2 files changed, 60 insertions(+)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > index 2719f2c..c419a68 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> > @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i)
> > return true;
> >  }
> >
> > +bool
> > +NVC0LoweringPass::handleCVT(Instruction *i)
> > +{
> > +   if (isFloatType(i->dType) || isFloatType(i->sType) ||
> > + isSignedIntType(i->dType) xor isSignedIntType(i->sType))
> > +  return false;
> > +
> > +   if (typeSizeof(i->sType) == 8) {
> > +  Value *toSplit = i->getSrc(0);
> > +  if (i->saturate) {
> > + Value *minValue = bld.loadImm(bld.getSSA(8), 0ul);
> > + Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX);
> > + if (isSignedType(i->sType)) {
> > +minValue = bld.loadImm(bld.getSSA(8), INT32_MIN);
> > +maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX);
> > + }
> > + Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), 
> > toSplit,
> > +minValue);
> > + toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes,
> > +  maxValue);
> > +  }
> > +
> > +  Value *value32[2] = { bld.getSSA(), bld.getSSA() };
> > +  bld.mkSplit(value32, 4, toSplit);
> > +  if (typeSizeof(i->dType) == 4) {
> > + bld.mkMov(i->getDef(0), value32[0], i->dType);
> > + delete_Instruction(prog, i);
> > + return true;
> > +  }
> > +
> > +  i->setSrc(0, bld.getSSA());
> > +  i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> > +  bld.mkMov(i->getSrc(0), value32[0], i->sType);
> > +   } else if (typeSizeof(i->dType) == 8) {
> > +  bld.setPosition(i, true);
> > +  Value *res = i->getDef(0);
> > +  Value *high32 = bld.loadImm(bld.getSSA(),
> > +  isSignedType(i->sType) ? UINT32_MAX : 
> > 0u);
> > +  Value *low32 = i->getSrc(0);
> > +  DataType resType = i->dType;
> > +
> > +  if (typeSizeof(i->sType) <= 2) {
> > + i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> > + low32 = bld.getSSA();
> > + i->setDef(0, low32);
> > +  } else if (typeSizeof(i->sType) == 4) {
> > + delete_Instruction(prog, i);
> > +  }
> > +
> > +  Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), low32,
> > +   high32);
> > +  bld.mkMov(res, merged64, resType);
> > +   }
> > +
> > +   return true;
> > +}
> > +
> >  // Generate a binary predicate if an instruction is predicated by
> >  // e.g. an f32 value.
> >  void
> > @@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i)
> >checkPredicate(i);
> >
> > switch (i->op) {
> > +   case OP_CVT:
> > +  return handleCVT(i);
> > case OP_TEX:
> > case OP_TXB:
> 

[Mesa-dev] [PATCH] nv50/ir: Split 64-bit MAD and MUL operations

2016-03-19 Thread Pierre Moreau
Two 32-bit MAD or MUL operations are generated in place of the original 64-bit
operation. All operands can either be signed or unsigned, but they have to be
integers.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
index 84ebfdb..0b37fcf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -586,6 +586,12 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
   srcNr = 2;
   break;
case OP_SELP: srcNr = 3; break;
+   case OP_MAD: /* fallthrough */
+   case OP_MUL:
+  if (!carry || isFloatType(i->dType) || isFloatType(i->sType))
+ return NULL;
+  srcNr = (i->op == OP_MAD) ? 3 : 2;
+  break;
default:
   // TODO when needed
   return NULL;
@@ -600,6 +606,9 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
 
hi->getDef(0)->reg.data.id++;
 
+   if (i->op == OP_MAD || i->op == OP_MUL)
+  hi->subOp = NV50_IR_SUBOP_MUL_HIGH;
+
for (int s = 0; s < srcNr; ++s) {
   if (lo->getSrc(s)->reg.size < 8) {
  if (s == 2)
@@ -629,7 +638,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
  }
   }
}
-   if (srcNr == 2) {
+   if (srcNr >= 2) {
   lo->setFlagsDef(1, carry);
   hi->setFlagsSrc(hi->srcCount(), carry);
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

2016-03-19 Thread Pierre Moreau
Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the GPU
unhappy. Instead, manually handle the conversion between 64-bit and 32-bit
values, and use `cvt` to convert between the original target (resp. source)
and 32-bit value. This happens to be the behaviour of NVIDIA's driver.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 59 ++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.h|  1 +
 2 files changed, 60 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 2719f2c..c419a68 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i)
return true;
 }
 
+bool
+NVC0LoweringPass::handleCVT(Instruction *i)
+{
+   if (isFloatType(i->dType) || isFloatType(i->sType) ||
+ isSignedIntType(i->dType) xor isSignedIntType(i->sType))
+  return false;
+
+   if (typeSizeof(i->sType) == 8) {
+  Value *toSplit = i->getSrc(0);
+  if (i->saturate) {
+ Value *minValue = bld.loadImm(bld.getSSA(8), 0ul);
+ Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX);
+ if (isSignedType(i->sType)) {
+minValue = bld.loadImm(bld.getSSA(8), INT32_MIN);
+maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX);
+ }
+ Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), toSplit,
+minValue);
+ toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes,
+  maxValue);
+  }
+
+  Value *value32[2] = { bld.getSSA(), bld.getSSA() };
+  bld.mkSplit(value32, 4, toSplit);
+  if (typeSizeof(i->dType) == 4) {
+ bld.mkMov(i->getDef(0), value32[0], i->dType);
+ delete_Instruction(prog, i);
+ return true;
+  }
+
+  i->setSrc(0, bld.getSSA());
+  i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
+  bld.mkMov(i->getSrc(0), value32[0], i->sType);
+   } else if (typeSizeof(i->dType) == 8) {
+  bld.setPosition(i, true);
+  Value *res = i->getDef(0);
+  Value *high32 = bld.loadImm(bld.getSSA(),
+  isSignedType(i->sType) ? UINT32_MAX : 0u);
+  Value *low32 = i->getSrc(0);
+  DataType resType = i->dType;
+
+  if (typeSizeof(i->sType) <= 2) {
+ i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
+ low32 = bld.getSSA();
+ i->setDef(0, low32);
+  } else if (typeSizeof(i->sType) == 4) {
+ delete_Instruction(prog, i);
+  }
+
+  Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), low32,
+   high32);
+  bld.mkMov(res, merged64, resType);
+   }
+
+   return true;
+}
+
 // Generate a binary predicate if an instruction is predicated by
 // e.g. an f32 value.
 void
@@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i)
   checkPredicate(i);
 
switch (i->op) {
+   case OP_CVT:
+  return handleCVT(i);
case OP_TEX:
case OP_TXB:
case OP_TXL:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 6eb8aff..9fc24d9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -96,6 +96,7 @@ protected:
bool handleMOD(Instruction *);
bool handleSQRT(Instruction *);
bool handlePOW(Instruction *);
+   bool handleCVT(Instruction *);
bool handleTEX(TexInstruction *);
bool handleTXD(TexInstruction *);
bool handleTXQ(TexInstruction *);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: Split 64-bit MAD and MUL operations

2016-03-19 Thread Pierre Moreau
On 06:24 PM - Mar 19 2016, Ilia Mirkin wrote:
> On Sat, Mar 19, 2016 at 6:15 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > On 06:05 PM - Mar 19 2016, Ilia Mirkin wrote:
> >> Not 100% sure, but pretty sure this is wrong. Can you provide the
> >> generated sequence of instructions in response to a 64-bit mul and
> >> mad?
> >
> > For the given mul:
> >
> >   mul u64 %r55d %r42d %r52d
> >
> > the following is generated:
> >
> >mul u32 { $r0 $c0 } $r0 $r2
> >mul (SUBOP:1) u32 $r1 $r1 $r3 $c0
> 
> That's not enough though... you need 4 mul's... if you have numbers
> (ab) * (cd) where a/b are the high/low of the 64-bit int, that results
> in
> 
> b*d + (a*d + b * d) * (1 << 32)
> 
> See expandIntegerMultiply in there -- it's meant for splitting a
> 32-bit multiply into 16x16 muls (which is what nv50 can do), but the
> same principle applies to splitting 64x64 into 32x32's.

Oops… that's definitely true. I'll fix that.

Pierre

> 
>   -ilia
> 
> >
> >
> > Whereas for the mad, I need to first find how to tell Nouveau to stop 
> > splitting
> > each of my mads to mul + add…
> >
> >>
> >> On Sat, Mar 19, 2016 at 5:56 PM, Pierre Moreau <pierre.mor...@free.fr> 
> >> wrote:
> >> > Two 32-bit MAD or MUL operations are generated in place of the original 
> >> > 64-bit
> >> > operation. All operands can either be signed or unsigned, but they have 
> >> > to be
> >> > integers.
> >> >
> >> > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> >> > ---
> >> >  src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp | 11 
> >> > ++-
> >> >  1 file changed, 10 insertions(+), 1 deletion(-)
> >> >
> >> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp 
> >> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> >> > index 84ebfdb..0b37fcf 100644
> >> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> >> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> >> > @@ -586,6 +586,12 @@ BuildUtil::split64BitOpPostRA(Function *fn, 
> >> > Instruction *i,
> >> >srcNr = 2;
> >> >break;
> >> > case OP_SELP: srcNr = 3; break;
> >> > +   case OP_MAD: /* fallthrough */
> >> > +   case OP_MUL:
> >> > +  if (!carry || isFloatType(i->dType) || isFloatType(i->sType))
> >> > + return NULL;
> >> > +  srcNr = (i->op == OP_MAD) ? 3 : 2;
> >> > +  break;
> >> > default:
> >> >// TODO when needed
> >> >return NULL;
> >> > @@ -600,6 +606,9 @@ BuildUtil::split64BitOpPostRA(Function *fn, 
> >> > Instruction *i,
> >> >
> >> > hi->getDef(0)->reg.data.id++;
> >> >
> >> > +   if (i->op == OP_MAD || i->op == OP_MUL)
> >> > +  hi->subOp = NV50_IR_SUBOP_MUL_HIGH;
> >> > +
> >> > for (int s = 0; s < srcNr; ++s) {
> >> >if (lo->getSrc(s)->reg.size < 8) {
> >> >   if (s == 2)
> >> > @@ -629,7 +638,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, 
> >> > Instruction *i,
> >> >   }
> >> >}
> >> > }
> >> > -   if (srcNr == 2) {
> >> > +   if (srcNr >= 2) {
> >> >lo->setFlagsDef(1, carry);
> >> >hi->setFlagsSrc(hi->srcCount(), carry);
> >> > }
> >> > --
> >> > 2.7.4
> >> >
> >> > ___
> >> > mesa-dev mailing list
> >> > mesa-dev@lists.freedesktop.org
> >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

2016-03-19 Thread Pierre Moreau
On 06:26 PM - Mar 19 2016, Ilia Mirkin wrote:
> On Sat, Mar 19, 2016 at 6:26 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > However, you could have some `long bar; char foo = convert_char_sat(bar);` 
> > in
> > the OpenCL kernel.
> 
> Sure, but the SPIR-V -> nv50/ir converter could be smarter about when
> it generates the converts, no?

It should be possible, but then I'm unsure what ends up in the SPIR-V ->
nv50/ir converter and what ends up in the nv50/ir backend. Should I also do
constant folding in the converter? I was assuming the backend would take care
of the optimisations, removing useless converts, but maybe my assumptions were
wrong and I have to take care of more things than just translating from SPIR-V
to nv50/ir?

Pierre

> 
>   -ilia
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

2016-03-19 Thread Pierre Moreau
On 06:41 PM - Mar 19 2016, Ilia Mirkin wrote:
> On Sat, Mar 19, 2016 at 6:36 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > On 06:26 PM - Mar 19 2016, Ilia Mirkin wrote:
> >> On Sat, Mar 19, 2016 at 6:26 PM, Pierre Moreau <pierre.mor...@free.fr> 
> >> wrote:
> >> > However, you could have some `long bar; char foo = 
> >> > convert_char_sat(bar);` in
> >> > the OpenCL kernel.
> >>
> >> Sure, but the SPIR-V -> nv50/ir converter could be smarter about when
> >> it generates the converts, no?
> >
> > It should be possible, but then I'm unsure what ends up in the SPIR-V ->
> > nv50/ir converter and what ends up in the nv50/ir backend. Should I also do
> > constant folding in the converter? I was assuming the backend would take 
> > care
> > of the optimisations, removing useless converts, but maybe my assumptions 
> > were
> > wrong and I have to take care of more things than just translating from 
> > SPIR-V
> > to nv50/ir?
> 
> Well, the nv50 ir is not a well-specified IR. So it's kind of up to us
> what to do. However if the hw hates converts with src/dst types like
> that, I'm perfectly happy to decree that such converts shall never
> make it into the IR. That said, if you feel strongly about it, we can
> leave it as a fix-up pass. What about nv50, need the same logic there
> too right?

If such converts are not permitted in nv50/ir, then that means each converter
to nv50/ir will need to do the fixup themselves, resulting in, most likely,
duplicate code between them, as they will handle it more or less the same way.
(Well, there aren't many converters to nv50/ir, so not really a big issue
here.) Whereas if we have it as a fixup pass, we need the code only once, and
have it handled for all existing (and future) converters.
But you have way more experience with nv50/ir and compilers than I have, so,
your call.

Most likely, but I haven't tried it. I should probably have this code in an
earlier pass then, that is not family dependent.

Pierre

> 
>   -ilia
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: Split 64-bit MAD and MUL operations

2016-03-19 Thread Pierre Moreau
On 06:05 PM - Mar 19 2016, Ilia Mirkin wrote:
> Not 100% sure, but pretty sure this is wrong. Can you provide the
> generated sequence of instructions in response to a 64-bit mul and
> mad?

For the given mul:

  mov u64 %r42d 0x0004
  mov u64 %r52d 0x0002
  mul u64 %r55d %r42d %r52d

the following is generated:

   mov u32 $r0 0x0004
   mov u32 $r1 0x
   mov u32 $r2 0x0002
   mov u32 $r3 0x
   mul u32 { $r0 $c0 } $r0 $r2
   mul (SUBOP:1) u32 $r1 $r1 $r3 $c0


Whereas for the mad, I need to first find how to tell Nouveau to stop splitting
each of my mads to mul + add…

> 
> On Sat, Mar 19, 2016 at 5:56 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > Two 32-bit MAD or MUL operations are generated in place of the original 
> > 64-bit
> > operation. All operands can either be signed or unsigned, but they have to 
> > be
> > integers.
> >
> > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> > ---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp | 11 ++-
> >  1 file changed, 10 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> > index 84ebfdb..0b37fcf 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> > @@ -586,6 +586,12 @@ BuildUtil::split64BitOpPostRA(Function *fn, 
> > Instruction *i,
> >srcNr = 2;
> >break;
> > case OP_SELP: srcNr = 3; break;
> > +   case OP_MAD: /* fallthrough */
> > +   case OP_MUL:
> > +  if (!carry || isFloatType(i->dType) || isFloatType(i->sType))
> > + return NULL;
> > +  srcNr = (i->op == OP_MAD) ? 3 : 2;
> > +  break;
> > default:
> >// TODO when needed
> >return NULL;
> > @@ -600,6 +606,9 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction 
> > *i,
> >
> > hi->getDef(0)->reg.data.id++;
> >
> > +   if (i->op == OP_MAD || i->op == OP_MUL)
> > +  hi->subOp = NV50_IR_SUBOP_MUL_HIGH;
> > +
> > for (int s = 0; s < srcNr; ++s) {
> >if (lo->getSrc(s)->reg.size < 8) {
> >   if (s == 2)
> > @@ -629,7 +638,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction 
> > *i,
> >   }
> >}
> > }
> > -   if (srcNr == 2) {
> > +   if (srcNr >= 2) {
> >lo->setFlagsDef(1, carry);
> >hi->setFlagsSrc(hi->srcCount(), carry);
> > }
> > --
> > 2.7.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] nv50/ir: make use of auxCBSlot instead of magic numbers

2016-03-19 Thread Pierre Moreau
Hello,

On 10:38 AM - Mar 16 2016, Hans de Goede wrote:
> Hi,
> 
> On 15-03-16 21:55, Samuel Pitoiset wrote:
> >This avoids using magic numbers for the driver constbuf slot which
> >is always 15 except for compute shaders on gk104+ where the slot 0
> >is used.
> >
> >For gk104+, some special compute-related values like the thread
> >index are uploaded to screen->parm which is currently bound on c0.
> >
> >Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> >---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 3 ++-
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 3 ++-
> >  2 files changed, 4 insertions(+), 2 deletions(-)
> >
> >diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
> >b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> >index d284446..4bebfdc 100644
> >--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> >+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> >@@ -2178,7 +2178,8 @@ Converter::getResourceBase(const int r)
> >
> > switch (r) {
> > case TGSI_RESOURCE_GLOBAL:
> >-  sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15);
> >+  sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL,
> >+   info->io.auxCBSlot);
> 
> Note this is dead code, see patch 6/6 of the patch-set I just send.
> 
> Also do we need to specify a slot here at all? The new code paths
> to re-enable global mem with clover do not use this and work fine
> in my testing on a gk107.

On Fermi+, the global memory is united, but it is not the case on Tesla. So the
slot remains needed for Tesla. (Tesla will keep hunting you! :-p)

Global and const memory are not completely the same, so I'm somewhat reluctant
to reuse the same variable slot for both of them. But, I'm just giving my
2cents here, so feel free to keep as is.

Regardless of your pick, this is
Acked-by: Pierre Moreau <pierre.mor...@free.fr>

> 
> 
> >break;
> > case TGSI_RESOURCE_LOCAL:
> >assert(prog->getType() == Program::TYPE_COMPUTE);
> >diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> >b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >index d879339..e0af4c0 100644
> >--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >@@ -1698,7 +1698,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
> >}
> >addr += prog->driver->prop.cp.gridInfoBase;
> >bld.mkLoad(TYPE_U32, i->getDef(0),
> >- bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL);
> >+ bld.mkSymbol(FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
> >+  TYPE_U32, addr), NULL);
> >break;
> 
> You're changing functionality here not just replacing a magic number,
> the commit msg does not reflect this. Maybe do this in a separate patch ?

Why is it changing functionality? This code is only run for GK104+, in which
case `prog->driver->io.auxCBSlot == 0`, cf patch 1.

Regards,
Pierre

> 
> Regards,
> 
> Hans
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nvc0/ir: Use double constant in `handleSQRT()`

2016-03-19 Thread Pierre Moreau
Fixes:

commit a100d89d09981d2ebb42a7e4643a48e78db8dfe3
Author: Vinson Lee <v...@freedesktop.org>
Date:   Fri Mar 18 18:28:28 2016 -0700

nv50,nvc0: Fix invalid constant.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 01364b3..b88e351 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1780,7 +1780,7 @@ NVC0LoweringPass::handleSQRT(Instruction *i)
 {
if (i->dType == TYPE_F64) {
   Value *pred = bld.getSSA(1, FILE_PREDICATE);
-  Value *zero = bld.loadImm(NULL, 0);
+  Value *zero = bld.loadImm(NULL, 0.0);
   Value *dst = bld.getSSA(8);
   bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
   bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] nvc0/ir: Use double constant in `handleSQRT()`

2016-03-19 Thread Pierre Moreau
v2: Use "Fixes: sha1 (subject)" format for the fixes section, as pointed out
by Ilia Mirin

Fixes: a100d89d0998 ("nv50,nvc0: Fix invalid constant.")
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 01364b3..b88e351 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1780,7 +1780,7 @@ NVC0LoweringPass::handleSQRT(Instruction *i)
 {
if (i->dType == TYPE_F64) {
   Value *pred = bld.getSSA(1, FILE_PREDICATE);
-  Value *zero = bld.loadImm(NULL, 0);
+  Value *zero = bld.loadImm(NULL, 0.0);
   Value *dst = bld.getSSA(8);
   bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
   bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/7] nv50: rework compute/3d validation path

2016-03-19 Thread Pierre Moreau
This serie is:
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>
Tested-by: Pierre Moreau <pierre.mor...@free.fr>

Pierre

On 08:54 PM - Mar 15 2016, Samuel Pitoiset wrote:
> From: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> 
> Hi,
> 
> This is loosely based on what I did for nvc0 few weeks ago. I have not tested
> this series because I don't have access to a Tesla card, but this should not
> break anything. By the way, doing almost the same series twice is not so cool
> but... refactoring nv50 and nvc0 drivers seems to be worse. :-)
> 
> Please review,
> Thanks.
> 
> Samuel Pitoiset (7):
>   nv50: rename nv50_context::dirty to nv50_context::dirty_3d
>   nv50: rename NV50_COMPUTE to NV50_CP
>   nv50: rename 3d dirty flags to NV50_NEW_3D_XXX
>   nv50: rename 3d binding points to NV50_BIND_3D_XXX
>   nv50: rework the validation path for 3D
>   nv50: rework nv50_compute_validate_program()
>   nv50: add a new validation path for compute
> 
>  src/gallium/drivers/nouveau/nv50/nv50_compute.c| 145 
> +
>  src/gallium/drivers/nouveau/nv50/nv50_context.c|  34 ++---
>  src/gallium/drivers/nouveau/nv50/nv50_context.h|  77 ++-
>  .../drivers/nouveau/nv50/nv50_query_hw_sm.c|  10 +-
>  .../drivers/nouveau/nv50/nv50_shader_state.c   |  37 --
>  src/gallium/drivers/nouveau/nv50/nv50_state.c  |  54 
>  .../drivers/nouveau/nv50/nv50_state_validate.c | 136 ++-
>  src/gallium/drivers/nouveau/nv50/nv50_surface.c|  40 +++---
>  src/gallium/drivers/nouveau/nv50/nv50_tex.c|   2 +-
>  src/gallium/drivers/nouveau/nv50/nv50_vbo.c|  14 +-
>  src/gallium/drivers/nouveau/nv50/nv50_winsys.h |   4 +-
>  11 files changed, 280 insertions(+), 273 deletions(-)
> 
> -- 
> 2.7.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ra: `isinf()` is in namespace `std` since C++11

2016-03-25 Thread Pierre Moreau
I'll resend a patch following Jose's suggestion, probably on Sunday. There are 
just too many things happening before that. 

Pierre

> On 25 Mar 2016, at 00:36, Jose Fonseca <jfons...@vmware.com> wrote:
> 
>> On 24/03/16 22:07, Ilia Mirkin wrote:
>>> On Sat, Mar 19, 2016 at 6:30 PM, Jose Fonseca <jfons...@vmware.com> wrote:
>>>> On 19/03/16 22:25, Ilia Mirkin wrote:
>>>> 
>>>>> On Sat, Mar 19, 2016 at 6:23 PM, Jose Fonseca <jfons...@vmware.com> wrote:
>>>>> 
>>>>>> On 18/03/16 04:00, Ilia Mirkin wrote:
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>> On Mar 17, 2016 8:27 PM, "Matt Turner" <matts...@gmail.com
>>>>>> <mailto:matts...@gmail.com>> wrote:
>>>>>>   >
>>>>>>   > On Thu, Mar 17, 2016 at 5:17 PM, Pierre Moreau
>>>>>> <pierre.mor...@free.fr
>>>>>> <mailto:pierre.mor...@free.fr>> wrote:
>>>>>>   > > This fixes a compile error while building Nouveau with C++11
>>>>>> enabled (and
>>>>>>   > > glibc >= 2.23). This happens if SWR is enabled, as it forces
>>>>>> C++11.
>>>>>>   >
>>>>>>   > That seems bad, right?
>>>>>>   >
>>>>>>   > Enabling OpenSWR should affect how any other drivers are built. Why
>>>>>>   > does this happen?
>>>>>> 
>>>>>> Yeah, the fix here is to fix the build not to add random unrelated
>>>>>> options from one driver when building another.
>>>>> 
>>>>> 
>>>>> 
>>>>> Although I agree in principle, that drivers should not interfere with
>>>>> others
>>>>> build,  C++14 will soon be the default [1].
>>>>> 
>>>>> So, in this particular case, it seems a missed opportunity not to try to
>>>>> fix
>>>>> this generically.
>>>>> 
>>>>> 
>>>>> What about adding to include/c99_math.h something like
>>>>> 
>>>>> #if __cplusplus >= 201103L
>>>>> using std::isinf;
>>>>> #endif
>>>> 
>>>> 
>>>> Why is isinf() becoming unavailable by the way? I don't think anyone's
>>>> given a clear answer on that. At least I haven't heard one.
>>> 
>>> 
>>> It's unavoidable.
>>> 
>>> On C99 isinf is a macro.
>>> 
>>> On C++11 isinf is an function inside std namespace.
>>> 
>>> You can't have a macro inside a C++ namespace -- macros have no namespaces.
>>> 
>>> So, even if you `#include ` instead of `#include `, the
>>> math.h must not `#define isinf` so that C-prepposeccor doesn't expan
>>> `std::isinf` to invalid code.
>> 
>> OK, so then we should drop this logic in whereever we define isinf
>> (iirc there's a definition for some platform that doesn't have it) or
>> in c99_math.h as Matt suggests.
> 
> No, there's no isinf definition anywhere anymore.
> 
> But as I said c99_math.h sounds a good place for it.
> 
> Jose
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: Check for valid insn instead of defs size

2016-03-09 Thread Pierre Moreau
On 06:10 PM - Mar 08 2016, Ilia Mirkin wrote:
> Patch is fine, description is wrong (or at least inaccurate).
> 
> The real issue is that function arguments have defs, but no defining
> instruction. As a result, there's nothing to do when allocating
> registers. This has nothing to do with $r0, but it does have something
> to do with the fact that nv50 compute makes use of function arguments
> for compute programs.

That's what I meant to write, but reading it again, it is confusing. I'll
rewrite it.

Pierre

> 
>   -ilia
> 
> On Wed, Feb 24, 2016 at 8:03 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > On Tesla cards, the first register $r0 contains the thread id; later
> > generations use a specialised register for it. In order to prevent the 
> > register
> > from being given to anyone, and thus lose the thread id information, an 
> > lvalue
> > is created to represent $r0 and is passed as an argument to the `main`
> > function.
> >
> > However, since the inputs and outputs of a function are stored as value
> > definitions, a definition is added onto the previously created lvalue 
> > without
> > it being associated to an instruction. Therefore, checking the number of
> > definitions of an lvalue do not ensure that it is associated to an 
> > instruction.
> >
> > Fixes a nullptr dereference in the register allocation pass, while running
> > compute kernels that do not use $r0.
> >
> > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> > ---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> > index d877c25..500ab89 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> > @@ -853,7 +853,7 @@ isShortRegOp(Instruction *insn)
> >  static bool
> >  isShortRegVal(LValue *lval)
> >  {
> > -   if (lval->defs.size() == 0)
> > +   if (lval->getInsn() == NULL)
> >return false;
> > for (Value::DefCIterator def = lval->defs.begin();
> >  def != lval->defs.end(); ++def)
> > @@ -1467,7 +1467,7 @@ GCRA::allocateRegisters(ArrayList& insns)
> >   nodes[i].init(regs, lval);
> >   RIG.insert([i]);
> >
> > - if (lval->inFile(FILE_GPR) && lval->defs.size() > 0 &&
> > + if (lval->inFile(FILE_GPR) && lval->getInsn() != NULL &&
> >   prog->getTarget()->getChipset() < 0xc0) {
> >  Instruction *insn = lval->getInsn();
> >  if (insn->op == OP_MAD || insn->op == OP_SAD)
> > --
> > 2.7.1
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] math: Import isinf and others to global namespace

2016-03-31 Thread Pierre Moreau
Starting from C++11, several math functions, like isinf, moved into the std
namespace. Since cmath undefines those functions before redefining them inside
the namespace, and glibc 2.23 defines the C variants as macros, the C variants
in global namespace are not accessible any longer.

v2: Move the fix outside of Nouveau, as suggested by Jose Fonseca, since anyone
might need it when GCC switches to C++14 by default with GCC 6.0.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 include/cpp11_math.h| 61 +
 src/gallium/auxiliary/util/u_math.h |  3 ++
 2 files changed, 64 insertions(+)
 create mode 100644 include/cpp11_math.h

diff --git a/include/cpp11_math.h b/include/cpp11_math.h
new file mode 100644
index 000..1f4aa3c
--- /dev/null
+++ b/include/cpp11_math.h
@@ -0,0 +1,61 @@
+/**
+ *
+ * Copyright 2016 Pierre Moreau
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+/**
+ * Wrapper for cmath which makes sure we maintain source compatibility with
+ * newer versions of C++.
+ */
+
+
+#ifndef _CPP11_MATH_H_
+#define _CPP11_MATH_H_
+
+
+/* Since C++11, the following functions are part of the std namespace. Their C
+ * counteparts should still exist in the global namespace, however cmath
+ * undefines those functions, which in glibc 2.23, are defined as macros rather
+ * than functions as in glibc 2.22.
+ */
+#if __cplusplus >= 201103L
+#include 
+
+using std::fpclassify;
+using std::isfinite;
+using std::isinf;
+using std::isnan;
+using std::isnormal;
+using std::signbit;
+using std::isgreater;
+using std::isgreaterequal;
+using std::isless;
+using std::islessequal;
+using std::islessgreater;
+using std::isunordered;
+#endif
+
+
+#endif /* #define _CPP11_MATH_H_ */
diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index e92f83a..12a3780 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -42,6 +42,9 @@
 #include "pipe/p_compiler.h"
 
 #include "c99_math.h"
+#ifdef __cplusplus
+#include "cpp11_math.h"
+#endif
 #include 
 #include 
 #include 
-- 
2.8.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] math: Import isinf and others to global namespace

2016-04-01 Thread Pierre Moreau
On 01:11 PM - Apr 01 2016, Jose Fonseca wrote:
> On 31/03/16 23:08, Pierre Moreau wrote:
> >Starting from C++11, several math functions, like isinf, moved into the std
> >namespace. Since cmath undefines those functions before redefining them 
> >inside
> >the namespace, and glibc 2.23 defines the C variants as macros, the C 
> >variants
> >in global namespace are not accessible any longer.
> >
> >v2: Move the fix outside of Nouveau, as suggested by Jose Fonseca, since 
> >anyone
> > might need it when GCC switches to C++14 by default with GCC 6.0.
> >
> >Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> >---
> >  include/cpp11_math.h| 61 
> > +
> >  src/gallium/auxiliary/util/u_math.h |  3 ++
> >  2 files changed, 64 insertions(+)
> >  create mode 100644 include/cpp11_math.h
> >
> >diff --git a/include/cpp11_math.h b/include/cpp11_math.h
> >new file mode 100644
> >index 000..1f4aa3c
> >--- /dev/null
> >+++ b/include/cpp11_math.h
> 
> I'm not sure a new header is necessary for this.
> 
> What this is doing is making C++11 math functions "appear" like C99 ones.
> It's not making things matching C++11 standard.
> 
> So IMO the right place for this is the end of c99_math.h

I was planning to put it in the c99 header first, but as I was including some
C++ header, I felt that having a separate header which would only be included
by C++ files would be better. I could have the `#ifdef __cplusplus` around the
whole block inside the c99_math.h instead, if you prefer it that way.

Thanks,
Pierre

> 
> Jose
> 
> >@@ -0,0 +1,61 @@
> >+/**
> >+ *
> >+ * Copyright 2016 Pierre Moreau
> >+ * All Rights Reserved.
> >+ *
> >+ * Permission is hereby granted, free of charge, to any person obtaining a
> >+ * copy of this software and associated documentation files (the
> >+ * "Software"), to deal in the Software without restriction, including
> >+ * without limitation the rights to use, copy, modify, merge, publish,
> >+ * distribute, sub license, and/or sell copies of the Software, and to
> >+ * permit persons to whom the Software is furnished to do so, subject to
> >+ * the following conditions:
> >+ *
> >+ * The above copyright notice and this permission notice (including the
> >+ * next paragraph) shall be included in all copies or substantial portions
> >+ * of the Software.
> >+ *
> >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> >+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> >+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
> >+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
> >+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> >+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> >+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> >+ *
> >+ **/
> >+
> >+/**
> >+ * Wrapper for cmath which makes sure we maintain source compatibility with
> >+ * newer versions of C++.
> >+ */
> >+
> >+
> >+#ifndef _CPP11_MATH_H_
> >+#define _CPP11_MATH_H_
> >+
> >+
> >+/* Since C++11, the following functions are part of the std namespace. 
> >Their C
> >+ * counteparts should still exist in the global namespace, however cmath
> >+ * undefines those functions, which in glibc 2.23, are defined as macros 
> >rather
> >+ * than functions as in glibc 2.22.
> >+ */
> >+#if __cplusplus >= 201103L
> >+#include 
> >+
> >+using std::fpclassify;
> >+using std::isfinite;
> >+using std::isinf;
> >+using std::isnan;
> >+using std::isnormal;
> >+using std::signbit;
> >+using std::isgreater;
> >+using std::isgreaterequal;
> >+using std::isless;
> >+using std::islessequal;
> >+using std::islessgreater;
> >+using std::isunordered;
> >+#endif
> >+
> >+
> >+#endif /* #define _CPP11_MATH_H_ */
> >diff --git a/src/gallium/auxiliary/util/u_math.h 
> >b/src/gallium/auxiliary/util/u_math.h
> >index e92f83a..12a3780 100644
> >--- a/src/gallium/auxiliary/util/u_math.h
> >+++ b/src/gallium/auxiliary/util/u_math.h
> >@@ -42,6 +42,9 @@
> >  #include "pipe/p_compiler.h"
> >
> >  #include "c99_math.h"
> >+#ifdef __cplusplus
> >+#include "cpp11_math.h"
> >+#endif
> >  #include 
> >  #include 
> >  #include 
> >
> 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nvc0: clear out surfaces bufctx before rebinding everything

2016-05-19 Thread Pierre Moreau
On 09:28 PM - May 18 2016, Ilia Mirkin wrote:
> Otherwise we can end up in a situation where that bin just grows and
> grows.
> 
> Signed-off-by: Ilia Mirkin 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> index cc5ea5e..2523c20 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> @@ -963,6 +963,11 @@ nvc0_validate_suf(struct nvc0_context *nvc0, int s)
> struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> struct nvc0_screen *screen = nvc0->screen;
>  
> +   if (s == 5)

This is not specific to this patch, but after seeing various patches with a
special case for `s == 5`, wouldn’t it make sense to have a define for that
index, both to make it a bit clearer to the reader why it is handled
differently, and to avoid typos? Same with having a define for the number of
surfaces (both 3D and CP) and another one for the number of 3D surfaces (or any
other method to avoid looping too many or too few times due to a typo)?

> +  nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
> +   else
> +  nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
> +
> for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
>struct pipe_image_view *view = >images[s][i];
>int width, height, depth;
> -- 
> 2.7.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: Add missing handling of U64/S64 in inlines

2016-05-19 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---

U64/S64 support is missing in other places of codegen (like in
nv50_ir_peephole.cpp for example), however its absence will result in code not
being as optimised as it could have. Adding that support is not as straight
forward as this patch, and will need a thorough testing to avoid any
regressions, therefore I’m only sending this patch for now.

 src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
index 4c5de2e..4cb53ab 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
@@ -126,7 +126,7 @@ static inline bool isFloatType(DataType ty)
 
 static inline bool isSignedIntType(DataType ty)
 {
-   return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32);
+   return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32 || ty == 
TYPE_S64);
 }
 
 static inline bool isSignedType(DataType ty)
@@ -136,6 +136,7 @@ static inline bool isSignedType(DataType ty)
case TYPE_U8:
case TYPE_U16:
case TYPE_U32:
+   case TYPE_U64:
case TYPE_B96:
case TYPE_B128:
   return false;
@@ -147,6 +148,7 @@ static inline bool isSignedType(DataType ty)
 static inline DataType intTypeToSigned(DataType ty)
 {
switch (ty) {
+   case TYPE_U64: return TYPE_S64;
case TYPE_U32: return TYPE_S32;
case TYPE_U16: return TYPE_S16;
case TYPE_U8: return TYPE_S8;
-- 
2.8.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] math: Import isinf and others to global namespace

2016-04-14 Thread Pierre Moreau
Starting from C++11, several math functions, like isinf, moved into the std
namespace. Since cmath undefines those functions before redefining them inside
the namespace, and glibc 2.23 defines the C variants as macros, the C variants
in global namespace are not accessible any longer.

v2: Move the fix outside of Nouveau, as suggested by Jose Fonseca, since anyone
might need it when GCC switches to C++14 by default with GCC 6.0.

v3:
*   Put the code directly inside c99_math.h rather than creating a new header
file, as asked by Jose Fonseca;
*   Guard the code behind glibc version checks, as only glibc > =2.23 defines
isinf & co. as functions, as suggested by Jose Fonseca.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 include/c99_math.h | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/include/c99_math.h b/include/c99_math.h
index 250e08d..192ff13 100644
--- a/include/c99_math.h
+++ b/include/c99_math.h
@@ -185,4 +185,27 @@ fpclassify(double x)
 #endif
 
 
+/* Since C++11, the following functions are part of the std namespace. Their C
+ * counteparts should still exist in the global namespace, however cmath
+ * undefines those functions, which in glibc 2.23, are defined as macros rather
+ * than functions as in glibc 2.22.
+ */
+#if __cplusplus >= 201103L && __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 23
+#include 
+
+using std::fpclassify;
+using std::isfinite;
+using std::isinf;
+using std::isnan;
+using std::isnormal;
+using std::signbit;
+using std::isgreater;
+using std::isgreaterequal;
+using std::isless;
+using std::islessequal;
+using std::islessgreater;
+using std::isunordered;
+#endif
+
+
 #endif /* #define _C99_MATH_H_ */
-- 
2.8.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 04/24] nvc0: bind images on 3D shaders for Kepler

2016-04-14 Thread Pierre Moreau
On 01:56 AM - Apr 13 2016, Samuel Pitoiset wrote:
> Similar to surfaces validation for compute shaders.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_program.c |  4 +++-
>  src/gallium/drivers/nouveau/nvc0/nvc0_tex.c | 26 
> -
>  2 files changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index ced8130..8e73227 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -561,12 +561,14 @@ nvc0_program_translate(struct nvc0_program *prog, 
> uint16_t chipset,
> } else {
>if (chipset >= NVISA_GK104_CHIPSET) {
>   info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
> + info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
> +  } else {
> + info->io.suInfoBase = 0; /* TODO */
>}
>info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
>info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
>info->io.msInfoCBSlot = 15;
>info->io.msInfoBase = 0; /* TODO */
> -  info->io.suInfoBase = 0; /* TODO */
> }
>  
> info->assignSlots = nvc0_program_assign_varying_slots;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> index 585b1e5..7cac31d 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
> @@ -875,7 +875,31 @@ nvc0_update_surface_bindings(struct nvc0_context *nvc0)
>  static inline void
>  nve4_update_surface_bindings(struct nvc0_context *nvc0)
>  {
> -   /* TODO */
> +   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> +   struct nvc0_screen *screen = nvc0->screen;
> +   int i, j, s;
> +
> +   for (s = 0; s < 5; s++) {

Similar to my comment on patch 1, shouldn’t you have `s < 6` here? (Except if
you follow Ilia’s suggestion.)

Pierre

> +  BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> +  PUSH_DATA (push, 2048);
> +  PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> +  PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
> +  BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16 * NVC0_MAX_IMAGES);
> +  PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(0));
> +
> +  for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
> + struct pipe_image_view *view = >images[s][i];
> + if (view->resource) {
> +struct nv04_resource *res = nv04_resource(view->resource);
> +
> +nve4_set_surface_info(push, view, screen);
> +BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
> + } else {
> +for (j = 0; j < 16; j++)
> +   PUSH_DATA(push, 0);
> + }
> +  }
> +   }
>  }
>  
>  void
> -- 
> 2.8.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] math: Import isinf and others to global namespace

2016-04-18 Thread Pierre Moreau
Thanks a lot Jose! If it hasn’t been done yet, could you please revert the
commit you pushed that only affected Nouveau, since it’s now unnecessary, and
furthermore, didn’t had the glibc check.

Thanks!
Pierre

On 11:21 AM - Apr 18 2016, Jose Fonseca wrote:
> Thanks. I've tweak the version check logic and pushed.
> 
> Jose
> 
> On 14/04/16 19:43, Pierre Moreau wrote:
> >Starting from C++11, several math functions, like isinf, moved into the std
> >namespace. Since cmath undefines those functions before redefining them 
> >inside
> >the namespace, and glibc 2.23 defines the C variants as macros, the C 
> >variants
> >in global namespace are not accessible any longer.
> >
> >v2: Move the fix outside of Nouveau, as suggested by Jose Fonseca, since 
> >anyone
> > might need it when GCC switches to C++14 by default with GCC 6.0.
> >
> >v3:
> >*   Put the code directly inside c99_math.h rather than creating a new header
> > file, as asked by Jose Fonseca;
> >*   Guard the code behind glibc version checks, as only glibc > =2.23 defines
> > isinf & co. as functions, as suggested by Jose Fonseca.
> >
> >Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> >---
> >  include/c99_math.h | 23 +++
> >  1 file changed, 23 insertions(+)
> >
> >diff --git a/include/c99_math.h b/include/c99_math.h
> >index 250e08d..192ff13 100644
> >--- a/include/c99_math.h
> >+++ b/include/c99_math.h
> >@@ -185,4 +185,27 @@ fpclassify(double x)
> >  #endif
> >
> >
> >+/* Since C++11, the following functions are part of the std namespace. 
> >Their C
> >+ * counteparts should still exist in the global namespace, however cmath
> >+ * undefines those functions, which in glibc 2.23, are defined as macros 
> >rather
> >+ * than functions as in glibc 2.22.
> >+ */
> >+#if __cplusplus >= 201103L && __GLIBC__ >= 2 && __GLIBC_MINOR__ >= 23
> >+#include 
> >+
> >+using std::fpclassify;
> >+using std::isfinite;
> >+using std::isinf;
> >+using std::isnan;
> >+using std::isnormal;
> >+using std::signbit;
> >+using std::isgreater;
> >+using std::isgreaterequal;
> >+using std::isless;
> >+using std::islessequal;
> >+using std::islessgreater;
> >+using std::isunordered;
> >+#endif
> >+
> >+
> >  #endif /* #define _C99_MATH_H_ */
> >
> 


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

2016-04-18 Thread Pierre Moreau
On 11:21 AM - Apr 18 2016, Hans de Goede wrote:
> Hi,
> 
> On 17-04-16 22:27, Pierre Moreau wrote:
> >On 04:17 PM - Apr 17 2016, Ilia Mirkin wrote:
> >>On Sun, Apr 17, 2016 at 4:07 PM, Pierre Moreau <pierre.mor...@free.fr> 
> >>wrote:
> >>>Ping :-)
> >>>
> >>>On 10:56 PM - Mar 19 2016, Pierre Moreau wrote:
> >>>>Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the 
> >>>>GPU
> >>>>unhappy. Instead, manually handle the conversion between 64-bit and 32-bit
> >>>>values, and use `cvt` to convert between the original target (resp. 
> >>>>source)
> >>>>and 32-bit value. This happens to be the behaviour of NVIDIA's driver.
> >>>>
> >>>>Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> >>>>---
> >>>>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 59 
> >>>> ++
> >>>>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h|  1 +
> >>>>  2 files changed, 60 insertions(+)
> >>>>
> >>>>diff --git 
> >>>>a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> >>>>b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >>>>index 2719f2c..c419a68 100644
> >>>>--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >>>>+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >>>>@@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i)
> >>>> return true;
> >>>>  }
> >>>>
> >>>>+bool
> >>>>+NVC0LoweringPass::handleCVT(Instruction *i)
> >>>>+{
> >>>>+   if (isFloatType(i->dType) || isFloatType(i->sType) ||
> >>>>+ isSignedIntType(i->dType) xor isSignedIntType(i->sType))
> >>
> >>I know pre-C89 features are cool, but let's avoid using them. I know
> >>characters like ^ were uncommon on the 1960's and 1970's teletypes,
> >>but I think we're past those days now.
> >
> >Yeah… Will fix that.
> 
> So "xor" or "^" is bitwise not logical, since isSignedIntType() returns
> a bool, which when cast to an int is guaranteed to be 0 or 1, this
> should work fine.
> 
> And being a bitwise op its presedence means it will get evaluated
> before the "||" operators in your condition which I believe is what
> we want here, but can we please have a pair of parenthesis around the
> "^" and its operands to make this more clear ?

Sure, I’ll add a pair of parenthesis around it.

Regards,
Pierre

> 
> Regards,
> 
> Hans
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

2016-04-17 Thread Pierre Moreau
On 04:17 PM - Apr 17 2016, Ilia Mirkin wrote:
> On Sun, Apr 17, 2016 at 4:07 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > Ping :-)
> >
> > On 10:56 PM - Mar 19 2016, Pierre Moreau wrote:
> >> Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the 
> >> GPU
> >> unhappy. Instead, manually handle the conversion between 64-bit and 32-bit
> >> values, and use `cvt` to convert between the original target (resp. source)
> >> and 32-bit value. This happens to be the behaviour of NVIDIA's driver.
> >>
> >> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> >> ---
> >>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 59 
> >> ++
> >>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h|  1 +
> >>  2 files changed, 60 insertions(+)
> >>
> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >> index 2719f2c..c419a68 100644
> >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> >> @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i)
> >> return true;
> >>  }
> >>
> >> +bool
> >> +NVC0LoweringPass::handleCVT(Instruction *i)
> >> +{
> >> +   if (isFloatType(i->dType) || isFloatType(i->sType) ||
> >> + isSignedIntType(i->dType) xor isSignedIntType(i->sType))
> 
> I know pre-C89 features are cool, but let's avoid using them. I know
> characters like ^ were uncommon on the 1960's and 1970's teletypes,
> but I think we're past those days now.

Yeah… Will fix that.

> 
> >> +  return false;
> >> +
> >> +   if (typeSizeof(i->sType) == 8) {
> >> +  Value *toSplit = i->getSrc(0);
> >> +  if (i->saturate) {
> >> + Value *minValue = bld.loadImm(bld.getSSA(8), 0ul);
> >> + Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX);
> >> + if (isSignedType(i->sType)) {
> >> +minValue = bld.loadImm(bld.getSSA(8), INT32_MIN);
> >> +maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX);
> >> + }
> >> + Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), 
> >> toSplit,
> >> +minValue);
> >> + toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes,
> >> +  maxValue);
> 
> Aren't you assuming that i->dType == 4 here? It could be an unsigned
> <-> signed conversion, at 64-bit. So the clamp values would be

I am assuming `i->dType <= 4`: remember the ^ from before! ;-) But, it
could be a U64 <=> U64 or S64 <=> S64 conversion, which would then fail…

> different. Handling ALL the cases is quite annoying... can you figure
> out what the hw doesn't support and just handle that? I doubt it'll be
> any slower, and definitely simpler.

I don’t remember if I checked U64 <=> S64 conversions… Will need to refresh my
memory and log which combinations fail.

Thanks!
Pierre

> 
>   -ilia
> 
> >> +  }
> >> +
> >> +  Value *value32[2] = { bld.getSSA(), bld.getSSA() };
> >> +  bld.mkSplit(value32, 4, toSplit);
> >> +  if (typeSizeof(i->dType) == 4) {
> >> + bld.mkMov(i->getDef(0), value32[0], i->dType);
> >> + delete_Instruction(prog, i);
> >> + return true;
> >> +  }
> >> +
> >> +  i->setSrc(0, bld.getSSA());
> >> +  i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> >> +  bld.mkMov(i->getSrc(0), value32[0], i->sType);
> >> +   } else if (typeSizeof(i->dType) == 8) {
> >> +  bld.setPosition(i, true);
> >> +  Value *res = i->getDef(0);
> >> +  Value *high32 = bld.loadImm(bld.getSSA(),
> >> +  isSignedType(i->sType) ? UINT32_MAX : 
> >> 0u);
> >> +  Value *low32 = i->getSrc(0);
> >> +  DataType resType = i->dType;
> >> +
> >> +  if (typeSizeof(i->sType) <= 2) {
> >> + i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> >> + low32 = bld.getSSA();
> >> + i->setDef(0, low32);
> >> +  } else if (typeSizeof(i->sType) == 4) {
> >> + delete_Instruction(p

Re: [Mesa-dev] [PATCH] nvc0/lowering: Handle conversions to U64/S64 manually

2016-04-17 Thread Pierre Moreau
Ping :-)

On 10:56 PM - Mar 19 2016, Pierre Moreau wrote:
> Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the GPU
> unhappy. Instead, manually handle the conversion between 64-bit and 32-bit
> values, and use `cvt` to convert between the original target (resp. source)
> and 32-bit value. This happens to be the behaviour of NVIDIA's driver.
> 
> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> ---
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 59 
> ++
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h|  1 +
>  2 files changed, 60 insertions(+)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 2719f2c..c419a68 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i)
> return true;
>  }
>  
> +bool
> +NVC0LoweringPass::handleCVT(Instruction *i)
> +{
> +   if (isFloatType(i->dType) || isFloatType(i->sType) ||
> + isSignedIntType(i->dType) xor isSignedIntType(i->sType))
> +  return false;
> +
> +   if (typeSizeof(i->sType) == 8) {
> +  Value *toSplit = i->getSrc(0);
> +  if (i->saturate) {
> + Value *minValue = bld.loadImm(bld.getSSA(8), 0ul);
> + Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX);
> + if (isSignedType(i->sType)) {
> +minValue = bld.loadImm(bld.getSSA(8), INT32_MIN);
> +maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX);
> + }
> + Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), toSplit,
> +minValue);
> + toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes,
> +  maxValue);
> +  }
> +
> +  Value *value32[2] = { bld.getSSA(), bld.getSSA() };
> +  bld.mkSplit(value32, 4, toSplit);
> +  if (typeSizeof(i->dType) == 4) {
> + bld.mkMov(i->getDef(0), value32[0], i->dType);
> + delete_Instruction(prog, i);
> + return true;
> +  }
> +
> +  i->setSrc(0, bld.getSSA());
> +  i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> +  bld.mkMov(i->getSrc(0), value32[0], i->sType);
> +   } else if (typeSizeof(i->dType) == 8) {
> +  bld.setPosition(i, true);
> +  Value *res = i->getDef(0);
> +  Value *high32 = bld.loadImm(bld.getSSA(),
> +  isSignedType(i->sType) ? UINT32_MAX : 0u);
> +  Value *low32 = i->getSrc(0);
> +  DataType resType = i->dType;
> +
> +  if (typeSizeof(i->sType) <= 2) {
> + i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32;
> + low32 = bld.getSSA();
> + i->setDef(0, low32);
> +  } else if (typeSizeof(i->sType) == 4) {
> + delete_Instruction(prog, i);
> +  }
> +
> +  Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), low32,
> +   high32);
> +  bld.mkMov(res, merged64, resType);
> +   }
> +
> +   return true;
> +}
> +
>  // Generate a binary predicate if an instruction is predicated by
>  // e.g. an f32 value.
>  void
> @@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i)
>checkPredicate(i);
>  
> switch (i->op) {
> +   case OP_CVT:
> +  return handleCVT(i);
> case OP_TEX:
> case OP_TXB:
> case OP_TXL:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index 6eb8aff..9fc24d9 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -96,6 +96,7 @@ protected:
> bool handleMOD(Instruction *);
> bool handleSQRT(Instruction *);
> bool handlePOW(Instruction *);
> +   bool handleCVT(Instruction *);
> bool handleTEX(TexInstruction *);
> bool handleTXD(TexInstruction *);
> bool handleTXQ(TexInstruction *);
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 01/24] nvc0: add preliminary support for images

2016-04-14 Thread Pierre Moreau
On 01:56 AM - Apr 13 2016, Samuel Pitoiset wrote:
> This implements set_shader_images() and resource invalidation for
> images. As OpenGL requires at least 8 images, we are going to expose
> this minimum value even if this might be raised for Kepler, but this
> limit is mainly for Fermi because the hardware only accepts 8 images.
> 
> Based on original patch by Ilia Mirkin.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.c| 17 +++
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h|  4 ++
>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.h |  1 +
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c  | 53 
> +-
>  .../drivers/nouveau/nvc0/nvc0_state_validate.c |  1 +
>  5 files changed, 74 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
> index fcb8289..3e25572 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
> @@ -301,6 +301,23 @@ nvc0_invalidate_resource_storage(struct nouveau_context 
> *ctx,
>   }
>}
>}
> +
> +  for (s = 0; s < 5; ++s) {

Should be `s < 6` here I guess, since you later test for `s == 5`.

> +  for (i = 0; i < NVC0_MAX_IMAGES; ++i) {

Any reason not to intend this second for loop?

Pierre

> + if (nvc0->images[s][i].resource == res) {
> +nvc0->images_dirty[s] |= 1 << i;
> +if (unlikely(s == 5)) {
> +   nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
> +   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
> +} else {
> +   nvc0->dirty_3d |= NVC0_NEW_3D_SURFACES;
> +   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
> +}
> + }
> + if (!--ref)
> +return ref;
> +  }
> +  }
> }
>  
> return ref;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index 91dffa1..617f4c2 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -237,6 +237,10 @@ struct nvc0_context {
> uint32_t buffers_dirty[6];
> uint32_t buffers_valid[6];
>  
> +   struct pipe_image_view images[6][NVC0_MAX_IMAGES];
> +   uint16_t images_dirty[6];
> +   uint16_t images_valid[6];
> +
> struct util_dynarray global_residents;
>  };
>  
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> index 0f78220..750bba0 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> @@ -23,6 +23,7 @@
>  
>  #define NVC0_MAX_BUFFERS 32
>  
> +#define NVC0_MAX_IMAGES 8
>  
>  struct nvc0_context;
>  
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> index a100fc4..e437a64 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
> @@ -1232,10 +1232,59 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
>  }
>  
>  static void
> +nvc0_bind_images_range(struct nvc0_context *nvc0, const unsigned s,
> +   unsigned start, unsigned nr,
> +   struct pipe_image_view *pimages)
> +{
> +   const unsigned end = start + nr;
> +   const unsigned mask = ((1 << nr) - 1) << start;
> +   unsigned i;
> +
> +   assert(s < 6);
> +
> +   if (pimages) {
> +  for (i = start; i < end; ++i) {
> + const unsigned p = i - start;
> + if (pimages[p].resource)
> +nvc0->images_valid[s] |= (1 << i);
> + else
> +nvc0->images_valid[s] &= ~(1 << i);
> +
> + nvc0->images[s][i].format = pimages[p].format;
> + nvc0->images[s][i].access = pimages[p].access;
> + if (pimages[p].resource->target == PIPE_BUFFER)
> +nvc0->images[s][i].u.buf = pimages[p].u.buf;
> + else
> +nvc0->images[s][i].u.tex = pimages[p].u.tex;
> +
> + pipe_resource_reference(
> +   >images[s][i].resource, pimages[p].resource);
> +  }
> +   } else {
> +  for (i = start; i < end; ++i)
> + pipe_resource_reference(>images[s][i].resource, NULL);
> +  nvc0->images_valid[s] &= ~mask;
> +   }
> +   nvc0->images_dirty[s] |= mask;
> +
> +   if (s == 5)
> +  nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
> +   else
> +  nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF);
> +}
> +
> +static void
>  nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
> -   unsigned start_slot, unsigned count,
> -   struct pipe_image_view *views)
> +   unsigned start, unsigned nr,
> +   struct 

Re: [Mesa-dev] [RFC 1/3] tgsi: Add WORK_DIM System Value

2016-04-28 Thread Pierre Moreau
On 05:43 PM - Apr 27 2016, Samuel Pitoiset wrote:
> 
> 
> On 04/27/2016 05:24 PM, Ilia Mirkin wrote:
> >On Wed, Apr 27, 2016 at 11:19 AM, Hans de Goede  wrote:
> >>Hi,
> >>
> >>On 27-04-16 16:49, Ilia Mirkin wrote:
> >>>
> >>>Please add this semantic to src/gallium/docs and explain what it
> >>>means.
> >>
> >>
> >>Ah, I was under the impression these were not documented, but I
> >>now see they are, will fix.
> >>
> >>>(I'm not even sure what this is, and the easily-found opencl
> >>>
> >>>docs helpfully indicate that get_work_dim returns the work_dim...)
> >>>This wasn't done for some of the other compute-specific semantics.
> >>
> >>
> >>When launching a grid through clEnqueueNDRangeKernel the user
> >>can tell how many dimensions of the pipe_grid_info->grid /
> >>pipe_grid_info->block equivalent the user is actually passing in
> >>this is done through a parameter to that function called work_dim:
> >>
> >>https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clEnqueueNDRangeKernel.html
> >>
> >>Clover pads the y and z values (if unused) to 1, so it always
> >>ends up giving a 3 dimensional grid and block sizes to the driver.
> >>
> >>But an opencl kernel can use the opencl builtin get_work_dim
> >>function, which returns how much dimensions where actually
> >>passed into clEnqueueNDRangeKernel.
> >>
> >>Note that passing in e.g. work_dim = 3 with a grid size of { 4, 1, 1 }
> >>is perfectly legal and then get_work_dim should actually return 3.
> >>
> >>So we need to store the clEnqueueNDRangeKernel workdim parameter
> >>somewhere, and allow getting it from a kernel. Using a system-value
> >>seems the best solution for this to me.
> >>
> >>I hope this helps to explain things, if not keep asking :)
> >>
> >>Also if you've a better solution, I'm all ears.
> >
> >Sounds like there's no real way to distinguish it from the kernel, the
> >parameters given to the hw are all the same, so it's just a thing that
> >needs to be passed along. I think that what you have is a fine
> >solution, although I question the usefulness of this being an OpenCL
> >builtin. But we're not about to change that now.
> >
> >An alternative would be for clover to stick it into a uniform and have
> >the kernel read it out from there. If there are going to be a ton of
> >these, that might be preferable. If it's just the one (or maybe two),
> >not worth it.
> 
> I think your solution is fine too. Storing the number of dimensions passed
> to clEnqueueNDRangeKernel() as well as the local/global sizes makes sense.

Later versions of OpenCL are adding a few more of those (see [OpenCL 1.0][1]
compared to [OpenCL 2.0][2]). So putting it in a uniform would be better, but
we can just switch to a uniform the day we reach OpenCL 2.0.

Pierre


[1]: 
https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/workItemFunctions.html
[2]: 
https://www.khronos.org/registry/cl/sdk/2.0/docs/man/xhtml/workItemFunctions.html

> 
> >
> >  -ilia
> >
> 
> -- 
> -Samuel
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: always return 0 when trying to read thread id along unit dim

2017-01-26 Thread Pierre Moreau
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

On 10:20 pm - Jan 25 2017, Ilia Mirkin wrote:
> Many many many compute shaders only define a 1- or 2-dimensional block,
> but then continue to use system values that take the full 3d into
> account (like gl_LocalInvocationIndex, etc). So for the special case
> that a dimension is exactly 1, we know that the thread id along that
> axis will always be 0, so return it as such and allow constant folding
> to fix things up.
> 
> Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.cpp   |  6 +-
>  src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h  |  2 +-
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 10 --
>  src/gallium/drivers/nouveau/codegen/nv50_ir_target.h  |  4 +++-
>  4 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> index 186c9fd..b67a1dd 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> @@ -1179,7 +1179,11 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
>info->prop.gp.instanceCount = 1;
>info->prop.gp.maxVertices = 1;
> }
> -   info->prop.cp.numThreads = 1;
> +   if (info->type == PIPE_SHADER_COMPUTE) {
> +  info->prop.cp.numThreads[0] =
> +  info->prop.cp.numThreads[1] =
> +  info->prop.cp.numThreads[2] = 1;
> +   }
> info->io.pointSize = 0xff;
> info->io.instanceId = 0xff;
> info->io.vertexId = 0xff;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 65d0904..e7d840d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -152,7 +152,7 @@ struct nv50_ir_prog_info
>   uint32_t inputOffset; /* base address for user args */
>   uint32_t sharedOffset; /* reserved space in s[] */
>   uint32_t gridInfoBase;  /* base address for NTID,NCTAID */
> - uint32_t numThreads; /* max number of threads */
> + uint16_t numThreads[3]; /* max number of threads */
>} cp;
> } prop;
>  
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 6320e52..51f8b29 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -1047,7 +1047,6 @@ bool Source::scanSource()
> }
>  
> info->io.viewportId = -1;
> -   info->prop.cp.numThreads = 1;
>  
> info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
> info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
> @@ -1150,9 +1149,13 @@ void Source::scanProperty(const struct 
> tgsi_full_property *prop)
>   info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but 
> points */
>break;
> case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
> +  info->prop.cp.numThreads[0] = prop->u[0].Data;
> +  break;
> case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
> +  info->prop.cp.numThreads[1] = prop->u[0].Data;
> +  break;
> case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
> -  info->prop.cp.numThreads *= prop->u[0].Data;
> +  info->prop.cp.numThreads[2] = prop->u[0].Data;
>break;
> case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
>info->io.clipDistances = prop->u[0].Data;
> @@ -1941,6 +1944,9 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, 
> int c, Value *ptr)
>return ld->getDef(0);
> case TGSI_FILE_SYSTEM_VALUE:
>assert(!ptr);
> +  if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
> +  info->prop.cp.numThreads[swz] == 1)
> + return zero;
>ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
>ld->perPatch = info->sv[idx].patch;
>return ld->getDef(0);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> index eaf50cc..e9d1057 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> @@ -174,7 +174,9 @@ public:
> virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const 
> = 0;
>  
> virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) {
> -  threads = info->prop.cp.nu

[Mesa-dev] [PATCH] nv50/ir: check for origin insn in findOriginForTestWithZero

2017-02-18 Thread Pierre Moreau
Function arguments do not have an "origin" instruction, causing a
NULL-pointer dereference without this check.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 79403c93df..d358abc5bd 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -410,6 +410,8 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
if (!value)
   return NULL;
Instruction *insn = value->getInsn();
+   if (!insn)
+  return NULL;
 
if (insn->asCmp() && insn->op != OP_SLCT)
   return insn->asCmp();
-- 
2.11.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: allow TK1 (NVEA) queries to work

2017-01-16 Thread Pierre Moreau
On 07:13 pm - Jan 15 2017, Samuel Pitoiset wrote:
> 
> 
> On 01/14/2017 02:35 AM, Ilia Mirkin wrote:
> > The NVEA 3D class is numerically larger than the NVF0 3D class. The TK1
> > chip uses the SM35 ISA and likely has the same hw counters. Allow these
> > to be used like on all the other supported chips.
> 
> This actually needs more testing. Perf counters are pretty different for
> each generation. The kernel used for reading the counters will work though,
> but the configuration has to be double checked.
> 
> More comments inline.
> 
> > 
> > Signed-off-by: Ilia Mirkin 
> > ---
> >  src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  4 ++--
> >  .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c   |  3 +++
> >  src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c   | 19 
> > ++-
> >  3 files changed, 15 insertions(+), 11 deletions(-)
> > 
> > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
> > b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
> > index 8b9e6b6..6bf2285 100644
> > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
> > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
> > @@ -205,7 +205,7 @@ nvc0_screen_get_driver_query_group_info(struct 
> > pipe_screen *pscreen,
> > 
> > if (screen->base.drm->version >= 0x01000101) {
> >if (screen->compute) {
> > - if (screen->base.class_3d <= NVF0_3D_CLASS) {
> > + if (screen->base.class_3d < GM107_3D_CLASS) {
> >  count += 2;
> >   }
> >}
> > @@ -229,7 +229,7 @@ nvc0_screen_get_driver_query_group_info(struct 
> > pipe_screen *pscreen,
> > } else
> > if (id == NVC0_HW_METRIC_QUERY_GROUP) {
> >if (screen->compute) {
> > -  if (screen->base.class_3d <= NVF0_3D_CLASS) {
> > +  if (screen->base.class_3d < GM107_3D_CLASS) {
> 
> Oops, I forgot to expose these groups when I added Maxwell support. These
> groups are only used for AMD_performance_monitor. Presumably this ext
> currently doesn't expose the counters on Maxwell.
> 
> We should enable them in a separate patch just before this one (for
> maxwell).

Do you need me to test anything on Maxwell?

Pierre

> 
> Otherwise, looks good.
> 
> >  info->name = "Performance metrics";
> >  info->max_active_queries = 4; /* A metric uses at least 2 
> > queries */
> >  info->num_queries = nvc0_hw_metric_get_num_queries(screen);
> > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c 
> > b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
> > index 089af61..494f2dd 100644
> > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
> > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
> > @@ -403,6 +403,7 @@ nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
> > case GM200_3D_CLASS:
> > case GM107_3D_CLASS:
> >return sm50_hw_metric_queries;
> > +   case NVEA_3D_CLASS:
> > case NVF0_3D_CLASS:
> >return sm35_hw_metric_queries;
> > case NVE4_3D_CLASS:
> > @@ -425,6 +426,7 @@ nvc0_hw_metric_get_num_queries(struct nvc0_screen 
> > *screen)
> > case GM200_3D_CLASS:
> > case GM107_3D_CLASS:
> >return ARRAY_SIZE(sm50_hw_metric_queries);
> > +   case NVEA_3D_CLASS:
> > case NVF0_3D_CLASS:
> >return ARRAY_SIZE(sm35_hw_metric_queries);
> > case NVE4_3D_CLASS:
> > @@ -684,6 +686,7 @@ nvc0_hw_metric_get_query_result(struct nvc0_context 
> > *nvc0,
> > switch (screen->base.class_3d) {
> > case GM200_3D_CLASS:
> > case GM107_3D_CLASS:
> > +   case NVEA_3D_CLASS:
> > case NVF0_3D_CLASS:
> >value = sm35_hw_metric_calc_result(hq, res64);
> >break;
> > diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c 
> > b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> > index df5723d..440e5d3 100644
> > --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> > +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
> > @@ -2239,6 +2239,7 @@ nvc0_hw_sm_get_queries(struct nvc0_screen *screen)
> >return sm52_hw_sm_queries;
> > case GM107_3D_CLASS:
> >return sm50_hw_sm_queries;
> > +   case NVEA_3D_CLASS:
> > case NVF0_3D_CLASS:
> >return sm35_hw_sm_queries;
> > case NVE4_3D_CLASS:
> > @@ -2262,6 +2263,7 @@ nvc0_hw_sm_get_num_queries(struct nvc0_screen *screen)
> >return ARRAY_SIZE(sm52_hw_sm_queries);
> > case GM107_3D_CLASS:
> >return ARRAY_SIZE(sm50_hw_sm_queries);
> > +   case NVEA_3D_CLASS:
> > case NVF0_3D_CLASS:
> >return ARRAY_SIZE(sm35_hw_sm_queries);
> > case NVE4_3D_CLASS:
> > @@ -2475,15 +2477,14 @@ nvc0_hw_sm_get_program(struct nvc0_screen *screen)
> >prog->code_size = sizeof(gm107_read_hw_sm_counters_code);
> >prog->num_gprs = 14;
> > } else
> > -   if (screen->base.class_3d == NVE4_3D_CLASS ||
> > -   screen->base.class_3d == NVF0_3D_CLASS) {
> > -  if (screen->base.class_3d == NVE4_3D_CLASS) {

Re: [Mesa-dev] [PATCH] nv50/ir: optimize sub(a, 0) to a

2016-10-06 Thread Pierre Moreau
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

On 12:36 am - Oct 06 2016, Karol Herbst wrote:
> helped some ue4 demos and divinity OS shaders
> 
> total instructions in shared programs : 2818674 -> 2818606 (-0.00%)
> total gprs used in shared programs: 379273 -> 379273 (0.00%)
> total local used in shared programs   : 9505 -> 9505 (0.00%)
> total bytes used in shared programs   : 25837792 -> 25837192 (-0.00%)
> 
> localgpr   inst  bytes
> helped   0   0  33  33
>   hurt   0   0   0   0
> 
> Signed-off-by: Karol Herbst <karolher...@gmail.com>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 9875738..1c71155 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -1037,12 +1037,15 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> , int s)
>}
>break;
> case OP_ADD:
> +   case OP_SUB:
>if (i->usesFlags())
>   break;
>if (imm0.isInteger(0)) {
>   if (s == 0) {
>  i->setSrc(0, i->getSrc(1));
>  i->src(0).mod = i->src(1).mod;
> +if (i->op == OP_SUB)
> +   i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
>   }
>   i->setSrc(1, NULL);
>   i->op = i->src(0).mod.getOp();
> -- 
> 2.10.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Where to place the SPIR-V headers

2016-11-06 Thread Pierre Moreau
Hello everyone,

I have been working on translating SPIR-V to NV50 IR inside Nouveau in order to
run OpenCL kernels, received as SPIR-V binaries, on Nouveau. I have some
patches for clover as well as gallium, but before sending those for review, I
would like to know how to handle the SPIR-V header files.

Currently, some of the SPIR-V headers (the C version + the GLSL instruction-
set) can be found in `src/compiler/spirv`. Clover and Nouveau will both use the
C++ version of the SPIR-V header and the OpenCL instruction-set header; to
support ARB_gl_spirv, Nouveau will also need the GLSL instruction-set header.
Should all headers be moved to a common folder, or should each project have its
own copy?

Regards,
Pierre


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: Split 64-bit integer MAD/MUL operations

2016-10-18 Thread Pierre Moreau
Hello Ian,

Since I am working on a direct SPIR-V to NV50 IR translator, ultimately to be
used for OpenCL kernels, I will still need the patch for that work. (I even
wrote that patch because I needed it when handling 64-bit addresses. :-) )
But thanks for the heads-up!

Pierre


On 02:07 pm - Oct 17 2016, Ian Romanick wrote:
> I know know if it will make this patch unnecessary, but I have a GLSL
> IR-level lowering pass for 64-bit multiplication.  I'm going to send
> that out with the rest of the GL_ARB_gpu_shader_int64 series within the
> next day or so.
> 
> On 10/15/2016 03:24 PM, Pierre Moreau wrote:
> > Hardware does not support 64-bit integers MAD and MUL operations, so we need
> > to transform them in 32-bit operations.
> > 
> > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> > ---
> >  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 121 
> > +
> >  1 file changed, 121 insertions(+)
> > 
> > Tested with (the GPU result was compared to the CPU result):
> > * 0xfff3lu * 0xfff2lu + 0x80070002lu
> > * 0xfff3lu * 0x80070002lu + 0x80070002lu
> > * 0x80010003lu * 0xfff2lu + 0x80070002lu
> > * 0x80010003lu * 0x80070002lu + 0x80070002lu
> > 
> > * -523456791234l * 929835793793l + -15793793l
> > *  523456791234l * 929835793793l + -15793793l
> > * -523456791234l * -929835793793l + -15793793l
> > *  523456791234l * -929835793793l + -15793793l
> > 
> > v2:
> > * Completely re-write the patch, as it was completely flawed (Ilia Mirkin)
> > * Move pass prior to Register Allocation, as some temporaries need to
> >   be created.
> > 
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > index d88bb34..a610eb5 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > @@ -2218,6 +2218,126 @@ LateAlgebraicOpt::visit(Instruction *i)
> >  
> >  // 
> > =
> >  
> > +// Split 64-bit MUL and MAD
> > +class Split64BitOpPreRA : public Pass
> > +{
> > +private:
> > +   virtual bool visit(BasicBlock *);
> > +   void split64BitReg(Function *, Instruction *, Instruction *,
> > +  Instruction *, Value *, int);
> > +   void split64MulMad(Function *, Instruction *, DataType);
> > +
> > +   BuildUtil bld;
> > +};
> > +
> > +bool
> > +Split64BitOpPreRA::visit(BasicBlock *bb)
> > +{
> > +   Instruction *i, *next;
> > +   Modifier mod;
> > +
> > +   for (i = bb->getEntry(); i; i = next) {
> > +  next = i->next;
> > +
> > +  if (typeSizeof(i->dType) != 8)
> > + continue;
> > +
> > +  DataType hTy;
> > +  switch (i->dType) {
> > +  case TYPE_U64: hTy = TYPE_U32; break;
> > +  case TYPE_S64: hTy = TYPE_S32; break;
> > +  default:
> > + continue;
> > +  }
> > +
> > +  if (i->op == OP_MAD || i->op == OP_MUL)
> > + split64MulMad(bb->getFunction(), i, hTy);
> > +   }
> > +
> > +   return true;
> > +}
> > +
> > +void
> > +Split64BitOpPreRA::split64MulMad(Function *fn, Instruction *i, DataType 
> > hTy)
> > +{
> > +   assert(i->op == OP_MAD || i->op == OP_MUL);
> > +   if (isFloatType(i->dType) || isFloatType(i->sType))
> > +  return;
> > +
> > +   bld.setPosition(i, true);
> > +
> > +   Value *zero = bld.mkImm(0u);
> > +   Value *carry = bld.getSSA(1, FILE_FLAGS);
> > +
> > +   // We want to compute `d = a * b (+ c)?`, where a, b, c and d are 64-bit
> > +   // values (a, b and c might be 32-bit values), using 32-bit operations. 
> > This
> > +   // gives the following operations:
> > +   // * `d.low = low(a.low * b.low) (+ c.low)?`
> > +   // * `d.high = low(a.high * b.low) + low(a.low * b.high)
> > +   //   + high(a.low * b.low) (+ c.high)?`
> > +   //
> > +   // To compute the high bits, we can split in the following operations:
> > +   // * `tmp1   = low(a.high * b.low) (+ c.high)?`
> > +   // * `tmp2   = low(a.low * b.high) + tmp1`
> > +   // * `d.high = high(a.low * b.low) + tmp2`
> > +   //
> > +   // mkSplit put lower bits at index 0 and higher bits at index 1
> > +
> > +   V

Re: [Mesa-dev] [PATCH v4] clover: restore support for LLVM <= 3.9

2016-11-18 Thread Pierre Moreau
Mesa master builds again against LLVM 3.6.

Tested-by: Pierre Moreau <pierre.mor...@free.fr>

On 07:57 pm - Nov 18 2016, Vedran Miletić wrote:
> The commit 8e430ff8b060b4e8e922bae24b3c57837da6ea77 support for LLVM
> 3.9 and older versionsin  Clover. This patch restores it and refactors
> the support using Clover compatibility layer for LLVM.
> 
> Signed-off-by: Vedran Miletić <ved...@miletic.net>
> ---
>  .../state_trackers/clover/llvm/codegen/bitcode.cpp |  9 ++
>  src/gallium/state_trackers/clover/llvm/compat.hpp  | 35 
> ++
>  2 files changed, 37 insertions(+), 7 deletions(-)
> 
> diff --git a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp 
> b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
> index 5dcc4f8..4b4ae41 100644
> --- a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
> @@ -32,6 +32,7 @@
>  ///
>  
>  #include "llvm/codegen.hpp"
> +#include "llvm/compat.hpp"
>  #include "llvm/metadata.hpp"
>  #include "core/error.hpp"
>  #include "util/algorithm.hpp"
> @@ -99,13 +100,7 @@ clover::llvm::parse_module_library(const module , 
> ::llvm::LLVMContext ,
> auto mod = ::llvm::parseBitcodeFile(::llvm::MemoryBufferRef(
>  as_string(m.secs[0].data), " "), 
> ctx);
>  
> -   if (::llvm::Error err = mod.takeError()) {
> -  std::string msg;
> -  ::llvm::handleAllErrors(std::move(err), [&](::llvm::ErrorInfoBase 
> ) {
> - msg = EIB.message();
> - fail(r_log, error(CL_INVALID_PROGRAM), msg.c_str());
> -  });
> -   }
> +   compat::handle_module_error(mod, r_log);
>  
> return std::unique_ptr<::llvm::Module>(std::move(*mod));
>  }
> diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp 
> b/src/gallium/state_trackers/clover/llvm/compat.hpp
> index a963cff..b29100f 100644
> --- a/src/gallium/state_trackers/clover/llvm/compat.hpp
> +++ b/src/gallium/state_trackers/clover/llvm/compat.hpp
> @@ -39,6 +39,11 @@
>  #include 
>  #include 
>  #include 
> +#if HAVE_LLVM >= 0x0400
> +#include 
> +#else
> +#include 
> +#endif
>  
>  #if HAVE_LLVM >= 0x0307
>  #include 
> @@ -53,6 +58,14 @@
>  #include 
>  #include 
>  
> +#if HAVE_LLVM >= 0x0307
> +#include 
> +#endif
> +
> +namespace llvm {
> +   class Module;
> +}
> +
>  namespace clover {
> namespace llvm {
>namespace compat {
> @@ -158,6 +171,28 @@ namespace clover {
>  #else
>   const auto default_reloc_model = ::llvm::Reloc::Default;
>  #endif
> +
> +#if HAVE_LLVM >= 0x0400
> + typedef ::llvm::Expected<std::unique_ptr<::llvm::Module>> 
> bitcode_module;
> +#elif HAVE_LLVM >= 0x0307
> + typedef ::llvm::ErrorOr<std::unique_ptr<::llvm::Module>> 
> bitcode_module;
> +#else
> + typedef ::llvm::ErrorOr<::llvm::Module *> bitcode_module;
> +#endif
> +
> + inline void
> + handle_module_error(bitcode_module , std::string _log) {
> +#if HAVE_LLVM >= 0x0400
> +if (::llvm::Error err = mod.takeError()) {
> +   ::llvm::handleAllErrors(std::move(err), 
> [&](::llvm::ErrorInfoBase ) {
> +  fail(r_log, error(CL_INVALID_PROGRAM), 
> EIB.message().c_str());
> +   });
> +}
> +#else
> +if (!mod)
> +   fail(r_log, error(CL_INVALID_PROGRAM), 
> mod.getError().message());
> +#endif
> + }
>}
> }
>  }
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v3] nv50/ir: Split 64-bit integer MAD/MUL operations

2016-10-30 Thread Pierre Moreau
Hardware does not support 64-bit integers MAD and MUL operations, so we need
to transform them in 32-bit operations.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 116 +
 1 file changed, 116 insertions(+)

Tested with (the GPU result was compared to the CPU result):
* 0xfff3lu * 0xfff2lu + 0x80070002lu
* 0xfff3lu * 0x80070002lu + 0x80070002lu
* 0x80010003lu * 0xfff2lu + 0x80070002lu
* 0x80010003lu * 0x80070002lu + 0x80070002lu

* -523456791234l * 929835793793l + -15793793l
*  523456791234l * 929835793793l + -15793793l
* -523456791234l * -929835793793l + -15793793l
*  523456791234l * -929835793793l + -15793793l

v2:
* Completely re-write the patch, as it was completely flawed (Ilia Mirkin)
* Move pass prior to Register Allocation, as some temporaries need to
  be created.

v3:
* Remove left-over prototype `split64Reg()`
* Remove redundant check for 64-bit destination type in `visit()` (Ilia Mirkin)
* Use the `func` attribute when calling split64MulMad (Ilia Mirkin)
* Change test of source and destination as float types, to an assert (Ilia
  Mirkin)
* Replace `typeSizeof(hTy)` by 4, as it will always be the case, and add an
  assert for it in `split64MulMad()`

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 0fb1a78..da6bbc4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2234,6 +2234,121 @@ LateAlgebraicOpt::visit(Instruction *i)
 
 // 
=
 
+// Split 64-bit MUL and MAD
+class Split64BitOpPreRA : public Pass
+{
+private:
+   virtual bool visit(BasicBlock *);
+   void split64MulMad(Function *, Instruction *, DataType);
+
+   BuildUtil bld;
+};
+
+bool
+Split64BitOpPreRA::visit(BasicBlock *bb)
+{
+   Instruction *i, *next;
+   Modifier mod;
+
+   for (i = bb->getEntry(); i; i = next) {
+  next = i->next;
+
+  DataType hTy;
+  switch (i->dType) {
+  case TYPE_U64: hTy = TYPE_U32; break;
+  case TYPE_S64: hTy = TYPE_S32; break;
+  default:
+ continue;
+  }
+
+  if (i->op == OP_MAD || i->op == OP_MUL)
+ split64MulMad(func, i, hTy);
+   }
+
+   return true;
+}
+
+void
+Split64BitOpPreRA::split64MulMad(Function *fn, Instruction *i, DataType hTy)
+{
+   assert(i->op == OP_MAD || i->op == OP_MUL);
+   assert(!isFloatType(i->dType) && !isFloatType(i->sType));
+   assert(typeSizeof(hTy) == 4);
+
+   bld.setPosition(i, true);
+
+   Value *zero = bld.mkImm(0u);
+   Value *carry = bld.getSSA(1, FILE_FLAGS);
+
+   // We want to compute `d = a * b (+ c)?`, where a, b, c and d are 64-bit
+   // values (a, b and c might be 32-bit values), using 32-bit operations. This
+   // gives the following operations:
+   // * `d.low = low(a.low * b.low) (+ c.low)?`
+   // * `d.high = low(a.high * b.low) + low(a.low * b.high)
+   //   + high(a.low * b.low) (+ c.high)?`
+   //
+   // To compute the high bits, we can split in the following operations:
+   // * `tmp1   = low(a.high * b.low) (+ c.high)?`
+   // * `tmp2   = low(a.low * b.high) + tmp1`
+   // * `d.high = high(a.low * b.low) + tmp2`
+   //
+   // mkSplit put lower bits at index 0 and higher bits at index 1
+
+   Value *op1[2];
+   if (i->getSrc(0)->reg.size == 8)
+  bld.mkSplit(op1, 4, i->getSrc(0));
+   else {
+  op1[0] = i->getSrc(0);
+  op1[1] = zero;
+   }
+   Value *op2[2];
+   if (i->getSrc(1)->reg.size == 8)
+  bld.mkSplit(op2, 4, i->getSrc(1));
+   else {
+  op2[0] = i->getSrc(1);
+  op2[1] = zero;
+   }
+
+   Value *op3[2] = { NULL, NULL };
+   if (i->op == OP_MAD) {
+  if (i->getSrc(2)->reg.size == 8)
+ bld.mkSplit(op3, 4, i->getSrc(2));
+  else {
+ op3[0] = i->getSrc(2);
+ op3[1] = zero;
+  }
+   }
+
+   Value *tmpRes1Hi = bld.getSSA();
+   if (i->op == OP_MAD)
+  bld.mkOp3(OP_MAD, hTy, tmpRes1Hi, op1[1], op2[0], op3[1]);
+   else
+  bld.mkOp2(OP_MUL, hTy, tmpRes1Hi, op1[1], op2[0]);
+
+   Value *tmpRes2Hi = bld.mkOp3v(OP_MAD, hTy, bld.getSSA(), op1[0], op2[1], 
tmpRes1Hi);
+
+   Value *def[2] = { bld.getSSA(), bld.getSSA() };
+
+   // If it was a MAD, add the carry from the low bits
+   // It is not needed if it was a MUL, since we added high(a.low * b.low) to
+   // d.high
+   if (i->op == OP_MAD)
+  bld.mkOp3(OP_MAD, hTy, def[0], op1[0], op2[0], op3[0])->setFlagsDef(1, 
carry);
+   else
+  bld.mkOp2(OP_MUL, hTy, def[0], op1[0], op2[0]);
+
+   Instruction *hiPart3 = bld.mkOp3(OP_MAD, hTy, def[1], op1[0], op2[0], 
tmpRes2Hi);
+   hiPart3->subOp = NV50_IR_SUBOP_MUL_HIGH;
+   if (i->op 

Re: [Mesa-dev] [PATCH] nvc0: do not duplicate similar performance metrics

2016-10-31 Thread Pierre Moreau
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

On 02:54 pm - Oct 31 2016, Samuel Pitoiset wrote:
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c| 50 
> +++---
>  1 file changed, 7 insertions(+), 43 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c 
> b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
> index 2f85c32..36534ba 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
> @@ -257,24 +257,6 @@ static const struct nvc0_hw_metric_query_cfg 
> *sm21_hw_metric_queries[] =
>  
>  /*  Compute capability 3.0 (GK104/GK106/GK107)  */
>  static const struct nvc0_hw_metric_query_cfg
> -sm30_achieved_occupancy =
> -{
> -   .type= NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,
> -   .queries[0]  = _SM(ACTIVE_WARPS),
> -   .queries[1]  = _SM(ACTIVE_CYCLES),
> -   .num_queries = 2,
> -};
> -
> -static const struct nvc0_hw_metric_query_cfg
> -sm30_branch_efficiency =
> -{
> -   .type= NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,
> -   .queries[0]  = _SM(BRANCH),
> -   .queries[1]  = _SM(DIVERGENT_BRANCH),
> -   .num_queries = 2,
> -};
> -
> -static const struct nvc0_hw_metric_query_cfg
>  sm30_inst_issued =
>  {
> .type= NVC0_HW_METRIC_QUERY_INST_ISSUED,
> @@ -284,15 +266,6 @@ sm30_inst_issued =
>  };
>  
>  static const struct nvc0_hw_metric_query_cfg
> -sm30_inst_per_wrap =
> -{
> -   .type= NVC0_HW_METRIC_QUERY_INST_PER_WRAP,
> -   .queries[0]  = _SM(INST_EXECUTED),
> -   .queries[1]  = _SM(WARPS_LAUNCHED),
> -   .num_queries = 2,
> -};
> -
> -static const struct nvc0_hw_metric_query_cfg
>  sm30_inst_replay_overhead =
>  {
> .type= NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,
> @@ -332,15 +305,6 @@ sm30_issue_slot_utilization =
>  };
>  
>  static const struct nvc0_hw_metric_query_cfg
> -sm30_ipc =
> -{
> -   .type= NVC0_HW_METRIC_QUERY_IPC,
> -   .queries[0]  = _SM(INST_EXECUTED),
> -   .queries[1]  = _SM(ACTIVE_CYCLES),
> -   .num_queries = 2,
> -};
> -
> -static const struct nvc0_hw_metric_query_cfg
>  sm30_shared_replay_overhead =
>  {
> .type= NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,
> @@ -352,29 +316,29 @@ sm30_shared_replay_overhead =
>  
>  static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =
>  {
> -   _achieved_occupancy,
> -   _branch_efficiency,
> +   _achieved_occupancy,
> +   _branch_efficiency,
> _inst_issued,
> -   _inst_per_wrap,
> +   _inst_per_wrap,
> _inst_replay_overhead,
> _issued_ipc,
> _issue_slots,
> _issue_slot_utilization,
> -   _ipc,
> +   _ipc,
> _shared_replay_overhead,
>  };
>  
>  /*  Compute capability 3.5 (GK110)  */
>  static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =
>  {
> -   _achieved_occupancy,
> +   _achieved_occupancy,
> _inst_issued,
> -   _inst_per_wrap,
> +   _inst_per_wrap,
> _inst_replay_overhead,
> _issued_ipc,
> _inst_issued,
> _issue_slot_utilization,
> -   _ipc,
> +   _ipc,
> _shared_replay_overhead,
>  };
>  
> -- 
> 2.10.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gm107/ir: emit RED instead of ATOM when no dst

2016-11-04 Thread Pierre Moreau
Are reduction doable on shared atomics as well?

Pierre

On 08:08 pm - Nov 04 2016, Samuel Pitoiset wrote:
> This is similar to NVC0 and GK110 emitters where we emit
> reduction operations instead of atomic operations when the
> destination is not used.
> 
> Found after writing some tests which check if performance counters
> return the expected value. In that case, gred_count returned 0
> on gm107 while at least gk106 returned the correct value.
> 
> Signed-off-by: Samuel Pitoiset 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp | 29 
> +-
>  1 file changed, 28 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> index 5ed2ad4..5bd0fa0 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
> @@ -180,6 +180,7 @@ private:
> void emitIPA();
> void emitATOM();
> void emitATOMS();
> +   void emitRED();
> void emitCCTL();
>  
> void emitPIXLD();
> @@ -2496,6 +2497,29 @@ CodeEmitterGM107::emitATOMS()
>  }
>  
>  void
> +CodeEmitterGM107::emitRED()
> +{
> +   unsigned dType;
> +
> +   switch (insn->dType) {
> +   case TYPE_U32: dType = 0; break;
> +   case TYPE_S32: dType = 1; break;
> +   case TYPE_U64: dType = 2; break;
> +   case TYPE_F32: dType = 3; break;
> +   case TYPE_B128: dType = 4; break;
> +   case TYPE_S64: dType = 5; break;
> +   default: assert(!"unexpected dType"); dType = 0; break;
> +   }
> +
> +   emitInsn (0xebf8);
> +   emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
> +   emitField(0x17, 3, insn->subOp);
> +   emitField(0x14, 3, dType);
> +   emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
> +   emitGPR  (0x00, insn->src(1));
> +}
> +
> +void
>  CodeEmitterGM107::emitCCTL()
>  {
> unsigned width;
> @@ -3237,7 +3261,10 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
>if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
>   emitATOMS();
>else
> - emitATOM();
> + if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
> +emitRED();
> + else
> +emitATOM();
>break;
> case OP_CCTL:
>emitCCTL();
> -- 
> 2.10.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: Split 64-bit integer MAD/MUL operations

2016-10-15 Thread Pierre Moreau
Hardware does not support 64-bit integers MAD and MUL operations, so we need
to transform them in 32-bit operations.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 121 +
 1 file changed, 121 insertions(+)

Tested with (the GPU result was compared to the CPU result):
* 0xfff3lu * 0xfff2lu + 0x80070002lu
* 0xfff3lu * 0x80070002lu + 0x80070002lu
* 0x80010003lu * 0xfff2lu + 0x80070002lu
* 0x80010003lu * 0x80070002lu + 0x80070002lu

* -523456791234l * 929835793793l + -15793793l
*  523456791234l * 929835793793l + -15793793l
* -523456791234l * -929835793793l + -15793793l
*  523456791234l * -929835793793l + -15793793l

v2:
* Completely re-write the patch, as it was completely flawed (Ilia Mirkin)
* Move pass prior to Register Allocation, as some temporaries need to
  be created.

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index d88bb34..a610eb5 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2218,6 +2218,126 @@ LateAlgebraicOpt::visit(Instruction *i)
 
 // 
=
 
+// Split 64-bit MUL and MAD
+class Split64BitOpPreRA : public Pass
+{
+private:
+   virtual bool visit(BasicBlock *);
+   void split64BitReg(Function *, Instruction *, Instruction *,
+  Instruction *, Value *, int);
+   void split64MulMad(Function *, Instruction *, DataType);
+
+   BuildUtil bld;
+};
+
+bool
+Split64BitOpPreRA::visit(BasicBlock *bb)
+{
+   Instruction *i, *next;
+   Modifier mod;
+
+   for (i = bb->getEntry(); i; i = next) {
+  next = i->next;
+
+  if (typeSizeof(i->dType) != 8)
+ continue;
+
+  DataType hTy;
+  switch (i->dType) {
+  case TYPE_U64: hTy = TYPE_U32; break;
+  case TYPE_S64: hTy = TYPE_S32; break;
+  default:
+ continue;
+  }
+
+  if (i->op == OP_MAD || i->op == OP_MUL)
+ split64MulMad(bb->getFunction(), i, hTy);
+   }
+
+   return true;
+}
+
+void
+Split64BitOpPreRA::split64MulMad(Function *fn, Instruction *i, DataType hTy)
+{
+   assert(i->op == OP_MAD || i->op == OP_MUL);
+   if (isFloatType(i->dType) || isFloatType(i->sType))
+  return;
+
+   bld.setPosition(i, true);
+
+   Value *zero = bld.mkImm(0u);
+   Value *carry = bld.getSSA(1, FILE_FLAGS);
+
+   // We want to compute `d = a * b (+ c)?`, where a, b, c and d are 64-bit
+   // values (a, b and c might be 32-bit values), using 32-bit operations. This
+   // gives the following operations:
+   // * `d.low = low(a.low * b.low) (+ c.low)?`
+   // * `d.high = low(a.high * b.low) + low(a.low * b.high)
+   //   + high(a.low * b.low) (+ c.high)?`
+   //
+   // To compute the high bits, we can split in the following operations:
+   // * `tmp1   = low(a.high * b.low) (+ c.high)?`
+   // * `tmp2   = low(a.low * b.high) + tmp1`
+   // * `d.high = high(a.low * b.low) + tmp2`
+   //
+   // mkSplit put lower bits at index 0 and higher bits at index 1
+
+   Value *op1[2];
+   if (i->getSrc(0)->reg.size == 8)
+  bld.mkSplit(op1, typeSizeof(hTy), i->getSrc(0));
+   else {
+  op1[0] = i->getSrc(0);
+  op1[1] = zero;
+   }
+   Value *op2[2];
+   if (i->getSrc(1)->reg.size == 8)
+  bld.mkSplit(op2, typeSizeof(hTy), i->getSrc(1));
+   else {
+  op2[0] = i->getSrc(1);
+  op2[1] = zero;
+   }
+
+   Value *op3[2] = { NULL, NULL };
+   if (i->op == OP_MAD) {
+  if (i->getSrc(2)->reg.size == 8)
+ bld.mkSplit(op3, typeSizeof(hTy), i->getSrc(2));
+  else {
+ op3[0] = i->getSrc(2);
+ op3[1] = zero;
+  }
+   }
+
+   Value *tmpRes1Hi = bld.getSSA();
+   if (i->op == OP_MAD)
+  bld.mkOp3(OP_MAD, hTy, tmpRes1Hi, op1[1], op2[0], op3[1]);
+   else
+  bld.mkOp2(OP_MUL, hTy, tmpRes1Hi, op1[1], op2[0]);
+
+   Value *tmpRes2Hi = bld.mkOp3v(OP_MAD, hTy, bld.getSSA(), op1[0], op2[1], 
tmpRes1Hi);
+
+   Value *def[2] = { bld.getSSA(), bld.getSSA() };
+
+   // If it was a MAD, add the carry from the low bits
+   // It is not needed if it was a MUL, since we added high(a.low * b.low) to
+   // d.high
+   if (i->op == OP_MAD)
+  bld.mkOp3(OP_MAD, hTy, def[0], op1[0], op2[0], op3[0])->setFlagsDef(1, 
carry);
+   else
+  bld.mkOp2(OP_MUL, hTy, def[0], op1[0], op2[0]);
+
+   Instruction *hiPart3 = bld.mkOp3(OP_MAD, hTy, def[1], op1[0], op2[0], 
tmpRes2Hi);
+   hiPart3->subOp = NV50_IR_SUBOP_MUL_HIGH;
+   if (i->op == OP_MAD)
+  hiPart3->setFlagsSrc(3, carry);
+
+   bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
+
+   delete_Instruction(fn->getProgram(), i);
+}
+

Re: [Mesa-dev] [PATCH] nv50/ir: Split 64-bit integer MAD/MUL operations

2016-10-15 Thread Pierre Moreau
Sorry, there should have been a v2 next to PATCH in the subject…

Pierre


On 12:24 am - Oct 16 2016, Pierre Moreau wrote:
> Hardware does not support 64-bit integers MAD and MUL operations, so we need
> to transform them in 32-bit operations.
> 
> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 121 
> +
>  1 file changed, 121 insertions(+)
> 
> Tested with (the GPU result was compared to the CPU result):
> * 0xfff3lu * 0xfff2lu + 0x80070002lu
> * 0xfff3lu * 0x80070002lu + 0x80070002lu
> * 0x80010003lu * 0xfff2lu + 0x80070002lu
> * 0x80010003lu * 0x80070002lu + 0x80070002lu
> 
> * -523456791234l * 929835793793l + -15793793l
> *  523456791234l * 929835793793l + -15793793l
> * -523456791234l * -929835793793l + -15793793l
> *  523456791234l * -929835793793l + -15793793l
> 
> v2:
> * Completely re-write the patch, as it was completely flawed (Ilia Mirkin)
> * Move pass prior to Register Allocation, as some temporaries need to
>   be created.
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index d88bb34..a610eb5 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -2218,6 +2218,126 @@ LateAlgebraicOpt::visit(Instruction *i)
>  
>  // 
> =
>  
> +// Split 64-bit MUL and MAD
> +class Split64BitOpPreRA : public Pass
> +{
> +private:
> +   virtual bool visit(BasicBlock *);
> +   void split64BitReg(Function *, Instruction *, Instruction *,
> +  Instruction *, Value *, int);
> +   void split64MulMad(Function *, Instruction *, DataType);
> +
> +   BuildUtil bld;
> +};
> +
> +bool
> +Split64BitOpPreRA::visit(BasicBlock *bb)
> +{
> +   Instruction *i, *next;
> +   Modifier mod;
> +
> +   for (i = bb->getEntry(); i; i = next) {
> +  next = i->next;
> +
> +  if (typeSizeof(i->dType) != 8)
> + continue;
> +
> +  DataType hTy;
> +  switch (i->dType) {
> +  case TYPE_U64: hTy = TYPE_U32; break;
> +  case TYPE_S64: hTy = TYPE_S32; break;
> +  default:
> + continue;
> +  }
> +
> +  if (i->op == OP_MAD || i->op == OP_MUL)
> + split64MulMad(bb->getFunction(), i, hTy);
> +   }
> +
> +   return true;
> +}
> +
> +void
> +Split64BitOpPreRA::split64MulMad(Function *fn, Instruction *i, DataType hTy)
> +{
> +   assert(i->op == OP_MAD || i->op == OP_MUL);
> +   if (isFloatType(i->dType) || isFloatType(i->sType))
> +  return;
> +
> +   bld.setPosition(i, true);
> +
> +   Value *zero = bld.mkImm(0u);
> +   Value *carry = bld.getSSA(1, FILE_FLAGS);
> +
> +   // We want to compute `d = a * b (+ c)?`, where a, b, c and d are 64-bit
> +   // values (a, b and c might be 32-bit values), using 32-bit operations. 
> This
> +   // gives the following operations:
> +   // * `d.low = low(a.low * b.low) (+ c.low)?`
> +   // * `d.high = low(a.high * b.low) + low(a.low * b.high)
> +   //   + high(a.low * b.low) (+ c.high)?`
> +   //
> +   // To compute the high bits, we can split in the following operations:
> +   // * `tmp1   = low(a.high * b.low) (+ c.high)?`
> +   // * `tmp2   = low(a.low * b.high) + tmp1`
> +   // * `d.high = high(a.low * b.low) + tmp2`
> +   //
> +   // mkSplit put lower bits at index 0 and higher bits at index 1
> +
> +   Value *op1[2];
> +   if (i->getSrc(0)->reg.size == 8)
> +  bld.mkSplit(op1, typeSizeof(hTy), i->getSrc(0));
> +   else {
> +  op1[0] = i->getSrc(0);
> +  op1[1] = zero;
> +   }
> +   Value *op2[2];
> +   if (i->getSrc(1)->reg.size == 8)
> +  bld.mkSplit(op2, typeSizeof(hTy), i->getSrc(1));
> +   else {
> +  op2[0] = i->getSrc(1);
> +  op2[1] = zero;
> +   }
> +
> +   Value *op3[2] = { NULL, NULL };
> +   if (i->op == OP_MAD) {
> +  if (i->getSrc(2)->reg.size == 8)
> + bld.mkSplit(op3, typeSizeof(hTy), i->getSrc(2));
> +  else {
> + op3[0] = i->getSrc(2);
> + op3[1] = zero;
> +  }
> +   }
> +
> +   Value *tmpRes1Hi = bld.getSSA();
> +   if (i->op == OP_MAD)
> +  bld.mkOp3(OP_MAD, hTy, tmpRes1Hi, op1[1], op2[0], op3[1]);
> +   else
> +  bld.mkOp2(OP_MUL, hTy, tmpRes1Hi, op1[1], op2[0]);
> +
> +   Value *

Re: [Mesa-dev] [PATCH] nv50/ir: Split 64-bit integer MAD/MUL operations

2016-10-16 Thread Pierre Moreau
On 06:38 pm - Oct 15 2016, Ilia Mirkin wrote:
> On Sat, Oct 15, 2016 at 6:24 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > Hardware does not support 64-bit integers MAD and MUL operations, so we need
> > to transform them in 32-bit operations.
> >
> > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> > ---
> >  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 121 
> > +
> >  1 file changed, 121 insertions(+)
> >
> > Tested with (the GPU result was compared to the CPU result):
> > * 0xfff3lu * 0xfff2lu + 0x80070002lu
> > * 0xfff3lu * 0x80070002lu + 0x80070002lu
> > * 0x80010003lu * 0xfff2lu + 0x80070002lu
> > * 0x80010003lu * 0x80070002lu + 0x80070002lu
> >
> > * -523456791234l * 929835793793l + -15793793l
> > *  523456791234l * 929835793793l + -15793793l
> > * -523456791234l * -929835793793l + -15793793l
> > *  523456791234l * -929835793793l + -15793793l
> >
> > v2:
> > * Completely re-write the patch, as it was completely flawed (Ilia Mirkin)
> > * Move pass prior to Register Allocation, as some temporaries need to
> >   be created.
> 
> In principle I like this approach. I don't remember what your old one
> was, but this is good. I think that nearly all of our "legalize" step
> items, including the gpu-family specific ones, need to be moved to
> this type of pass.

The old one inserted itself within the existing
`BuildUtil::split64BitOpPostRA()`.

> 
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > index d88bb34..a610eb5 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > @@ -2218,6 +2218,126 @@ LateAlgebraicOpt::visit(Instruction *i)
> >
> >  // 
> > =
> >
> > +// Split 64-bit MUL and MAD
> > +class Split64BitOpPreRA : public Pass
> > +{
> > +private:
> > +   virtual bool visit(BasicBlock *);
> > +   void split64BitReg(Function *, Instruction *, Instruction *,
> > +  Instruction *, Value *, int);

Oops, forgot to remove the above prototype, will do it for v3.

> > +   void split64MulMad(Function *, Instruction *, DataType);
> > +
> > +   BuildUtil bld;
> > +};
> > +
> > +bool
> > +Split64BitOpPreRA::visit(BasicBlock *bb)
> > +{
> > +   Instruction *i, *next;
> > +   Modifier mod;
> > +
> > +   for (i = bb->getEntry(); i; i = next) {
> > +  next = i->next;
> > +
> > +  if (typeSizeof(i->dType) != 8)
> > + continue;
> 
> Is this necessary? You exclusively operate on U64/S64 below.

The above was added as I thought this pass could be reused for other 64-bit
operations that need to be split, while the below switch statement is more of a
remaining from when the code was in `BuildUtil::split64BitOpPostRA()`.
I guess that even if the pass gets support for more operations, FP64 are not
going to be part of it as the hardware supports them. In which case, only
64-bit integers are left, and the below switch statement would indeed be
enough.

> 
> > +
> > +  DataType hTy;
> > +  switch (i->dType) {
> > +  case TYPE_U64: hTy = TYPE_U32; break;
> > +  case TYPE_S64: hTy = TYPE_S32; break;
> > +  default:
> > + continue;
> > +  }
> > +
> > +  if (i->op == OP_MAD || i->op == OP_MUL)
> > + split64MulMad(bb->getFunction(), i, hTy);
> 
> There's an instance variable "func" (and "prog") you can use.

Oh, nice! Will use it.

> 
> > +   }
> > +
> > +   return true;
> > +}
> > +
> > +void
> > +Split64BitOpPreRA::split64MulMad(Function *fn, Instruction *i, DataType 
> > hTy)
> > +{
> > +   assert(i->op == OP_MAD || i->op == OP_MUL);
> > +   if (isFloatType(i->dType) || isFloatType(i->sType))
> > +  return;
> 
> I'd make this into an assert. Given the checks before calling this
> function, it can't really happen.

True, I’ll change that.

> 
> > +
> > +   bld.setPosition(i, true);
> > +
> > +   Value *zero = bld.mkImm(0u);
> > +   Value *carry = bld.getSSA(1, FILE_FLAGS);
> > +
> > +   // We want to compute `d = a * b (+ c)?`, where a, b, c and d are 64-bit
> > 

[Mesa-dev] [PATCH] clover: Check for executables before enqueueing a kernel

2016-12-29 Thread Pierre Moreau
Without this check, the kernel::bind() method would fail with a
std::out_of_range exception, letting an exception escape from the
library into the client, rather than returning the corresponding error
code CL_INVALID_PROGRAM_EXECUTABLE.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/state_trackers/clover/api/kernel.cpp | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp 
b/src/gallium/state_trackers/clover/api/kernel.cpp
index 73ba34abe8..e3d75af972 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -215,7 +215,10 @@ namespace {
 }, kern.args()))
  throw error(CL_INVALID_KERNEL_ARGS);
 
-  if (!count(q.device(), kern.program().devices()))
+  // If the command queue's device is not associated to the program, we get
+  // a module, with no sections, which will also fail the following test.
+  auto  = kern.program().build(q.device()).binary;
+  if (!any_of(type_equals(module::section::text_executable), m.secs))
  throw error(CL_INVALID_PROGRAM_EXECUTABLE);
}
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Leaked hardware event if kernel launch fails?

2016-12-25 Thread Pierre Moreau
Hello,

I noticed that, if trying to enqueue a kernel which had no
`module::section::text_executable` attached to its clover module, I would get a
`std::out_of_range` exception, instead of the expected
CL_INVALID_PROGRAM_EXECUTABLE (see [0]; I tried enqueueing using
`clEnqueueNDRangeKernel). I modified the `kernel::exec_context::bind()` method
to catch the out-of-range exceptions, and throw the proper clover exception
instead:

```
diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp 
b/src/gallium/state_trackers/clover/core/kernel.cpp
index 328323b6b0..1bb4f612cb 100644
--- a/src/gallium/state_trackers/clover/core/kernel.cpp
+++ b/src/gallium/state_trackers/clover/core/kernel.cpp
@@ -161,8 +161,18 @@ kernel::exec_context::bind(intrusive_ptr _q,
 
// Bind kernel arguments.
auto  = kern.program().build(q->device()).binary;
-   auto margs = find(name_equals(kern.name()), m.syms).args;
-   auto msec = find(type_equals(module::section::text_executable), m.secs);
+   std::vector margs;
+   try {
+  margs = find(name_equals(kern.name()), m.syms).args;
+   } catch (const std::out_of_range ) {
+  throw error(CL_INVALID_KERNEL);
+   }
+   module::section msec;
+   try {
+  msec = find(type_equals(module::section::text_executable), m.secs);
+   } catch (const std::out_of_range ) {
+  throw error(CL_INVALID_PROGRAM_EXECUTABLE);
+   }
auto explicit_arg = kern._args.begin();
 
for (auto  : margs) {
```

But now, when my OpenCL program exists after the error, the destruction of the
`cl::CommandQueue` object doesn’t happen in a peaceful manner:

```
Program received signal SIGSEGV, Segmentation fault.
0x00652e40 in ?? ()
(gdb) bt
#0  0x00652e40 in ?? ()
#1  0x77b2d29a in clover::command_queue::flush 
(this=this@entry=0x653310) at 
../../../../../mesa_spirv/src/gallium/state_trackers/clover/core/queue.cpp:77
#2  0x77b0ebc0 in clReleaseCommandQueue (d_q=0x653318) at 
../../../../../mesa_spirv/src/gallium/state_trackers/clover/api/queue.cpp:63
#3  0x00405125 in 
cl::detail::ReferenceHandler<_cl_command_queue*>::release (queue=0x653318) at 
/usr/include/CL/cl.hpp:1686
#4  0x00405108 in cl::detail::Wrapper<_cl_command_queue*>::release 
(this=0x7fffda98) at /usr/include/CL/cl.hpp:1863
#5  0x004050c7 in cl::detail::Wrapper<_cl_command_queue*>::~Wrapper 
(this=0x7fffda98) at /usr/include/CL/cl.hpp:1802
#6  0x004047e5 in cl::CommandQueue::~CommandQueue (this=0x7fffda98) 
at /usr/include/CL/cl.hpp:5482
#7  0x004046f8 in main () at instruction-set_OpenCL-std.cpp:58
(gdb) up
#1  0x77b2d29a in clover::command_queue::flush 
(this=this@entry=0x653310) at 
../../../../../mesa_spirv/src/gallium/state_trackers/clover/core/queue.cpp:77
77   queued_events.front()().fence(fence);
```

(I am using the OpenCL-C++ binding, and Mesa is quite recent (88b5acfa09) with
custom patches to get some OpenCL support for Nouveau.)

Looking around a bit with the debugger, it seems like the event created by the
`clEnqueueNDRangeKernel()` function still exists within the command queue:
shouldn’t it have been automatically removed from the queue as the enqueue
function failed?


Thank you for your help!
Pierre


[0]: 
https://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clEnqueueNDRangeKernel.html


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Leaked hardware event if kernel launch fails?

2016-12-25 Thread Pierre Moreau
Hello Francisco!

Thank you for your quick reply!

On 02:33 pm - Dec 25 2016, Francisco Jerez wrote:
> Pierre Moreau <pierre.mor...@free.fr> writes:
> 
> > Hello,
> >
> Hi Pierre!
> 
> > I noticed that, if trying to enqueue a kernel which had no
> > `module::section::text_executable` attached to its clover module, I would 
> > get a
> > `std::out_of_range` exception, instead of the expected
> > CL_INVALID_PROGRAM_EXECUTABLE (see [0]; I tried enqueueing using
> > `clEnqueueNDRangeKernel). I modified the `kernel::exec_context::bind()` 
> > method
> > to catch the out-of-range exceptions, and throw the proper clover exception
> > instead:
> >
> > ```
> > diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp 
> > b/src/gallium/state_trackers/clover/core/kernel.cpp
> > index 328323b6b0..1bb4f612cb 100644
> > --- a/src/gallium/state_trackers/clover/core/kernel.cpp
> > +++ b/src/gallium/state_trackers/clover/core/kernel.cpp
> > @@ -161,8 +161,18 @@ 
> > kernel::exec_context::bind(intrusive_ptr _q,
> >  
> > // Bind kernel arguments.
> > auto  = kern.program().build(q->device()).binary;
> > -   auto margs = find(name_equals(kern.name()), m.syms).args;
> > -   auto msec = find(type_equals(module::section::text_executable), m.secs);
> > +   std::vector margs;
> > +   try {
> > +  margs = find(name_equals(kern.name()), m.syms).args;
> > +   } catch (const std::out_of_range ) {
> > +  throw error(CL_INVALID_KERNEL);
> > +   }
> > +   module::section msec;
> > +   try {
> > +  msec = find(type_equals(module::section::text_executable), m.secs);
> > +   } catch (const std::out_of_range ) {
> > +  throw error(CL_INVALID_PROGRAM_EXECUTABLE);
> > +   }
> 
> I think we should be validating this condition beforehand in the
> clEnqueueNDRangeKernel() entry point.  The reason is that
> clover::kernel::launch() will in general be executed asynchronously, so
> the exceptions you're throwing above won't necessarily cause
> clEnqueueNDRangeKernel() to return a failure status code as you'd expect
> [Consider e.g. what would happen if the clEnqueueNDRangeKernel() call
> was made explicitly dependent on a user event which wasn't initially
> signalled, what would delay the execution of clover::kernel::launch()
> until *after* clEnqueueNDRangeKernel() has returned control to the
> user].

I hadn’t really thought of what would happen if the launch happened after
clEnqueueNDRangeKernel() had returned…
I had looked at validate_common() too quickly, and missed that it was already
returning CL_INVALID_PROGRAM_EXECUTABLE, but only when the program was not set
up for the current device. This new patch should be less intrusive than the
previous one, and it does work better. :-) I’ll send a proper patch tomorrow.

Pierre


diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp 
b/src/gallium/state_trackers/clover/api/kernel.cpp
index 73ba34abe8..61737ede5e 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -215,7 +215,8 @@ namespace {
 }, kern.args()))
  throw error(CL_INVALID_KERNEL_ARGS);
 
-  if (!count(q.device(), kern.program().devices()))
+  auto  = kern.program().build(q.device()).binary;
+  if (!any_of(type_equals(module::section::text_executable), m.secs))
  throw error(CL_INVALID_PROGRAM_EXECUTABLE);
}
 

> 
> > auto explicit_arg = kern._args.begin();
> >  
> > for (auto  : margs) {
> > ```
> >
> > But now, when my OpenCL program exists after the error, the destruction of 
> > the
> > `cl::CommandQueue` object doesn’t happen in a peaceful manner:
> >
> > ```
> > Program received signal SIGSEGV, Segmentation fault.
> > 0x00652e40 in ?? ()
> > (gdb) bt
> > #0  0x00652e40 in ?? ()
> > #1  0x77b2d29a in clover::command_queue::flush 
> > (this=this@entry=0x653310) at 
> > ../../../../../mesa_spirv/src/gallium/state_trackers/clover/core/queue.cpp:77
> > #2  0x77b0ebc0 in clReleaseCommandQueue (d_q=0x653318) at 
> > ../../../../../mesa_spirv/src/gallium/state_trackers/clover/api/queue.cpp:63
> > #3  0x00405125 in 
> > cl::detail::ReferenceHandler<_cl_command_queue*>::release (queue=0x653318) 
> > at /usr/include/CL/cl.hpp:1686
> > #4  0x00405108 in cl::detail::Wrapper<_cl_command_queue*>::release 
> > (this=0x7fffda98) at /usr/include/CL/cl.hpp:1863
> > #5  0x004050c7 in cl::detail::Wrapper<_cl_command_queue*>::~Wrapper 
> > (this=0x7fffda98) at /usr/include/CL/

Re: [Mesa-dev] [PATCH] nv50/ir: Change chipset constants to ISA constants.

2017-04-11 Thread Pierre Moreau
I would agree with Ilia on making it an enum (maybe even an enum class?),
rather than a define.
If keeping it as a define, maybe we should also change the value while at it,
and have NVISA_SM30 equal 0x1d or 0x30 (or by numbering from 0 and on), rather
than keeping it linked to a chipset.

Pierre

On 01:03 am - Apr 11 2017, Samuel Pitoiset wrote:
> Karol told me that over IRC. Introducing ->getIsa() looks good to me.
> 
> On 04/11/2017 01:01 AM, Ilia Mirkin wrote:
> > I wanted to flip things over and use smxx notation...
> > 
> > On Apr 10, 2017 6:20 PM, "Samuel Pitoiset"  > > wrote:
> > 
> > Not sure why you get confused here. The chipset names are globally
> > consistent inside the codegen part and we never use SMxx. Maybe add
> > a comment like:
> > 
> > #define NVISA_GK104_CHIPSET0xe0 /* SM30 */
> > 
> > If you really need this?
> > 
> > On 04/10/2017 11:41 PM, Matthew Mondazzi wrote:
> > 
> > Define references to chipset did not actually use chipset,
> > leading to confusion. More relevant ISA constants put in place
> > of chipset compares.
> > 
> > Signed-off-by: Matthew Mondazzi  > >
> > ---
> >.../drivers/nouveau/codegen/nv50_ir_driver.h   |  7 ++--
> >.../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 24
> > +--
> >.../nouveau/codegen/nv50_ir_lowering_nvc0.cpp  | 46
> > +++---
> >.../nouveau/codegen/nv50_ir_target_nvc0.cpp|  6 +--
> >4 files changed, 42 insertions(+), 41 deletions(-)
> > 
> > diff --git
> > a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> > index e7d840d..76c815e 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> > @@ -75,9 +75,10 @@ struct nv50_ir_prog_symbol
> >   uint32_t offset;
> >};
> >-#define NVISA_GK104_CHIPSET0xe0
> > -#define NVISA_GK20A_CHIPSET0xea
> > -#define NVISA_GM107_CHIPSET0x110
> > +#define NVISA_SM30   0xe0
> > +#define NVISA_SM35   0xea
> > +#define NVISA_SM50   0x110
> > +
> >  struct nv50_ir_prog_info
> >{
> > diff --git
> > a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> > index 5467447..ed29661 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> > @@ -806,7 +806,7 @@ CodeEmitterNVC0::emitSHLADD(const
> > Instruction *i)
> >void
> >CodeEmitterNVC0::emitMADSP(const Instruction *i)
> >{
> > -   assert(targ->getChipset() >= NVISA_GK104_CHIPSET);
> > +   assert(targ->getChipset() >= NVISA_SM30);
> > emitForm_A(i, HEX64(, 0003));
> >@@ -1852,7 +1852,7 @@ CodeEmitterNVC0::emitSTORE(const
> > Instruction *i)
> >   case FILE_MEMORY_LOCAL:  opc = 0xc800; break;
> >   case FILE_MEMORY_SHARED:
> >  if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
> > - if (targ->getChipset() >= NVISA_GK104_CHIPSET)
> > + if (targ->getChipset() >= NVISA_SM30)
> >opc = 0xb800;
> > else
> >opc = 0xcc00;
> > @@ -1868,7 +1868,7 @@ CodeEmitterNVC0::emitSTORE(const
> > Instruction *i)
> >   code[0] = 0x0005;
> >   code[1] = opc;
> >-   if (targ->getChipset() >= NVISA_GK104_CHIPSET) {
> > +   if (targ->getChipset() >= NVISA_SM30) {
> >  // Unlocked store on shared memory can fail.
> >  if (i->src(0).getFile() == FILE_MEMORY_SHARED &&
> >  i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
> > @@ -1901,7 +1901,7 @@ CodeEmitterNVC0::emitLOAD(const
> > Instruction *i)
> >   case FILE_MEMORY_LOCAL:  opc = 0xc000; break;
> >   case FILE_MEMORY_SHARED:
> >  if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
> > - if (targ->getChipset() >= NVISA_GK104_CHIPSET)
> > + if (targ->getChipset() >= NVISA_SM30)
> >opc = 0xa800;
> > else
> >opc = 0xc400;
> > @@ -1944,7 +1944,7 @@ CodeEmitterNVC0::emitLOAD(const
> > Instruction *i)
> >  code[0] |= 63 << 14;
> > if (p >= 0) {
> > -  

Re: [Mesa-dev] [PATCH 4/5] clover/llvm: Use -cl-std and device version to select language defaults

2017-07-22 Thread Pierre Moreau
Hi Aaron,

On 2017-07-21 — 23:19, Aaron Watry wrote:
> According to section 5.8.4.5 of the 2.0 spec, the CL C version is chosen by:
>  1) If you have -cl-std=CL1.1+ use the version specified
>  2) If not, use the highest 1.x version that the device supports

According to that same part of the spec, clBuildProgram and clCompileProgram
should fail if the specified CL C version is strictly greater than the version
the device supports. You could add a check in `get_language_version()` to
compare `ver` and `device_version`, and throw a `build_error()` exception if
`ver > device_version`.

I have two more comments further down.

> Curiously, there is no valid value for -cl-std=CL1.0
> 
> Signed-off-by: Aaron Watry 
> ---
>  .../state_trackers/clover/llvm/invocation.cpp  | 48 
> --
>  1 file changed, 45 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
> b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index 364aaf1517..92d72e5b73 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -93,6 +93,48 @@ namespace {
>return ctx;
> }
>  
> +   clang::LangStandard::Kind
> +   get_language_from_version_str(const std::string _str,
> + bool is_opt = false) {
> +   /**
> +* Per CL 2.0 spec, section 5.8.4.5:
> +* If it's an option, use the value directly.
> +* If it's a device version, clamp to max 1.x version, a.k.a. 1.2
> +*/
> +   if (version_str == "1.1")
> +  return clang::LangStandard::lang_opencl11;
> +   if (version_str == "1.2")
> +  return clang::LangStandard::lang_opencl12;
> +   if (version_str == "2.0"){
> +  if (is_opt) return clang::LangStandard::lang_opencl20;
> +  else return clang::LangStandard::lang_opencl12;
> +   }
> +
> +   /*
> +* At this point, it's not a recognized language version option or
> +* 1.1+ device version, which just leaves 1.0 as a possible device
> +* version (or an invalid version string).
> +*/
> +   return clang::LangStandard::lang_opencl10;
> +  }
> +
> +   clang::LangStandard::Kind
> +   get_language_version(const std::vector ,
> +const std::string _version) {
> +
> +  const std::string search = "-cl-std=CL";
> +
> +   for(auto opt: opts){
> +   auto pos = opt.find(search);
> +   if (pos == 0){
> +   auto ver = opt.substr(pos+search.size());
> +   return get_language_from_version_str(ver, true);
> +   }
> +   }
> +
> +   return get_language_from_version_str(device_version);
> +}
> +
> std::unique_ptr
> create_compiler_instance(const target ,
>  const std::vector ,
> @@ -129,7 +171,7 @@ namespace {
>compat::set_lang_defaults(c->getInvocation(), c->getLangOpts(),
>  compat::ik_opencl, 
> ::llvm::Triple(target.triple),
>  c->getPreprocessorOpts(),
> -clang::LangStandard::lang_opencl11);
> +get_language_version(opts, device_version));
>  
>c->createDiagnostics(new clang::TextDiagnosticPrinter(
>*new raw_string_ostream(r_log),
> @@ -211,7 +253,7 @@ clover::llvm::compile_program(const std::string ,
>  
> auto ctx = create_context(r_log);
> auto c = create_compiler_instance(target, tokenize(opts + " input.cl"),
> - r_log);
> + device_version, r_log);

This should be part of patch 3 as that patch doesn't build otherwise.

> auto mod = compile(*ctx, *c, "input.cl", source, headers, target, opts,
>r_log);
>  
> @@ -280,7 +322,7 @@ clover::llvm::link_program(const std::vector 
> ,
> erase_if(equals("-create-library"), options);
>  
> auto ctx = create_context(r_log);
> -   auto c = create_compiler_instance(target, options, r_log);
> +   auto c = create_compiler_instance(target, options, device_version, r_log);

Same here, this should be in patch 3.

Thank you,
Pierre

> auto mod = link(*ctx, *c, modules, r_log);
>  
> optimize(*mod, c->getCodeGenOpts().OptimizationLevel, !create_library);
> -- 
> 2.11.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] A few clover fixes for both CTS and eventual 1.2 support

2017-07-22 Thread Pierre Moreau
With the comments in patch 4 taken care of, this series is

Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>


On 2017-07-21 — 23:19, Aaron Watry wrote:
> The first patch is one I've been sitting on for a few weeks while
> I've tried to chase down other issues with clover/llvm/libclc. It
> fixes at least one CTS test that I know of for CL 1.2.
> 
> The other 4 patches move the device version declaration to core/device
> and then use that along with the -cl-std option to determine which
> OpenCL language version to enable in clang.
> 
> I've done a full piglit run before/after, and there are no changes for me
> on radeonsi/pitcairn if the device is left at CL 1.1.
> 
> When I bump my platform/device versions to 1.2, the clang instance has
> been confirmed to enable 1.2 language features (like the static keyword
> required in test/cl/program/execute/static.cl, which goes skip->pass).
> 
> Anyway, happy reviewing.
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 01/17] auxiliary: Introduce utilities for SPIR-V binaries

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/auxiliary/Makefile.am |  1 +
 src/gallium/auxiliary/Makefile.sources|  4 ++
 src/gallium/auxiliary/spirv/spirv_utils.c | 75 +++
 src/gallium/auxiliary/spirv/spirv_utils.h | 86 +++
 4 files changed, 166 insertions(+)
 create mode 100644 src/gallium/auxiliary/spirv/spirv_utils.c
 create mode 100644 src/gallium/auxiliary/spirv/spirv_utils.h

diff --git a/src/gallium/auxiliary/Makefile.am 
b/src/gallium/auxiliary/Makefile.am
index dc4bd4a40c..d2530a1f90 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -19,6 +19,7 @@ AM_CXXFLAGS = \
 libgallium_la_SOURCES = \
$(C_SOURCES) \
$(NIR_SOURCES) \
+   $(SPIRV_SOURCES) \
$(GENERATED_SOURCES)
 
 if HAVE_LIBDRM
diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index dbdb3ca815..f4817742ff 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -312,6 +312,10 @@ NIR_SOURCES := \
nir/tgsi_to_nir.c \
nir/tgsi_to_nir.h
 
+SPIRV_SOURCES := \
+   spirv/spirv_utils.c \
+   spirv/spirv_utils.h
+
 VL_SOURCES := \
vl/vl_bicubic_filter.c \
vl/vl_bicubic_filter.h \
diff --git a/src/gallium/auxiliary/spirv/spirv_utils.c 
b/src/gallium/auxiliary/spirv/spirv_utils.c
new file mode 100644
index 00..a2334d6909
--- /dev/null
+++ b/src/gallium/auxiliary/spirv/spirv_utils.c
@@ -0,0 +1,75 @@
+/**
+ *
+ * Copyright 2017 Pierre Moreau
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include "spirv_utils.h"
+
+#include "compiler/spirv/spirv.h"
+#include "util/u_math.h"
+
+spirv_word
+spirv_get_word(const char *binary, unsigned word_offset)
+{
+   return ((spirv_word *) binary)[word_offset];
+}
+
+void
+spirv_set_word(char *binary, unsigned word_offset, spirv_word word)
+{
+   ((spirv_word *) binary)[word_offset] = word;
+}
+
+const char *
+spirv_get_string(const char *binary, unsigned word_offset)
+{
+   return binary + word_offset * sizeof(spirv_word);
+}
+
+bool
+spirv_is_binary_spirv(const char *binary)
+{
+   const spirv_word first_word = spirv_get_word(binary, 0u);
+   const bool ret = (first_word == SpvMagicNumber) ||
+(util_bswap32(first_word) == SpvMagicNumber);
+   return ret;
+}
+
+char *
+spirv_spirv_to_cpu(const char *binary, size_t length)
+{
+   spirv_word word = spirv_get_word(binary, 0u);
+   size_t i = 0;
+   char *cpu_endianness_binary = malloc(length);
+   if (word == SpvMagicNumber)
+  return memcpy(cpu_endianness_binary, binary, length);
+
+   for (i = 0; i < length; i += 4) {
+  word = spirv_get_word(binary, i);
+  spirv_set_word(cpu_endianness_binary, i, util_bswap32(word));
+   }
+
+   return cpu_endianness_binary;
+}
diff --git a/src/gallium/auxiliary/spirv/spirv_utils.h 
b/src/gallium/auxiliary/spirv/spirv_utils.h
new file mode 100644
index 00..2db7f3b9dd
--- /dev/null
+++ b/src/gallium/auxiliary/spirv/spirv_utils.h
@@ -0,0 +1,86 @@
+/**********
+ *
+ * Copyright 2017 Pierre Moreau
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so,

[Mesa-dev] [RFC PATCH 05/17] include/CL: Add clCreateProgramWithIL from OpenCL 2.1

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 include/CL/cl.h  | 6 ++
 include/CL/cl_platform.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/include/CL/cl.h b/include/CL/cl.h
index 316565d6e4..44d7aedc3e 100644
--- a/include/CL/cl.h
+++ b/include/CL/cl.h
@@ -757,6 +757,12 @@ clCreateProgramWithBuiltInKernels(cl_context/* 
context */,
   const char *  /* kernel_names */,
   cl_int *  /* errcode_ret */) 
CL_API_SUFFIX__VERSION_1_2;
 
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithIL(cl_context/* context */,
+ const void*/* il */,
+ size_t /* length */,
+ cl_int*/* errcode_ret */) 
CL_API_SUFFIX__VERSION_2_1;
+
 extern CL_API_ENTRY cl_int CL_API_CALL
 clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
 
diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h
index 7f6f5e8a74..105d3cc1f0 100644
--- a/include/CL/cl_platform.h
+++ b/include/CL/cl_platform.h
@@ -75,6 +75,7 @@ extern "C" {
 #define CL_EXT_SUFFIX__VERSION_1_1
 #define CL_API_SUFFIX__VERSION_1_2
 #define CL_EXT_SUFFIX__VERSION_1_2
+#define CL_API_SUFFIX__VERSION_2_1
 
 #ifdef __GNUC__
 #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 02/17] auxiliary: Implement a linker for SPIR-V binaries

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/auxiliary/Makefile.sources |4 +-
 src/gallium/auxiliary/spirv/spirv_linker.c | 1324 
 src/gallium/auxiliary/spirv/spirv_linker.h |   67 ++
 3 files changed, 1394 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/auxiliary/spirv/spirv_linker.c
 create mode 100644 src/gallium/auxiliary/spirv/spirv_linker.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index f4817742ff..91aac49dfb 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -314,7 +314,9 @@ NIR_SOURCES := \
 
 SPIRV_SOURCES := \
spirv/spirv_utils.c \
-   spirv/spirv_utils.h
+   spirv/spirv_utils.h \
+   spirv/spirv_linker.c \
+   spirv/spirv_linker.h
 
 VL_SOURCES := \
vl/vl_bicubic_filter.c \
diff --git a/src/gallium/auxiliary/spirv/spirv_linker.c 
b/src/gallium/auxiliary/spirv/spirv_linker.c
new file mode 100644
index 00..9d060be0cc
--- /dev/null
+++ b/src/gallium/auxiliary/spirv/spirv_linker.c
@@ -0,0 +1,1324 @@
+/**
+ *
+ * Copyright 2017 Pierre Moreau
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include "spirv_linker.h"
+#include "spirv_utils.h"
+
+#include "compiler/spirv/spirv.h"
+#include "util/u_debug.h"
+#include "util/u_hash_table.h"
+#include "util/u_pointer.h"
+
+#include 
+#include 
+#include 
+
+#define PTR_TO_UINT(x) ((unsigned)pointer_to_uintptr(x))
+#define UINT_TO_PTR(x) (uintptr_to_pointer((uintptr_t)(x)))
+
+/**
+ * Extracts the opcode and the number of words making up this instruction.
+ *
+ * @param binary binary to extract the information from
+ * @param word_id index of the word to extract
+ * @param word_count if not null, will be set to the number of words making up
+ *   the instruction, otherwise will be left untouched
+ * @return the opcode
+ */
+static SpvOp
+spirv_get_opcode(const char *binary, size_t word_offset, unsigned *word_count)
+{
+   const unsigned desc_word = spirv_get_word(binary, word_offset);
+   if (word_count)
+  *word_count = desc_word >> SpvWordCountShift;
+   return (SpvOp) (desc_word & SpvOpCodeMask);
+}
+
+static unsigned
+spirv_spvid_hash(void *id)
+{
+   return PTR_TO_UINT(id);
+}
+
+static int
+spirv_spvid_compare(void *id1, void *id2)
+{
+   return PTR_TO_UINT(id1) != PTR_TO_UINT(id2);
+}
+
+/**
+ * Adds a specified base ID to the ID found at a specified position in the
+ * binary.
+ */
+static void
+spirv_bump_id(char *binary, unsigned word_offset, void *base_id)
+{
+   SpvId old_id = spirv_get_word(binary, word_offset);
+   spirv_set_word(binary, word_offset, PTR_TO_UINT(base_id) + old_id);
+}
+
+/**
+ * Replaces an ID with another one, if found in the link table.
+ */
+static void
+spirv_link_ids(char *binary, unsigned word_offset, void *link_table)
+{
+   SpvId old_id = spirv_get_word(binary, word_offset);
+   void *new_id_ptr = util_hash_table_get((struct util_hash_table *) 
link_table,
+  UINT_TO_PTR(old_id));
+   SpvId new_id = PTR_TO_UINT(new_id_ptr);
+   if (new_id_ptr != NULL)
+  spirv_set_word(binary, word_offset, new_id);
+}
+
+/**
+ * Associates the given variable to its width, if found.
+ */
+static void
+spirv_register_variable(char *binary, unsigned type_offset,
+unsigned variable_offset, struct util_hash_table 
*types,
+struct util_hash_table *variables)
+{
+   SpvId type_id = spirv_get_word(binary, type_offset);
+   SpvId var_id = spirv_get_word(binary, variable_offset);
+

[Mesa-dev] [RFC PATCH 07/17] configure.ac: Check for SPIRV-Tools header and library

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 configure.ac | 16 
 1 file changed, 16 insertions(+)

diff --git a/configure.ac b/configure.ac
index ba042791ad..602aeb279d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2064,6 +2064,11 @@ AC_ARG_WITH([clang-libdir],
 
 PKG_CHECK_EXISTS([libclc], [have_libclc=yes], [have_libclc=no])
 
+AC_LANG_PUSH([C++])
+AC_SEARCH_LIBS([_ZNK8spvtools10SpirvTools8ValidateEPKjm], [SPIRV-Tools], 
[have_spirv_tools=yes], [have_spirv_tools=no])
+AC_CHECK_HEADER([spirv-tools/libspirv.hpp], [have_spirv_tools_headers=yes; 
break;])
+AC_LANG_POP([C++])
+
 if test "x$enable_opencl" = xyes; then
 if test -z "$with_gallium_drivers"; then
 AC_MSG_ERROR([cannot enable OpenCL without Gallium])
@@ -2123,6 +2128,17 @@ if test "x$enable_opencl" = xyes; then
 llvm_add_component "objcarcopts" "opencl"
 llvm_add_component "profiledata" "opencl"
 
+if test "x$have_spirv_tools_headers" != xyes; then
+   AC_MSG_ERROR([Failed to find spirv-tools/libspirv.hpp, which is
+ required to build clover])
+fi
+
+if test "x$have_spirv_tools" != xyes; then
+   AC_MSG_ERROR([Failed to find a library implementing
+ _ZNK8spvtools10SpirvTools8ValidateEPKjm which is required
+ to build clover])
+fi
+
 dnl Check for Clang internal headers
 if test -z "$CLANG_LIBDIR"; then
 CLANG_LIBDIR=${LLVM_LIBDIR}
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 06/17] include/CL: Add new option to clGetProgramInfo from OpenCL 2.1

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 include/CL/cl.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/CL/cl.h b/include/CL/cl.h
index 44d7aedc3e..cc8d7ddf60 100644
--- a/include/CL/cl.h
+++ b/include/CL/cl.h
@@ -455,6 +455,7 @@ typedef struct _cl_buffer_region {
 #define CL_PROGRAM_BINARIES 0x1166
 #define CL_PROGRAM_NUM_KERNELS  0x1167
 #define CL_PROGRAM_KERNEL_NAMES 0x1168
+#define CL_PROGRAM_IL   0x1169
 
 /* cl_program_build_info */
 #define CL_PROGRAM_BUILD_STATUS 0x1181
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 00/17] Introducing SPIR-V support to clover

2017-05-03 Thread Pierre Moreau
Hello everyone,

I have been working on converting SPIR-V to NVIR in order to run OpenCL kernels
on Nouveau, and I would like to submit the first part of that work for review.
Pieces from the SPIR-V to NVIR conversion work will be submitted once I have
cleaned it up and this series has progressed through the reviewing process.


What’s in this Series?
--

The focus of this series is to let clover accept SPIR-V binaries, either
through `clCreateProgramWithBinary()`, or through `clCreateProgramWithIL()`.
The latter function is the proper way to feed SPIR-V binaries using the OpenCL
API, however it was only introduced in OpenCL 2.1 (more on “why supporting
SPIR-V through `clCreateProgramWithBinary()` can be interesting” further down).

As several SPIR-V binaries can be linked together using the OpenCL API, I
implemented a SPIR-V linker, which is not perfect, but does the job. I tested
linking against a variable, a function, a library, and a function containing a
switch statement; switch-statements require you to keep some extra stuff around
to be properly parsed.
I also added a few “utilities” functions for retrieving and setting a word /
retrieving a string from a SPIR-V binary, and converting a SPIR-V binary to the
same endianness as the host CPU.

For validating SPIR-V binaries, I use an external tool, SPIRV-Tools [1]. It
could also be used in anv, and possibly radv if there is no validation done
already, but I haven’t looked into that.

A few modifications have been made to the pipe interface, to add a define for
the SPIR-V IR, and store the program’s byte-size along the program in
`struct pipe_compute_state`. The latter will only be needed by the consumer of
the SPIR-V, which is not part of this series. However, since clover needs to
fill that information in and I was modifying clover already, I decided to add
the new attribute in this series.


Missing
---

* As there is no upstream version of LLVM which can produce SPIR-V out of
  OpenCL code, clCreateProgramWithSource will refuse to work if the target IR
  is SPIR-V, for now.

* Optimisation linking options are parsed by the SPIR-V code in clover but
  are not passed along to the linker as it does not support them.


To Improve
--

The SPIR-V binary resulting from the linking of multiple SPIR-V binaries could
be cleaned up:

* As capabilities are simply copied from all the involved binaries, you can end
  up with multiple times the same capabilities in the resulting binary; this
  shouldn’t have any impact though.

* Similarly, types can end up being duplicated under different IDs, which
  should have no other impact than making SPIR-V validators unhappy.


Misc.
-

Being able to feed SPIR-V binaries through `clCreateProgramWithBinary()` is not
really useful at the moment: the same can be achieved using
`clCreateProgramWithIL()`. However it will be interesting once there is an
upstream version of LLVM which can generate SPIR-V binaries, as the application
could query the binary created by `clCreateProgramWithSource()` on the first
run, and give it to `clCreateProgramWithBinary()`on later runs.

Once NIR supports pointers, and anything else that could be missing to support
OpenCL kernels, it should be possible and easy to convert input SPIR-V
binaries to NIR, for drivers that do not accept SPIR-V as IR.


I have sent patches to Mesa in the past, but never series, so the splitting of
the patches in the series could be completely wrong, and I apologise for that
in advance.
Also, I am sure I abused of macros, gotos and manual memory managements, as I
am not that comfortable at writing too much C code: I’ll try to learn from
your comments.


Thank you in advance for reviewing/commenting,
Pierre


[1]: https://github.com/KhronosGroup/SPIRV-Tools/


Pierre Moreau (17):
  auxiliary: Introduce utilities for SPIR-V binaries
  auxiliary: Implement a linker for SPIR-V binaries
  include/pipe: Define SPIRV as an IR
  include/pipe: Store the byte-size of a SPIR-V binary
  include/CL: Add clCreateProgramWithIL from OpenCL 2.1
  include/CL: Add new option to clGetProgramInfo from OpenCL 2.1
  configure.ac: Check for SPIRV-Tools header and library
  clover: Fill in the program byte-size in pipe_compute_state
  clover: Add additional functions to query supported IRs
  clover/spirv: Import spirv.hpp11 version 1.0 (rev 10)
  clover/spirv: Add functions for parsing arguments, linking programs,
etc.
  clover: Refuse to compile source code to SPIR-V
  clover: Handle the case when linking SPIR-V binaries together
  clover: Accept SPIR-V binaries in clCreateProgramWithBinary
  clover: Implement clCreateProgramWithIL from OpenCL 2.1
  clover: Add a pointer property to return ILs
  clover: Handle CL_PROGRAM_IL in clGetProgramInfo

 configure.ac   |   16 +
 include/CL/cl.h|7 +
 include/CL/cl_platform.h   |1 +
 src/gallium/auxiliary

[Mesa-dev] [RFC PATCH 03/17] include/pipe: Define SPIRV as an IR

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/include/pipe/p_defines.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index ce2cfd1d88..71991383c2 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -850,6 +850,7 @@ enum pipe_shader_ir
PIPE_SHADER_IR_LLVM,
PIPE_SHADER_IR_NATIVE,
PIPE_SHADER_IR_NIR,
+   PIPE_SHADER_IR_SPIRV
 };
 
 /**
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 04/17] include/pipe: Store the byte-size of a SPIR-V binary

2017-05-03 Thread Pierre Moreau
Besides parsing all the opcodes until reaching the EOF character, there
is no way to compute the size of a SPIR-V binary. Therefore, it is
easier to pass it along the SPIR-V binary in pipe_compute_state.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/include/pipe/p_state.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h
index ce9ca34d29..1f8fdf530f 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -810,6 +810,7 @@ struct pipe_compute_state
 {
enum pipe_shader_ir ir_type; /**< IR type contained in prog. */
const void *prog; /**< Compute program to be executed. */
+   unsigned prog_num_bytes; /**< Program size in bytes, used by SPIR-V. */
unsigned req_local_mem; /**< Required size of the LOCAL resource. */
unsigned req_private_mem; /**< Required size of the PRIVATE resource. */
unsigned req_input_mem; /**< Required size of the INPUT resource. */
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] nv50/ir: Report wrong prog types using proper var

2017-05-12 Thread Pierre Moreau
Coverity caught the use of the uninitialised variable `type`.
However, it was `info->type`, which is initialised, which was meant to
be used.

CID: 1406000
Reported-by: Ilia Mirkin <imir...@alum.mit.edu>
Fixes: b490ca9a387d ("nv50/ir: Fail if encountering unknown shader type")
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index a000bcbd32..21641a4746 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1214,7 +1214,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
PROG_TYPE_CASE(COMPUTE, COMPUTE);
default:
-  INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", type);
+  INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", 
info->type);
   return -1;
}
INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 02/17] auxiliary: Implement a linker for SPIR-V binaries

2017-05-15 Thread Pierre Moreau
This version tries to link builtins, as they are marked as imported, but nothing
exports them, resulting in the linker failing and throwing an error. As
builtins are target dependent, it is best to let the driver handles those
cases. I changed the code locally to also track which IDs are builtins, and
remove from the import table all builtins, right before generating the linking
table.

Other comments are welcomed (on this patch, and on the others as well). :-)

Pierre

On 11:56 pm - May 03 2017, Pierre Moreau wrote:
> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> ---
>  src/gallium/auxiliary/Makefile.sources |4 +-
>  src/gallium/auxiliary/spirv/spirv_linker.c | 1324 
> 
>  src/gallium/auxiliary/spirv/spirv_linker.h |   67 ++
>  3 files changed, 1394 insertions(+), 1 deletion(-)
>  create mode 100644 src/gallium/auxiliary/spirv/spirv_linker.c
>  create mode 100644 src/gallium/auxiliary/spirv/spirv_linker.h
> 
> diff --git a/src/gallium/auxiliary/Makefile.sources 
> b/src/gallium/auxiliary/Makefile.sources
> index f4817742ff..91aac49dfb 100644
> --- a/src/gallium/auxiliary/Makefile.sources
> +++ b/src/gallium/auxiliary/Makefile.sources
> @@ -314,7 +314,9 @@ NIR_SOURCES := \
>  
>  SPIRV_SOURCES := \
>   spirv/spirv_utils.c \
> - spirv/spirv_utils.h
> + spirv/spirv_utils.h \
> + spirv/spirv_linker.c \
> + spirv/spirv_linker.h
>  
>  VL_SOURCES := \
>   vl/vl_bicubic_filter.c \
> diff --git a/src/gallium/auxiliary/spirv/spirv_linker.c 
> b/src/gallium/auxiliary/spirv/spirv_linker.c
> new file mode 100644
> index 00..9d060be0cc
> --- /dev/null
> +++ b/src/gallium/auxiliary/spirv/spirv_linker.c
> @@ -0,0 +1,1324 @@
> +/******
> + *
> + * Copyright 2017 Pierre Moreau
> + * All Rights Reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
> + * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + **/
> +
> +#include "spirv_linker.h"
> +#include "spirv_utils.h"
> +
> +#include "compiler/spirv/spirv.h"
> +#include "util/u_debug.h"
> +#include "util/u_hash_table.h"
> +#include "util/u_pointer.h"
> +
> +#include 
> +#include 
> +#include 
> +
> +#define PTR_TO_UINT(x) ((unsigned)pointer_to_uintptr(x))
> +#define UINT_TO_PTR(x) (uintptr_to_pointer((uintptr_t)(x)))
> +
> +/**
> + * Extracts the opcode and the number of words making up this instruction.
> + *
> + * @param binary binary to extract the information from
> + * @param word_id index of the word to extract
> + * @param word_count if not null, will be set to the number of words making 
> up
> + *   the instruction, otherwise will be left untouched
> + * @return the opcode
> + */
> +static SpvOp
> +spirv_get_opcode(const char *binary, size_t word_offset, unsigned 
> *word_count)
> +{
> +   const unsigned desc_word = spirv_get_word(binary, word_offset);
> +   if (word_count)
> +  *word_count = desc_word >> SpvWordCountShift;
> +   return (SpvOp) (desc_word & SpvOpCodeMask);
> +}
> +
> +static unsigned
> +spirv_spvid_hash(void *id)
> +{
> +   return PTR_TO_UINT(id);
> +}
> +
> +static int
> +spirv_spvid_compare(void *id1, void *id2)
> +{
> +   return PTR_TO_UINT(id1) != PTR_TO_UINT(id2);
> +}
> +
> +/**
> + * Adds a specified base ID to the ID found at a specified position in the
> + * binary.
> + */
> +static void
> +spirv_bump_id(cha

[Mesa-dev] [PATCH] nv50/ir: Properly fold constants in SPLIT operation

2017-06-12 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index e032255178..57223d311c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -975,8 +975,9 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue , 
int s)
   bld.setPosition(i, false);
 
   uint8_t size = i->getDef(0)->reg.size;
-  uint32_t mask = (1ULL << size) - 1;
-  assert(size <= 32);
+  uint8_t bitsize = size * 8;
+  uint32_t mask = (1ULL << bitsize) - 1;
+  assert(bitsize <= 32);
 
   uint64_t val = imm0.reg.data.u64;
   for (int8_t d = 0; i->defExists(d); ++d) {
@@ -984,7 +985,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue , 
int s)
  assert(def->reg.size == size);
 
  newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), TYPE_U32);
- val >>= size;
+ val >>= bitsize;
   }
   delete_Instruction(prog, i);
   break;
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nv50/ir: Properly fold constants in SPLIT operation

2017-06-22 Thread Pierre Moreau
I think we never split values that are < 64-bit wide, as the RA pass does not
support < 32-bit wide values. And 64-bit values aren’t used that often, I would
guess.

Does adding the Fixes: tag here work, or should I send a new version?

Fixes: b7d9677d ("nv50/ir: constant fold OP_SPLIT")

On 02:46 pm - Jun 21 2017, Ilia Mirkin wrote:
> Wait, this is actively buggy! How did this ever work :( I guess we
> don't split immediates too frequently, and I was testing it with
> zero's or something.
> 
> Can you figure out the commit where I added this idiotic code and add
> a Fixes: tag?
> 
> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu>
> Cc: mesa-sta...@lists.freedesktop.org
> 
> On Mon, Jun 12, 2017 at 4:53 PM, Pierre Moreau <pierre.mor...@free.fr> wrote:
> > Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
> > ---
> >  src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 7 ---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > index e032255178..57223d311c 100644
> > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> > @@ -975,8 +975,9 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > , int s)
> >bld.setPosition(i, false);
> >
> >uint8_t size = i->getDef(0)->reg.size;
> > -  uint32_t mask = (1ULL << size) - 1;
> > -  assert(size <= 32);
> > +  uint8_t bitsize = size * 8;
> > +  uint32_t mask = (1ULL << bitsize) - 1;
> > +  assert(bitsize <= 32);
> >
> >uint64_t val = imm0.reg.data.u64;
> >for (int8_t d = 0; i->defExists(d); ++d) {
> > @@ -984,7 +985,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
> > , int s)
> >   assert(def->reg.size == size);
> >
> >   newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), 
> > TYPE_U32);
> 
> Is that what you want here? Should this be typeOfSize(size) ?
> 
> I guess you want to split this into 32-bit values anyways since that's
> what everything processes... eventually we might want to play around
> with the SIMD opcodes but probably not soon.
> 
> > - val >>= size;
> > + val >>= bitsize;
> >}
> >delete_Instruction(prog, i);
> >break;
> > --
> > 2.13.1
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] nv50/ir: move POW lowering into BuildUtil

2017-06-24 Thread Pierre Moreau
Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>

On 09:28 pm - Jun 22 2017, Karol Herbst wrote:
> It's the same for all supported chipsets.
> 
> Signed-off-by: Karol Herbst <karolher...@gmail.com>
> ---
>  .../drivers/nouveau/codegen/nv50_ir_build_util.cpp| 16 
>  .../drivers/nouveau/codegen/nv50_ir_build_util.h  |  2 ++
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp | 19 
> +--
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 18 +-
>  .../drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h   |  1 -
>  5 files changed, 20 insertions(+), 36 deletions(-)
> 
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> index 84ebfdb1cb..5756e1b4d4 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
> @@ -636,4 +636,20 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction 
> *i,
> return hi;
>  }
>  
> +bool
> +BuildUtil::lowerPOW(Instruction *i)
> +{
> +   LValue *val = getScratch();
> +
> +   mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
> +   mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
> +   mkOp1(OP_PREEX2, TYPE_F32, val, val);
> +
> +   i->op = OP_EX2;
> +   i->setSrc(0, val);
> +   i->setSrc(1, NULL);
> +
> +   return true;
> +}
> +
>  } // namespace nv50_ir
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
> index d171f64d9a..aac5b609e2 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h
> @@ -103,6 +103,8 @@ public:
>  
> Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); }
>  
> +   bool lowerPOW(Instruction *);
> +
> // returns high part of the operation
> static Instruction *split64BitOpPostRA(Function *, Instruction *,
>Value *zero, Value *carry);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> index 36ab837f6e..2b187086cf 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
> @@ -626,7 +626,6 @@ private:
>  
> bool handleDIV(Instruction *);
> bool handleSQRT(Instruction *);
> -   bool handlePOW(Instruction *);
>  
> bool handleSET(Instruction *);
> bool handleSLCT(CmpInstruction *);
> @@ -1245,22 +1244,6 @@ NV50LoweringPreSSA::handleSQRT(Instruction *i)
>  }
>  
>  bool
> -NV50LoweringPreSSA::handlePOW(Instruction *i)
> -{
> -   LValue *val = bld.getScratch();
> -
> -   bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
> -   bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
> -   bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
> -
> -   i->op = OP_EX2;
> -   i->setSrc(0, val);
> -   i->setSrc(1, NULL);
> -
> -   return true;
> -}
> -
> -bool
>  NV50LoweringPreSSA::handleEXPORT(Instruction *i)
>  {
> if (prog->getType() == Program::TYPE_FRAGMENT) {
> @@ -1416,7 +1399,7 @@ NV50LoweringPreSSA::visit(Instruction *i)
> case OP_SELP:
>return handleSELP(i);
> case OP_POW:
> -  return handlePOW(i);
> +  return bld.lowerPOW(i);
> case OP_DIV:
>return handleDIV(i);
> case OP_SQRT:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 64d743708a..bb9ef7a468 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -2700,22 +2700,6 @@ NVC0LoweringPass::handleSQRT(Instruction *i)
>  }
>  
>  bool
> -NVC0LoweringPass::handlePOW(Instruction *i)
> -{
> -   LValue *val = bld.getScratch();
> -
> -   bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0));
> -   bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1;
> -   bld.mkOp1(OP_PREEX2, TYPE_F32, val, val);
> -
> -   i->op = OP_EX2;
> -   i->setSrc(0, val);
> -   i->setSrc(1, NULL);
> -
> -   return true;
> -}
> -
> -bool
>  NVC0LoweringPass::handleEXPORT(Instruction *i)
>  {
> if (prog->getType() == Program::TYPE_FRAGMENT) {
> @@ -2813,7 +2797,7 @@ NVC0LoweringPass::visit(Instruction *i)
>i->setSrc(0, i->getDef(0));
>break;
> case OP_

Re: [Mesa-dev] [RFC PATCH 00/17] Introducing SPIR-V support to clover

2017-06-16 Thread Pierre Moreau
Hello,

I am working on an updated version of this series, which will include changes
that have been suggested, as well as support for structures as a kernel
argument. Along those, I was also thinking of doing some other changes to the
SPIR-V linker:

a) Store some metadata about the SPIR-V module, such as the word offset of
   each of the different sections: if one wants to run an operation on a single
   section of the module, it is no longer needed to iterate over everything in
   front of that section.
   It could also contain the width of the literals used in the switch
   statements, avoiding having to track those every time one iterate over the
   module.
   Maybe tables for imported and exported symbols, as well?

b) Change the code from having a single loop iterating over the whole module,
   into having one loop per section. This will avoid testing for entry points,
   decorations, types, etc. when we are already iterating over the function
   bodies, possibly improving performance.

c) Remove duplicate decorations, capabilities, names? and types found in the
   linked module. Types is the most important one, as it currently prevents us
   from validating the linked module.

I am still thinking of leaving out of this series, the linking options support
for the linker.

As for clover, it should check that the devices actually support the
capabilities present in the module, so I will add that as well.

Do you think this would make sense, and should I be implementing them in this
series, or should I leave them be for later? (Point b), if deemed useful,
should be part of this series as it would rewrite a decent chunk of the code.)
If you have any other comments, feel free to share it. :-)

Best regards,
Pierre


On 11:56 pm - May 03 2017, Pierre Moreau wrote:
> Hello everyone,
> 
> I have been working on converting SPIR-V to NVIR in order to run OpenCL 
> kernels
> on Nouveau, and I would like to submit the first part of that work for review.
> Pieces from the SPIR-V to NVIR conversion work will be submitted once I have
> cleaned it up and this series has progressed through the reviewing process.
> 
> 
> What’s in this Series?
> --
> 
> The focus of this series is to let clover accept SPIR-V binaries, either
> through `clCreateProgramWithBinary()`, or through `clCreateProgramWithIL()`.
> The latter function is the proper way to feed SPIR-V binaries using the OpenCL
> API, however it was only introduced in OpenCL 2.1 (more on “why supporting
> SPIR-V through `clCreateProgramWithBinary()` can be interesting” further 
> down).
> 
> As several SPIR-V binaries can be linked together using the OpenCL API, I
> implemented a SPIR-V linker, which is not perfect, but does the job. I tested
> linking against a variable, a function, a library, and a function containing a
> switch statement; switch-statements require you to keep some extra stuff 
> around
> to be properly parsed.
> I also added a few “utilities” functions for retrieving and setting a word /
> retrieving a string from a SPIR-V binary, and converting a SPIR-V binary to 
> the
> same endianness as the host CPU.
> 
> For validating SPIR-V binaries, I use an external tool, SPIRV-Tools [1]. It
> could also be used in anv, and possibly radv if there is no validation done
> already, but I haven’t looked into that.
> 
> A few modifications have been made to the pipe interface, to add a define for
> the SPIR-V IR, and store the program’s byte-size along the program in
> `struct pipe_compute_state`. The latter will only be needed by the consumer of
> the SPIR-V, which is not part of this series. However, since clover needs to
> fill that information in and I was modifying clover already, I decided to add
> the new attribute in this series.
> 
> 
> Missing
> ---
> 
> * As there is no upstream version of LLVM which can produce SPIR-V out of
>   OpenCL code, clCreateProgramWithSource will refuse to work if the target IR
>   is SPIR-V, for now.
> 
> * Optimisation linking options are parsed by the SPIR-V code in clover but
>   are not passed along to the linker as it does not support them.
> 
> 
> To Improve
> --
> 
> The SPIR-V binary resulting from the linking of multiple SPIR-V binaries could
> be cleaned up:
> 
> * As capabilities are simply copied from all the involved binaries, you can 
> end
>   up with multiple times the same capabilities in the resulting binary; this
>   shouldn’t have any impact though.
> 
> * Similarly, types can end up being duplicated under different IDs, which
>   should have no other impact than making SPIR-V validators unhappy.
> 
> 
> Misc.
> -
> 
> Being able to feed SPIR-V binaries through `clCreateProgramWithBinary()` is 
> not
> really useful at the moment: the

Re: [Mesa-dev] [RFC PATCH 00/17] Introducing SPIR-V support to clover

2017-05-04 Thread Pierre Moreau
> hopefully this[1] will eventually happen, which would make this less
> of an issue :-)
> 
> [1] http://lists.llvm.org/pipermail/llvm-dev/2017-May/112538.html

Indeed! I have seen that thread and made sure to subscribe to the ML not to
miss anything there.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 04/17] include/pipe: Store the byte-size of a SPIR-V binary

2017-05-06 Thread Pierre Moreau
This seems like a good idea. I changed the code locally to use
`pipe_llvm_program_header` instead; it does not seem worth it to create a
`pipe_spirv_program_header`, since it would only contain a `num_bytes`
attributes for now.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] nv50/ir: Free target if we failed to create a program

2017-05-06 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 1f640a348a..a4b46eb13f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1224,8 +1224,10 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
   return -1;
 
nv50_ir::Program *prog = new nv50_ir::Program(type, targ);
-   if (!prog)
+   if (!prog) {
+  nv50_ir::Target::destroy(targ);
   return -1;
+   }
prog->driver = info;
prog->dbgFlags = info->dbgFlags;
prog->optLevel = info->optLevel;
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] nv50/ir: Replace NV50_PROGRAM_IR_* by PIPE_SHADER_IR_*

2017-05-06 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp  | 2 +-
 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 7 +--
 src/gallium/drivers/nouveau/nouveau_compiler.c   | 2 +-
 src/gallium/drivers/nouveau/nv50/nv50_program.c  | 4 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c  | 2 +-
 5 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index f811781756..a000bcbd32 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1233,7 +1233,7 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
-   case NV50_PROGRAM_IR_TGSI:
+   case PIPE_SHADER_IR_TGSI:
   ret = prog->makeFromTGSI(info) ? 0 : -2;
   break;
default:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index e7d840df00..1962ead35a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -54,11 +54,6 @@ struct nv50_ir_varying
ubyte si; /* TGSI semantic index */
 };
 
-#define NV50_PROGRAM_IR_TGSI 0
-#define NV50_PROGRAM_IR_SM4  1
-#define NV50_PROGRAM_IR_GLSL 2
-#define NV50_PROGRAM_IR_LLVM 3
-
 #ifdef DEBUG
 # define NV50_IR_DEBUG_BASIC (1 << 0)
 # define NV50_IR_DEBUG_VERBOSE   (2 << 0)
@@ -95,7 +90,7 @@ struct nv50_ir_prog_info
   uint32_t *code;
   uint32_t codeSize;
   uint32_t instructions;
-  uint8_t sourceRep;  /* NV50_PROGRAM_IR */
+  uint8_t sourceRep;  /* PIPE_SHADER_IR_* */
   const void *source;
   void *relocData;
   void *fixupData;
diff --git a/src/gallium/drivers/nouveau/nouveau_compiler.c 
b/src/gallium/drivers/nouveau/nouveau_compiler.c
index d8009f5bfe..3151a6f420 100644
--- a/src/gallium/drivers/nouveau/nouveau_compiler.c
+++ b/src/gallium/drivers/nouveau/nouveau_compiler.c
@@ -109,7 +109,7 @@ nouveau_codegen(int chipset, int type, struct tgsi_token 
tokens[],
 
info.type = type;
info.target = chipset;
-   info.bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+   info.bin.sourceRep = PIPE_SHADER_IR_TGSI;
info.bin.source = tokens;
 
info.io.auxCBSlot = 15;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_program.c 
b/src/gallium/drivers/nouveau/nv50/nv50_program.c
index 76d06aeddf..92e73f8c12 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -20,6 +20,8 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "pipe/p_defines.h"
+
 #include "nv50/nv50_program.h"
 #include "nv50/nv50_context.h"
 
@@ -331,7 +333,7 @@ nv50_program_translate(struct nv50_program *prog, uint16_t 
chipset,
 
info->type = prog->type;
info->target = chipset;
-   info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+   info->bin.sourceRep = PIPE_SHADER_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
 
info->io.auxCBSlot = 15;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 6cc518309c..27740bc87f 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -567,7 +567,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t 
chipset,
 
info->type = prog->type;
info->target = chipset;
-   info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+   info->bin.sourceRep = PIPE_SHADER_IR_TGSI;
info->bin.source = (void *)prog->pipe.tokens;
 
 #ifdef DEBUG
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] nv50/ir: Fail if encountering unknown shader type

2017-05-06 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index b67a1ddbd5..1f640a348a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1214,8 +1214,8 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
PROG_TYPE_CASE(FRAGMENT, FRAGMENT);
PROG_TYPE_CASE(COMPUTE, COMPUTE);
default:
-  type = nv50_ir::Program::TYPE_COMPUTE;
-  break;
+  INFO_DBG(info->dbgFlags, VERBOSE, "unsupported program type %u\n", type);
+  return -1;
}
INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type);
 
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] nv50/ir: Remove unused translation methods

2017-05-06 Thread Pierre Moreau
This code was merged commented out, and has stayed that way ever since.

Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 12 +++-
 src/gallium/drivers/nouveau/codegen/nv50_ir.h   |  1 -
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index a4b46eb13f..f811781756 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1233,17 +1233,11 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
prog->optLevel = info->optLevel;
 
switch (info->bin.sourceRep) {
-#if 0
-   case PIPE_IR_LLVM:
-   case PIPE_IR_GLSL:
-  return -1;
-   case PIPE_IR_SM4:
-  ret = prog->makeFromSM4(info) ? 0 : -2;
+   case NV50_PROGRAM_IR_TGSI:
+  ret = prog->makeFromTGSI(info) ? 0 : -2;
   break;
-   case PIPE_IR_TGSI:
-#endif
default:
-  ret = prog->makeFromTGSI(info) ? 0 : -2;
+  ret = -1;
   break;
}
if (ret < 0)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index de6c110536..5c09fed05c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1253,7 +1253,6 @@ public:
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); }
 
bool makeFromTGSI(struct nv50_ir_prog_info *);
-   bool makeFromSM4(struct nv50_ir_prog_info *);
bool convertToSSA();
bool optimizeSSA(int level);
bool optimizePostRA(int level);
-- 
2.12.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [RFC PATCH 11/17] clover/spirv: Add functions for parsing arguments, linking programs, etc.

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 src/gallium/state_trackers/clover/Makefile.am  |  10 +-
 src/gallium/state_trackers/clover/Makefile.sources |   4 +
 .../state_trackers/clover/spirv/invocation.cpp | 481 +
 .../state_trackers/clover/spirv/invocation.hpp |  40 ++
 4 files changed, 533 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/state_trackers/clover/spirv/invocation.cpp
 create mode 100644 src/gallium/state_trackers/clover/spirv/invocation.hpp

diff --git a/src/gallium/state_trackers/clover/Makefile.am 
b/src/gallium/state_trackers/clover/Makefile.am
index 321393536d..e29457e948 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -28,7 +28,7 @@ cl_HEADERS = \
$(top_srcdir)/include/CL/opencl.h
 endif
 
-noinst_LTLIBRARIES = libclover.la libcltgsi.la libclllvm.la
+noinst_LTLIBRARIES = libclover.la libcltgsi.la libclllvm.la libspirv.la
 
 libcltgsi_la_CXXFLAGS = \
-std=c++11 \
@@ -50,13 +50,19 @@ libclllvm_la_CXXFLAGS = \
 
 libclllvm_la_SOURCES = $(LLVM_SOURCES)
 
+libspirv_la_CXXFLAGS = \
+   -std=c++11 \
+   $(VISIBILITY_CXXFLAGS)
+
+libspirv_la_SOURCES = $(SPIRV_SOURCES)
+
 libclover_la_CXXFLAGS = \
-std=c++11 \
$(CLOVER_STD_OVERRIDE) \
$(VISIBILITY_CXXFLAGS)
 
 libclover_la_LIBADD = \
-   libcltgsi.la libclllvm.la
+   libcltgsi.la libclllvm.la libspirv.la
 
 libclover_la_SOURCES = $(CPP_SOURCES)
 
diff --git a/src/gallium/state_trackers/clover/Makefile.sources 
b/src/gallium/state_trackers/clover/Makefile.sources
index e9828b107b..f223bebcd3 100644
--- a/src/gallium/state_trackers/clover/Makefile.sources
+++ b/src/gallium/state_trackers/clover/Makefile.sources
@@ -66,3 +66,7 @@ LLVM_SOURCES := \
 TGSI_SOURCES := \
tgsi/compiler.cpp \
tgsi/invocation.hpp
+
+SPIRV_SOURCES := \
+   spirv/invocation.cpp \
+   spirv/invocation.hpp
diff --git a/src/gallium/state_trackers/clover/spirv/invocation.cpp 
b/src/gallium/state_trackers/clover/spirv/invocation.cpp
new file mode 100644
index 00..3e740eb998
--- /dev/null
+++ b/src/gallium/state_trackers/clover/spirv/invocation.cpp
@@ -0,0 +1,481 @@
+//
+// Copyright 2017 Pierre Moreau
+//
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+
+#include 
+#include 
+
+#include 
+
+#include "core/error.hpp"
+#include "invocation.hpp"
+#include "llvm/util.hpp"
+#include "spirv/spirv_linker.h"
+#include "spirv/spirv_utils.h"
+#include "util/algorithm.hpp"
+#include "util/functional.hpp"
+#include "util/u_debug.h"
+
+#include "spirv.hpp11"
+
+using namespace clover;
+
+namespace {
+
+   template
+   T get(const std::vector& source, size_t index) {
+  if (index * sizeof(spirv_word) + 3u > source.size())
+ return static_cast(0);
+  return static_cast(spirv_get_word(source.data(), index));
+   }
+
+   enum module::argument::type
+   convertStorageClass(spv::StorageClass storage_class) {
+  switch (storage_class) {
+  case spv::StorageClass::UniformConstant:
+ return module::argument::constant;
+  case spv::StorageClass::Workgroup:
+ return module::argument::local;
+  case spv::StorageClass::CrossWorkgroup:
+ return module::argument::global;
+  default:
+ throw build_error();
+  }
+   }
+
+   enum module::argument::type
+   convertImageType(spv::Id id, spv::Dim dim, spv::AccessQualifier access,
+std::string ) {
+#define APPEND_DIM(d) \
+  switch(access) { \
+  case spv::AccessQualifier::ReadOnly: \
+ return module::argument::image##d##_rd; \
+  case spv::AccessQualifier::WriteOnly: \
+ return module::argument::image##d##_wr; \
+  default: \
+ err += "Invalid access qualifier " #d

[Mesa-dev] [RFC PATCH 10/17] clover/spirv: Import spirv.hpp11 version 1.0 (rev 10)

2017-05-03 Thread Pierre Moreau
Signed-off-by: Pierre Moreau <pierre.mor...@free.fr>
---
 .../state_trackers/clover/spirv/spirv.hpp11| 952 +
 1 file changed, 952 insertions(+)
 create mode 100644 src/gallium/state_trackers/clover/spirv/spirv.hpp11

diff --git a/src/gallium/state_trackers/clover/spirv/spirv.hpp11 
b/src/gallium/state_trackers/clover/spirv/spirv.hpp11
new file mode 100644
index 00..62bb127a8a
--- /dev/null
+++ b/src/gallium/state_trackers/clover/spirv/spirv.hpp11
@@ -0,0 +1,952 @@
+// Copyright (c) 2014-2017 The Khronos Group Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and/or associated documentation files (the "Materials"),
+// to deal in the Materials without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Materials, and to permit persons to whom the
+// Materials are furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Materials.
+//
+// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
+//
+// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+// IN THE MATERIALS.
+
+// This header is automatically generated by the same tool that creates
+// the Binary Section of the SPIR-V specification.
+
+// Enumeration tokens for SPIR-V, in various styles:
+//   C, C++, C++11, JSON, Lua, Python
+//
+// - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL
+// - C++ will have tokens in the "spv" name space, e.g.: 
spv::SourceLanguageGLSL
+// - C++11 will use enum classes in the spv namespace, e.g.: 
spv::SourceLanguage::GLSL
+// - Lua will use tables, e.g.: spv.SourceLanguage.GLSL
+// - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL']
+//
+// Some tokens act like mask values, which can be OR'd together,
+// while others are mutually exclusive.  The mask-like ones have
+// "Mask" in their name, and a parallel enum that has the shift
+// amount (1 << x) for each corresponding enumerant.
+
+#ifndef spirv_HPP
+#define spirv_HPP
+
+namespace spv {
+
+typedef unsigned int Id;
+
+#define SPV_VERSION 0x1
+#define SPV_REVISION 10
+
+static const unsigned int MagicNumber = 0x07230203;
+static const unsigned int Version = 0x0001;
+static const unsigned int Revision = 10;
+static const unsigned int OpCodeMask = 0x;
+static const unsigned int WordCountShift = 16;
+
+enum class SourceLanguage : unsigned {
+Unknown = 0,
+ESSL = 1,
+GLSL = 2,
+OpenCL_C = 3,
+OpenCL_CPP = 4,
+Max = 0x7fff,
+};
+
+enum class ExecutionModel : unsigned {
+Vertex = 0,
+TessellationControl = 1,
+TessellationEvaluation = 2,
+Geometry = 3,
+Fragment = 4,
+GLCompute = 5,
+Kernel = 6,
+Max = 0x7fff,
+};
+
+enum class AddressingModel : unsigned {
+Logical = 0,
+Physical32 = 1,
+Physical64 = 2,
+Max = 0x7fff,
+};
+
+enum class MemoryModel : unsigned {
+Simple = 0,
+GLSL450 = 1,
+OpenCL = 2,
+Max = 0x7fff,
+};
+
+enum class ExecutionMode : unsigned {
+Invocations = 0,
+SpacingEqual = 1,
+SpacingFractionalEven = 2,
+SpacingFractionalOdd = 3,
+VertexOrderCw = 4,
+VertexOrderCcw = 5,
+PixelCenterInteger = 6,
+OriginUpperLeft = 7,
+OriginLowerLeft = 8,
+EarlyFragmentTests = 9,
+PointMode = 10,
+Xfb = 11,
+DepthReplacing = 12,
+DepthGreater = 14,
+DepthLess = 15,
+DepthUnchanged = 16,
+LocalSize = 17,
+LocalSizeHint = 18,
+InputPoints = 19,
+InputLines = 20,
+InputLinesAdjacency = 21,
+Triangles = 22,
+InputTrianglesAdjacency = 23,
+Quads = 24,
+Isolines = 25,
+OutputVertices = 26,
+OutputPoints = 27,
+OutputLineStrip = 28,
+OutputTriangleStrip = 29,
+VecTypeHint = 30,
+ContractionOff = 31,
+Max = 0x7fff,
+};
+
+enum class StorageClass : unsigned {
+UniformConstant = 0,
+Input = 1,
+Uniform = 2,
+Output = 3,
+Workgroup = 4,
+CrossWorkgroup = 5,
+Private = 6,
+Function = 7,
+Generic = 8,
+PushConstant = 9,
+AtomicCounter = 10,
+Image = 11,
+Max = 0x7fff,
+};
+
+enum class D

  1   2   3   4   >