Re: [Mesa-dev] [PATCH 3/3] u_dynarray: turn util_dynarray_{grow, resize} into element-oriented macros

2019-05-04 Thread Gustaw Smolarczyk
ruct util_dynarray *buf, 
> unsigned newcap)
>} else {
>   buf->data = realloc(buf->data, buf->capacity);
>}
>if (!buf->data)
>   return 0;
> }
>
> return (void *)((char *)buf->data + buf->size);
>  }
>
> -static inline void *
> -util_dynarray_grow_cap(struct util_dynarray *buf, int diff)
> -{
> -   return util_dynarray_ensure_cap(buf, buf->size + diff);
> -}
> -
>  /* use util_dynarray_trim to reduce the allocated storage */
>  static inline void *
> -util_dynarray_resize(struct util_dynarray *buf, unsigned newsize)
> +util_dynarray_resize_bytes(struct util_dynarray *buf, unsigned nelts, size_t 
> eltsize)
>  {
> +   if (unlikely(nelts > UINT_MAX / eltsize)) {
> +  util_dynarray_fini(buf);
> +  return 0;
> +   }
> +
> +   unsigned newsize = nelts * eltsize;
> void *p = util_dynarray_ensure_cap(buf, newsize);
> buf->size = newsize;
>
> return p;
>  }
>
>  static inline void
>  util_dynarray_clone(struct util_dynarray *buf, void *mem_ctx,
>  struct util_dynarray *from_buf)
>  {
> util_dynarray_init(buf, mem_ctx);
> -   util_dynarray_resize(buf, from_buf->size);
> +   util_dynarray_resize_bytes(buf, 1, from_buf->size);

Just a nit: couldn't you swap the arguments to resize_bytes? The
compiler will probably figure it out, but you are performing the
following operation in the resize_bytes function:

  if (unlikely(1 > UINT_MAX / from_buf->size))

which is a division of a constant by a variable. With the arguments
swapped it would be:

  if (unlikely(from_buf->size > UINT_MAX / 1))

which should be easier to optimize away (also in non-optimized
version). This way, the eltsize parameter will probably always be a
constant too.

Regards,
Gustaw Smolarczyk

> memcpy(buf->data, from_buf->data, from_buf->size);
>  }
>
>  static inline void *
> -util_dynarray_grow(struct util_dynarray *buf, int diff)
> +util_dynarray_grow_bytes(struct util_dynarray *buf, unsigned ngrow, size_t 
> eltsize)
>  {
> -   return util_dynarray_resize(buf, buf->size + diff);
> +   unsigned growbytes = ngrow * eltsize;
> +
> +   if (unlikely(ngrow > (UINT_MAX / eltsize) ||
> +growbytes > UINT_MAX - buf->size)) {
> +  util_dynarray_fini(buf);
> +  return 0;
> +   }
> +
> +   unsigned newsize = buf->size + growbytes;
> +   void *p = util_dynarray_ensure_cap(buf, newsize);
> +   buf->size = newsize;
> +
> +   return p;
>  }
>
>  static inline void
>  util_dynarray_trim(struct util_dynarray *buf)
>  {
> if (buf->size != buf->capacity) {
>if (buf->size) {
>   if (buf->mem_ctx) {
>  buf->data = reralloc_size(buf->mem_ctx, buf->data, buf->size);
>   } else {
> @@ -146,21 +159,24 @@ util_dynarray_trim(struct util_dynarray *buf)
>  ralloc_free(buf->data);
>   } else {
>  free(buf->data);
>   }
>   buf->data = NULL;
>   buf->capacity = 0;
>}
> }
>  }
>
> -#define util_dynarray_append(buf, type, v) do {type __v = (v); 
> memcpy(util_dynarray_grow((buf), sizeof(type)), &__v, sizeof(type));} while(0)
> +#define util_dynarray_append(buf, type, v) do {type __v = (v); 
> memcpy(util_dynarray_grow_bytes((buf), 1, sizeof(type)), &__v, 
> sizeof(type));} while(0)
> +/* Returns a pointer to the space of the first new element (in case of 
> growth) or NULL on failure. */
> +#define util_dynarray_resize(buf, type, nelts) 
> util_dynarray_resize_bytes(buf, (nelts), sizeof(type))
> +#define util_dynarray_grow(buf, type, ngrow) util_dynarray_grow_bytes(buf, 
> (ngrow), sizeof(type))
>  #define util_dynarray_top_ptr(buf, type) (type*)((char*)(buf)->data + 
> (buf)->size - sizeof(type))
>  #define util_dynarray_top(buf, type) *util_dynarray_top_ptr(buf, type)
>  #define util_dynarray_pop_ptr(buf, type) (type*)((char*)(buf)->data + 
> ((buf)->size -= sizeof(type)))
>  #define util_dynarray_pop(buf, type) *util_dynarray_pop_ptr(buf, type)
>  #define util_dynarray_contains(buf, type) ((buf)->size >= sizeof(type))
>  #define util_dynarray_element(buf, type, idx) ((type*)(buf)->data + (idx))
>  #define util_dynarray_begin(buf) ((buf)->data)
>  #define util_dynarray_end(buf) ((void*)util_dynarray_element((buf), char, 
> (buf)->size))
>  #define util_dynarray_num_elements(buf, type) ((buf)->size / sizeof(type))
>
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/3] llvmpipe: add lp_fence_timedwait() helper

2019-04-25 Thread Gustaw Smolarczyk
czw., 25 kwi 2019 o 20:11 Gustaw Smolarczyk  napisał(a):
>
> czw., 25 kwi 2019 o 19:42 Emil Velikov  napisał(a):
> >
> > The function is analogous to lp_fence_wait() while taking at timeout
> > (ns) parameter, as needed for EGL fence/sync.
> >
> > v2:
> >  - use absolute UTC time, as per spec (Gustaw)
> >  - bail out on cnd_timedwait() failure (Gustaw)
> >
> > Cc: Gustaw Smolarczyk 
> > Cc: Roland Scheidegger 
> > Signed-off-by: Emil Velikov 
> > Reviewed-by: Roland Scheidegger  (v1)
> > ---
> >  src/gallium/drivers/llvmpipe/lp_fence.c | 30 +
> >  src/gallium/drivers/llvmpipe/lp_fence.h |  3 +++
> >  2 files changed, 33 insertions(+)
> >
> > diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c 
> > b/src/gallium/drivers/llvmpipe/lp_fence.c
> > index 20cd91cd63d..b79d773bf6c 100644
> > --- a/src/gallium/drivers/llvmpipe/lp_fence.c
> > +++ b/src/gallium/drivers/llvmpipe/lp_fence.c
> > @@ -125,3 +125,33 @@ lp_fence_wait(struct lp_fence *f)
> >  }
> >
> >
> > +boolean
> > +lp_fence_timedwait(struct lp_fence *f, uint64_t timeout)
> > +{
> > +   struct timespec ts;
> > +   int ret;
> > +
> > +   timespec_get(, TIME_UTC);
> > +
> > +   ts.tv_nsec += timeout % 10L;
> > +   ts.tv_sec += timeout / 10L;
> > +   if (ts.tv_nsec >= 10L) {
> > +  ts.tv_sec++;
> > +  ts.tv_nsec -= 10L;
> > +   }
> > +
> > +   if (LP_DEBUG & DEBUG_FENCE)
> > +  debug_printf("%s %d\n", __FUNCTION__, f->id);
> > +
> > +   mtx_lock(>mutex);
> > +   assert(f->issued);
> > +   while (f->count < f->rank) {
> > +  ret = cnd_timedwait(>signalled, >mutex, );
> > +  if (ret != thrd_success)
> > + break;
> > +   }
> > +   mtx_unlock(>mutex);
> > +   return (f->count >= f->rank && ret == thrd_success);

Hmm, you are reading from the fence object outside of the critical
section, which doesn't sound safe. Maybe compute the return value
before the mutex is unlocked?

   const boolean result = (f->count >= f->rank);
   mtx_unlock(>mutex);
   return result;

Since f->rank is immutable and f->count never decreases, it might
still be ok without this change, though it is racy.

>
> Is checking for ret == thrd_success here really necessary? If the
> first part is true we already know that the fence has been signalled.
>
> With this changed or not:
>
> Reviewed-by: Gustaw Smolarczyk 
>
> > +}
> > +
> > +
> > diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h 
> > b/src/gallium/drivers/llvmpipe/lp_fence.h
> > index b72026492c6..5ba746d22d1 100644
> > --- a/src/gallium/drivers/llvmpipe/lp_fence.h
> > +++ b/src/gallium/drivers/llvmpipe/lp_fence.h
> > @@ -65,6 +65,9 @@ lp_fence_signalled(struct lp_fence *fence);
> >  void
> >  lp_fence_wait(struct lp_fence *fence);
> >
> > +boolean
> > +lp_fence_timedwait(struct lp_fence *fence, uint64_t timeout);
> > +
> >  void
> >  llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
> >
> > --
> > 2.20.1
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 1/3] llvmpipe: add lp_fence_timedwait() helper

2019-04-25 Thread Gustaw Smolarczyk
czw., 25 kwi 2019 o 19:42 Emil Velikov  napisał(a):
>
> The function is analogous to lp_fence_wait() while taking at timeout
> (ns) parameter, as needed for EGL fence/sync.
>
> v2:
>  - use absolute UTC time, as per spec (Gustaw)
>  - bail out on cnd_timedwait() failure (Gustaw)
>
> Cc: Gustaw Smolarczyk 
> Cc: Roland Scheidegger 
> Signed-off-by: Emil Velikov 
> Reviewed-by: Roland Scheidegger  (v1)
> ---
>  src/gallium/drivers/llvmpipe/lp_fence.c | 30 +
>  src/gallium/drivers/llvmpipe/lp_fence.h |  3 +++
>  2 files changed, 33 insertions(+)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c 
> b/src/gallium/drivers/llvmpipe/lp_fence.c
> index 20cd91cd63d..b79d773bf6c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_fence.c
> +++ b/src/gallium/drivers/llvmpipe/lp_fence.c
> @@ -125,3 +125,33 @@ lp_fence_wait(struct lp_fence *f)
>  }
>
>
> +boolean
> +lp_fence_timedwait(struct lp_fence *f, uint64_t timeout)
> +{
> +   struct timespec ts;
> +   int ret;
> +
> +   timespec_get(, TIME_UTC);
> +
> +   ts.tv_nsec += timeout % 10L;
> +   ts.tv_sec += timeout / 10L;
> +   if (ts.tv_nsec >= 10L) {
> +  ts.tv_sec++;
> +  ts.tv_nsec -= 10L;
> +   }
> +
> +   if (LP_DEBUG & DEBUG_FENCE)
> +  debug_printf("%s %d\n", __FUNCTION__, f->id);
> +
> +   mtx_lock(>mutex);
> +   assert(f->issued);
> +   while (f->count < f->rank) {
> +  ret = cnd_timedwait(>signalled, >mutex, );
> +  if (ret != thrd_success)
> + break;
> +   }
> +   mtx_unlock(>mutex);
> +   return (f->count >= f->rank && ret == thrd_success);

Is checking for ret == thrd_success here really necessary? If the
first part is true we already know that the fence has been signalled.

With this changed or not:

Reviewed-by: Gustaw Smolarczyk 

> +}
> +
> +
> diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h 
> b/src/gallium/drivers/llvmpipe/lp_fence.h
> index b72026492c6..5ba746d22d1 100644
> --- a/src/gallium/drivers/llvmpipe/lp_fence.h
> +++ b/src/gallium/drivers/llvmpipe/lp_fence.h
> @@ -65,6 +65,9 @@ lp_fence_signalled(struct lp_fence *fence);
>  void
>  lp_fence_wait(struct lp_fence *fence);
>
> +boolean
> +lp_fence_timedwait(struct lp_fence *fence, uint64_t timeout);
> +
>  void
>  llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
>
> --
> 2.20.1
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] llvmpipe: add lp_fence_timedwait() helper

2019-04-16 Thread Gustaw Smolarczyk
wt., 16 kwi 2019 o 12:11 Emil Velikov  napisał(a):
>
> On Thu, 11 Apr 2019 at 17:55, Gustaw Smolarczyk  wrote:
> >
> > czw., 11 kwi 2019 o 18:06 Emil Velikov  
> > napisał(a):
> > >
> > > The function is analogous to lp_fence_wait() while taking at timeout
> > > (ns) parameter, as needed for EGL fence/sync.
> > >
> > > Cc: Roland Scheidegger 
> > > Signed-off-by: Emil Velikov 
> > > ---
> > >  src/gallium/drivers/llvmpipe/lp_fence.c | 22 ++
> > >  src/gallium/drivers/llvmpipe/lp_fence.h |  3 +++
> > >  2 files changed, 25 insertions(+)
> > >
> > > diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c 
> > > b/src/gallium/drivers/llvmpipe/lp_fence.c
> > > index 20cd91cd63d..f8b31a9d6a5 100644
> > > --- a/src/gallium/drivers/llvmpipe/lp_fence.c
> > > +++ b/src/gallium/drivers/llvmpipe/lp_fence.c
> > > @@ -125,3 +125,25 @@ lp_fence_wait(struct lp_fence *f)
> > >  }
> > >
> > >
> > > +boolean
> > > +lp_fence_timedwait(struct lp_fence *f, uint64_t timeout)
> > > +{
> > > +   struct timespec ts = {
> > > +  .tv_nsec = timeout % 10L,
> > > +  .tv_sec = timeout / 10L,
> > > +   };
> >
> > According to the documentation [1] and looking at the implementation
> > in mesa [2], cnd_timedwait accepts an absolute time in UTC, not
> > duration. It seems that the fence_finish callback accepts duration.
> >
> Agreed, not sure how I missed that one.
>
> > [1] https://en.cppreference.com/w/c/thread/cnd_timedwait
> > [2] 
> > https://gitlab.freedesktop.org/mesa/mesa/blob/master/include/c11/threads_posix.h#L135
> >
> > > +   int ret;
> > > +
> > > +   if (LP_DEBUG & DEBUG_FENCE)
> > > +  debug_printf("%s %d\n", __FUNCTION__, f->id);
> > > +
> > > +   mtx_lock(>mutex);
> > > +   assert(f->issued);
> > > +   while (f->count < f->rank) {
> > > +  ret = cnd_timedwait(>signalled, >mutex, );
> >
> > Shouldn't ret be checked for thrd_busy here as well? Otherwise, the
> > function will busy-wait after the timeout is reached instead of
> > returning.
> >
> Actually this should be tweaked to:
>  - only wait for the requested timeout
>  - _regardless_ of how meany threads (and hence ranks) llvmpipe has
>
> Something like the following (warning pre-coffee code) should work.
>
>mtx_lock(>mutex);
>assert(f->issued);
>if (f->count < f->rank)
>   ret = cnd_timedwait(>signalled, >mutex, );
>
>mtx_unlock(>mutex);
>return (f->count >= f->rank && ret == thrd_success);
>

Is it a good idea not to perform cnd_timedwait in a loop? A spurious
wake up could could shorten the wait time.

> Thanks for the review :-)
> -Emil

Regards,
Gustaw Smolarczyk
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] llvmpipe: add lp_fence_timedwait() helper

2019-04-11 Thread Gustaw Smolarczyk
czw., 11 kwi 2019 o 18:06 Emil Velikov  napisał(a):
>
> The function is analogous to lp_fence_wait() while taking at timeout
> (ns) parameter, as needed for EGL fence/sync.
>
> Cc: Roland Scheidegger 
> Signed-off-by: Emil Velikov 
> ---
>  src/gallium/drivers/llvmpipe/lp_fence.c | 22 ++
>  src/gallium/drivers/llvmpipe/lp_fence.h |  3 +++
>  2 files changed, 25 insertions(+)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_fence.c 
> b/src/gallium/drivers/llvmpipe/lp_fence.c
> index 20cd91cd63d..f8b31a9d6a5 100644
> --- a/src/gallium/drivers/llvmpipe/lp_fence.c
> +++ b/src/gallium/drivers/llvmpipe/lp_fence.c
> @@ -125,3 +125,25 @@ lp_fence_wait(struct lp_fence *f)
>  }
>
>
> +boolean
> +lp_fence_timedwait(struct lp_fence *f, uint64_t timeout)
> +{
> +   struct timespec ts = {
> +  .tv_nsec = timeout % 10L,
> +  .tv_sec = timeout / 10L,
> +   };

According to the documentation [1] and looking at the implementation
in mesa [2], cnd_timedwait accepts an absolute time in UTC, not
duration. It seems that the fence_finish callback accepts duration.

[1] https://en.cppreference.com/w/c/thread/cnd_timedwait
[2] 
https://gitlab.freedesktop.org/mesa/mesa/blob/master/include/c11/threads_posix.h#L135

> +   int ret;
> +
> +   if (LP_DEBUG & DEBUG_FENCE)
> +  debug_printf("%s %d\n", __FUNCTION__, f->id);
> +
> +   mtx_lock(>mutex);
> +   assert(f->issued);
> +   while (f->count < f->rank) {
> +  ret = cnd_timedwait(>signalled, >mutex, );

Shouldn't ret be checked for thrd_busy here as well? Otherwise, the
function will busy-wait after the timeout is reached instead of
returning.

Regards,
Gustaw Smolarczyk


> +   }
> +   mtx_unlock(>mutex);
> +   return ret == thrd_success;
> +}
> +
> +
> diff --git a/src/gallium/drivers/llvmpipe/lp_fence.h 
> b/src/gallium/drivers/llvmpipe/lp_fence.h
> index b72026492c6..5ba746d22d1 100644
> --- a/src/gallium/drivers/llvmpipe/lp_fence.h
> +++ b/src/gallium/drivers/llvmpipe/lp_fence.h
> @@ -65,6 +65,9 @@ lp_fence_signalled(struct lp_fence *fence);
>  void
>  lp_fence_wait(struct lp_fence *fence);
>
> +boolean
> +lp_fence_timedwait(struct lp_fence *fence, uint64_t timeout);
> +
>  void
>  llvmpipe_init_screen_fence_funcs(struct pipe_screen *screen);
>
> --
> 2.20.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: fix compiler issues with GCC 9

2019-02-11 Thread Gustaw Smolarczyk
FWIW,

Reviewed-by: Gustaw Smolarczyk 

pon., 11 lut 2019 o 10:15 Samuel Pitoiset 
napisał(a):
>
> "The C standard says that compound literals which occur inside of
> the body of a function have automatic storage duration associated
> with the enclosing block. Older GCC releases were putting such
> compound literals into the scope of the whole function, so their
> lifetime actually ended at the end of containing function. This
> has been fixed in GCC 9. Code that relied on this extended lifetime
> needs to be fixed, move the compound literals to whatever scope
> they need to accessible in."
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109543
> Cc: 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_meta_blit.c | 90 ++---
>  1 file changed, 48 insertions(+), 42 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_meta_blit.c b/src/amd/vulkan/radv_meta_blit.c
> index a2ba7e45022..5af9c4a303f 100644
> --- a/src/amd/vulkan/radv_meta_blit.c
> +++ b/src/amd/vulkan/radv_meta_blit.c
> @@ -849,54 +849,60 @@ build_pipeline(struct radv_device *device,
> .subpass = 0,
> };
>
> -   switch(aspect) {
> -   case VK_IMAGE_ASPECT_COLOR_BIT:
> -   vk_pipeline_info.pColorBlendState = 
> &(VkPipelineColorBlendStateCreateInfo) {
> -   .sType = 
> VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
> -   .attachmentCount = 1,
> -   .pAttachments = (VkPipelineColorBlendAttachmentState 
> []) {
> -   { .colorWriteMask =
> -   VK_COLOR_COMPONENT_A_BIT |
> -   VK_COLOR_COMPONENT_R_BIT |
> -   VK_COLOR_COMPONENT_G_BIT |
> -   VK_COLOR_COMPONENT_B_BIT },
> +   VkPipelineColorBlendStateCreateInfo color_blend_info = {
> +   .sType = 
> VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
> +   .attachmentCount = 1,
> +   .pAttachments = (VkPipelineColorBlendAttachmentState []) {
> +   {
> +   .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
> + VK_COLOR_COMPONENT_R_BIT |
> + VK_COLOR_COMPONENT_G_BIT |
> + VK_COLOR_COMPONENT_B_BIT },
> }
> };
> +
> +   VkPipelineDepthStencilStateCreateInfo depth_info = {
> +   .sType = 
> VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
> +   .depthTestEnable = true,
> +   .depthWriteEnable = true,
> +   .depthCompareOp = VK_COMPARE_OP_ALWAYS,
> +   };
> +
> +   VkPipelineDepthStencilStateCreateInfo stencil_info = {
> +   .sType = 
> VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
> +   .depthTestEnable = false,
> +   .depthWriteEnable = false,
> +   .stencilTestEnable = true,
> +   .front = {
> +   .failOp = VK_STENCIL_OP_REPLACE,
> +   .passOp = VK_STENCIL_OP_REPLACE,
> +   .depthFailOp = VK_STENCIL_OP_REPLACE,
> +   .compareOp = VK_COMPARE_OP_ALWAYS,
> +   .compareMask = 0xff,
> +   .writeMask = 0xff,
> +   .reference = 0
> +   },
> +   .back = {
> +   .failOp = VK_STENCIL_OP_REPLACE,
> +   .passOp = VK_STENCIL_OP_REPLACE,
> +   .depthFailOp = VK_STENCIL_OP_REPLACE,
> +   .compareOp = VK_COMPARE_OP_ALWAYS,
> +   .compareMask = 0xff,
> +   .writeMask = 0xff,
> +   .reference = 0
> +   },
> +   .depthCompareOp = VK_COMPARE_OP_ALWAYS,
> +   };
> +
> +   switch(aspect) {
> +   case VK_IMAGE_ASPECT_COLOR_BIT:
> +   vk_pipeline_info.pColorBlendState = _blend_info;
> break;
> case VK_IMAGE_ASPECT_DEPTH_BIT:
> -   vk_pipeline_info.pDepthStencilState = 
> &(VkPipelineDepthStencilStateCreateInfo) {
> -   .sType = 
> VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
> -   .depthTestEnable = true,
> -   .depthWriteEnable = true,
> -   .depthCompareOp = VK_COMPARE_OP_ALWAYS,
> -   };
> +  

Re: [Mesa-dev] [PATCH 1.5/2] ac/surface/gfx9: let addrlib choose the preferred swizzle kind

2018-11-21 Thread Gustaw Smolarczyk
śr., 21 lis 2018 o 16:21 Nicolai Hähnle  napisał(a):
>
> From: Nicolai Hähnle 
>
> Our choices here are simply redundant as long as sin.flags is set
> correctly.
> --
> This is the change I was talking about.
> ---
>  src/amd/common/ac_surface.c | 10 --
>  1 file changed, 10 deletions(-)
>
> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
> index edd710a968c..ad2cb585c9d 100644
> --- a/src/amd/common/ac_surface.c
> +++ b/src/amd/common/ac_surface.c
> @@ -1057,30 +1057,20 @@ gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
> sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle 
> modes */
> sin.forbiddenBlock.linear = 1; /* don't allow linear swizzle modes */
> sin.bpp = in->bpp;
> sin.width = in->width;
> sin.height = in->height;
> sin.numSlices = in->numSlices;
> sin.numMipLevels = in->numMipLevels;
> sin.numSamples = in->numSamples;
> sin.numFrags = in->numFrags;
>
> -   if (flags & RADEON_SURF_SCANOUT) {

Isn't the `flags' function argument now unused?

Regards,
Gustaw Smolarczyk

> -   sin.preferredSwSet.sw_D = 1;
> -   /* Raven only allows S for displayable surfaces with < 64 
> bpp, so
> -* allow it as fallback */
> -   sin.preferredSwSet.sw_S = 1;
> -   } else if (in->flags.depth || in->flags.stencil || is_fmask)
> -   sin.preferredSwSet.sw_Z = 1;
> -   else
> -   sin.preferredSwSet.sw_S = 1;
> -
> if (is_fmask) {
> sin.flags.display = 0;
> sin.flags.color = 0;
> sin.flags.fmask = 1;
> }
>
> ret = Addr2GetPreferredSurfaceSetting(addrlib, , );
> if (ret != ADDR_OK)
> return ret;
>
> --
> 2.19.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] util/ralloc: Make sizeof(linear_header) a multiple of 8

2018-11-13 Thread Gustaw Smolarczyk
Wt., 13 lis 2018, 06:03: Matt Turner  napisał(a):

> On Mon, Nov 12, 2018 at 3:07 PM Eric Anholt  wrote:
> >
> > Matt Turner  writes:
> >
> > > Prior to this patch sizeof(linear_header) was 20 bytes in a
> > > non-debug build on 32-bit platforms. We do some pointer arithmetic to
> > > calculate the next available location with
> > >
> > >ptr = (linear_size_chunk *)((char *)[1] + latest->offset);
> > >
> > > in linear_alloc_child(). The [1] adds 20 bytes, so an allocation
> > > would only be 4-byte aligned.
> > >
> > > On 32-bit SPARC a 'sttw' instruction (which stores a consecutive pair
> of
> > > 4-byte registers to memory) requires an 8-byte aligned address. Such an
> > > instruction is used to store to an 8-byte integer type, like intmax_t
> > > which is used in glcpp's expression_value_t struct.
> > >
> > > As a result of the 4-byte alignment returned by linear_alloc_child() we
> > > would generate a SIGBUS (unaligned exception) on SPARC.
> > >
> > > According to the GNU libc manual malloc() always returns memory that
> has
> > > at least an alignment of 8-bytes [1]. I think our allocator should do
> > > the same.
> > >
> > > So, simple fix with two parts:
> > >
> > >(1) Increase SUBALLOC_ALIGNMENT to 8 unconditionally.
> > >(2) Mark linear_header with an aligned attribute, which will cause
> > >its sizeof to be rounded up to that alignment. (We already do
> > >this for ralloc_header)
> > >
> > > With this done, all Mesa's unit tests now pass on SPARC.
> > >
> > > [1]
> https://www.gnu.org/software/libc/manual/html_node/Aligned-Memory-Blocks.html
> > >
> > > Fixes: 47e17586924f ("glcpp: use the linear allocator for most
> objects")
> > > Bug: https://bugs.gentoo.org/636326
> > > ---
> > >  src/util/ralloc.c | 14 --
> > >  1 file changed, 12 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/src/util/ralloc.c b/src/util/ralloc.c
> > > index 745b4cf1226..fc35661996d 100644
> > > --- a/src/util/ralloc.c
> > > +++ b/src/util/ralloc.c
> > > @@ -552,10 +552,18 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t
> *start, const char *fmt,
> > >   */
> > >
> > >  #define MIN_LINEAR_BUFSIZE 2048
> > > -#define SUBALLOC_ALIGNMENT sizeof(uintptr_t)
> > > +#define SUBALLOC_ALIGNMENT 8
> > >  #define LMAGIC 0x87b9c7d3
> > >
> > > -struct linear_header {
> > > +struct
> > > +#ifdef _MSC_VER
> > > + __declspec(align(8))
> > > +#elif defined(__LP64__)
> > > + __attribute__((aligned(16)))
> > > +#else
> > > + __attribute__((aligned(8)))
> > > +#endif
> > > +   linear_header {
> > >  #ifndef NDEBUG
> > > unsigned magic;   /* for debugging */
> > >  #endif
> > > @@ -647,6 +655,8 @@ linear_alloc_child(void *parent, unsigned size)
> > > ptr = (linear_size_chunk *)((char*)[1] + latest->offset);
> > > ptr->size = size;
> > > latest->offset += full_size;
> > > +
> > > +   assert((uintptr_t)[1] % SUBALLOC_ALIGNMENT == 0);
> > > return [1];
> > >  }
> >
> > These patches are:
> >
> > Reviewed-by: Eric Anholt 
>
> Thanks a bunch! I hope this is useful for you on arm as well.
>
> > However, shouldn't we also bump SUBALLOC_ALIGNMENT to 16 on LP64, too,
> > if that's what glibc is doing for malloc?
>
> 16-byte alignment is necessary for SSE aligned vector load/store
> instructions. I suppose we're not getting any vectorized SSE
> load/store instructions to memory allocated by linear_alloc_* and
> that's why we haven't seen problems?
>

FWIW, at least clang on x86 assumes malloc/new return pointers aligned to
16 bytes, though it probably doesn't detect linear_alloc_* as such.

Regards,
Gustaw Smolarczyk


> Seems reasonable to bump it to 16-bytes.
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-10-30 Thread Gustaw Smolarczyk
śr., 31 paź 2018 o 00:23 Marek Olšák  napisał(a):
>
> On Tue, Oct 30, 2018 at 7:11 PM Gustaw Smolarczyk  
> wrote:
>>
>> wt., 30 paź 2018 o 23:55 Marek Olšák  napisał(a):
>> >
>> > On Tue, Oct 30, 2018 at 6:32 PM Gustaw Smolarczyk  
>> > wrote:
>> >>
>> >> wt., 30 paź 2018, 23:01 Marek Olšák :
>> >>>
>> >>> On Mon, Oct 29, 2018 at 12:43 PM Michel Dänzer  
>> >>> wrote:
>> >>>>
>> >>>> On 2018-10-28 11:27 a.m., Gustaw Smolarczyk wrote:
>> >>>> > pon., 17 wrz 2018 o 18:24 Michel Dänzer  
>> >>>> > napisał(a):
>> >>>> >>
>> >>>> >> On 2018-09-15 3:04 a.m., Marek Olšák wrote:
>> >>>> >>> On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer  
>> >>>> >>> wrote:
>> >>>> >>>>
>> >>>> >>>> Last but not least, this doesn't solve the issue of apps such as
>> >>>> >>>> blender, which spawn their own worker threads after initializing 
>> >>>> >>>> OpenGL
>> >>>> >>>> (possibly not themselves directly, but via the toolkit or another
>> >>>> >>>> library; e.g. GTK+4 uses OpenGL by default), inheriting the thread 
>> >>>> >>>> affinity.
>> >>>> >>>>
>> >>>> >>>>
>> >>>> >>>> Due to these issues, setting the thread affinity needs to be 
>> >>>> >>>> disabled by
>> >>>> >>>> default, and only white-listed for applications where it's known 
>> >>>> >>>> safe
>> >>>> >>>> and beneficial. This sucks, but I'm afraid that's the reality until
>> >>>> >>>> there's better API available which allows solving these issues.
>> >>>> >>>
>> >>>> >>> We don't have the bandwidth to maintain whitelists. This will either
>> >>>> >>> have to be always on or always off.
>> >>>> >>>
>> >>>> >>> On the positive side, only Ryzens with multiple CCXs get all the
>> >>>> >>> benefits and disadvantages.
>> >>>> >>
>> >>>> >> In other words, only people who spent relatively large amounts of 
>> >>>> >> money
>> >>>> >> for relatively high-end CPUs will be affected (I'm sure they'll be 
>> >>>> >> glad
>> >>>> >> to know that "common people" aren't affected. ;). Affected 
>> >>>> >> applications
>> >>>> >> will see their performance decreased by a factor of 2-8 (the number 
>> >>>> >> of
>> >>>> >> CCXs in the CPU).
>> >>>> >>
>> >>>> >> OTOH, only a relatively small number of games will get a significant
>> >>>> >> benefit from the thread affinity, and the benefit will be smaller 
>> >>>> >> than a
>> >>>> >> factor of 2. This cannot justify risking a performance drop of up to 
>> >>>> >> a
>> >>>> >> factor of 8, no matter how small the risk.
>> >>>> >>
>> >>>> >> Therefore, the appropriate mechanism is a whitelist.
>> >>>> >
>> >>>> > Hi,
>> >>>> >
>> >>>> > What was the conclusion of this discussion? I don't see any
>> >>>> > whitelist/blacklist for this feature.
>> >>>> >
>> >>>> > I have just tested blender and it still renders on only a single CCX
>> >>>> > on mesa from git master. Also, there is a bug report that suggests
>> >>>> > this regressed performance in at least one game [1].
>> >>>>
>> >>>> I hooked up that bug report to the 18.3 blocker bug.
>> >>>>
>> >>>>
>> >>>> > If you think enabling it by default is the way to go, we should also
>> >>>> > implement a blacklist so that it can be turned off in such cases.
>> >>>>
>> >>>> I stand by my opinion that a white-list is appropriate, not a
>> >>>> black-list. It's pretty much the same as mesa_g

Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-10-30 Thread Gustaw Smolarczyk
wt., 30 paź 2018 o 23:55 Marek Olšák  napisał(a):
>
> On Tue, Oct 30, 2018 at 6:32 PM Gustaw Smolarczyk  
> wrote:
>>
>> wt., 30 paź 2018, 23:01 Marek Olšák :
>>>
>>> On Mon, Oct 29, 2018 at 12:43 PM Michel Dänzer  wrote:
>>>>
>>>> On 2018-10-28 11:27 a.m., Gustaw Smolarczyk wrote:
>>>> > pon., 17 wrz 2018 o 18:24 Michel Dänzer  napisał(a):
>>>> >>
>>>> >> On 2018-09-15 3:04 a.m., Marek Olšák wrote:
>>>> >>> On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer  
>>>> >>> wrote:
>>>> >>>>
>>>> >>>> Last but not least, this doesn't solve the issue of apps such as
>>>> >>>> blender, which spawn their own worker threads after initializing 
>>>> >>>> OpenGL
>>>> >>>> (possibly not themselves directly, but via the toolkit or another
>>>> >>>> library; e.g. GTK+4 uses OpenGL by default), inheriting the thread 
>>>> >>>> affinity.
>>>> >>>>
>>>> >>>>
>>>> >>>> Due to these issues, setting the thread affinity needs to be disabled 
>>>> >>>> by
>>>> >>>> default, and only white-listed for applications where it's known safe
>>>> >>>> and beneficial. This sucks, but I'm afraid that's the reality until
>>>> >>>> there's better API available which allows solving these issues.
>>>> >>>
>>>> >>> We don't have the bandwidth to maintain whitelists. This will either
>>>> >>> have to be always on or always off.
>>>> >>>
>>>> >>> On the positive side, only Ryzens with multiple CCXs get all the
>>>> >>> benefits and disadvantages.
>>>> >>
>>>> >> In other words, only people who spent relatively large amounts of money
>>>> >> for relatively high-end CPUs will be affected (I'm sure they'll be glad
>>>> >> to know that "common people" aren't affected. ;). Affected applications
>>>> >> will see their performance decreased by a factor of 2-8 (the number of
>>>> >> CCXs in the CPU).
>>>> >>
>>>> >> OTOH, only a relatively small number of games will get a significant
>>>> >> benefit from the thread affinity, and the benefit will be smaller than a
>>>> >> factor of 2. This cannot justify risking a performance drop of up to a
>>>> >> factor of 8, no matter how small the risk.
>>>> >>
>>>> >> Therefore, the appropriate mechanism is a whitelist.
>>>> >
>>>> > Hi,
>>>> >
>>>> > What was the conclusion of this discussion? I don't see any
>>>> > whitelist/blacklist for this feature.
>>>> >
>>>> > I have just tested blender and it still renders on only a single CCX
>>>> > on mesa from git master. Also, there is a bug report that suggests
>>>> > this regressed performance in at least one game [1].
>>>>
>>>> I hooked up that bug report to the 18.3 blocker bug.
>>>>
>>>>
>>>> > If you think enabling it by default is the way to go, we should also
>>>> > implement a blacklist so that it can be turned off in such cases.
>>>>
>>>> I stand by my opinion that a white-list is appropriate, not a
>>>> black-list. It's pretty much the same as mesa_glthread.
>>>
>>>
>>> So you are saying that gallium multithreading show be slower than 
>>> singlethreading by default.
>>>
>>> Marek
>>
>>
>> Hi Marek,
>>
>> The Ryzen optimization helps a lot of applications (mostly games) and 
>> improves their performance, mostly because of the reduced cost of 
>> communication between application's GL API thread and driver's pipe/winsys 
>> threads.
>>
>> However, not all of the applications respond in the same way. The thread 
>> affinity management is hacky, by which I mean that this mechanism was not 
>> meant to mess with application threads from within library's threads. As an 
>> example, blender's threads, which use OpenGL "by accident", are forced to 
>> use the same CCX as the main gallium/winsys thread, even if they are many 
>> and want to work on as many CCXs as are possible. The thread that starts 
>> using GL sp

Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-10-30 Thread Gustaw Smolarczyk
wt., 30 paź 2018, 23:01 Marek Olšák :

> On Mon, Oct 29, 2018 at 12:43 PM Michel Dänzer  wrote:
>
>> On 2018-10-28 11:27 a.m., Gustaw Smolarczyk wrote:
>> > pon., 17 wrz 2018 o 18:24 Michel Dänzer 
>> napisał(a):
>> >>
>> >> On 2018-09-15 3:04 a.m., Marek Olšák wrote:
>> >>> On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer 
>> wrote:
>> >>>>
>> >>>> Last but not least, this doesn't solve the issue of apps such as
>> >>>> blender, which spawn their own worker threads after initializing
>> OpenGL
>> >>>> (possibly not themselves directly, but via the toolkit or another
>> >>>> library; e.g. GTK+4 uses OpenGL by default), inheriting the thread
>> affinity.
>> >>>>
>> >>>>
>> >>>> Due to these issues, setting the thread affinity needs to be
>> disabled by
>> >>>> default, and only white-listed for applications where it's known safe
>> >>>> and beneficial. This sucks, but I'm afraid that's the reality until
>> >>>> there's better API available which allows solving these issues.
>> >>>
>> >>> We don't have the bandwidth to maintain whitelists. This will either
>> >>> have to be always on or always off.
>> >>>
>> >>> On the positive side, only Ryzens with multiple CCXs get all the
>> >>> benefits and disadvantages.
>> >>
>> >> In other words, only people who spent relatively large amounts of money
>> >> for relatively high-end CPUs will be affected (I'm sure they'll be glad
>> >> to know that "common people" aren't affected. ;). Affected applications
>> >> will see their performance decreased by a factor of 2-8 (the number of
>> >> CCXs in the CPU).
>> >>
>> >> OTOH, only a relatively small number of games will get a significant
>> >> benefit from the thread affinity, and the benefit will be smaller than
>> a
>> >> factor of 2. This cannot justify risking a performance drop of up to a
>> >> factor of 8, no matter how small the risk.
>> >>
>> >> Therefore, the appropriate mechanism is a whitelist.
>> >
>> > Hi,
>> >
>> > What was the conclusion of this discussion? I don't see any
>> > whitelist/blacklist for this feature.
>> >
>> > I have just tested blender and it still renders on only a single CCX
>> > on mesa from git master. Also, there is a bug report that suggests
>> > this regressed performance in at least one game [1].
>>
>> I hooked up that bug report to the 18.3 blocker bug.
>>
>>
>> > If you think enabling it by default is the way to go, we should also
>> > implement a blacklist so that it can be turned off in such cases.
>>
>> I stand by my opinion that a white-list is appropriate, not a
>> black-list. It's pretty much the same as mesa_glthread.
>>
>
> So you are saying that gallium multithreading show be slower than
> singlethreading by default.
>
> Marek
>

Hi Marek,

The Ryzen optimization helps a lot of applications (mostly games) and
improves their performance, mostly because of the reduced cost of
communication between application's GL API thread and driver's pipe/winsys
threads.

However, not all of the applications respond in the same way. The thread
affinity management is hacky, by which I mean that this mechanism was not
meant to mess with application threads from within library's threads. As an
example, blender's threads, which use OpenGL "by accident", are forced to
use the same CCX as the main gallium/winsys thread, even if they are many
and want to work on as many CCXs as are possible. The thread that starts
using GL spawns many more threads that don't use GL at all, and the current
atfork mechanism doesn't help.

The current mechanism of tweaking thread affinities doesn't work
universally with all Linux applications. We need a mechanism of tweaking
this behavior, either through a whitelist or through a blacklist. As any
application using OpenGL can be affected, I would opt towards disabling
this by default and providing a whitelist for applications we know it would
help.

Regards,
Gustaw Smolarczyk
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/14] vbo: Remove the VBO_SAFE_FALLBACK flag.

2018-10-30 Thread Gustaw Smolarczyk
wt., 30 paź 2018, 06:08 :

> From: Mathias Fröhlich 
>
> On finishing a display list playback the VBO_SAFE_FALLBACK bit
>

s/SAFE/SAVE/g (here and in the title)

Regards,
Gustaw Smolarczyk

is still kept in vbo_save_context::replay_flags. But examining
> replay_flags and the display list flags that feed this value
> the corresponding bit is never set these days anymore.
> So, since it is nowhere set or checked, we can safely remove it.
>
> Signed-off-by: Mathias Fröhlich 
> ---
>  src/mesa/vbo/vbo_save.h | 2 --
>  src/mesa/vbo/vbo_save_api.c | 8 ++--
>  2 files changed, 2 insertions(+), 8 deletions(-)
>
> diff --git a/src/mesa/vbo/vbo_save.h b/src/mesa/vbo/vbo_save.h
> index 37f883e9de..d00700166e 100644
> --- a/src/mesa/vbo/vbo_save.h
> +++ b/src/mesa/vbo/vbo_save.h
> @@ -140,8 +140,6 @@ _vbo_save_get_vertex_count(const struct
> vbo_save_vertex_list *node)
>  #define VBO_SAVE_PRIM_WEAK  0x40
>  #define VBO_SAVE_PRIM_NO_CURRENT_UPDATE 0x80
>
> -#define VBO_SAVE_FALLBACK0x1000
> -
>  struct vbo_save_vertex_store {
> struct gl_buffer_object *bufferobj;
> fi_type *buffer_map;
> diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
> index d5b43d0684..975ba46c8e 100644
> --- a/src/mesa/vbo/vbo_save_api.c
> +++ b/src/mesa/vbo/vbo_save_api.c
> @@ -1804,12 +1804,8 @@ vbo_save_EndCallList(struct gl_context *ctx)
>  {
> struct vbo_save_context *save = _context(ctx)->save;
>
> -   if (ctx->ListState.CallDepth == 1) {
> -  /* This is correct: want to keep only the VBO_SAVE_FALLBACK
> -   * flag, if it is set:
> -   */
> -  save->replay_flags &= VBO_SAVE_FALLBACK;
> -   }
> +   if (ctx->ListState.CallDepth == 1)
> +  save->replay_flags = 0;
>  }
>
>
> --
> 2.17.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: don't let children of fork & exec inherit our thread affinity

2018-10-28 Thread Gustaw Smolarczyk
pon., 17 wrz 2018 o 18:24 Michel Dänzer  napisał(a):
>
> On 2018-09-15 3:04 a.m., Marek Olšák wrote:
> > On Fri, Sep 14, 2018 at 4:53 AM, Michel Dänzer  wrote:
> >> On 2018-09-13 8:56 p.m., Marek Olšák wrote:
> >>
> >>> +* What happens if a driver is unloaded and the app creates a thread?
> >>
> >> I suppose the child process will likely crash, because the memory
> >> address where util_set_full_cpu_affinity was located will either be
> >> unmapped or have random other contents?
> >>
> >> At least in theory, there could also be an issue where the application
> >> might have set its own thread affinity before calling fork, which would
> >> be clobbered by util_set_full_cpu_affinity in the child process.
> >>
> >>
> >> Last but not least, this doesn't solve the issue of apps such as
> >> blender, which spawn their own worker threads after initializing OpenGL
> >> (possibly not themselves directly, but via the toolkit or another
> >> library; e.g. GTK+4 uses OpenGL by default), inheriting the thread 
> >> affinity.
> >>
> >>
> >> Due to these issues, setting the thread affinity needs to be disabled by
> >> default, and only white-listed for applications where it's known safe
> >> and beneficial. This sucks, but I'm afraid that's the reality until
> >> there's better API available which allows solving these issues.
> >
> > We don't have the bandwidth to maintain whitelists. This will either
> > have to be always on or always off.
> >
> > On the positive side, only Ryzens with multiple CCXs get all the
> > benefits and disadvantages.
>
> In other words, only people who spent relatively large amounts of money
> for relatively high-end CPUs will be affected (I'm sure they'll be glad
> to know that "common people" aren't affected. ;). Affected applications
> will see their performance decreased by a factor of 2-8 (the number of
> CCXs in the CPU).
>
> OTOH, only a relatively small number of games will get a significant
> benefit from the thread affinity, and the benefit will be smaller than a
> factor of 2. This cannot justify risking a performance drop of up to a
> factor of 8, no matter how small the risk.
>
> Therefore, the appropriate mechanism is a whitelist.
>
>
> --
> Earthling Michel Dänzer   |   http://www.amd.com
> Libre software enthusiast | Mesa and X developer
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Hi,

What was the conclusion of this discussion? I don't see any
whitelist/blacklist for this feature.

I have just tested blender and it still renders on only a single CCX
on mesa from git master. Also, there is a bug report that suggests
this regressed performance in at least one game [1].

If you think enabling it by default is the way to go, we should also
implement a blacklist so that it can be turned off in such cases.

Regards,
Gustaw Smolarczyk

[1] https://bugs.freedesktop.org/show_bug.cgi?id=108330
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: add support for Raven2

2018-10-27 Thread Gustaw Smolarczyk
) << 0)
>  #define   G_02842C_STENCILFAIL(x) (((x) 
> >> 0) & 0x0F)
>  #define   C_02842C_STENCILFAIL
> 0xFFF0
>  #define   S_02842C_STENCILZPASS(x)
> (((unsigned)(x) & 0x0F) << 4)
>  #define   G_02842C_STENCILZPASS(x)(((x) 
> >> 4) & 0x0F)
>  #define   C_02842C_STENCILZPASS   
> 0xFF0F
>  #define   S_02842C_STENCILZFAIL(x)
> (((unsigned)(x) & 0x0F) << 8)
>  #define   G_02842C_STENCILZFAIL(x)(((x) 
> >> 8) & 0x0F)
>  #define   C_02842C_STENCILZFAIL   
> 0xF0FF
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 6118b8076f1..6e268ed4a7c 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -1026,23 +1026,24 @@ struct pipe_screen *radeonsi_screen_create(struct 
> radeon_winsys *ws,
> sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= 
> CHIP_POLARIS10 &&
> sscreen->info.family <= 
> CHIP_POLARIS12) ||
>sscreen->info.family == 
> CHIP_VEGA10 ||
>sscreen->info.family == CHIP_RAVEN;
> sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
> sscreen->info.family == CHIP_RAVEN;
>
> if (sscreen->debug_flags & DBG(DPBB)) {
> sscreen->dpbb_allowed = true;
> } else {
> -   /* Only enable primitive binning on Raven by default. */
> +   /* Only enable primitive binning on APUs by default. */
> /* TODO: Investigate if binning is profitable on Vega12. */
> sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN &&
> +   sscreen->info.family == CHIP_RAVEN2 &&

I think you meant to do || here, as family cannot be CHIP_RAVEN and
CHIP_RAVEN2 at the same time.

Regards,
Gustaw Smolarczyk

> !(sscreen->debug_flags & 
> DBG(NO_DPBB));
> }
>
> if (sscreen->debug_flags & DBG(DFSM)) {
> sscreen->dfsm_allowed = sscreen->dpbb_allowed;
> } else {
> sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
> !(sscreen->debug_flags & 
> DBG(NO_DFSM));
> }
>
> @@ -1056,21 +1057,22 @@ struct pipe_screen *radeonsi_screen_create(struct 
> radeon_winsys *ws,
>  * always disable it.
>  */
> if (sscreen->info.family == CHIP_STONEY ||
> sscreen->info.chip_class >= GFX9) {
> sscreen->has_rbplus = true;
>
> sscreen->rbplus_allowed =
> !(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
> (sscreen->info.family == CHIP_STONEY ||
>  sscreen->info.family == CHIP_VEGA12 ||
> -sscreen->info.family == CHIP_RAVEN);
> +sscreen->info.family == CHIP_RAVEN ||
> +sscreen->info.family == CHIP_RAVEN2);
> }
>
> sscreen->dcc_msaa_allowed =
> !(sscreen->debug_flags & DBG(NO_DCC_MSAA));
>
> sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= 
> VI;
>
> (void) mtx_init(>shader_parts_mutex, mtx_plain);
> sscreen->use_monolithic_shaders =
> (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c 
> b/src/gallium/drivers/radeonsi/si_state.c
> index 43d76d19916..0293bdfa791 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -113,21 +113,22 @@ static void si_emit_cb_render_state(struct si_context 
> *sctx)
>   blend &&
>   blend->blend_enable_4bit & cb_target_mask &&
>   sctx->framebuffer.nr_samples >= 2;
> unsigned watermark = 
> sctx->framebuffer.dcc_overwrite_combiner_watermark;
>
> radeon_opt_set_context_reg(
>   

Re: [Mesa-dev] [PATCH] radv: check return from mkdir

2018-10-05 Thread Gustaw Smolarczyk
pt., 5 paź 2018 o 13:10 Grazvydas Ignotas  napisał(a):
>
> On Fri, Oct 5, 2018 at 3:38 AM Dave Airlie  wrote:
> >
> > From: Dave Airlie 
> >
> > There may be some security or sandbox reason this might fail, so
> > check and fail appropriately.
> > ---
> >  src/amd/vulkan/radv_meta.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
> > index 1ec8896afa2..6616b1da65a 100644
> > --- a/src/amd/vulkan/radv_meta.c
> > +++ b/src/amd/vulkan/radv_meta.c
> > @@ -248,7 +248,9 @@ radv_builtin_cache_path(char *path)
> >
> > strcpy(path, pwd.pw_dir);
> > strcat(path, "/.cache");
> > -   mkdir(path, 0755);
> > +   ret = mkdir(path, 0755);
> > +   if (ret == -1)
>
> if (ret == -1 && errno != EEXIST) ?

Won't EEXIST be returned even in case the path already exists but is
not a directory? [1]

Regards,
Gustaw Smolarczyk

[1] http://man7.org/linux/man-pages/man2/mkdir.2.html

>
> > +  return false;
> >
> > ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
> >pwd.pw_dir, suffix2, sizeof(void *) * 8);
> > --
> > 2.17.1
>
> Gražvydas
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv: fix descriptor pool allocation size

2018-09-18 Thread Gustaw Smolarczyk
pt., 14 wrz 2018 o 15:00 Bas Nieuwenhuizen  
napisał(a):
>
> Reviewed-by: Bas Nieuwenhuizen 
> On Fri, Sep 14, 2018 at 2:55 PM Samuel Pitoiset
>  wrote:
> >
> > The size has to be multiplied by the number of sets.
> >
> > This gets rid of the OUT_OF_POOL_KHR error and fixes
> > the Tangrams demo.
> >
> > CC: 18.1 18.2 
> > Signed-off-by: Samuel Pitoiset 
> > ---
> >  src/amd/vulkan/radv_descriptor_set.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/amd/vulkan/radv_descriptor_set.c 
> > b/src/amd/vulkan/radv_descriptor_set.c
> > index c4341f6ac5..49d0811bb0 100644
> > --- a/src/amd/vulkan/radv_descriptor_set.c
> > +++ b/src/amd/vulkan/radv_descriptor_set.c
> > @@ -569,9 +569,10 @@ VkResult radv_CreateDescriptorPool(
> > }
> >
> > if (!(pCreateInfo->flags & 
> > VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
> > -   uint64_t host_size = pCreateInfo->maxSets * sizeof(struct 
> > radv_descriptor_set);
> > +   uint64_t host_size = sizeof(struct radv_descriptor_set);
> > host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
> > host_size += sizeof(struct radv_descriptor_range) * 
> > range_count;
> > +   host_size *= pCreateInfo->maxSets;
> > size += host_size;
> > } else {
> > size += sizeof(struct radv_descriptor_pool_entry) * 
> > pCreateInfo->maxSets;
> > --
> > 2.19.0
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

I don't think this change is correct.

From the vkAllocateDescriptorSets documentation [1]:


If a call to vkAllocateDescriptorSets would cause the total number of
descriptor sets allocated from the pool to exceed the value of
VkDescriptorPoolCreateInfo::maxSets used to create
pAllocateInfo→descriptorPool, then the allocation may fail due to lack
of space in the descriptor pool. Similarly, the allocation may fail
due to lack of space if the call to vkAllocateDescriptorSets would
cause the number of any given descriptor type to exceed the sum of all
the descriptorCount members of each element of
VkDescriptorPoolCreateInfo::pPoolSizes with a member equal to that
type.


What it implies (I think), is that VkDescriptorPoolCreateInfo::maxSets
and descriptorCount of each VkDescriptorPoolCreateInfo::pPoolSizes are
treated separately. I don't think you should multiply one by another.
Each pool should be able to allocate at least maxSets sets and
descriptorCount descriptors.

Please, correct me if I'm wrong.

Regards,

Gustaw Smolarczyk

[1] 
https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/vkAllocateDescriptorSets.html
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] mesa: rotation of 0-vector

2018-09-18 Thread Gustaw Smolarczyk
wt., 18 wrz 2018 o 15:59 Sergii Romantsov 
napisał(a):
>
> Specification doesn't define behaviour for rotation of 0-vector.
> Windows and Nvidia drivers have a workaround for that.
> For compatibility proposed that for 0-vector a rotation will be
> done around x-axis.
>
> -v2: logic moved to _math_matrix_rotate
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100960
> Signed-off-by: Sergii Romantsov 
> ---
>  src/mesa/math/m_matrix.c | 19 +++
>  1 file changed, 19 insertions(+)
>
> diff --git a/src/mesa/math/m_matrix.c b/src/mesa/math/m_matrix.c
> index 57a4953..2b3adb3 100644
> --- a/src/mesa/math/m_matrix.c
> +++ b/src/mesa/math/m_matrix.c
> @@ -824,6 +824,25 @@ _math_matrix_rotate( GLmatrix *mat,
> M(1,0) = s;
>  }
>   }
> + else {
> +/* https://bugs.freedesktop.org/show_bug.cgi?id=100960
> + * https://github.com/KhronosGroup/OpenGL-API/issues/41
> + * So that is kind of workaround for empty-vectors to have
> + * compatibility with Windows and Nvidia drivers.
> + */
> +optimized = GL_TRUE;
> +/* rotate only around x-axis */
> +M(1,1) = c;
> +M(2,2) = c;
> +if (x < 0.0F) {

Isn't x guaranteed to be 0.0F here? And I think you wanted to treat it
as 1.0F in that case, so that (0, 0, 0) becomes (1, 0, 0).

Regards,
Gustaw Smolarczyk

> +   M(1,2) = s;
> +   M(2,1) = -s;
> +}
> +else {
> +   M(1,2) = -s;
> +   M(2,1) = s;
> +}
> + }
>}
>else if (z == 0.0F) {
>   optimized = GL_TRUE;
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: add ff fragment shader support for geom and tess shaders

2018-06-18 Thread Gustaw Smolarczyk
2018-06-18 11:19 GMT+02:00 Iago Toral :

> On Mon, 2018-06-18 at 10:45 +0200, Gustaw Smolarczyk wrote:
> > 2018-06-18 10:39 GMT+02:00 Iago Toral :
> > > On Mon, 2018-06-18 at 09:43 +0200, Gustaw Smolarczyk wrote:
> > > > 2018-06-18 4:39 GMT+02:00 Timothy Arceri :
> > > > > This is required for compatibility profile support.
> > > > > ---
> > > > >  src/mesa/main/ff_fragment_shader.cpp | 6 +-
> > > > >  1 file changed, 5 insertions(+), 1 deletion(-)
> > > > >
> > > > > diff --git a/src/mesa/main/ff_fragment_shader.cpp
> > > > > b/src/mesa/main/ff_fragment_shader.cpp
> > > > > index a698931d99e..935a21624af 100644
> > > > > --- a/src/mesa/main/ff_fragment_shader.cpp
> > > > > +++ b/src/mesa/main/ff_fragment_shader.cpp
> > > > > @@ -229,7 +229,11 @@ static GLbitfield filter_fp_input_mask(
> > > > > GLbitfield fp_inputs,
> > > > >  * since vertex shader state validation comes after
> > > > > fragment state
> > > > >  * validation (see additional comments in state.c).
> > > > >  */
> > > > > -   if (vertexShader)
> > > > > +   if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] !=
> > > > > NULL)
> > > > > +  vprog = ctx->_Shader-
> > > > > >CurrentProgram[MESA_SHADER_GEOMETRY];
> > > > > +   else if (ctx->_Shader-
> > > > > >CurrentProgram[MESA_SHADER_TESS_EVAL] != NULL)
> > > > > +  vprog = ctx->_Shader-
> > > > > >CurrentProgram[MESA_SHADER_TESS_EVAL];
> > > > > +   else if (vertexShader)
> > > >
> > > >
> > > > Shouldn't you also update the if condition on line 178?
> > > > Otherwise, you won't reach the if tree you change when the vertex
> > > > shader is missing (unless that was intended - I am not really
> > > > familiar with how fixed function shaders work alongside new
> > > > features).
> > >
> > > You don't have Tesselation / Geometry with fixed function GL, so I
> > > think this should be fine.
> > >
> >
> > Well, this whole file implements fixed function fragment shader, so
> > it will only be reached when the fragment shader is missing. Unless
> > you meant that tessellation/geometry shaders cannot be combined with
> > fixed function vertex shader but fixed function fragment shader is
> > fine.
>
> Yes, that is what I was thinking.  The OpenGL 4.5 spec with
> compatibility profile states in 'chapter 12: fixed function vertex
> processing":
>
> "When programmable vertex processing (see chapter 11) is not being
> performed, the fixed-function operations described in this chapter are
> performed instead. Vertices are first transformed as described in section
> 12.1, followed by lighting and coloring described described in section
> 12.2. The resulting transformed vertices are then processed as described in
> chapter 13."
>
> And then 'Chapter 13: Fixed function vertex post-processing', doesn't
> include geometry or tessellation shading, and seems to include the
> fixed function stages that start after the geometry stage.
>
> Iago
>

That makes sense then, thanks! Though it might be a good idea to add/update
the comment so that there are less confused people in the future.

Regards,
Gustaw Smolarczyk


>
>
> > Regards,
> > Gustaw Smolarczyk
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: add ff fragment shader support for geom and tess shaders

2018-06-18 Thread Gustaw Smolarczyk
2018-06-18 10:39 GMT+02:00 Iago Toral :

> On Mon, 2018-06-18 at 09:43 +0200, Gustaw Smolarczyk wrote:
>
> 2018-06-18 4:39 GMT+02:00 Timothy Arceri :
>
> This is required for compatibility profile support.
> ---
>  src/mesa/main/ff_fragment_shader.cpp | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/ff_fragment_shader.cpp
> b/src/mesa/main/ff_fragment_shader.cpp
> index a698931d99e..935a21624af 100644
> --- a/src/mesa/main/ff_fragment_shader.cpp
> +++ b/src/mesa/main/ff_fragment_shader.cpp
> @@ -229,7 +229,11 @@ static GLbitfield filter_fp_input_mask( GLbitfield
> fp_inputs,
>  * since vertex shader state validation comes after fragment state
>  * validation (see additional comments in state.c).
>  */
> -   if (vertexShader)
> +   if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] != NULL)
> +  vprog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
> +   else if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] != NULL)
> +  vprog = ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
> +   else if (vertexShader)
>
>
>
> Shouldn't you also update the if condition on line 178? Otherwise, you
> won't reach the if tree you change when the vertex shader is missing
> (unless that was intended - I am not really familiar with how fixed
> function shaders work alongside new features).
>
>
> You don't have Tesselation / Geometry with fixed function GL, so I think
> this should be fine.
>

Well, this whole file implements fixed function fragment shader, so it will
only be reached when the fragment shader is missing. Unless you meant
that tessellation/geometry
shaders cannot be combined with fixed function vertex shader but fixed
function fragment shader is fine.

Regards,
Gustaw Smolarczyk
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: add ff fragment shader support for geom and tess shaders

2018-06-18 Thread Gustaw Smolarczyk
2018-06-18 4:39 GMT+02:00 Timothy Arceri :

> This is required for compatibility profile support.

---
>  src/mesa/main/ff_fragment_shader.cpp | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/ff_fragment_shader.cpp
> b/src/mesa/main/ff_fragment_shader.cpp
> index a698931d99e..935a21624af 100644
> --- a/src/mesa/main/ff_fragment_shader.cpp
> +++ b/src/mesa/main/ff_fragment_shader.cpp
> @@ -229,7 +229,11 @@ static GLbitfield filter_fp_input_mask( GLbitfield
> fp_inputs,
>  * since vertex shader state validation comes after fragment state
>  * validation (see additional comments in state.c).
>  */
> -   if (vertexShader)
> +   if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY] != NULL)
> +  vprog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
> +   else if (ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL] != NULL)
> +  vprog = ctx->_Shader->CurrentProgram[MESA_SHADER_TESS_EVAL];
> +   else if (vertexShader)
>


Shouldn't you also update the if condition on line 178? Otherwise, you
won't reach the if tree you change when the vertex shader is missing
(unless that was intended - I am not really familiar with how fixed
function shaders work alongside new features).

You could also move or update the comment that is just above your change.

Regards,
Gustaw Smolarczyk


>vprog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
> else
>vprog = ctx->VertexProgram.Current;
> --
> 2.17.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/16] radv: Implement VK_KHR_maintenance3.

2018-03-07 Thread Gustaw Smolarczyk
_size = 96;
> +   }
> +   descriptor_alignment = 32;
> +   case VK_DESCRIPTOR_TYPE_SAMPLER:
> +   if 
> (!has_equal_immutable_samplers(binding->pImmutableSamplers,
> binding->descriptorCount)) {
> +   descriptor_size = 16;
> +   descriptor_alignment = 16;
> +   }
> +   break;
> +   default:
> +   unreachable("unknown descriptor type\n");
> +   break;
> +   }
> +
> +   if (size && !align_u64(size, descriptor_alignment)) {
> +   supported = false;
> +   }
> +   size = align_u64(size, descriptor_alignment);
> +   if (descriptor_size && (UINT64_MAX - size) /
> descriptor_size < binding->descriptorCount) {
> +   supported = false;
> +   }
> +   size += binding->descriptorCount * descriptor_size;
> +   }
> +
> +   pSupport->supported = supported;
> +}
> +
>  /*
>   * Pipeline layouts.  These have nothing to do with the pipeline.  They
> are
>   * just muttiple descriptor set layouts pasted together
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 00bb70612e..593cfc9a36 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -875,6 +875,16 @@ void radv_GetPhysicalDeviceProperties2(
> properties->quadOperationsInAllStages = false;
> break;
> }
> +   case 
> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES:
> {
> +   VkPhysicalDeviceMaintenance3Properties
> *properties =
> +   (VkPhysicalDeviceMaintenance3Properties*)ext;
> +   /* Make sure evrything is addressable by a signed
> 32-bit int, and our
>
> Typo: everything
>
> Regards,
> Gustaw Smolarczyk
>
> +* largest descriptors are 96 bytes. */
> +   properties->maxPerSetDescriptors = (1ull << 31) /
> 96;
> +   /* Our buffer size fields allow only this much */
> +   properties->maxMemoryAllocationSize =
> 0xull;
> +   break;
> +   }
> default:
> break;
> }
> diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_
> extensions.py
> index 6fa553e589..3b4f75bff6 100644
> --- a/src/amd/vulkan/radv_extensions.py
> +++ b/src/amd/vulkan/radv_extensions.py
> @@ -70,6 +70,7 @@ EXTENSIONS = [
>  Extension('VK_KHR_incremental_present',   1, True),
>  Extension('VK_KHR_maintenance1',  1, True),
>  Extension('VK_KHR_maintenance2',  1, True),
> +Extension('VK_KHR_maintenance3',  1, True),
>  Extension('VK_KHR_push_descriptor',   1, True),
>  Extension('VK_KHR_relaxed_block_layout',  1, True),
>  Extension('VK_KHR_sampler_mirror_clamp_to_edge',  1, True),
> --
> 2.16.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/egd_tables.py: make the script python 2+3 compatible

2018-03-02 Thread Gustaw Smolarczyk
2018-03-02 12:59 GMT+01:00 Stefan Dirsch <sndir...@suse.de>:

> On Fri, Mar 02, 2018 at 11:47:57AM +, Eric Engestrom wrote:
> > On Friday, 2018-03-02 12:25:11 +0100, Stefan Dirsch wrote:
> > > On Fri, Mar 02, 2018 at 11:03:53AM +, Eric Engestrom wrote:
> > > > On Friday, 2018-03-02 11:41:00 +0100, Stefan Dirsch wrote:
> > > > > Patch by "Tomas Chvatal" <tchva...@suse.com> with modifications
> > > > > by "Michal Srb" <m...@suse.com> to not break python 2.
> > > > >
> > > > > https://bugzilla.suse.com/show_bug.cgi?id=1082303
> > > > >
> > > > > v2:
> > > > > - no longer try to encode a unicode
> > > > > - make use of 'from __future__ import print_function', so semantics
> > > > >   of print statements in python2 are closer to print functions in
> python3
> > > > >
> > > > > https://lists.freedesktop.org/archives/mesa-dev/2018-
> February/187056.html
> > > > >
> > > > > Signed-off-by: Stefan Dirsch <sndir...@suse.de>
> > > > > Reviewed-by: Tomas Chvatal <tchva...@suse.com>
> > > > > ---
> > > > >  src/gallium/drivers/r600/egd_tables.py | 53
> +-
> > > > >  1 file changed, 27 insertions(+), 26 deletions(-)
> > > > >
> > > > > diff --git a/src/gallium/drivers/r600/egd_tables.py
> b/src/gallium/drivers/r600/egd_tables.py
> > > > > index d7b78c7fb1..4796456330 100644
> > > > > --- a/src/gallium/drivers/r600/egd_tables.py
> > > > > +++ b/src/gallium/drivers/r600/egd_tables.py
> > > > > @@ -1,3 +1,4 @@
> > > > > +from __future__ import print_function
> > > > >
> > > > >  CopyRight = '''
> > > > >  /*
> > > > > @@ -60,7 +61,7 @@ class StringTable:
> > > > >  """
> > > > >  fragments = [
> > > > >  '"%s\\0" /* %s */' % (
> > > > > -te[0].encode('string_escape'),
> > > > > +te[0],
>

Hi,

I am not an expert in Python 3, but I have found the following answer
experimentally.

I think the original version is semi-correct, but the encode() method
returns a bytes object which we need to convert to unicode. I think the
following would be fine:

te[0].encode('unicode_escape').decode('utf-8')

Regards,
Gustaw Smolarczyk


> > > >
> > > > I think you still need to escape the string here.
> > >
> > > I don't know how to address this. :-( At least the output of
> > >
> > >   python2 egd_tables.py evergreend.h
> > >   python3 egd_tables.py evergreend.h
> > >
> > > is now identical. Surely this may change with changes in content of
> > > evergreend.h. :-( Ok. I've tried my best.
> >
> > I think you should already land the print() changes, leaving this line
> > as is for now. Won't be valid python3 yet, but this will be the last
> > thing to fix, which someone else might pick up :)
>
> Leaving the line as is won't run with python3.
>
> > With the string escape hunk left out, this patch is
> > Reviewed-by: Eric Engestrom <eric.engest...@imgtec.com>
>
> Thanks, but I'm not really interested in submitting a patch, which doesn't
> fix
> the build for python3.
>
> Stefan
>
> Public Key available
> --
> Stefan Dirsch (Res. & Dev.)   SUSE LINUX GmbH
> Tel: 0911-740 53 0Maxfeldstraße 5
> FAX: 0911-740 53 479  D-90409 Nürnberg
> http://www.suse.deGermany
> ---
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham
> Norton, HRB 21284 (AG Nürnberg)
> ---
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir: remove the abs call in is_neg_power_of_two

2018-02-09 Thread Gustaw Smolarczyk
2018-02-09 0:16 GMT+01:00 Matt Turner <matts...@gmail.com>:

> On Mon, Feb 5, 2018 at 7:16 PM, Vlad Golovkin
> <vlad.golovkin.m...@gmail.com> wrote:
> > val->i32[swizzle[i]] is guaranteed to have non-positive value before the
> > __is_power_of_two call, so unary minus is equivalent to abs in this case.
> > ---
> >  src/compiler/nir/nir_search_helpers.h | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/src/compiler/nir/nir_search_helpers.h
> b/src/compiler/nir/nir_search_helpers.h
> > index 2e3bd137d6..66e1546ae6 100644
> > --- a/src/compiler/nir/nir_search_helpers.h
> > +++ b/src/compiler/nir/nir_search_helpers.h
> > @@ -80,7 +80,7 @@ is_neg_power_of_two(nir_alu_instr *instr, unsigned
> src, unsigned num_components,
> >case nir_type_int:
> >   if (val->i32[swizzle[i]] > 0)
>

Hi,
I would change it to >= 0, as when the value is zero it cannot be a power
of two. Either way, -0 == 0, so it doesn't matter from correctness
perspective.


> >  return false;
> > - if (!__is_power_of_two(abs(val->i32[swizzle[i]])))
> > + if (!__is_power_of_two(-val->i32[swizzle[i]]))
> >  return false;
>
> I think your transformation is correct, but unnecessary and confusing.
>
> You're right that val->i32[swizzle[i] must be 0 or negative.
> __is_power_of_two() takes an unsigned value, so we need to remove the
> sign bit, which can be done with a negation or an abs().
>
> It takes more effort for me to understand why the negation is correct,
> while the abs() is obvious.
>

abs(x) can be defined as:
#define abs(x) ((x) > 0 ? (x) : -(x))

(other than evaluating (x) twice; also, for 2s-complement arithmetic, > and
be replaced with >=)

Since we already know (x) > 0 doesn't hold, abs(x) can be simplified to
-(x).


>
> Anyone else have a different opinion?


One issue with the previous code is that abs(INT_MIN) is undefined [1], as
you cannot represent -INT_MIN in int since (int)-(unsigned)INT_MIN ==
INT_MIN [2]. Similarly (I think - I lack a quote here other than SO [3])
taking a negative of INT_MIN as-is is undefined, as it cannot be
represented in the int type without wrapping too - you first have to
convert it to unsigned.

However, when you take the negative of an int converted to unsigned, all
will be good. For 2s-complement arithmetic it doesn't matter whether you
take a negative from a signed number as-is or an signed number converted to
unsigned (the only thing that changes is the interpretation of the
resulting bits). And this also holds for INT_MIN, for example for 32-bit
int, INT_MIN == -2^31 and -(unsigned)INT_MIN == (unsigned)INT_MIN == 2^31.
In other words - by first converting to unsigned, we make any overflow
happening during computation defined as wrapping, and we want the result to
be unsigned anyway.

As such, I would recommend changing it to:

__is_power_of_two(-(unsigned)val->i32[swizzle[i]])

Regards,
Gustaw Smolarczyk

[1] http://man7.org/linux/man-pages/man3/abs.3.html
[2] https://wandbox.org/permlink/bqWG5gvNz4WUWFna
[3] https://stackoverflow.com/a/8917528
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] glsl/lower_64bit: extract non-64bit sources from vectors.

2018-02-03 Thread Gustaw Smolarczyk
2018-02-03 11:26 GMT+01:00 Erik Faye-Lund <kusmab...@gmail.com>:

>
>
> On Feb 1, 2018 04:35, "Dave Airlie" <airl...@gmail.com> wrote:
>
> From: Dave Airlie <airl...@redhat.com>
>
> In order to deal with conversions properly we need to extract
> non-64bit sources from vectors instead of expanding them as
> the 64-bit code does.
>
> We need non-64bit sources for the 32->64 conversion functions.
>
> Signed-off-by: Dave Airlie <airl...@redhat.com>
> ---
>  src/compiler/glsl/lower_64bit.cpp | 38 ++
> 
>  1 file changed, 34 insertions(+), 4 deletions(-)
>
> diff --git a/src/compiler/glsl/lower_64bit.cpp
> b/src/compiler/glsl/lower_64bit.cpp
> index ac62d1db1e..c7c6d1cb31 100644
> --- a/src/compiler/glsl/lower_64bit.cpp
> +++ b/src/compiler/glsl/lower_64bit.cpp
> @@ -52,6 +52,7 @@ using namespace ir_builder;
>
>  namespace lower_64bit {
>  void expand_source(ir_factory &, ir_rvalue *val, ir_variable
> **expanded_src);
> +void extract_source(ir_factory &, ir_rvalue *val, ir_variable
> **extracted_src);
>
>  ir_dereference_variable *compact_destination(ir_factory &,
>   const glsl_type *type,
> @@ -226,6 +227,25 @@ lower_64bit::expand_source(ir_factory ,
>expanded_src[i] = expanded_src[0];
>  }
>
> +void
> +lower_64bit::extract_source(ir_factory ,
> +ir_rvalue *val,
> +ir_variable **extracted_src)
> +{
> +   ir_variable *const temp = body.make_temp(val->type, "tmp");
> +
> +   body.emit(assign(temp, val));
> +   unsigned i;
> +   for (i = 0; i < val->type->vector_elements; i++) {
> +  extracted_src[i] = body.make_temp(val->type->get_scalar_type(),
> "extracted_source");
> +
> +  body.emit(assign(extracted_src[i], swizzle(temp, i, 1)));
> +   }
> +
> +   for (/* empty */; i < 4; i++)
> +  extracted_src[i] = extracted_src[0];
> +}
> +
>  /**
>   * Convert a series of uvec2 results into a single 64-bit integer vector
>   */
> @@ -262,14 +282,24 @@ lower_64bit::lower_op_to_function_call(ir_instruction
> *base_ir,
> void *const mem_ctx = ralloc_parent(ir);
> exec_list instructions;
> unsigned source_components = 0;
> -   const glsl_type *const result_type =
> -  ir->type->base_type == GLSL_TYPE_UINT64
> -  ? glsl_type::uvec2_type : glsl_type::ivec2_type;
> +   const glsl_type *result_type;
> +
> +   if (ir->type->is_64bit()) {
> +  if (ir->type->base_type == GLSL_TYPE_UINT64 ||
> +  ir->type->base_type == GLSL_TYPE_DOUBLE)
> + result_type = glsl_type::uvec2_type;
> +  else
> + result_type = glsl_type::ivec2_type;
> +   } else
> +  result_type = ir->type->get_scalar_type();
>
> ir_factory body(, mem_ctx);
>
> for (unsigned i = 0; i < num_operands; i++) {
> -  expand_source(body, ir->operands[i], src[i]);
> +  if (ir->operands[i]->type->is_64bit())
> + expand_source(body, ir->operands[i], src[i]);
> +  else
> + extract_source(body, ir->operands[i], src[i]);
>
>
> This change looks like a nop to me. Was the different sides of the else
> supposed to do different things?
>

Hi, just passing by.

Though they look very similarly, one is ex-pand-_source and the other is
ex-tract-_source.

Regards,
Gustaw Smolarczyk
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: expand glDrawPixels cache to handle multiple images

2018-01-25 Thread Gustaw Smolarczyk
2018-01-25 18:02 GMT+01:00 Roland Scheidegger <srol...@vmware.com>:

> Am 25.01.2018 um 16:55 schrieb Brian Paul:
> > The newest version of WSI Fusion makes several glDrawPixels calls
> > per frame.  By caching more than one image, we get better performance
> > when panning/zomming the map.
> Still zooming :-)
>
>
>
>
> >
> > v2: move pixel unpack param checking out of cache search loop, per Roland
> > ---
> >  src/mesa/state_tracker/st_cb_drawpixels.c | 196
> +-
> >  src/mesa/state_tracker/st_context.c   |   4 -
> >  src/mesa/state_tracker/st_context.h   |  22 +++-
> >  3 files changed, 154 insertions(+), 68 deletions(-)
> >
> > diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c
> b/src/mesa/state_tracker/st_cb_drawpixels.c
> > index 1d88976..e63f6f7 100644
> > --- a/src/mesa/state_tracker/st_cb_drawpixels.c
> > +++ b/src/mesa/state_tracker/st_cb_drawpixels.c
> > @@ -375,6 +375,131 @@ alloc_texture(struct st_context *st, GLsizei
> width, GLsizei height,
> >
> >
> >  /**
> > + * Search the cache for an image which matches the given parameters.
> > + * \return  pipe_resource pointer if found, NULL if not found.
> > + */
> > +static struct pipe_resource *
> > +search_drawpixels_cache(struct st_context *st,
> > +GLsizei width, GLsizei height,
> > +GLenum format, GLenum type,
> > +const struct gl_pixelstore_attrib *unpack,
> > +const void *pixels)
> > +{
> > +   struct pipe_resource *pt = NULL;
> > +   const GLint bpp = _mesa_bytes_per_pixel(format, type);
> > +   unsigned i;
> > +
> > +   if ((unpack->RowLength != 0 && unpack->RowLength != width) ||
> > +   unpack->SkipPixels != 0 ||
> > +   unpack->SkipRows != 0 ||
> > +   unpack->SwapBytes) {
> > +  /* we don't allow non-default pixel unpacking values */
> > +  return NULL;
> > +   }
> > +
> > +   /* Search cache entries for a match */
> > +   for (i = 0; i < ARRAY_SIZE(st->drawpix_cache.entries); i++) {
> > +  struct drawpix_cache_entry *entry = >drawpix_cache.entries[i];
> > +
> > +  if (width == entry->width &&
> > +  height == entry->height &&
> > +  format == entry->format &&
> > +  type == entry->type &&
> > +  pixels == entry->user_pointer &&
> > +  !_mesa_is_bufferobj(unpack->BufferObj) &&
> Move this line as well?
>
>
>
> > +  entry->image) {
> > + assert(entry->texture);
> > +
> > + /* check if the pixel data is the same */
> > + if (memcmp(pixels, entry->image, width * height * bpp) == 0) {
> > +/* Success - found a cache match */
> > +pipe_resource_reference(, entry->texture);
> > +/* refcount of returned texture should be at least two
> here.  One
> > + * reference for the cache to hold on to, one for the
> caller (which
> > + * it will release), and possibly more held by the driver.
> > + */
> > +assert(pt->reference.count >= 2);
> > +
> > +/* update the age of this entry */
> > +entry->age = ++st->drawpix_cache.age;
> > +
> > +return pt;
> > + }
> > +  }
> > +   }
> > +
> > +   /* no cache match found */
> > +   return NULL;
> > +}
> > +
> > +
> > +/**
> > + * Find the oldest entry in the glDrawPixels cache.  We'll replace this
> > + * one when we need to store a new image.
> > + */
> > +static struct drawpix_cache_entry *
> > +find_oldest_drawpixels_cache_entry(struct st_context *st)
> > +{
> > +   unsigned oldest_age = ~0u, oldest_index = ~0u;
> > +   unsigned i;
> > +
> > +   /* Find entry with oldest (lowest) age */
> > +   for (i = 0; i < ARRAY_SIZE(st->drawpix_cache.entries); i++) {
> > +  const struct drawpix_cache_entry *entry =
> >drawpix_cache.entries[i];
> > +  if (entry->age < oldest_age) {
> > + oldest_age = entry->age;
> > + oldest_index = i;
> > +  }
> > +   }
> > +
> > +   assert(oldest_age != ~0u);
> Ok, if it takes 2 years to hit it, that's probably ok...
>
> Reviewed-by: Roland Scheidegger <srol...@vmware.com>
>

Note that at 13000fps (maximum I c

Re: [Mesa-dev] DRI Configurator replacement announcement

2018-01-16 Thread Gustaw Smolarczyk
2018-01-16 8:33 GMT+01:00 Gert Wollny :

> Hello Jean,
>
> Am Montag, den 15.01.2018, 20:15 + schrieb Jean Hertel:
> > I have written a simply application like DRI Conf tool.
> > It is written using GTKmm and C++.
> Great!
>
> Unfortunately, it didn't link properly, so I send you a pull request to
> correct the Boost_LOCALE dependency.
>
>   https://github.com/jlHertel/adriconf/pull/1
>
> Then it crashed with std::bas_alloc that also printed a Python (?!)
> ValueError (opened an issue for that):
>
> https://github.com/jlHertel/adriconf/issues/2


FYI, the python exception is internal to gdb that probably couldn't use the
pretty printer for the mentioned type.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] ac: change legacy_surf_level::slice_size to dword units

2017-11-27 Thread Gustaw Smolarczyk
2017-11-27 14:43 GMT+01:00 Marek Olšák :

> On Mon, Nov 27, 2017 at 12:54 PM, Nicolai Hähnle 
> wrote:
> > On 23.11.2017 20:35, Marek Olšák wrote:
> >>
> >> From: Marek Olšák 
> >>
> >> The next commit will reduce the size even more.
> >>
> >> v2: typecast to uint64_t manually
> >> ---
> >>   src/amd/common/ac_surface.c|  2 +-
> >>   src/amd/common/ac_surface.h|  3 ++-
> >>   src/amd/vulkan/radv_image.c|  8 
> >>   src/gallium/drivers/r600/evergreen_state.c |  8 
> >>   src/gallium/drivers/r600/r600_state.c  |  8 
> >>   src/gallium/drivers/r600/r600_texture.c| 14
> +++---
> >>   src/gallium/drivers/r600/radeon_uvd.c  |  2 +-
> >>   src/gallium/drivers/radeon/r600_texture.c  | 10 +-
> >>   src/gallium/drivers/radeon/radeon_uvd.c|  2 +-
> >>   src/gallium/drivers/radeonsi/cik_sdma.c|  4 ++--
> >>   src/gallium/drivers/radeonsi/si_dma.c  |  8 
> >>   src/gallium/winsys/radeon/drm/radeon_drm_surface.c |  4 ++--
> >>   12 files changed, 37 insertions(+), 36 deletions(-)
> >>
> >> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
> >> index f7600a3..2b6c3fb 100644
> >> --- a/src/amd/common/ac_surface.c
> >> +++ b/src/amd/common/ac_surface.c
> >> @@ -297,21 +297,21 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
> >> ret = AddrComputeSurfaceInfo(addrlib,
> >>  AddrSurfInfoIn,
> >>  AddrSurfInfoOut);
> >> if (ret != ADDR_OK) {
> >> return ret;
> >> }
> >> surf_level = is_stencil ? >u.legacy.stencil_level[level]
> :
> >> >u.legacy.level[level];
> >> surf_level->offset = align64(surf->surf_size,
> >> AddrSurfInfoOut->baseAlign);
> >> -   surf_level->slice_size = AddrSurfInfoOut->sliceSize;
> >> +   surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
> >> surf_level->nblk_x = AddrSurfInfoOut->pitch;
> >> surf_level->nblk_y = AddrSurfInfoOut->height;
> >> switch (AddrSurfInfoOut->tileMode) {
> >> case ADDR_TM_LINEAR_ALIGNED:
> >> surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
> >> break;
> >> case ADDR_TM_1D_TILED_THIN1:
> >> surf_level->mode = RADEON_SURF_MODE_1D;
> >> break;
> >> diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
> >> index 1dc95cd..a50aec2 100644
> >> --- a/src/amd/common/ac_surface.h
> >> +++ b/src/amd/common/ac_surface.h
> >> @@ -64,21 +64,22 @@ enum radeon_micro_mode {
> >>   /* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
> >>   #define RADEON_SURF_FMASK   (1 << 21)
> >>   #define RADEON_SURF_DISABLE_DCC (1 << 22)
> >>   #define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
> >>   #define RADEON_SURF_IMPORTED(1 << 24)
> >>   #define RADEON_SURF_OPTIMIZE_FOR_SPACE  (1 << 25)
> >>   #define RADEON_SURF_SHAREABLE   (1 << 26)
> >> struct legacy_surf_level {
> >>   uint64_toffset;
> >> -uint64_tslice_size;
> >> +/* Declare 32 bits of uint64_t, so that multiplication results in
> 64
> >> bits. */
> >> +uint32_tslice_size_dw; /* in dwords; max = 4GB
> /
> >> 4. */
> >
> >
> > The comment is outdated now.
> >
> >
> >>   uint32_tdcc_offset; /* relative offset within
> >> DCC mip tree */
> >>   uint32_tdcc_fast_clear_size;
> >>   uint16_tnblk_x;
> >>   uint16_tnblk_y;
> >>   enum radeon_surf_mode   mode;
> >>   };
> >> struct legacy_surf_layout {
> >>   unsignedbankw:4;  /* max 8 */
> >>   unsignedbankh:4;  /* max 8 */
> >
> > [snip]
> >>
> >> diff --git a/src/gallium/drivers/r600/r600_texture.c
> >> b/src/gallium/drivers/r600/r600_texture.c
> >> index f7c9b63..cc15e53 100644
> >> --- a/src/gallium/drivers/r600/r600_texture.c
> >> +++ b/src/gallium/drivers/r600/r600_texture.c
> >> @@ -171,29 +171,29 @@ static void r600_copy_from_staging_texture(struct
> >> pipe_context *ctx, struct r600
> >>   }
> >> static unsigned r600_texture_get_offset(struct r600_common_screen
> >> *rscreen,
> >> struct r600_texture *rtex,
> >> unsigned level,
> >> const struct pipe_box *box,
> >> unsigned *stride,
> >> unsigned *layer_stride)
> >>   {
> >> *stride = rtex->surface.u.legacy.level[level].nblk_x *
> >> rtex->surface.bpe;
> >> -   

Re: [Mesa-dev] [PATCH] mesa: replace GLenum with GLenum16 in common structures

2017-11-14 Thread Gustaw Smolarczyk
2017-11-14 15:04 GMT+01:00 Marek Olšák :
> On Mon, Nov 13, 2017 at 10:19 PM, Ian Romanick  wrote:
>> On 11/08/2017 07:16 PM, Marek Olšák wrote:
>>> From: Marek Olšák 
>>>
>>> For lower CPU cache usage. All enums fit within 2 bytes.
>>
>> Have you benchmarked this on anything?  My recollection is that for many
>> things loads and stores of 16-bit values is more expensive than 8- or
>> 32-bit values on 64-bit architectures.  More recent CPUs may have
>> changed in this respect... I think that was back in the Core2 kind of
>> time frame, but I thought it also applied to AMD CPUs.

According to Agner [1] (looking at the instruction tables), there was
a penalty for 8 and 16 bit reads (+1 cycle of latency) at least on AMD
K8, K10 and Jaguar processors. The MOVZX and MOVSX instructions (often
used when dealing with sub-32bit loads) are also slower than direct
loads (with the same +1 cycle of latency penalty). On Bulldozer based
CPUs the penalty is only left when doing MOVSX (i.e. zero-extending
and loading to a smaller register is as fast as a 32-bit load). Zen
based CPUs has removed even this, so all modern AMD CPUs shouldn't be
slower (other than potential increase in code size due to usage of
0x66 prefixes when dealing with 16-bit quantities; 8-bit load/stores
have no such penalty).

I don't see any problem on any modern Intel CPUs since at least Core2.

[1] http://www.agner.org/optimize/
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] util/queue: fix a race condition in the fence code

2017-09-28 Thread Gustaw Smolarczyk
2017-09-28 20:21 GMT+02:00 Nicolai Hähnle <nhaeh...@gmail.com>:
> On 28.09.2017 19:18, Gustaw Smolarczyk wrote:
>>
>> 2017-09-28 18:52 GMT+02:00 Marek Olšák <mar...@gmail.com>:
>>>
>>> A clearer comment would be: "Don't destroy the fence when it's in the
>>> middle of util_queue_fence_signal (signalled but not unlocked yet
>>> because util_queue_fence_is_signalled doesn't lock). Instead, wait
>>> until util_queue_fence_signal returns and then destroy it."
>
>
> Sure, I can change that.
>
> [snip]
>
>> What about the ones that wait in util_queue_fence_wait? I think it's
>> possible for the following:
>>
>> ---
>>
>>   util_queue_fence_wait (begin)
>>
>>   ...
>>
>>   cnd_wait(>cond, >mutex);
>> util_queue_fence_signal (begin)
>> mtx_lock(>mutex);
>> fence->signalled = true;
>> cnd_broadcast(>cond);
>> mtx_unlock(>mutex);
>> util_queue_fence_signal (end)
>>  util_queue_fence_is_signalled
>>  util_queue_fence_destroy (begin)
>>  mtx_lock(>mutex);
>>  mtx_unlock(>mutex);
>>  cnd_destroy(>cond);
>>  mtx_destroy(>mutex);
>>  util_queue_fence_destroy (end)
>>
>>   mtx_unlock(>mutex); <--- use after free
>>
>>   util_queue_fence_wait (end)
>> ---
>>
>> Unless it is defined that the race between mtx_unlock and mtx_destroy
>> is automagically fixed (like mtx_destroy performs a lock operation
>> inside of itself; pthread_mutex_* don't do that IIUC).
>
>
> If I'm understanding you correctly, there are three threads in play here.
> The question is: In your example, what is the contract between the different
> threads?
>
> The thread which calls util_queue_fence_destroy is responsible for ensuring
> that no other thread accesses the fence anymore.
>
> The contract between the util_queue and its users is that after signaling
> the fence of a job, the queue will not touch that job anymore. So the user
> of util_queue can destroy the job (and hence its fence) after the fence is
> signaled.
>
> I'm pretty sure your example cannot possibly make sense. You have to have
> some synchronization external to u_queue.c between the thread running
> util_queue_fence_wait and the thread running util_queue_fence_destroy to
> ensure that this doesn't happen.

Right. I was just taking a look without considering the usage. If the
thread that destroys the fence takes responsibility for it to never be
waited upon ever again (for example by being the only thread that ever
waits for it and stopping doing that after the destroy) then I believe
no problematic schedule could happen that would still exercise the
use-after-free issue. I see now that only the race between
wait+destroy and signal should be considered and this patch fixes it.

FWIW,
Reviewed-by: Gustaw Smolarczyk <wielkie...@gmail.com>

Regards,
Gustaw

> Cheers,
> Nicolai
>
>>
>> Regards,
>> Gustaw
>>
>
>
> --
> Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] util/queue: fix a race condition in the fence code

2017-09-28 Thread Gustaw Smolarczyk
2017-09-28 18:52 GMT+02:00 Marek Olšák :
> A clearer comment would be: "Don't destroy the fence when it's in the
> middle of util_queue_fence_signal (signalled but not unlocked yet
> because util_queue_fence_is_signalled doesn't lock). Instead, wait
> until util_queue_fence_signal returns and then destroy it."
>
> Reviewed-by: Marek Olšák 
>
> Marek
>
> On Thu, Sep 28, 2017 at 6:10 PM, Nicolai Hähnle  wrote:
>> From: Nicolai Hähnle 
>>
>> A tempting alternative fix would be adding a lock/unlock pair in
>> util_queue_fence_is_signalled. However, that wouldn't actually
>> improve anything in the semantics of util_queue_fence_is_signalled,
>> while making that test much more heavy-weight. So this lock/unlock
>> pair in util_queue_fence_destroy for "flushing out" other threads
>> that may still be in util_queue_fence_signal looks like the better
>> fix.
>>
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/util/u_queue.c | 13 +
>>  1 file changed, 13 insertions(+)
>>
>> diff --git a/src/util/u_queue.c b/src/util/u_queue.c
>> index 449da7dc9ab..82a761e8420 100644
>> --- a/src/util/u_queue.c
>> +++ b/src/util/u_queue.c
>> @@ -113,20 +113,33 @@ util_queue_fence_init(struct util_queue_fence *fence)
>> memset(fence, 0, sizeof(*fence));
>> (void) mtx_init(>mutex, mtx_plain);
>> cnd_init(>cond);
>> fence->signalled = true;
>>  }
>>
>>  void
>>  util_queue_fence_destroy(struct util_queue_fence *fence)
>>  {
>> assert(fence->signalled);
>> +
>> +   /* Protect against the following race:
>> +*
>> +* util_queue_fence_signal (begin)
>> +*   fence->signalled = true;
>> +* 
>> util_queue_fence_is_signalled
>> +* util_queue_fence_destroy
>> +*   cnd_broadcast(>cond); <-- use-after-free
>> +* util_queue_fence_signal (end)
>> +*/
>> +   mtx_lock(>mutex);
>> +   mtx_unlock(>mutex);

What about the ones that wait in util_queue_fence_wait? I think it's
possible for the following:

---

 util_queue_fence_wait (begin)

 ...

 cnd_wait(>cond, >mutex);
util_queue_fence_signal (begin)
mtx_lock(>mutex);
fence->signalled = true;
cnd_broadcast(>cond);
mtx_unlock(>mutex);
util_queue_fence_signal (end)
util_queue_fence_is_signalled
util_queue_fence_destroy (begin)
mtx_lock(>mutex);
mtx_unlock(>mutex);
cnd_destroy(>cond);
mtx_destroy(>mutex);
util_queue_fence_destroy (end)

 mtx_unlock(>mutex); <--- use after free

 util_queue_fence_wait (end)
---

Unless it is defined that the race between mtx_unlock and mtx_destroy
is automagically fixed (like mtx_destroy performs a lock operation
inside of itself; pthread_mutex_* don't do that IIUC).

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] radeonsi: fix border color translation for integer textures

2017-09-26 Thread Gustaw Smolarczyk
2017-09-26 16:46 GMT+02:00 Nicolai Hähnle <nhaeh...@gmail.com>:
> From: Nicolai Hähnle <nicolai.haeh...@amd.com>
>
> This fixes the extremely unlikely case that an application uses
> 0x8000 or 0x3f80 as border color for an integer texture and
> helps in the also, but perhaps slightly less, unlikely case that 1 is
> used as a border color.

Hi,

I see that it fixes the wrong optimization in si_translate_border_color.

However, I also see that for floating point textures this will change
-0.0 into 0.0 (if I understand how
V_008F3C_SQ_TEX_BORDER_COLOR_*_BLACK work). I don't know GL rules
enough to judge that (whether 0.0 and -0.0 should be
indistinguishable), but is that ok?

Regards,
Gustaw Smolarczyk
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] radv: do not use a bitfield when dirtying the vertex buffers

2017-09-06 Thread Gustaw Smolarczyk
2017-09-06 15:53 GMT+02:00 Samuel Pitoiset :
> Useless to track which one has been updated because we
> re-upload all the vertex buffers in one shot.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c | 5 +++--
>  src/amd/vulkan/radv_private.h| 2 +-
>  2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index ed2984eb5a..cc9a758d36 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -1629,7 +1629,7 @@ radv_cmd_buffer_update_vertex_descriptors(struct 
> radv_cmd_buffer *cmd_buffer)
> radv_emit_userdata_address(cmd_buffer, 
> cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
>AC_UD_VS_VERTEX_BUFFERS, va);
> }
> -   cmd_buffer->state.vb_dirty = 0;
> +   cmd_buffer->state.vb_dirty = false;
>  }
>
>  static void
> @@ -2049,8 +2049,9 @@ void radv_CmdBindVertexBuffers(
> for (uint32_t i = 0; i < bindingCount; i++) {
> vb[firstBinding + i].buffer = 
> radv_buffer_from_handle(pBuffers[i]);
> vb[firstBinding + i].offset = pOffsets[i];
> -   cmd_buffer->state.vb_dirty |= 1 << (firstBinding + i);
> }
> +
> +   cmd_buffer->state.vb_dirty = true;
>  }
>
>  void radv_CmdBindIndexBuffer(
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 65ec712707..7c5dac3240 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -757,7 +757,7 @@ struct radv_attachment_state {
>  };
>
>  struct radv_cmd_state {
> -   uint32_t  vb_dirty;
> +   bool  vb_dirty;

Maybe move it below the dirty field in order to save 8 bytes? You
might also move the predicating field in order to save another 8.

That might be a candidate for a different commit though...

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: update the extensions that are enabled for 460

2017-08-02 Thread Gustaw Smolarczyk
2017-08-02 20:57 GMT+02:00 Samuel Pitoiset :
> Other ones are either unsupported or don't have any helper
> function checks.
>
> v2: - fix ARB_shader_draw_parameters system value names
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/compiler/glsl/builtin_functions.cpp |  6 --
>  src/compiler/glsl/builtin_variables.cpp | 12 +---
>  2 files changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/src/compiler/glsl/builtin_functions.cpp 
> b/src/compiler/glsl/builtin_functions.cpp
> index 84833bdd7d..5a0c78ec4a 100644
> --- a/src/compiler/glsl/builtin_functions.cpp
> +++ b/src/compiler/glsl/builtin_functions.cpp
> @@ -483,7 +483,8 @@ shader_atomic_counters(const _mesa_glsl_parse_state 
> *state)
>  static bool
>  shader_atomic_counter_ops(const _mesa_glsl_parse_state *state)
>  {
> -   return state->ARB_shader_atomic_counter_ops_enable;
> +   return (state->is_version(460, 0) ||
> +   state->ARB_shader_atomic_counter_ops_enable);
>  }
>
>  static bool
> @@ -606,7 +607,8 @@ barrier_supported(const _mesa_glsl_parse_state *state)
>  static bool
>  vote(const _mesa_glsl_parse_state *state)
>  {
> -   return state->ARB_shader_group_vote_enable;
> +   return (state->is_version(460, 0) ||
> +   state->ARB_shader_group_vote_enable);
>  }
>
>  static bool
> diff --git a/src/compiler/glsl/builtin_variables.cpp 
> b/src/compiler/glsl/builtin_variables.cpp
> index 19d427e4bc..56163e543f 100644
> --- a/src/compiler/glsl/builtin_variables.cpp
> +++ b/src/compiler/glsl/builtin_variables.cpp
> @@ -1022,9 +1022,15 @@ builtin_variable_generator::generate_vs_special_vars()
> if (state->ARB_draw_instanced_enable || state->is_version(140, 300))
>add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID");
> if (state->ARB_shader_draw_parameters_enable) {
> -  add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertexARB");
> -  add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, 
> "gl_BaseInstanceARB");
> -  add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB");
> +  if (state->is_version(460, 0)) {
> + add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertex");
> + add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, 
> "gl_BaseInstance");
> + add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawID");
> +  } else {
> + add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, 
> "gl_BaseVertexARB");
> + add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, 
> "gl_BaseInstanceARB");
> + add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB");
> +  }

I am not a GLSL expert, but doesn't that mean that, with GLSL 4.60,
the gl_BaseVertex et. al. system values will only be defined when
ARB_shader_draw_parameters extension is enabled? I think you meant:

if (state->is_version(460, 0)) {
  // add ARB-less system values
}
if (state->ARB_shader_draw_parameters_enable) {
  // add ARB-ful system values
}

(See for example gl_FragDepth and gl_FragDepthEXT)

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] AMD Tahiti: A question about PFP firmware

2017-07-24 Thread Gustaw Smolarczyk
$ ./piglit run gpu -t arb_draw_indirect results/indirect --dmesg
[12/12] pass: 11, fail: 1
Thank you for running Piglit!
Results have been written to /home/bigos/src/piglit/results/indirect

The only one failing is
spec/arb_draw_indirect/arb_draw_indirect-draw-arrays-prim-restart.
However, it is also failing with unmodified mesa (i.e. it's not a
regression).

But...

I see no reference to MDI and draw_parameters in these piglit tests. I
have thus tested -t arb_indirect_parameters -t
arb_shader_draw_parameters and all 12 of them pass now (they were
skipped before). So at least piglit seems to work with my firmware.

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] AMD Tahiti: A question about PFP firmware

2017-07-22 Thread Gustaw Smolarczyk
Hello,

While looking at the extension list on my Tahiti GPU (SI) I have found
that ARB_draw_parameters is missing. Looking at the code, it requires
ME firmware version >= 87 and PFP firmware version >= 121. While the
first is satisfied, the second is not [1]. I believe I use the newest
TAHITI firmware from the linux-firmware repository [2] (I use the
lower-case tahiti_{me,pfp}.bin files). I use the amdgpu kernel driver
instead of radeon (not sure if that matters since both of them should
use the same firmware files); well, if I didn't the versions would be
0 since only amdgpu winsys queries them.

Is there maybe some unreleased firmware file with newer version for my
hardware? Or is the check a nice way of saying "if you want hardware
accelerated MDI, buy newer hardware"?

[1] https://pastebin.com/BpYiJj0Z
[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/radeon

Regards,
Gustaw

P.S. I tried to ask on IRC, but I was unable to write a message on
#dri-devel channel as it was rejected (using irc.freenode.net). May
there be something I was missing?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v6 1/3] util: Add util_strlcpy

2017-07-06 Thread Gustaw Smolarczyk
2017-07-06 20:52 GMT+02:00 Robert Foss <robert.f...@collabora.com>:
> On Wed, 2017-07-05 at 08:46 -0600, Brian Paul wrote:
>> On 07/05/2017 12:57 AM, Robert Foss wrote:
>> > Add local strlcpy implementation.
>> >
>> > Signed-off-by: Robert Foss <robert.f...@collabora.com>
>> > ---
>> > Changes since v5:
>> >Actually include changes from v5 in patch
>> >
>> > Changes since v4:
>> >Gustaw Smolarczyk <wielkie...@gmail.com>
>> > - Make util_strlcpy have the same behaviour as strlcpy
>> >
>> > Changes since v3:
>> >Matt Turner <matts...@gmail.com>
>> > - Change name of util_strncpy to util_strlcpy
>> >
>> > Changes since v2:
>> >Brian Paul <bri...@vmware.com>
>> >  - Patch added
>> >
>> >
>> >   src/util/u_string.h | 9 +
>> >   1 file changed, 9 insertions(+)
>> >
>> > diff --git a/src/util/u_string.h b/src/util/u_string.h
>> > index e88e13f42c..bbabcbc7cb 100644
>> > --- a/src/util/u_string.h
>> > +++ b/src/util/u_string.h
>> > @@ -48,6 +48,15 @@
>> >   extern "C" {
>> >   #endif
>> >
>> > +static inline size_t
>> > +util_strlcpy(char *dst, const char *src, size_t n)
>> > +{
>> > +   strncpy(dst, src, n);
>> > +   dst[n-1] = '\0';
>> > +
>> > +   return strnlen(src, n);
>> > +}
>>
>> This effectively walks over the source string twice.  I'd suggest
>> just
>> using your own loop.
>>
>
> I don't think a solution can be built using strlen if we want to only
> iterate once. So one iteration will have to do both counting and
> copying, which rules out using memcpy too.

I would say that iterating twice should be fine since we also get to
use the optimized strlen and memcpy routines. Your open-coded version
doesn't vectorize nor unroll on gcc-7 -Ofast (as far as my simple
testing showed).

That said, performance of this function doesn't matter much - what
matters the most is ease of use and correctness.

>
> So how about something like this:
>
> static inline size_t
> util_strlcpy(char *dst, const char *src, size_t n)
> {
>size_t src_len = 0;
>
>/* Copy chars for as long as there is space in dst */
>while (src_len < (n - 1) && *src[src_len] != "\0") {

If n == 0, (n - 1) is the same as SIZE_MAX. I think you should check
for n == 0 and then just return strlen(src) before this loop is
entered. Or, alternatively, wrap the first loop and dst[src_len] =
'\0' assignment in if (n != 0) { ... }.
Also, use '\0' instead of "\0". And remove * before src[src_len].

>   dst[src_len] = src[src_len];
>   src_len++;
>}
>
>/* The last char of either src or if dst is filled
> * should always be \0.
> */
>dst[src_len] = '\0';
>
>/* Continue on finding the end of src */
>while (src[src_len] != "\0") {

Use '\0' instead of "\0".

>   src_len++;
>}
>
>return src_len;
> }
>
> Rob.


Other than these, it looks correct.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v6 1/3] util: Add util_strlcpy

2017-07-05 Thread Gustaw Smolarczyk
2017-07-05 8:57 GMT+02:00 Robert Foss <robert.f...@collabora.com>:
> Add local strlcpy implementation.
>
> Signed-off-by: Robert Foss <robert.f...@collabora.com>
> ---
> Changes since v5:
>   Actually include changes from v5 in patch
>
> Changes since v4:
>   Gustaw Smolarczyk <wielkie...@gmail.com>
>- Make util_strlcpy have the same behaviour as strlcpy
>
> Changes since v3:
>   Matt Turner <matts...@gmail.com>
>- Change name of util_strncpy to util_strlcpy
>
> Changes since v2:
>   Brian Paul <bri...@vmware.com>
> - Patch added
>
>
>  src/util/u_string.h | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/src/util/u_string.h b/src/util/u_string.h
> index e88e13f42c..bbabcbc7cb 100644
> --- a/src/util/u_string.h
> +++ b/src/util/u_string.h
> @@ -48,6 +48,15 @@
>  extern "C" {
>  #endif
>
> +static inline size_t
> +util_strlcpy(char *dst, const char *src, size_t n)
> +{
> +   strncpy(dst, src, n);
> +   dst[n-1] = '\0';
> +
> +   return strnlen(src, n);

I don't think you can use strnlen here. strlcpy returns the number of
characters in src without any restrictions. If src happens not to be
null-terminated, it is already undefined behavior (and strnlen won't
necessarily help if src is just before an unmapped page and n is
sufficiently large). In general, you shouldn't use strn* functions
when working with null-terminated strings.

You might also use strlen+memcpy instead of strncpy+strlen to
potentially achieve higher performance (strlen and memcpy are probably
ones of the most optimized functions in any runtime). Maybe something
like this:

static inline size_t
util_strlcpy(char *dst, const char *src, size_t n)
{
   const size_t src_len = strlen(src);

   if (src_len < n) {
  memcpy(dst, src, src_len + 1);
   } else {
  memcpy(dst, src, n - 1);
  dst[n - 1] = '\0';
   }

   return src_len;
}

> +}
> +
>  #ifdef _GNU_SOURCE
>
>  #define util_strchrnul strchrnul
> --
> 2.11.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 1/3] util: Add util_strlcpy

2017-07-04 Thread Gustaw Smolarczyk
4 lip 2017 14:25 "Robert Foss"  napisał(a):

Add local strlcpy implementation.

Signed-off-by: Robert Foss 
---
Changes since v3:
  Matt Turner 
   - Change name of util_strncpy to util_strlcpy

Changes since v2:
  Brian Paul 
- Patch added

 src/util/u_string.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/util/u_string.h b/src/util/u_string.h
index e88e13f42c..77014bc744 100644
--- a/src/util/u_string.h
+++ b/src/util/u_string.h
@@ -48,6 +48,15 @@
 extern "C" {
 #endif

+static inline char*
+util_strlcpy(char *dst, const char *src, size_t n)
+{
+   strncpy(dst, src, n);
+   dst[n-1] = '\0';
+
+   return dst;
+}


The original strlcpy returns the amount of characters of the source string
which can be used in order to know whether the truncation happened or not.

It's probably unnecessary right know but might be useful in the future.

Gustaw

+
 #ifdef _GNU_SOURCE

 #define util_strchrnul strchrnul
--
2.11.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 1/3] util: Add util_strncpy

2017-07-03 Thread Gustaw Smolarczyk
2017-07-03 6:10 GMT+02:00 Robert Foss :
> On Sun, 2017-07-02 at 21:06 -0700, Matt Turner wrote:
>> Otherwise known as strlcpy()?
>
> I didn't realize this, a wrapper would be needed for all platformsou
> that don't support it natively.
>
> Do you know which platforms support strlcpy?
>
>
> Rob.

I think various BSD variants support strlcpy ([1] as an example).

Gustaw

[1] https://www.freebsd.org/cgi/man.cgi?query=strlcpy=3
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v1 3/3] gallium/hud: Prevent buffer overflow in hud_thread_busy_install

2017-06-29 Thread Gustaw Smolarczyk
2017-06-29 4:41 GMT+02:00 Robert Foss :
> Switch to using strncopy to avoid potential overflow of
> name array in struct hud_graph.
>
> Coverity-id: 1413760
>
> Signed-off-by: Robert Foss 
> ---
>  src/gallium/auxiliary/hud/hud_cpu.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/auxiliary/hud/hud_cpu.c 
> b/src/gallium/auxiliary/hud/hud_cpu.c
> index 468c36207b..ceadccb377 100644
> --- a/src/gallium/auxiliary/hud/hud_cpu.c
> +++ b/src/gallium/auxiliary/hud/hud_cpu.c
> @@ -288,7 +288,7 @@ hud_thread_busy_install(struct hud_pane *pane, const char 
> *name, bool main)
> if (!gr)
>return;
>
> -   strcpy(gr->name, name);
> +   strcpy(gr->name, name, HUD_GRAPH_NAME_LEN);

strncpy?

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallium/util: Allow pipe_resource_reference to be inlined again

2017-06-13 Thread Gustaw Smolarczyk
2017-06-13 12:07 GMT+02:00 Michel Dänzer :
> On 13/06/17 06:51 PM, Timothy Arceri wrote:
>> On 13/06/17 19:22, Michel Dänzer wrote:
>>> From: Michel Dänzer 
>>>
>>> It calling itself recursively prevented it from being inlined, resulting
>>> in a copy being generated in every compilation unit referencing it. This
>>> bloated the text segment of the Gallium mega-driver *_dri.so by ~5%,
>>> and might also have impacted performance.
>>>
>>> Fixes: ecd6fce2611e ("mesa/st: support lowering multi-planar YUV")
>>> Signed-off-by: Michel Dänzer 
>>> ---
>>>   src/gallium/auxiliary/util/u_inlines.h | 13 -
>>>   1 file changed, 12 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/auxiliary/util/u_inlines.h
>>> b/src/gallium/auxiliary/util/u_inlines.h
>>> index 6a3d5043cf..9a08779f2a 100644
>>> --- a/src/gallium/auxiliary/util/u_inlines.h
>>> +++ b/src/gallium/auxiliary/util/u_inlines.h
>>> @@ -131,13 +131,24 @@ pipe_surface_release(struct pipe_context *pipe,
>>> struct pipe_surface **ptr)
>>>   static inline void
>>> +pipe_resource_next_reference(struct pipe_resource **ptr, struct
>>> pipe_resource *tex)
>>> +{
>>> +   struct pipe_resource *old_tex = *ptr;
>>> +
>>> +   if (pipe_reference_described(&(*ptr)->reference, >reference,
>>> +
>>> (debug_reference_descriptor)debug_describe_resource))
>>
>> Why don't we need to call pipe_resource_next_reference(_tex->next,
>> NULL) again here?
>
> Good catch, thanks. I'll work on a v3 patch fixing this tomorrow.
>

Hello,

If pipe_resource_next_reference is not meant to be inlined, maybe put
it in a source file?

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/21] mesa: add validate_depth_buffer() helper

2017-06-01 Thread Gustaw Smolarczyk
Same problem as in the previous patch.

Regards,
Gustaw


1 cze 2017 15:07 "Samuel Pitoiset"  napisał(a):

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/main/blit.c | 105 +++---
-
 1 file changed, 56 insertions(+), 49 deletions(-)

diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c
index 207ce7d501..455a9a647f 100644
--- a/src/mesa/main/blit.c
+++ b/src/mesa/main/blit.c
@@ -233,6 +233,60 @@ validate_stencil_buffer(struct gl_context *ctx, struct
gl_framebuffer *readFb,
}
 }

+
+static void
+validate_depth_buffer(struct gl_context *ctx, struct gl_framebuffer
*readFb,
+  struct gl_framebuffer *drawFb, GLbitfield *mask,
+  bool no_error, const char *func)
+{
+   struct gl_renderbuffer *readRb =
+  readFb->Attachment[BUFFER_DEPTH].Renderbuffer;
+   struct gl_renderbuffer *drawRb =
+  drawFb->Attachment[BUFFER_DEPTH].Renderbuffer;
+
+   /* From the EXT_framebuffer_object spec:
+*
+* "If a buffer is specified in  and does not exist in both
+* the read and draw framebuffers, the corresponding bit is silently
+* ignored."
+*/
+   if (readRb == NULL || drawRb == NULL) {
+  *mask &= ~GL_DEPTH_BUFFER_BIT;
+   } else if (!no_error) {
+  int read_s_bit, draw_s_bit;
+
+  if (_mesa_is_gles3(ctx) && (drawRb == readRb)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(source and destination depth buffer cannot be the
"
+ "same)", func);
+ return;
+  }
+
+  if ((_mesa_get_format_bits(readRb->Format, GL_DEPTH_BITS) !=
+   _mesa_get_format_bits(drawRb->Format, GL_DEPTH_BITS)) ||
+  (_mesa_get_format_datatype(readRb->Format) !=
+   _mesa_get_format_datatype(drawRb->Format))) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(depth attachment format mismatch)", func);
+ return;
+  }
+
+  read_s_bit = _mesa_get_format_bits(readRb->Format, GL_STENCIL_BITS);
+  draw_s_bit = _mesa_get_format_bits(drawRb->Format, GL_STENCIL_BITS);
+
+  /* If both buffers also have stencil data, the stencil formats must
+   * match as well.  If one doesn't have stencil, it's not blitted, so
+   * we should ignore the stencil format check.
+   */
+  if (read_s_bit > 0 && draw_s_bit > 0 && read_s_bit != draw_s_bit) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(depth attachment stencil bits mismatch)", func);
+ return;
+  }
+   }
+}
+
+
 static void
 blit_framebuffer(struct gl_context *ctx,
  struct gl_framebuffer *readFb, struct gl_framebuffer
*drawFb,
@@ -376,55 +430,8 @@ blit_framebuffer(struct gl_context *ctx,
if (mask & GL_STENCIL_BUFFER_BIT)
   validate_stencil_buffer(ctx, readFb, drawFb, , false, func);

-   if (mask & GL_DEPTH_BUFFER_BIT) {
-  struct gl_renderbuffer *readRb =
- readFb->Attachment[BUFFER_DEPTH].Renderbuffer;
-  struct gl_renderbuffer *drawRb =
- drawFb->Attachment[BUFFER_DEPTH].Renderbuffer;
-
-  /* From the EXT_framebuffer_object spec:
-   *
-   * "If a buffer is specified in  and does not exist in both
-   * the read and draw framebuffers, the corresponding bit is
silently
-   * ignored."
-   */
-  if ((readRb == NULL) || (drawRb == NULL)) {
- mask &= ~GL_DEPTH_BUFFER_BIT;
-  }
-  else {
- int read_s_bit, draw_s_bit;
-
- if (_mesa_is_gles3(ctx) && (drawRb == readRb)) {
-_mesa_error(ctx, GL_INVALID_OPERATION,
-"%s(source and destination depth "
-"buffer cannot be the same)", func);
-return;
- }
-
- if ((_mesa_get_format_bits(readRb->Format, GL_DEPTH_BITS) !=
-  _mesa_get_format_bits(drawRb->Format, GL_DEPTH_BITS)) ||
- (_mesa_get_format_datatype(readRb->Format) !=
-  _mesa_get_format_datatype(drawRb->Format))) {
-_mesa_error(ctx, GL_INVALID_OPERATION,
-"%s(depth attachment format mismatch)", func);
-return;
- }
-
- read_s_bit = _mesa_get_format_bits(readRb->Format,
GL_STENCIL_BITS);
- draw_s_bit = _mesa_get_format_bits(drawRb->Format,
GL_STENCIL_BITS);
-
- /* If both buffers also have stencil data, the stencil formats
must
-  * match as well.  If one doesn't have stencil, it's not blitted,
so
-  * we should ignore the stencil format check.
-  */
- if (read_s_bit > 0 && draw_s_bit > 0 && read_s_bit != draw_s_bit)
{
-_mesa_error(ctx, GL_INVALID_OPERATION,
-"%s(depth attachment stencil bits mismatch)",
func);
-return;
- }
-  }
-   }
-
+   if (mask & GL_DEPTH_BUFFER_BIT)
+  validate_depth_buffer(ctx, readFb, drawFb, , false, func);

Re: [Mesa-dev] [PATCH 09/21] mesa: add validate_stencil_buffer() helper

2017-06-01 Thread Gustaw Smolarczyk
This time, send it to the list too.
Gustaw


1 cze 2017 15:45 "Gustaw Smolarczyk" <wielkie...@gmail.com> napisał(a):

1 cze 2017 15:07 "Samuel Pitoiset" <samuel.pitoi...@gmail.com> napisał(a):

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
 src/mesa/main/blit.c | 111 +++---
-
 1 file changed, 58 insertions(+), 53 deletions(-)

diff --git a/src/mesa/main/blit.c b/src/mesa/main/blit.c
index 2c0300eab3..207ce7d501 100644
--- a/src/mesa/main/blit.c
+++ b/src/mesa/main/blit.c
@@ -178,6 +178,62 @@ is_valid_blit_filter(const struct gl_context *ctx,
GLenum filter)


 static void


Shouldn't it return a bool so that the caller will know whether the
validation passed or not?

+validate_stencil_buffer(struct gl_context *ctx, struct gl_framebuffer
*readFb,
+struct gl_framebuffer *drawFb, GLbitfield *mask,
+bool no_error, const char *func)
+{
+   struct gl_renderbuffer *readRb =
+  readFb->Attachment[BUFFER_STENCIL].Renderbuffer;
+   struct gl_renderbuffer *drawRb =
+  drawFb->Attachment[BUFFER_STENCIL].Renderbuffer;
+
+   /* From the EXT_framebuffer_object spec:
+*
+* "If a buffer is specified in  and does not exist in both
+* the read and draw framebuffers, the corresponding bit is silently
+* ignored."
+*/
+   if (readRb == NULL || drawRb == NULL) {
+  *mask &= ~GL_STENCIL_BUFFER_BIT;
+   } else if (!no_error) {
+  int read_z_bits, draw_z_bits;
+
+  if (_mesa_is_gles3(ctx) && (drawRb == readRb)) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(source and destination stencil buffer cannot be
the "
+ "same)", func);
+ return;
+  }
+
+  if (_mesa_get_format_bits(readRb->Format, GL_STENCIL_BITS) !=
+  _mesa_get_format_bits(drawRb->Format, GL_STENCIL_BITS)) {
+ /* There is no need to check the stencil datatype here, because
+  * there is only one: GL_UNSIGNED_INT.
+  */
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(stencil attachment format mismatch)", func);
+ return;
+  }
+
+  read_z_bits = _mesa_get_format_bits(readRb->Format, GL_DEPTH_BITS);
+  draw_z_bits = _mesa_get_format_bits(drawRb->Format, GL_DEPTH_BITS);
+
+  /* If both buffers also have depth data, the depth formats must match
+   * as well.  If one doesn't have depth, it's not blitted, so we
should
+   * ignore the depth format check.
+   */
+  if (read_z_bits > 0 && draw_z_bits > 0 &&
+  (read_z_bits != draw_z_bits ||
+   _mesa_get_format_datatype(readRb->Format) !=
+   _mesa_get_format_datatype(drawRb->Format))) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(stencil attachment depth format mismatch)", func);
+ return;
+  }
+   }
+}
+
+static void
 blit_framebuffer(struct gl_context *ctx,
  struct gl_framebuffer *readFb, struct gl_framebuffer
*drawFb,
  GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
@@ -317,59 +373,8 @@ blit_framebuffer(struct gl_context *ctx,
   }
}

-   if (mask & GL_STENCIL_BUFFER_BIT) {
-  struct gl_renderbuffer *readRb =
- readFb->Attachment[BUFFER_STENCIL].Renderbuffer;
-  struct gl_renderbuffer *drawRb =
- drawFb->Attachment[BUFFER_STENCIL].Renderbuffer;
-
-  /* From the EXT_framebuffer_object spec:
-   *
-   * "If a buffer is specified in  and does not exist in both
-   * the read and draw framebuffers, the corresponding bit is
silently
-   * ignored."
-   */
-  if ((readRb == NULL) || (drawRb == NULL)) {
- mask &= ~GL_STENCIL_BUFFER_BIT;
-  }
-  else {
- int read_z_bits, draw_z_bits;
-
- if (_mesa_is_gles3(ctx) && (drawRb == readRb)) {
-_mesa_error(ctx, GL_INVALID_OPERATION,
-"%s(source and destination stencil "
-"buffer cannot be the same)", func);
-return;
- }
-
- if (_mesa_get_format_bits(readRb->Format, GL_STENCIL_BITS) !=
- _mesa_get_format_bits(drawRb->Format, GL_STENCIL_BITS)) {
-/* There is no need to check the stencil datatype here, because
- * there is only one: GL_UNSIGNED_INT.
- */
-_mesa_error(ctx, GL_INVALID_OPERATION,
-"%s(stencil attachment format mismatch)", func);
-return;
- }
-
- read_z_bits = _mesa_get_format_bits(readRb->Format,
GL_DEPTH_BITS);
- draw_z_bits = _mesa_get_format_bits(drawRb->Format,
GL_DEPTH_BITS);
-
- /* If both buffers also have depth da

Re: [Mesa-dev] [PATCH 14/16] anv: Advertise both 32-bit and 48-bit heaps when we have enough memory

2017-05-19 Thread Gustaw Smolarczyk
2017-05-18 23:01 GMT+02:00 Jason Ekstrand :
> ---
>  src/intel/vulkan/anv_device.c | 42 --
>  1 file changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 6ea8dfe..8eed7f3 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -112,12 +112,42 @@ anv_physical_device_init_heaps(struct 
> anv_physical_device *device, int fd)
> if (result != VK_SUCCESS)
>return result;
>
> -   device->memory.heap_count = 1;
> -   device->memory.heaps[0] = (struct anv_memory_heap) {
> -  .size = heap_size,
> -  .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
> -  .supports_48bit_addresses = device->supports_48bit_addresses,
> -   };
> +   if (heap_size <= 3ull * (1ull << 30)) {
> +  /* In this case, everything fits nicely into the 32-bit address space,
> +   * so there's no need for supporting 48bit addresses on 
> clinet-allocated

Probably a typo: s/clinet/client/

> +   * memory objects.
> +   */
> +  device->memory.heap_count = 1;
> +  device->memory.heaps[0] = (struct anv_memory_heap) {
> + .size = heap_size,
> + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
> + .supports_48bit_addresses = false,
> +  };
> +   } else {
> +  /* Not everything will fit nicely into a 32-bit address space.  In this
> +   * case we need a 64-bit heap.  Advertise a small 32-bit heap and a
> +   * larger 48-bit heap.  If we're in this case, then we have a total 
> heap
> +   * size larger than 3GiB which most likely means they have 8 GiB of
> +   * video memory and so carving off 2 GiB for the 32-bit heap should be
> +   * reasonable.
> +   */
> +  const uint32_t heap_size_32bit = 2ull * (1ull << 30);
> +  const uint32_t heap_size_48bit = heap_size - heap_size_32bit;

Is it really a good idea for these variables to be only 32-bit?
Especially the second one.

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/14] mesa/main/ff_frag: Don't bother with VARYING_BIT_FOGC.

2017-05-16 Thread Gustaw Smolarczyk
2017-05-16 18:48 GMT+02:00 Ian Romanick <i...@freedesktop.org>:
> On 05/15/2017 11:38 AM, Gustaw Smolarczyk wrote:
>> 2017-05-15 20:28 GMT+02:00 Ian Romanick <i...@freedesktop.org>:
>>> With this patch I applied, I still see this bit used in several
>>> places, including the i915 driver.  Did you test that?
>>>
>>> rc/compiler/shader_enums.h:#define VARYING_BIT_FOGC 
>>> BITFIELD64_BIT(VARYING_SLOT_FOGC)
>>> src/mesa/swrast/s_context.c:VARYING_BIT_FOGC | 
>>> VARYING_BITS_TEX_ANY;
>>> src/mesa/swrast/s_context.c: attribsMask |= VARYING_BIT_FOGC;
>>> src/mesa/swrast/s_fog.c:if (span->arrayAttribs & VARYING_BIT_FOGC) {
>>> \
>>> src/mesa/program/programopt.c: * This function sets \c VARYING_BIT_FOGC in 
>>> \c fprog->info.inputs_read.
>>> src/mesa/program/programopt.c:   fprog->info.inputs_read |= 
>>> VARYING_BIT_FOGC;
>>> src/mesa/main/ffvertex_prog.c:   if (p->state->fragprog_inputs_read & 
>>> VARYING_BIT_FOGC)
>>> src/mesa/drivers/dri/i915/i915_fragprog.c:   if ((inputsRead & 
>>> VARYING_BIT_FOGC)) {
>>> src/mesa/tnl/t_context.c:   || (fp != NULL && (fp->info.inputs_read & 
>>> VARYING_BIT_FOGC) != 0)) {
>>
>> The bit would eventually end in the state_key::inputs_available bitset
>> which is only used locally (i.e. in the same source file). It was
>> never tested against.
>
> Okay.  As long as this inputs_referenced isn't used to populate the
> shader_info::inputs_read field in the generated fragment program it
> should be fine.
>
>> Regards,
>> Gustaw
>

That should not be a problem since only COL{0,1} and TEX* bits are
used. The bitmask is never used as a whole (other than copying it to
the state_key structure), so it shouldn't be accessed from outside of
ff_fragment_shader.cpp file.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/14] mesa/main/ff_frag: Don't bother with VARYING_BIT_FOGC.

2017-05-15 Thread Gustaw Smolarczyk
2017-05-15 20:28 GMT+02:00 Ian Romanick :
> With this patch I applied, I still see this bit used in several
> places, including the i915 driver.  Did you test that?
>
> rc/compiler/shader_enums.h:#define VARYING_BIT_FOGC 
> BITFIELD64_BIT(VARYING_SLOT_FOGC)
> src/mesa/swrast/s_context.c:VARYING_BIT_FOGC | 
> VARYING_BITS_TEX_ANY;
> src/mesa/swrast/s_context.c: attribsMask |= VARYING_BIT_FOGC;
> src/mesa/swrast/s_fog.c:if (span->arrayAttribs & VARYING_BIT_FOGC) {  
>   \
> src/mesa/program/programopt.c: * This function sets \c VARYING_BIT_FOGC in \c 
> fprog->info.inputs_read.
> src/mesa/program/programopt.c:   fprog->info.inputs_read |= VARYING_BIT_FOGC;
> src/mesa/main/ffvertex_prog.c:   if (p->state->fragprog_inputs_read & 
> VARYING_BIT_FOGC)
> src/mesa/drivers/dri/i915/i915_fragprog.c:   if ((inputsRead & 
> VARYING_BIT_FOGC)) {
> src/mesa/tnl/t_context.c:   || (fp != NULL && (fp->info.inputs_read & 
> VARYING_BIT_FOGC) != 0)) {

The bit would eventually end in the state_key::inputs_available bitset
which is only used locally (i.e. in the same source file). It was
never tested against.

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/17] st/mesa: unify TCS, TES, GS st_*_program structures

2017-05-02 Thread Gustaw Smolarczyk
2017-05-01 14:52 GMT+02:00 Marek Olšák :
> From: Marek Olšák 

SNIP

>
>  /**
>   * Derived from Mesa gl_program:
>   */
> -struct st_geometry_program
> +struct st_common_program
>  {
> struct gl_program Base;  /**< The Mesa geometry program */

I think you should remove the word "geometry" from the comment.

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 3/4] ff_fragment_shader: mark impossible switch values with unreachable

2017-04-22 Thread Gustaw Smolarczyk
2017-04-22 11:45 GMT+02:00 Nicolai Hähnle <nhaeh...@gmail.com>:
> On 22.04.2017 09:52, Giuseppe Bilotta wrote:
>>
>> Signed-off-by: Giuseppe Bilotta <giuseppe.bilo...@gmail.com>
>> ---
>>  src/mesa/main/ff_fragment_shader.cpp | 18 +++---
>>  1 file changed, 7 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/mesa/main/ff_fragment_shader.cpp
>> b/src/mesa/main/ff_fragment_shader.cpp
>> index aac9de78ca..4f2d6b07a2 100644
>> --- a/src/mesa/main/ff_fragment_shader.cpp
>> +++ b/src/mesa/main/ff_fragment_shader.cpp
>> @@ -138,8 +138,7 @@ need_saturate( GLuint mode )
>> case TEXENV_MODE_ADD_PRODUCTS_SIGNED_NV:
>>return GL_TRUE;
>> default:
>> -  assert(0);
>> -  return GL_FALSE;
>> +  unreachable("Invalid TexEnv Combine mode");
>> }
>>  }
>>
>> @@ -426,8 +425,7 @@ get_source(texenv_fragment_program *p,
>>}
>>
>> default:
>> -  assert(0);
>> -  return NULL;
>> +  unreachable("Invalid TexEnv source");
>> }
>>  }
>>
>> @@ -458,8 +456,7 @@ emit_combine_source(texenv_fragment_program *p,
>>return src;
>>
>> default:
>> -  assert(0);
>> -  return src;
>> +  unreachable("Invalid TexEnv Combine operand");
>> }
>>  }
>>
>> @@ -495,8 +492,8 @@ static GLboolean args_match( const struct state_key
>> *key, GLuint unit )
>> return GL_FALSE;
>>  }
>>  break;
>> -  default:
>> -return GL_FALSE;   /* impossible */
>> +  default:
>> +unreachable("Invalid TexEnv Combine operand");
>
>
> For this one, I think despite the comment it'd be best to change that to an
> assert + return first, and re-visit it after some time.

I think the comment was quite accurate - alpha argument cannot use
color values, so handling only alpha operand enums is perfectly safe.
It is being validated in texenv.c:set_combiner_operand (the second
switch); later on, the GL_* constants are converted to TEXENV_OPR ones
in texstate.c:pack_tex_combine. Though first adding an assert is safer
in case I missed something.

>
> Cheers,
> Nicolai
>
>
>>}
>> }
>>
>> @@ -579,9 +576,8 @@ emit_combine(texenv_fragment_program *p,
>> case TEXENV_MODE_ADD_PRODUCTS_SIGNED_NV:
>>return add(add(mul(src[0], src[1]), mul(src[2], src[3])),
>>  new(p->mem_ctx) ir_constant(-0.5f));
>> -   default:
>> -  assert(0);
>> -  return src[0];
>> +   default:
>> +  unreachable("Invalid TexEnv Combine mode");
>> }
>>  }

With Nicolai's suggestion or not:

Reviewed-by: Gustaw Smolarczyk <wielkie...@gmail.com>

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/14] Better Travis-CI integration, take 2

2017-04-21 Thread Gustaw Smolarczyk
2017-04-21 14:08 GMT+02:00 Emil Velikov :
> Note: GCC 5/6 crashes with internal compiler error at seemingly random
> stages of the build. Unless any of the SWR devs sort it out, we might
> want to disable it for now.
>
> A sample report can be seen at
> https://travis-ci.org/evelikov/Mesa/builds/223590028


Hello,

The ICEs look like an external factor is involved.

> g++-5: internal compiler error: Killed (program cc1plus)

Looks like someone killed the cc1plus process. Maybe too much memory
was used and kernel OOM killer was unleashed?

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/14] mesa/main: Maintain compressed fog mode.

2017-04-08 Thread Gustaw Smolarczyk
2017-04-08 18:37 GMT+02:00 Marek Olšák <mar...@gmail.com>:
> On Sat, Apr 8, 2017 at 12:53 AM, Gustaw Smolarczyk <wielkie...@gmail.com> 
> wrote:
>> 2017-04-07 23:56 GMT+02:00 Marek Olšák <mar...@gmail.com>:
>>> On Fri, Apr 7, 2017 at 6:54 PM, Gustaw Smolarczyk <wielkie...@gmail.com> 
>>> wrote:
>>>> 2017-04-07 16:31 GMT+02:00 Marek Olšák <mar...@gmail.com>:
>>>>> On Thu, Mar 30, 2017 at 8:09 PM, Gustaw Smolarczyk <wielkie...@gmail.com> 
>>>>> wrote:
>>>>>> Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
>>>>>> ---
>>>>>>  src/mesa/main/enable.c |  1 +
>>>>>>  src/mesa/main/fog.c|  9 +
>>>>>>  src/mesa/main/mtypes.h | 14 ++
>>>>>>  3 files changed, 24 insertions(+)
>>>>>>
>>>>>> diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
>>>>>> index d9d63a6b4b..ef278a318a 100644
>>>>>> --- a/src/mesa/main/enable.c
>>>>>> +++ b/src/mesa/main/enable.c
>>>>>> @@ -385,6 +385,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
>>>>>> GLboolean state)
>>>>>>  return;
>>>>>>   FLUSH_VERTICES(ctx, _NEW_FOG);
>>>>>>   ctx->Fog.Enabled = state;
>>>>>> + ctx->Fog._PackedEnabledMode = state ? ctx->Fog._PackedMode : 
>>>>>> FOG_NONE;
>>>>>>   break;
>>>>>>case GL_LIGHT0:
>>>>>>case GL_LIGHT1:
>>>>>> diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
>>>>>> index 1ad939cfde..76e65080b7 100644
>>>>>> --- a/src/mesa/main/fog.c
>>>>>> +++ b/src/mesa/main/fog.c
>>>>>> @@ -102,8 +102,13 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
>>>>>>   m = (GLenum) (GLint) *params;
>>>>>>  switch (m) {
>>>>>>  case GL_LINEAR:
>>>>>> +   ctx->Fog._PackedMode = FOG_LINEAR;
>>>>>> +   break;
>>>>>>  case GL_EXP:
>>>>>> +   ctx->Fog._PackedMode = FOG_EXP;
>>>>>> +   break;
>>>>>>  case GL_EXP2:
>>>>>> +   ctx->Fog._PackedMode = FOG_EXP2;
>>>>>
>>>>> Perhaps these should be set before FLUSH_VERTICES?
>>>>>
>>>>> Marek
>>>>
>>>> That would make us need two switch() statements instead of one. Also,
>>>> if ctx->Fog.Mode == m then we are essentially writing the same values
>>>> so nothing changes - doing it after the check shouldn't affect
>>>> correctness in any way. I might be wrong, though. _PackedMode is only
>>>> ever used to manage _PackedEnabledMode (here and in _mesa_set_enable),
>>>> so I think we should be safe.
>>>>
>>>> Since this simplification is pretty minor, I can change the code if you 
>>>> want.
>>>
>>> It's OK if _PackedMode is only a temporary variable used after
>>> FLUSH_VERTICES. It can't be used directly in places that don't call
>>> FLUSH_VERTICES though (e.g. ff_fragment_program.cpp).
>>
>> Since current _PackedMode usage is restricted to mesa/main just after
>> FLUSH_VERTICES is called (and will probably stay this way), I think
>> the prerequisites are fulfilled.
>
> The prerequisites are fulfilled only if _PackedMode isn't used by
> FLUSH_VERTICES itself.
>
> Mesa doesn't send all draw calls to drivers. Some draw calls are
> queued in the vbo module. FLUSH_VERTICES flushes the queue. All state
> changes should happen after FLUSH_VERTICES. If they happen before
> FLUSH_VERTICES, they could affect previous draw calls. It's basically
> the same as changing the order of GL calls in an incorrect way. We
> should make sure that it never happens.

Yes, I understand how it's important not to modify some state before
the driver (or vbo module or st/mesa) is done using it for things that
were being buffered (like the vertices). Since currently _PackedMode
is not used there, it shouldn't be a problem. And I think it's always
better to use _PackedEnabledMode instead of _PackedMode since it also
tells you whether the whole fog processing is enabled or not. Maybe I
should put a comment saying that _PackedMode is for internal use and
shouldn't be used in any other way than helping _PackedEnabledMode
initialization?

>
>>
>> If you are ok with the patches, could please push the whole series? I
>> don't have commit access.
>
> If you send me a git link to the whole series, I'll push it.

I am not sure what you mean by "git link". Did I forget about
something while using git-format-patch/git-send-email before sending
the series? Or do you want a git repository you can pull the series
from?

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/14] mesa/main: Maintain compressed fog mode.

2017-04-07 Thread Gustaw Smolarczyk
2017-04-07 23:56 GMT+02:00 Marek Olšák <mar...@gmail.com>:
> On Fri, Apr 7, 2017 at 6:54 PM, Gustaw Smolarczyk <wielkie...@gmail.com> 
> wrote:
>> 2017-04-07 16:31 GMT+02:00 Marek Olšák <mar...@gmail.com>:
>>> On Thu, Mar 30, 2017 at 8:09 PM, Gustaw Smolarczyk <wielkie...@gmail.com> 
>>> wrote:
>>>> Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
>>>> ---
>>>>  src/mesa/main/enable.c |  1 +
>>>>  src/mesa/main/fog.c|  9 +
>>>>  src/mesa/main/mtypes.h | 14 ++
>>>>  3 files changed, 24 insertions(+)
>>>>
>>>> diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
>>>> index d9d63a6b4b..ef278a318a 100644
>>>> --- a/src/mesa/main/enable.c
>>>> +++ b/src/mesa/main/enable.c
>>>> @@ -385,6 +385,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
>>>> GLboolean state)
>>>>  return;
>>>>   FLUSH_VERTICES(ctx, _NEW_FOG);
>>>>   ctx->Fog.Enabled = state;
>>>> + ctx->Fog._PackedEnabledMode = state ? ctx->Fog._PackedMode : 
>>>> FOG_NONE;
>>>>   break;
>>>>case GL_LIGHT0:
>>>>case GL_LIGHT1:
>>>> diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
>>>> index 1ad939cfde..76e65080b7 100644
>>>> --- a/src/mesa/main/fog.c
>>>> +++ b/src/mesa/main/fog.c
>>>> @@ -102,8 +102,13 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
>>>>   m = (GLenum) (GLint) *params;
>>>>  switch (m) {
>>>>  case GL_LINEAR:
>>>> +   ctx->Fog._PackedMode = FOG_LINEAR;
>>>> +   break;
>>>>  case GL_EXP:
>>>> +   ctx->Fog._PackedMode = FOG_EXP;
>>>> +   break;
>>>>  case GL_EXP2:
>>>> +   ctx->Fog._PackedMode = FOG_EXP2;
>>>
>>> Perhaps these should be set before FLUSH_VERTICES?
>>>
>>> Marek
>>
>> That would make us need two switch() statements instead of one. Also,
>> if ctx->Fog.Mode == m then we are essentially writing the same values
>> so nothing changes - doing it after the check shouldn't affect
>> correctness in any way. I might be wrong, though. _PackedMode is only
>> ever used to manage _PackedEnabledMode (here and in _mesa_set_enable),
>> so I think we should be safe.
>>
>> Since this simplification is pretty minor, I can change the code if you want.
>
> It's OK if _PackedMode is only a temporary variable used after
> FLUSH_VERTICES. It can't be used directly in places that don't call
> FLUSH_VERTICES though (e.g. ff_fragment_program.cpp).

Since current _PackedMode usage is restricted to mesa/main just after
FLUSH_VERTICES is called (and will probably stay this way), I think
the prerequisites are fulfilled.

If you are ok with the patches, could please push the whole series? I
don't have commit access.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 11/14] mesa/main: Maintain compressed TexEnv Combine state.

2017-04-07 Thread Gustaw Smolarczyk
2017-04-07 16:40 GMT+02:00 Marek Olšák <mar...@gmail.com>:
> On Fri, Apr 7, 2017 at 4:35 PM, Marek Olšák <mar...@gmail.com> wrote:
>> On Thu, Mar 30, 2017 at 8:09 PM, Gustaw Smolarczyk <wielkie...@gmail.com> 
>> wrote:
>>> Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
>>> ---
>>>  src/mesa/main/mtypes.h   |  83 ++
>>>  src/mesa/main/texstate.c | 103 
>>> +++
>>>  2 files changed, 186 insertions(+)
>>>
>>> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
>>> index 186d79928c..369d2327f2 100644
>>> --- a/src/mesa/main/mtypes.h
>>> +++ b/src/mesa/main/mtypes.h
>>> @@ -1078,6 +1078,87 @@ struct gl_tex_env_combine_state
>>>  };
>>>
>>>
>>> +/** Compressed TexEnv effective Combine mode */
>>> +enum gl_tex_env_mode
>>> +{
>>> +   TEXENV_MODE_REPLACE, /* r = a0 */
>>> +   TEXENV_MODE_MODULATE,/* r = a0 * a1 */
>>> +   TEXENV_MODE_ADD, /* r = a0 + a1 */
>>> +   TEXENV_MODE_ADD_SIGNED,  /* r = a0 + a1 - 0.5 */
>>> +   TEXENV_MODE_INTERPOLATE, /* r = a0 * a2 + a1 * (1 - a2) */
>>> +   TEXENV_MODE_SUBTRACT,/* r = a0 - a1 */
>>> +   TEXENV_MODE_DOT3_RGB,/* r = a0 . a1 */
>>> +   TEXENV_MODE_DOT3_RGB_EXT,/* r = a0 . a1 */
>>> +   TEXENV_MODE_DOT3_RGBA,   /* r = a0 . a1 */
>>> +   TEXENV_MODE_DOT3_RGBA_EXT,   /* r = a0 . a1 */
>>> +   TEXENV_MODE_MODULATE_ADD_ATI,/* r = a0 * a2 + a1 */
>>> +   TEXENV_MODE_MODULATE_SIGNED_ADD_ATI, /* r = a0 * a2 + a1 - 0.5 */
>>> +   TEXENV_MODE_MODULATE_SUBTRACT_ATI,   /* r = a0 * a2 - a1 */
>>> +   TEXENV_MODE_ADD_PRODUCTS_NV, /* r = a0 * a1 + a2 * a3 */
>>> +   TEXENV_MODE_ADD_PRODUCTS_SIGNED_NV,  /* r = a0 * a1 + a2 * a3 - 0.5 */
>>> +};
>>> +
>>> +
>>> +/** Compressed TexEnv Combine source */
>>> +enum gl_tex_env_source
>>> +{
>>> +   TEXENV_SRC_TEXTURE0,
>>> +   TEXENV_SRC_TEXTURE1,
>>> +   TEXENV_SRC_TEXTURE2,
>>> +   TEXENV_SRC_TEXTURE3,
>>> +   TEXENV_SRC_TEXTURE4,
>>> +   TEXENV_SRC_TEXTURE5,
>>> +   TEXENV_SRC_TEXTURE6,
>>> +   TEXENV_SRC_TEXTURE7,
>>> +   TEXENV_SRC_TEXTURE,
>>> +   TEXENV_SRC_PREVIOUS,
>>> +   TEXENV_SRC_PRIMARY_COLOR,
>>> +   TEXENV_SRC_CONSTANT,
>>> +   TEXENV_SRC_ZERO,
>>> +   TEXENV_SRC_ONE,
>>> +};
>>> +
>>> +
>>> +/** Compressed TexEnv Combine operand */
>>> +enum gl_tex_env_operand
>>> +{
>>> +   TEXENV_OPR_COLOR,
>>> +   TEXENV_OPR_ONE_MINUS_COLOR,
>>> +   TEXENV_OPR_ALPHA,
>>> +   TEXENV_OPR_ONE_MINUS_ALPHA,
>>> +};
>>> +
>>> +
>>> +/** Compressed TexEnv Combine argument */
>>> +struct gl_tex_env_argument
>>> +{
>>> +#ifdef __GNUC__
>>> +   __extension__ uint8_t Source:4;  /**< TEXENV_SRC_x */
>>> +   __extension__ uint8_t Operand:2; /**< TEXENV_OPR_x */
>>> +#else
>>> +   uint8_t Source;  /**< SRC_x */
>>> +   uint8_t Operand; /**< OPR_x */

Oops, I left the old comments here. Should be the same as above.

>>> +#endif
>>
>> Why is this #ifdef here?

I have copied it from ff_fragment_shader.cpp.

>
> You can just use "unsigned Source:4;" There is no reason to do it any other 
> way.

There is one: using unsigned instead of uint8_t makes sizeof(struct
gl_tex_env_argument) be 4 instead of 1. I assume that is also the
reason it was done this way in ff_fragment_shader.cpp (though in C++
using uint8_t for a bitfield is ok and isn't an extension, IIRC).

I can try to pursue some other alternative if you don't like
special-casing for gcc here. I can just use uint8_t and make inline
functions/macros that pack/unpack Source and Operand from it.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/14] mesa/main: Maintain compressed fog mode.

2017-04-07 Thread Gustaw Smolarczyk
2017-04-07 16:31 GMT+02:00 Marek Olšák <mar...@gmail.com>:
> On Thu, Mar 30, 2017 at 8:09 PM, Gustaw Smolarczyk <wielkie...@gmail.com> 
> wrote:
>> Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
>> ---
>>  src/mesa/main/enable.c |  1 +
>>  src/mesa/main/fog.c|  9 +
>>  src/mesa/main/mtypes.h | 14 ++
>>  3 files changed, 24 insertions(+)
>>
>> diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
>> index d9d63a6b4b..ef278a318a 100644
>> --- a/src/mesa/main/enable.c
>> +++ b/src/mesa/main/enable.c
>> @@ -385,6 +385,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
>> GLboolean state)
>>  return;
>>   FLUSH_VERTICES(ctx, _NEW_FOG);
>>   ctx->Fog.Enabled = state;
>> + ctx->Fog._PackedEnabledMode = state ? ctx->Fog._PackedMode : 
>> FOG_NONE;
>>   break;
>>case GL_LIGHT0:
>>case GL_LIGHT1:
>> diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
>> index 1ad939cfde..76e65080b7 100644
>> --- a/src/mesa/main/fog.c
>> +++ b/src/mesa/main/fog.c
>> @@ -102,8 +102,13 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
>>   m = (GLenum) (GLint) *params;
>>  switch (m) {
>>  case GL_LINEAR:
>> +   ctx->Fog._PackedMode = FOG_LINEAR;
>> +   break;
>>  case GL_EXP:
>> +   ctx->Fog._PackedMode = FOG_EXP;
>> +   break;
>>  case GL_EXP2:
>> +   ctx->Fog._PackedMode = FOG_EXP2;
>
> Perhaps these should be set before FLUSH_VERTICES?
>
> Marek

That would make us need two switch() statements instead of one. Also,
if ctx->Fog.Mode == m then we are essentially writing the same values
so nothing changes - doing it after the check shouldn't affect
correctness in any way. I might be wrong, though. _PackedMode is only
ever used to manage _PackedEnabledMode (here and in _mesa_set_enable),
so I think we should be safe.

Since this simplification is pretty minor, I can change the code if you want.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/14] st/mesa: Use compressed fog mode for atifs.

2017-03-30 Thread Gustaw Smolarczyk
Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/state_tracker/st_atifs_to_tgsi.c |  6 +++---
 src/mesa/state_tracker/st_atom_shader.c   | 17 +
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/src/mesa/state_tracker/st_atifs_to_tgsi.c 
b/src/mesa/state_tracker/st_atifs_to_tgsi.c
index 64879f1b27..90286a1115 100644
--- a/src/mesa/state_tracker/st_atifs_to_tgsi.c
+++ b/src/mesa/state_tracker/st_atifs_to_tgsi.c
@@ -705,7 +705,7 @@ transform_inst:
   }
 
   /* compute the 1 component fog factor f */
-  if (ctx->key->fog == 1) {
+  if (ctx->key->fog == FOG_LINEAR) {
  /* LINEAR formula: f = (end - z) / (end - start)
   * with optimized parameters:
   *f = MAD(fogcoord, oparams.x, oparams.y)
@@ -721,7 +721,7 @@ transform_inst:
  SET_SRC(, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, 
X, X, X, X);
  SET_SRC(, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, 
Y, Y, Y, Y);
  tctx->emit_instruction(tctx, );
-  } else if (ctx->key->fog == 2) {
+  } else if (ctx->key->fog == FOG_EXP) {
  /* EXP formula: f = exp(-dens * z)
   * with optimized parameters:
   *f = MUL(fogcoord, oparams.z); f= EX2(-f)
@@ -747,7 +747,7 @@ transform_inst:
  SET_SRC(, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, 
W);
  inst.Src[0].Register.Negate = 1;
  tctx->emit_instruction(tctx, );
-  } else if (ctx->key->fog == 3) {
+  } else if (ctx->key->fog == FOG_EXP2) {
  /* EXP2 formula: f = exp(-(dens * z)^2)
   * with optimized parameters:
   *f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
diff --git a/src/mesa/state_tracker/st_atom_shader.c 
b/src/mesa/state_tracker/st_atom_shader.c
index f79afe0b1c..ee97c69df3 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -54,19 +54,6 @@
 #include "st_texture.h"
 
 
-/** Compress the fog function enums into a 2-bit value */
-static GLuint
-translate_fog_mode(GLenum mode)
-{
-   switch (mode) {
-   case GL_LINEAR: return 1;
-   case GL_EXP:return 2;
-   case GL_EXP2:   return 3;
-   default:
-  return 0;
-   }
-}
-
 static unsigned
 get_texture_target(struct gl_context *ctx, const unsigned unit)
 {
@@ -132,9 +119,7 @@ update_fp( struct st_context *st )
   _mesa_geometric_samples(st->ctx->DrawBuffer) > 1;
 
if (stfp->ati_fs) {
-  if (st->ctx->Fog.Enabled) {
- key.fog = translate_fog_mode(st->ctx->Fog.Mode);
-  }
+  key.fog = st->ctx->Fog._PackedEnabledMode;
 
   for (unsigned u = 0; u < MAX_NUM_FRAGMENT_REGISTERS_ATI; u++) {
  key.texture_targets[u] = get_texture_target(st->ctx, u);
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/14] mesa/main: Maintain compressed TexEnv Combine state.

2017-03-30 Thread Gustaw Smolarczyk
Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/mtypes.h   |  83 ++
 src/mesa/main/texstate.c | 103 +++
 2 files changed, 186 insertions(+)

diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 186d79928c..369d2327f2 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1078,6 +1078,87 @@ struct gl_tex_env_combine_state
 };
 
 
+/** Compressed TexEnv effective Combine mode */
+enum gl_tex_env_mode
+{
+   TEXENV_MODE_REPLACE, /* r = a0 */
+   TEXENV_MODE_MODULATE,/* r = a0 * a1 */
+   TEXENV_MODE_ADD, /* r = a0 + a1 */
+   TEXENV_MODE_ADD_SIGNED,  /* r = a0 + a1 - 0.5 */
+   TEXENV_MODE_INTERPOLATE, /* r = a0 * a2 + a1 * (1 - a2) */
+   TEXENV_MODE_SUBTRACT,/* r = a0 - a1 */
+   TEXENV_MODE_DOT3_RGB,/* r = a0 . a1 */
+   TEXENV_MODE_DOT3_RGB_EXT,/* r = a0 . a1 */
+   TEXENV_MODE_DOT3_RGBA,   /* r = a0 . a1 */
+   TEXENV_MODE_DOT3_RGBA_EXT,   /* r = a0 . a1 */
+   TEXENV_MODE_MODULATE_ADD_ATI,/* r = a0 * a2 + a1 */
+   TEXENV_MODE_MODULATE_SIGNED_ADD_ATI, /* r = a0 * a2 + a1 - 0.5 */
+   TEXENV_MODE_MODULATE_SUBTRACT_ATI,   /* r = a0 * a2 - a1 */
+   TEXENV_MODE_ADD_PRODUCTS_NV, /* r = a0 * a1 + a2 * a3 */
+   TEXENV_MODE_ADD_PRODUCTS_SIGNED_NV,  /* r = a0 * a1 + a2 * a3 - 0.5 */
+};
+
+
+/** Compressed TexEnv Combine source */
+enum gl_tex_env_source
+{
+   TEXENV_SRC_TEXTURE0,
+   TEXENV_SRC_TEXTURE1,
+   TEXENV_SRC_TEXTURE2,
+   TEXENV_SRC_TEXTURE3,
+   TEXENV_SRC_TEXTURE4,
+   TEXENV_SRC_TEXTURE5,
+   TEXENV_SRC_TEXTURE6,
+   TEXENV_SRC_TEXTURE7,
+   TEXENV_SRC_TEXTURE,
+   TEXENV_SRC_PREVIOUS,
+   TEXENV_SRC_PRIMARY_COLOR,
+   TEXENV_SRC_CONSTANT,
+   TEXENV_SRC_ZERO,
+   TEXENV_SRC_ONE,
+};
+
+
+/** Compressed TexEnv Combine operand */
+enum gl_tex_env_operand
+{
+   TEXENV_OPR_COLOR,
+   TEXENV_OPR_ONE_MINUS_COLOR,
+   TEXENV_OPR_ALPHA,
+   TEXENV_OPR_ONE_MINUS_ALPHA,
+};
+
+
+/** Compressed TexEnv Combine argument */
+struct gl_tex_env_argument
+{
+#ifdef __GNUC__
+   __extension__ uint8_t Source:4;  /**< TEXENV_SRC_x */
+   __extension__ uint8_t Operand:2; /**< TEXENV_OPR_x */
+#else
+   uint8_t Source;  /**< SRC_x */
+   uint8_t Operand; /**< OPR_x */
+#endif
+};
+
+
+/***
+ * Compressed TexEnv Combine state.
+ */
+struct gl_tex_env_combine_packed
+{
+   uint32_t ModeRGB:4;  /**< Effective mode for RGB as 4 bits */
+   uint32_t ModeA:4;/**< Effective mode for RGB as 4 bits */
+   uint32_t ScaleShiftRGB:2;/**< 0, 1 or 2 */
+   uint32_t ScaleShiftA:2;  /**< 0, 1 or 2 */
+   uint32_t NumArgsRGB:3;   /**< Number of inputs used for the RGB 
combiner */
+   uint32_t NumArgsA:3; /**< Number of inputs used for the A combiner 
*/
+   /** Source arguments in a packed manner */
+   struct gl_tex_env_argument ArgsRGB[MAX_COMBINER_TERMS];
+   struct gl_tex_env_argument ArgsA[MAX_COMBINER_TERMS];
+};
+
+
 /**
  * TexGenEnabled flags.
  */
@@ -1180,6 +1261,8 @@ struct gl_texture_unit
/** Points to highest priority, complete and enabled texture object */
struct gl_texture_object *_Current;
 
+   /** Current compressed TexEnv & Combine state */
+   struct gl_tex_env_combine_packed _CurrentCombinePacked;
 };
 
 
diff --git a/src/mesa/main/texstate.c b/src/mesa/main/texstate.c
index ada0dfdb66..71757dc154 100644
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -376,6 +376,107 @@ _mesa_update_texture_matrices(struct gl_context *ctx)
 
 
 /**
+ * Translate GL combiner state into a MODE_x value
+ */
+static uint32_t
+tex_combine_translate_mode(GLenum envMode, GLenum mode)
+{
+   switch (mode) {
+   case GL_REPLACE: return TEXENV_MODE_REPLACE;
+   case GL_MODULATE: return TEXENV_MODE_MODULATE;
+   case GL_ADD:
+  if (envMode == GL_COMBINE4_NV)
+return TEXENV_MODE_ADD_PRODUCTS_NV;
+  else
+return TEXENV_MODE_ADD;
+   case GL_ADD_SIGNED:
+  if (envMode == GL_COMBINE4_NV)
+return TEXENV_MODE_ADD_PRODUCTS_SIGNED_NV;
+  else
+return TEXENV_MODE_ADD_SIGNED;
+   case GL_INTERPOLATE: return TEXENV_MODE_INTERPOLATE;
+   case GL_SUBTRACT: return TEXENV_MODE_SUBTRACT;
+   case GL_DOT3_RGB: return TEXENV_MODE_DOT3_RGB;
+   case GL_DOT3_RGB_EXT: return TEXENV_MODE_DOT3_RGB_EXT;
+   case GL_DOT3_RGBA: return TEXENV_MODE_DOT3_RGBA;
+   case GL_DOT3_RGBA_EXT: return TEXENV_MODE_DOT3_RGBA_EXT;
+   case GL_MODULATE_ADD_ATI: return TEXENV_MODE_MODULATE_ADD_ATI;
+   case GL_MODULATE_SIGNED_ADD_ATI: return TEXENV_MODE_MODULATE_SIGNED_ADD_ATI;
+   case GL_MODULATE_SUBTRACT_ATI: return TEXENV_MODE_MODULATE_SUBTRACT_ATI;
+   default:
+  unreachable("Invalid TexEnv Combine mode");
+   }
+}
+
+
+static uint8_t
+tex_combine_translate_source(GLenum src)
+{
+   switch (src) {
+   case GL_TE

[Mesa-dev] [PATCH 13/14] mesa/main/ff_frag: Use compressed TexEnv Combine state.

2017-03-30 Thread Gustaw Smolarczyk
Along the way, add missing GL_ONE source support and drop non-existing
GL_ZERO and GL_ONE operand support.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 335 +++
 1 file changed, 104 insertions(+), 231 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index bdbefc7880..aac9de78ca 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -81,16 +81,6 @@ texenv_doing_secondary_color(struct gl_context *ctx)
return GL_FALSE;
 }
 
-struct mode_opt {
-#ifdef __GNUC__
-   __extension__ GLubyte Source:4;  /**< SRC_x */
-   __extension__ GLubyte Operand:3; /**< OPR_x */
-#else
-   GLubyte Source;  /**< SRC_x */
-   GLubyte Operand; /**< OPR_x */
-#endif
-};
-
 struct state_key {
GLuint nr_enabled_units:4;
GLuint separate_specular:1;
@@ -103,131 +93,23 @@ struct state_key {
   GLuint enabled:1;
   GLuint source_index:4;   /**< TEXTURE_x_INDEX */
   GLuint shadow:1;
+
+  /***
+   * These are taken from struct gl_tex_env_combine_packed
+   * @{
+   */
+  GLuint ModeRGB:4;
+  GLuint ModeA:4;
   GLuint ScaleShiftRGB:2;
   GLuint ScaleShiftA:2;
-
-  GLuint NumArgsRGB:3;  /**< up to MAX_COMBINER_TERMS */
-  GLuint ModeRGB:5; /**< MODE_x */
-
-  GLuint NumArgsA:3;  /**< up to MAX_COMBINER_TERMS */
-  GLuint ModeA:5; /**< MODE_x */
-
-  struct mode_opt OptRGB[MAX_COMBINER_TERMS];
-  struct mode_opt OptA[MAX_COMBINER_TERMS];
+  GLuint NumArgsRGB:3;
+  GLuint NumArgsA:3;
+  struct gl_tex_env_argument ArgsRGB[MAX_COMBINER_TERMS];
+  struct gl_tex_env_argument ArgsA[MAX_COMBINER_TERMS];
+  /** @} */
} unit[MAX_TEXTURE_COORD_UNITS];
 };
 
-#define OPR_SRC_COLOR   0
-#define OPR_ONE_MINUS_SRC_COLOR 1
-#define OPR_SRC_ALPHA   2
-#define OPR_ONE_MINUS_SRC_ALPHA3
-#define OPR_ZERO4
-#define OPR_ONE 5
-#define OPR_UNKNOWN 7
-
-static GLuint translate_operand( GLenum operand )
-{
-   switch (operand) {
-   case GL_SRC_COLOR: return OPR_SRC_COLOR;
-   case GL_ONE_MINUS_SRC_COLOR: return OPR_ONE_MINUS_SRC_COLOR;
-   case GL_SRC_ALPHA: return OPR_SRC_ALPHA;
-   case GL_ONE_MINUS_SRC_ALPHA: return OPR_ONE_MINUS_SRC_ALPHA;
-   case GL_ZERO: return OPR_ZERO;
-   case GL_ONE: return OPR_ONE;
-   default:
-  assert(0);
-  return OPR_UNKNOWN;
-   }
-}
-
-#define SRC_TEXTURE  0
-#define SRC_TEXTURE0 1
-#define SRC_TEXTURE1 2
-#define SRC_TEXTURE2 3
-#define SRC_TEXTURE3 4
-#define SRC_TEXTURE4 5
-#define SRC_TEXTURE5 6
-#define SRC_TEXTURE6 7
-#define SRC_TEXTURE7 8
-#define SRC_CONSTANT 9
-#define SRC_PRIMARY_COLOR 10
-#define SRC_PREVIOUS 11
-#define SRC_ZERO 12
-#define SRC_UNKNOWN  15
-
-static GLuint translate_source( GLenum src )
-{
-   switch (src) {
-   case GL_TEXTURE: return SRC_TEXTURE;
-   case GL_TEXTURE0:
-   case GL_TEXTURE1:
-   case GL_TEXTURE2:
-   case GL_TEXTURE3:
-   case GL_TEXTURE4:
-   case GL_TEXTURE5:
-   case GL_TEXTURE6:
-   case GL_TEXTURE7: return SRC_TEXTURE0 + (src - GL_TEXTURE0);
-   case GL_CONSTANT: return SRC_CONSTANT;
-   case GL_PRIMARY_COLOR: return SRC_PRIMARY_COLOR;
-   case GL_PREVIOUS: return SRC_PREVIOUS;
-   case GL_ZERO:
-  return SRC_ZERO;
-   default:
-  assert(0);
-  return SRC_UNKNOWN;
-   }
-}
-
-#define MODE_REPLACE 0  /* r = a0 */
-#define MODE_MODULATE1  /* r = a0 * a1 */
-#define MODE_ADD 2  /* r = a0 + a1 */
-#define MODE_ADD_SIGNED  3  /* r = a0 + a1 - 0.5 */
-#define MODE_INTERPOLATE 4  /* r = a0 * a2 + a1 * (1 - a2) */
-#define MODE_SUBTRACT5  /* r = a0 - a1 */
-#define MODE_DOT3_RGB6  /* r = a0 . a1 */
-#define MODE_DOT3_RGB_EXT7  /* r = a0 . a1 */
-#define MODE_DOT3_RGBA   8  /* r = a0 . a1 */
-#define MODE_DOT3_RGBA_EXT   9  /* r = a0 . a1 */
-#define MODE_MODULATE_ADD_ATI   10  /* r = a0 * a2 + a1 */
-#define MODE_MODULATE_SIGNED_ADD_ATI11  /* r = a0 * a2 + a1 - 0.5 */
-#define MODE_MODULATE_SUBTRACT_ATI  12  /* r = a0 * a2 - a1 */
-#define MODE_ADD_PRODUCTS   13  /* r = a0 * a1 + a2 * a3 */
-#define MODE_ADD_PRODUCTS_SIGNED14  /* r = a0 * a1 + a2 * a3 - 0.5 */
-#define MODE_UNKNOWN16
-
-/**
- * Translate GL combiner state into a MODE_x value
- */
-static GLuint translate_mode( GLenum envMode, GLenum mode )
-{
-   switch (mode) {
-   case GL_REPLACE: return MODE_REPLACE;
-   case GL_MODULATE: return MODE_MODULATE;
-   case GL_ADD:
-  if (envMode == GL_COMBINE4_NV)
- return MODE_ADD_PRODUCTS;
-  else
- return MODE_ADD;
-   case GL_ADD_SIGNED:
-  if (envMode == GL_COMBINE4_NV)
- return MODE_ADD_PRODUCTS_SIG

[Mesa-dev] [PATCH 09/14] mesa/main/ff_frag: Don't retrieve format if not necessary.

2017-03-30 Thread Gustaw Smolarczyk
Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 2b4d99c879..e1fe9b58c0 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -402,24 +402,21 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
   const struct gl_texture_unit *texUnit = >Texture.Unit[i];
   const struct gl_texture_object *texObj = texUnit->_Current;
   const struct gl_tex_env_combine_state *comb = texUnit->_CurrentCombine;
-  const struct gl_sampler_object *samp;
-  GLenum format;
 
   if (!texObj)
  continue;
 
-  samp = _mesa_get_samplerobj(ctx, i);
-  format = _mesa_texture_base_format(texObj);
-
   key->unit[i].enabled = 1;
   inputs_referenced |= VARYING_BIT_TEX(i);
 
   key->unit[i].source_index = texObj->TargetIndex;
 
-  key->unit[i].shadow =
- ((samp->CompareMode == GL_COMPARE_R_TO_TEXTURE) &&
-  ((format == GL_DEPTH_COMPONENT) || 
-   (format == GL_DEPTH_STENCIL_EXT)));
+  const struct gl_sampler_object *samp = _mesa_get_samplerobj(ctx, i);
+  if (samp->CompareMode == GL_COMPARE_R_TO_TEXTURE) {
+ const GLenum format = _mesa_texture_base_format(texObj);
+ key->unit[i].shadow = (format == GL_DEPTH_COMPONENT ||
+   format == GL_DEPTH_STENCIL_EXT);
+  }
 
   key->unit[i].NumArgsRGB = comb->_NumArgsRGB;
   key->unit[i].NumArgsA = comb->_NumArgsA;
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/14] mesa/main/ff_frag: Don't bother with VARYING_BIT_FOGC.

2017-03-30 Thread Gustaw Smolarczyk
It's not used.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 95c74e2b92..05641997de 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -449,10 +449,8 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
}
 
/* _NEW_FOG */
-   if (ctx->Fog.Enabled) {
+   if (ctx->Fog.Enabled)
   key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
-  inputs_referenced |= VARYING_BIT_FOGC; /* maybe */
-   }
 
/* _NEW_BUFFERS */
key->num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/14] mesa/main/ff_frag: Use compressed fog mode.

2017-03-30 Thread Gustaw Smolarczyk
Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 18 +-
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index e1fe9b58c0..bdbefc7880 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -117,21 +117,6 @@ struct state_key {
} unit[MAX_TEXTURE_COORD_UNITS];
 };
 
-#define FOG_NONE0
-#define FOG_LINEAR  1
-#define FOG_EXP 2
-#define FOG_EXP23
-
-static GLuint translate_fog_mode( GLenum mode )
-{
-   switch (mode) {
-   case GL_LINEAR: return FOG_LINEAR;
-   case GL_EXP: return FOG_EXP;
-   case GL_EXP2: return FOG_EXP2;
-   default: return FOG_NONE;
-   }
-}
-
 #define OPR_SRC_COLOR   0
 #define OPR_ONE_MINUS_SRC_COLOR 1
 #define OPR_SRC_ALPHA   2
@@ -446,8 +431,7 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
}
 
/* _NEW_FOG */
-   if (ctx->Fog.Enabled)
-  key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
+   key->fog_mode = ctx->Fog._PackedEnabledMode;
 
/* _NEW_BUFFERS */
key->num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/14] mesa/main/ff_frag: Store nr_enabled_units only once.

2017-03-30 Thread Gustaw Smolarczyk
Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 1b76f40e9a..717f39e9d3 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -396,8 +396,9 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
 
/* _NEW_TEXTURE_OBJECT */
mask = ctx->Texture._EnabledCoordUnits;
+   int i = -1;
while (mask) {
-  const int i = u_bit_scan();
+  i = u_bit_scan();
   const struct gl_texture_unit *texUnit = >Texture.Unit[i];
   const struct gl_texture_object *texObj = texUnit->_Current;
   const struct gl_tex_env_combine_state *comb = texUnit->_CurrentCombine;
@@ -411,7 +412,6 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
   format = _mesa_texture_base_format(texObj);
 
   key->unit[i].enabled = 1;
-  key->nr_enabled_units = i + 1;
   inputs_referenced |= VARYING_BIT_TEX(i);
 
   key->unit[i].source_index = _mesa_tex_target_to_index(ctx,
@@ -441,6 +441,8 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
   }
}
 
+   key->nr_enabled_units = i + 1;
+
/* _NEW_LIGHT | _NEW_FOG */
if (texenv_doing_secondary_color(ctx)) {
   key->separate_specular = 1;
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/14] mesa/main: Maintain compressed fog mode.

2017-03-30 Thread Gustaw Smolarczyk
Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/enable.c |  1 +
 src/mesa/main/fog.c|  9 +
 src/mesa/main/mtypes.h | 14 ++
 3 files changed, 24 insertions(+)

diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c
index d9d63a6b4b..ef278a318a 100644
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -385,6 +385,7 @@ _mesa_set_enable(struct gl_context *ctx, GLenum cap, 
GLboolean state)
 return;
  FLUSH_VERTICES(ctx, _NEW_FOG);
  ctx->Fog.Enabled = state;
+ ctx->Fog._PackedEnabledMode = state ? ctx->Fog._PackedMode : FOG_NONE;
  break;
   case GL_LIGHT0:
   case GL_LIGHT1:
diff --git a/src/mesa/main/fog.c b/src/mesa/main/fog.c
index 1ad939cfde..76e65080b7 100644
--- a/src/mesa/main/fog.c
+++ b/src/mesa/main/fog.c
@@ -102,8 +102,13 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
  m = (GLenum) (GLint) *params;
 switch (m) {
 case GL_LINEAR:
+   ctx->Fog._PackedMode = FOG_LINEAR;
+   break;
 case GL_EXP:
+   ctx->Fog._PackedMode = FOG_EXP;
+   break;
 case GL_EXP2:
+   ctx->Fog._PackedMode = FOG_EXP2;
break;
 default:
_mesa_error( ctx, GL_INVALID_ENUM, "glFog" );
@@ -113,6 +118,8 @@ _mesa_Fogfv( GLenum pname, const GLfloat *params )
return;
 FLUSH_VERTICES(ctx, _NEW_FOG);
 ctx->Fog.Mode = m;
+ctx->Fog._PackedEnabledMode = ctx->Fog.Enabled ?
+  ctx->Fog._PackedMode : FOG_NONE;
 break;
   case GL_FOG_DENSITY:
 if (*params<0.0F) {
@@ -210,6 +217,8 @@ void _mesa_init_fog( struct gl_context * ctx )
/* Fog group */
ctx->Fog.Enabled = GL_FALSE;
ctx->Fog.Mode = GL_EXP;
+   ctx->Fog._PackedMode = FOG_EXP;
+   ctx->Fog._PackedEnabledMode = FOG_NONE;
ASSIGN_4V( ctx->Fog.Color, 0.0, 0.0, 0.0, 0.0 );
ASSIGN_4V( ctx->Fog.ColorUnclamped, 0.0, 0.0, 0.0, 0.0 );
ctx->Fog.Index = 0.0;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 91e1948c23..186d79928c 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -575,12 +575,26 @@ struct gl_eval_attrib
 
 
 /**
+ * Compressed fog mode.
+ */
+enum gl_fog_mode
+{
+   FOG_NONE,
+   FOG_LINEAR,
+   FOG_EXP,
+   FOG_EXP2,
+};
+
+
+/**
  * Fog attribute group (GL_FOG_BIT).
  */
 struct gl_fog_attrib
 {
GLboolean Enabled;  /**< Fog enabled flag */
GLboolean ColorSumEnabled;
+   uint8_t _PackedMode;/**< Fog mode as 2 bits */
+   uint8_t _PackedEnabledMode; /**< Masked CompressedMode */
GLfloat ColorUnclamped[4];/**< Fog color */
GLfloat Color[4];   /**< Fog color */
GLfloat Density;/**< Density >= 0.0 */
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/14] mesa/main/ff_frag: Use gl_texture_object::TargetIndex.

2017-03-30 Thread Gustaw Smolarczyk
Instead of computing it once again using _mesa_tex_target_to_index.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 717f39e9d3..2b4d99c879 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -414,8 +414,7 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
   key->unit[i].enabled = 1;
   inputs_referenced |= VARYING_BIT_TEX(i);
 
-  key->unit[i].source_index = _mesa_tex_target_to_index(ctx,
-texObj->Target);
+  key->unit[i].source_index = texObj->TargetIndex;
 
   key->unit[i].shadow =
  ((samp->CompareMode == GL_COMPARE_R_TO_TEXTURE) &&
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/14] mesa/main/ff_frag: Simplify get_fp_input_mask.

2017-03-30 Thread Gustaw Smolarczyk
Change it into filter_fp_input_mask transform function that instead of
returning a mask, transforms input.

Also, simplify the case of vertex program handling by assuming that
fp_inputs is always a combination of VARYING_BIT_COL* and VARYING_BIT_TEX*.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 111 +--
 1 file changed, 55 insertions(+), 56 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 05641997de..1b76f40e9a 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -284,30 +284,33 @@ need_saturate( GLuint mode )
  * constants instead.
  *
  * This function figures out all the inputs that the fragment program
- * has access to.  The bitmask is later reduced to just those which
- * are actually referenced.
+ * has access to and filters input bitmask.
  */
-static GLbitfield get_fp_input_mask( struct gl_context *ctx )
+static GLbitfield filter_fp_input_mask( GLbitfield fp_inputs,
+   struct gl_context *ctx )
 {
-   /* _NEW_PROGRAM */
-   const GLboolean vertexShader =
-  ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
-   const GLboolean vertexProgram = ctx->VertexProgram._Enabled;
-   GLbitfield fp_inputs = 0x0;
-
if (ctx->VertexProgram._Overriden) {
   /* Somebody's messing with the vertex program and we don't have
* a clue what's happening.  Assume that it could be producing
* all possible outputs.
*/
-  fp_inputs = ~0;
+  return fp_inputs;
}
-   else if (ctx->RenderMode == GL_FEEDBACK) {
+
+   if (ctx->RenderMode == GL_FEEDBACK) {
   /* _NEW_RENDERMODE */
-  fp_inputs = (VARYING_BIT_COL0 | VARYING_BIT_TEX0);
+  return fp_inputs & (VARYING_BIT_COL0 | VARYING_BIT_TEX0);
}
-   else if (!(vertexProgram || vertexShader)) {
+
+   /* _NEW_PROGRAM */
+   const GLboolean vertexShader =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
+   const GLboolean vertexProgram = ctx->VertexProgram._Enabled;
+
+   if (!(vertexProgram || vertexShader)) {
   /* Fixed function vertex logic */
+  GLbitfield possible_inputs = 0;
+
   /* _NEW_VARYING_VP_INPUTS */
   GLbitfield64 varying_inputs = ctx->varying_vp_inputs;
 
@@ -315,69 +318,66 @@ static GLbitfield get_fp_input_mask( struct gl_context 
*ctx )
* vertex program:
*/
   /* _NEW_POINT */
-  if (ctx->Point.PointSprite)
- varying_inputs |= VARYING_BITS_TEX_ANY;
+  if (ctx->Point.PointSprite) {
+ /* All texture varyings are possible to use */
+ possible_inputs = VARYING_BITS_TEX_ANY;
+  }
+  else {
+ /* _NEW_TEXTURE_STATE */
+ const GLbitfield possible_tex_inputs =
+   ctx->Texture._TexGenEnabled |
+   ctx->Texture._TexMatEnabled |
+   ((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0);
+
+ possible_inputs = (possible_tex_inputs << VARYING_SLOT_TEX0);
+  }
 
   /* First look at what values may be computed by the generated
* vertex program:
*/
   /* _NEW_LIGHT */
   if (ctx->Light.Enabled) {
- fp_inputs |= VARYING_BIT_COL0;
+ possible_inputs |= VARYING_BIT_COL0;
 
  if (texenv_doing_secondary_color(ctx))
-fp_inputs |= VARYING_BIT_COL1;
+possible_inputs |= VARYING_BIT_COL1;
   }
 
-  /* _NEW_TEXTURE_STATE */
-  fp_inputs |= (ctx->Texture._TexGenEnabled |
-ctx->Texture._TexMatEnabled) << VARYING_SLOT_TEX0;
-
   /* Then look at what might be varying as a result of enabled
* arrays, etc:
*/
   if (varying_inputs & VERT_BIT_COLOR0)
- fp_inputs |= VARYING_BIT_COL0;
+ possible_inputs |= VARYING_BIT_COL0;
   if (varying_inputs & VERT_BIT_COLOR1)
- fp_inputs |= VARYING_BIT_COL1;
-
-  fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0) 
-<< VARYING_SLOT_TEX0);
+ possible_inputs |= VARYING_BIT_COL1;
 
+  return fp_inputs & possible_inputs;
}
-   else {
-  /* calculate from vp->outputs */
-  struct gl_program *vprog;
-  GLbitfield64 vp_outputs;
-
-  /* Choose GLSL vertex shader over ARB vertex program.  Need this
-   * since vertex shader state validation comes after fragment state
-   * validation (see additional comments in state.c).
-   */
-  if (vertexShader)
- vprog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];
-  else
- vprog = ctx->VertexProgram.Current;
 
-  vp_outputs = vprog->info.outputs_written;
+   /* calculate from vp->outputs */
+   struct gl_program *vprog;
 
-  /* These get generated in the setup routine regar

[Mesa-dev] [PATCH 01/14] mesa/main/ff_frag: Use correct constant.

2017-03-30 Thread Gustaw Smolarczyk
Since fixed-function shaders are restricted to MAX_TEXTURE_COORD_UNITS
texture units, use this constant instead of MAX_TEXTURE_UNITS. This
reduces the array size from 32 to 8.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
Reviewed-by: Eric Anholt <e...@anholt.net>
---
 src/mesa/main/ff_fragment_shader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index f007ac3b40..7679328d4c 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -123,7 +123,7 @@ struct state_key {
 
   struct mode_opt OptRGB[MAX_COMBINER_TERMS];
   struct mode_opt OptA[MAX_COMBINER_TERMS];
-   } unit[MAX_TEXTURE_UNITS];
+   } unit[MAX_TEXTURE_COORD_UNITS];
 };
 
 #define FOG_NONE0
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/14] mesa/main/ff_frag: Remove unused struct.

2017-03-30 Thread Gustaw Smolarczyk
Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 8 
 1 file changed, 8 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 9b00c36534..95c74e2b92 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -68,14 +68,6 @@ using namespace ir_builder;
  */
 
 
-struct texenvprog_cache_item
-{
-   GLuint hash;
-   void *key;
-   struct gl_shader_program *data;
-   struct texenvprog_cache_item *next;
-};
-
 static GLboolean
 texenv_doing_secondary_color(struct gl_context *ctx)
 {
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/14] mesa/main/ff_frag: Reduce the size of nr_enabled_units.

2017-03-30 Thread Gustaw Smolarczyk
Since it holds values from 0 to 8, 4 bits will suffice.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
Reviewed-by: Eric Anholt <e...@anholt.net>
---
 src/mesa/main/ff_fragment_shader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index d1e89abc08..9b00c36534 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -100,7 +100,7 @@ struct mode_opt {
 };
 
 struct state_key {
-   GLuint nr_enabled_units:8;
+   GLuint nr_enabled_units:4;
GLuint separate_specular:1;
GLuint fog_mode:2;  /**< FOG_x */
GLuint inputs_available:12;
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/14] mesa/main/ff_frag: Remove enabled_units.

2017-03-30 Thread Gustaw Smolarczyk
Its only usage is easily replaced by nr_enabled_units. As for cache key
part, unit[i].enabled should be enough.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
Reviewed-by: Eric Anholt <e...@anholt.net>
---
 src/mesa/main/ff_fragment_shader.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 7679328d4c..d1e89abc08 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -101,7 +101,6 @@ struct mode_opt {
 
 struct state_key {
GLuint nr_enabled_units:8;
-   GLuint enabled_units:8;
GLuint separate_specular:1;
GLuint fog_mode:2;  /**< FOG_x */
GLuint inputs_available:12;
@@ -421,7 +420,6 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
   format = _mesa_texture_base_format(texObj);
 
   key->unit[i].enabled = 1;
-  key->enabled_units |= (1<<i);
   key->nr_enabled_units = i + 1;
   inputs_referenced |= VARYING_BIT_TEX(i);
 
@@ -1136,7 +1134,7 @@ emit_instructions(texenv_fragment_program *p)
struct state_key *key = p->state;
GLuint unit;
 
-   if (key->enabled_units) {
+   if (key->nr_enabled_units) {
   /* First pass - to support texture_env_crossbar, first identify
* all referenced texture sources and emit texld instructions
* for each:
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/14] More substantial ff_fragment_shader cache key optimizations.

2017-03-30 Thread Gustaw Smolarczyk
Hello,

This is the continuation of my ff_fragment_shader cache key optimizations.
I have continued to try to reduce overhead of make_state_key function and it
seems that I have gained a little bit. As this is the first time I have
ventured into the mesa codebase so much, it's possible that I did something
wrong along the way. Please, point it out if you find anything incorrect.
For example, I was a little bit confused by the indentation used in some code
parts (like using only spaces or a mix of spaces and tabs). I tried to preserve
the indentation of the files I modified.

As before, the number of patches might be a little bit high since some of them
are very simple. I might squash some of them if you prefer that.

The first 3 patches are a rebased resend of a previous series. I have kept
Eric's r-by (I have changed the commit message of these a little bit, I hope
keeping r-by was ok).

Patches 4-9 contain simple self-contained improvements to the cache key and
its computation.

Patches 10 and 11 try to move some of the state computation to the point it is
changed. I have added a couple of compressed state fields into the context
object.

Patches 12 and 13 use these new fields inside make_state_key, simplifying it
a lot. Along the way, I have fixed an apparent bug (GL_ONE was not handled as
a combine source), though there was no difference for piglit quick run.

Finally, patch 14 uses the new compressed fog state for atifs state handling
in st/mesa, since it was quite simple to modify it. I didn't bother using
the new state for classic dri drivers.

I have run a piglit quick test on radeonsi before and after the series and
there were no differences apart from some unstable test results.

As for performance measurements, I have run a simple minecraft apitrace
through perf-record 5 times and have found that:

1. The apitrace replay fps measure is too variable to show any difference.
   It can be passed as "a wash".
2. perf-report shows something more encouraging. The time spent in
   _mesa_get_fixed_func_fragment_program has dropped from ~0.78% to ~0.37%.
   Standard deviation here is ~0.025% so the performance gain is statistically
   significant.

Regards,
Gustaw

Gustaw Smolarczyk (14):
  mesa/main/ff_frag: Use correct constant.
  mesa/main/ff_frag: Remove enabled_units.
  mesa/main/ff_frag: Reduce the size of nr_enabled_units.
  mesa/main/ff_frag: Remove unused struct.
  mesa/main/ff_frag: Don't bother with VARYING_BIT_FOGC.
  mesa/main/ff_frag: Simplify get_fp_input_mask.
  mesa/main/ff_frag: Store nr_enabled_units only once.
  mesa/main/ff_frag: Use gl_texture_object::TargetIndex.
  mesa/main/ff_frag: Don't retrieve format if not necessary.
  mesa/main: Maintain compressed fog mode.
  mesa/main: Maintain compressed TexEnv Combine state.
  mesa/main/ff_frag: Use compressed fog mode.
  mesa/main/ff_frag: Use compressed TexEnv Combine state.
  st/mesa: Use compressed fog mode for atifs.

 src/mesa/main/enable.c|   1 +
 src/mesa/main/ff_fragment_shader.cpp  | 506 ++
 src/mesa/main/fog.c   |   9 +
 src/mesa/main/mtypes.h|  97 ++
 src/mesa/main/texstate.c  | 103 ++
 src/mesa/state_tracker/st_atifs_to_tgsi.c |   6 +-
 src/mesa/state_tracker/st_atom_shader.c   |  17 +-
 7 files changed, 388 insertions(+), 351 deletions(-)

-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/3] mesa: always return GL_OUT_OF_MEMORY or GL_NO_ERROR when KHR_no_error enabled

2017-03-28 Thread Gustaw Smolarczyk
28 mar 2017 06:35 "Timothy Arceri"  napisał(a):

---
 src/mesa/main/getstring.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/main/getstring.c b/src/mesa/main/getstring.c
index 6e90511..50140cf 100644
--- a/src/mesa/main/getstring.c
+++ b/src/mesa/main/getstring.c
@@ -297,17 +297,29 @@ invalid_pname:
  *
  * Returns __struct gl_contextRec::ErrorValue.
  */
 GLenum GLAPIENTRY
 _mesa_GetError( void )
 {
GET_CURRENT_CONTEXT(ctx);
GLenum e = ctx->ErrorValue;
ASSERT_OUTSIDE_BEGIN_END_WITH_RETVAL(ctx, 0);

+   /* From Issue (3) of the KHR_no_error spec:
+*
+*"Should glGetError() always return NO_ERROR or have undefined
+*results?
+*
+*RESOLVED: It should for all errors except OUT_OF_MEMORY."
+*/
+   if (ctx->Const.ContextFlags == GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR &&


Shouldn't this be checked using & instead of ==?

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] Minor fixed-function fragment shader cache key optimizations.

2017-03-27 Thread Gustaw Smolarczyk
2017-03-27 21:27 GMT+02:00 Gustaw Smolarczyk <wielkie...@gmail.com>:
> 2017-03-27 21:10 GMT+02:00 Jason Ekstrand <ja...@jlekstrand.net>:
>> On March 27, 2017 10:24:47 AM Gustaw Smolarczyk <wielkie...@gmail.com>
>> wrote:
>>
>>> Hello,
>>>
>>> I was playing with profiling Minecraft on radeonsi in perf and found that
>>> _mesa_get_fixed_func_fragment_program was a little bit too high in the
>>> profile
>>> log than it should. I assumed that most of it comes from make_state_key
>>> static
>>> function which I am trying to optimize now in the spare time.
>>>
>>> I started with a few pretty simple things. I don't think they will help
>>> much,
>>> but should be a good start.
>>
>>
>> Do you have any performance data for any of this?  It looks reasonable to me
>> but I'm not very familiar with this code.  I just know that others will ask.
>> :-)
>
> Unfortunately not. Testing minecraft is not an easy task since it's
> not simple to reproduce the same testing circumstances.
>
> Maybe I should record an apitrace trace and try to profile that? I
> will try that later, though I don't expect much of a difference with
> these patches - there are probably other things that could be done to
> make_state_key that would have more of a performance impact.

I have created a simple (~1min) apitrace trace of minecraft. I
measured the produced FPS using apitrace replay -b and at the same
time I profiled the apitrace using perf and looked at % spent in
_mesa_get_fixed_func_fragment_program. The results show that these 3
patches help only very slightly or even not at all (a.k.a. "a wash").

It seems that I also overestimated the time spent in the optimized
function (it's less than 1%), though it's still one of the hottest
mesa functions in the profile.

5 runs before and after the patches.

Before:
FPS: 65.12666 +/- 0.25995
_mesa_get_fixed_func_fragment_program: 0.734% +/- 0.023323

After:
FPS: 65.52748 +/- 0.53377
_mesa_get_fixed_func_fragment_program: 0.728% +/- 0.011661

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] mesa: load a user defined drirc file specified via an environment variable

2017-03-27 Thread Gustaw Smolarczyk
2017-03-27 21:59 GMT+02:00 Edmondo Tommasina :
> Introduce a new MESA_USER_DRIRC environment variable to load a customized
> drirc file.
>
> This can be used mostly for two things:
> 1. Force the load of a different user drirc configuration for an application
>without touching/creating a user global ${HOME}/.drirc file or passing
>their values through a list of env variables.
> 2. Avoid the load of the ${HOME}/.drirc file and ensure that only the
>system /etc/drirc file is used.
>
> Examples:
> 1. Set the variable to a file to load it instead of ${HOME}/.drirc:
>MESA_USER_DRIRC=~/glthread.drirc glxgears
>
> 2. Set the variable to an empty string to avoid the load of the user
>defined ${HOME}/.drirc file (if it exists):
>MESA_USER_DRIRC="" glxgears
>
> v2:
>   - extend the documentation of the envvar usage (Gustaw)
>   - describe usefulness of the envvar in commit message (Eric)
> ---
>  docs/envvars.html   |  7 +++
>  src/mesa/drivers/dri/common/xmlconfig.c | 26 +-
>  2 files changed, 24 insertions(+), 9 deletions(-)
>
> diff --git a/docs/envvars.html b/docs/envvars.html
> index 653736565e..44a5ef6c74 100644
> --- a/docs/envvars.html
> +++ b/docs/envvars.html
> @@ -130,6 +130,13 @@ that variable is set), or else within .cache/mesa within 
> the user's
>  home directory.
>  MESA_GLSL - shading language compiler 
> options
>  MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally 
> disabled.
> +MESA_USER_DRIRC - load a user defined drirc file, instead of the default
> +.drirc file in the user home directory. Setting the variable to an empty
> +string will also avoid to load the default user .drirc file. Examples:
> +
> +   MESA_USER_DRIRC="/tmp/special.drirc" loads a special config file
> +   MESA_USER_DRIRC="" skips the load of the default ${HOME}/.drirc
> +
>  
>
>
> diff --git a/src/mesa/drivers/dri/common/xmlconfig.c 
> b/src/mesa/drivers/dri/common/xmlconfig.c
> index fef007996e..dd4b46f3a4 100644
> --- a/src/mesa/drivers/dri/common/xmlconfig.c
> +++ b/src/mesa/drivers/dri/common/xmlconfig.c
> @@ -988,7 +988,6 @@ driParseConfigFiles(driOptionCache *cache, const 
> driOptionCache *info,
>  int screenNum, const char *driverName)
>  {
>  char *filenames[2] = { SYSCONFDIR "/drirc", NULL};
> -char *home;
>  uint32_t i;
>  struct OptConfData userData;
>
> @@ -999,14 +998,23 @@ driParseConfigFiles(driOptionCache *cache, const 
> driOptionCache *info,
>  userData.driverName = driverName;
>  userData.execName = GET_PROGRAM_NAME();
>
> -if ((home = getenv ("HOME"))) {
> -uint32_t len = strlen (home);
> -filenames[1] = malloc(len + 7+1);
> -if (filenames[1] == NULL)
> -__driUtilMessage ("Can't allocate memory for %s/.drirc.", home);
> -else {
> -memcpy (filenames[1], home, len);
> -memcpy (filenames[1] + len, "/.drirc", 7+1);
> +const char *userDrirc = getenv("MESA_USER_DRIRC");
> +
> +if (userDrirc) {
> +filenames[1] = malloc(strlen(userDrirc) + 1);
> +strcpy(filenames[1], userDrirc);

strdup?

And maybe skip this entirely when userDrirc[0] == '\0'? So that
filenames[1] stays as NULL. Of course this should skip also the else
branch below so that userDrirc != NULL && *userDrirc == '\0' would
skip the custom drirc.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/3] Minor fixed-function fragment shader cache key optimizations.

2017-03-27 Thread Gustaw Smolarczyk
2017-03-27 21:10 GMT+02:00 Jason Ekstrand <ja...@jlekstrand.net>:
> On March 27, 2017 10:24:47 AM Gustaw Smolarczyk <wielkie...@gmail.com>
> wrote:
>
>> Hello,
>>
>> I was playing with profiling Minecraft on radeonsi in perf and found that
>> _mesa_get_fixed_func_fragment_program was a little bit too high in the
>> profile
>> log than it should. I assumed that most of it comes from make_state_key
>> static
>> function which I am trying to optimize now in the spare time.
>>
>> I started with a few pretty simple things. I don't think they will help
>> much,
>> but should be a good start.
>
>
> Do you have any performance data for any of this?  It looks reasonable to me
> but I'm not very familiar with this code.  I just know that others will ask.
> :-)

Unfortunately not. Testing minecraft is not an easy task since it's
not simple to reproduce the same testing circumstances.

Maybe I should record an apitrace trace and try to profile that? I
will try that later, though I don't expect much of a difference with
these patches - there are probably other things that could be done to
make_state_key that would have more of a performance impact.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] mesa/main: Reduce the size of nr_enabled_units in ff cache key.

2017-03-27 Thread Gustaw Smolarczyk
Since it holds values from 0 to 8, 4 bits will suffice.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 75189733ba..c29d9bd64d 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -100,7 +100,7 @@ struct mode_opt {
 };
 
 struct state_key {
-   GLuint nr_enabled_units:8;
+   GLuint nr_enabled_units:4;
GLuint separate_specular:1;
GLuint fog_mode:2;  /**< FOG_x */
GLuint inputs_available:12;
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] mesa/main: Remove enabled_units from ff cache key.

2017-03-27 Thread Gustaw Smolarczyk
Its only usage is easily replaced by nr_enabled_units. As for cache key
part, unit[i].enabled should be enough.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index 8af7d1cfb7..75189733ba 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -101,7 +101,6 @@ struct mode_opt {
 
 struct state_key {
GLuint nr_enabled_units:8;
-   GLuint enabled_units:8;
GLuint separate_specular:1;
GLuint fog_mode:2;  /**< FOG_x */
GLuint inputs_available:12;
@@ -421,7 +420,6 @@ static GLuint make_state_key( struct gl_context *ctx,  
struct state_key *key )
   format = _mesa_texture_base_format(texObj);
 
   key->unit[i].enabled = 1;
-  key->enabled_units |= (1<<i);
   key->nr_enabled_units = i + 1;
   inputs_referenced |= VARYING_BIT_TEX(i);
 
@@ -1136,7 +1134,7 @@ emit_instructions(texenv_fragment_program *p)
struct state_key *key = p->state;
GLuint unit;
 
-   if (key->enabled_units) {
+   if (key->nr_enabled_units) {
   /* First pass - to support texture_env_crossbar, first identify
* all referenced texture sources and emit texld instructions
* for each:
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] mesa/main: Use correct constant in ff cache key.

2017-03-27 Thread Gustaw Smolarczyk
Since fixed-function shaders are restricted to MAX_TEXTURE_COORD_UNITS
texture units, use this constant instead of MAX_TEXTURE_UNITS. This
reduces the array size from 32 to 8.

Signed-off-by: Gustaw Smolarczyk <wielkie...@gmail.com>
---
 src/mesa/main/ff_fragment_shader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/ff_fragment_shader.cpp 
b/src/mesa/main/ff_fragment_shader.cpp
index be382fa3ae..8af7d1cfb7 100644
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -123,7 +123,7 @@ struct state_key {
 
   struct mode_opt OptRGB[MAX_COMBINER_TERMS];
   struct mode_opt OptA[MAX_COMBINER_TERMS];
-   } unit[MAX_TEXTURE_UNITS];
+   } unit[MAX_TEXTURE_COORD_UNITS];
 };
 
 #define FOG_NONE0
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] Minor fixed-function fragment shader cache key optimizations.

2017-03-27 Thread Gustaw Smolarczyk
Hello,

I was playing with profiling Minecraft on radeonsi in perf and found that
_mesa_get_fixed_func_fragment_program was a little bit too high in the profile
log than it should. I assumed that most of it comes from make_state_key static
function which I am trying to optimize now in the spare time.

I started with a few pretty simple things. I don't think they will help much,
but should be a good start.

The code is divided into 3 patches, which might be an overkill. I can squash
them if you prefer that.

For more serious performance benefits, I was thinking about making the state
inside the gl context be more similar to ff cache key. This way make_state_key
would be more about copying stuff around than translating it from one
representation to another.

As I don't have commit access, please push the changes after you deem them
ready for that.

Regards,
Gustaw

Gustaw Smolarczyk (3):
  mesa/main: Use correct constant in ff cache key.
  mesa/main: Remove enabled_units from ff cache key.
  mesa/main: Reduce the size of nr_enabled_units in ff cache key.

 src/mesa/main/ff_fragment_shader.cpp | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] mesa: load a user defined drirc file specified via an environment variable

2017-03-27 Thread Gustaw Smolarczyk
26 mar 2017 22:32 "Edmondo Tommasina" 
napisał(a):

Define a new MESA_USER_DRIRC environment variable to load a customized
drirc file.

When the variable is not defined, nothing changes and the ${HOME}/.drirc
file will be loaded.

If the variable is set to a file, this file will be loaded instead of
the the ${HOME}/.drirc.

Example: MESA_USER_DRIRC=~/glthread.drirc glxgears

If the variable is set to nothing, it avoids to load the ${HOME}/.drirc
file.

Example: MESA_USER_DRIRC= glxgears


Isn't a different behavior of unset variable vs an empty one confusing? And
what about at least documenting it outside of the commit message?

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] mesa/main: Fix memset in formatquery.c

2017-03-27 Thread Gustaw Smolarczyk
27 mar 2017 08:47 "Alejandro Piñeiro"  napisał(a):

On 27/03/17 01:51, Edward O'Callaghan wrote:
> V.1:
> We memset number of elements without multiplication by the
> element size.

It is not usual to summarize v1. The idea is explain just the changes.
>
> V.2:
> We explicitly set each member to -1 over using a confusing
> memset().

Nitpick: usually, the info about version changes doesn't follow that
style (uppercase v, point, number => lowercase v, number).

>
> Signed-off-by: Edward O'Callaghan 
> ---
>  src/mesa/main/formatquery.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
> index 598d34d..6aa57ec 100644
> --- a/src/mesa/main/formatquery.c
> +++ b/src/mesa/main/formatquery.c
> @@ -1564,7 +1564,7 @@ _mesa_GetInternalformati64v(GLenum target, GLenum
internalformat,
>  * no pname can return a negative value, we fill params32 with
negative
>  * values as reference values, that can be used to know what
copy-back to
>  * params */
> -   memset(params32, -1, 16);
> +   for (i = 0; i < realSize; i++) params32[i] = -1;

If you keep a loop, then what Brian already said.

But out of curiosity, as mentioned, the other alternative is filling
params32 with -1 when defining it:
 GLint params32[16] = { [0 ... 15] = -1 };

Any disadvantage over the loop?


If realSize < 16, this form will write more bytes. However if the fill size
is known at compile time, the compiler should be able to optimize the code
more, like using 4 sse writes without any loop instead of up-to-16 dword
ones in a loop.

Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv: Advertise larger heap sizes

2017-03-18 Thread Gustaw Smolarczyk
18 mar 2017 05:24 "Jason Ekstrand"  napisał(a):

Instead of just advertising the aperture size, we do something more
intelligent.  On systems with a full 48-bit PPGTT, we can address 100%
of the available system RAM from the GPU.  In order to keep clients from
burning 100% of your available RAM for graphics resources, we have a
nice little heuristic (which has received exactly zero tuning) to keep
things under a reasonable level of control.

Cc: Alex Smith 
---
 src/intel/vulkan/anv_device.c  | 61 ++
+++-
 src/intel/vulkan/anv_gem.c | 16 +++
 src/intel/vulkan/anv_private.h | 13 -
 3 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 8994e70..fd71a23 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -53,6 +54,48 @@ compiler_perf_log(void *data, const char *fmt, ...)
va_end(args);
 }

+static VkResult
+anv_compute_heap_size(int fd, uint64_t *heap_size)
+{
+   uint64_t gtt_size;
+   if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
+ _size) == -1) {
+  /* If, for whatever reason, we can't actually get the GTT size from
the
+   * kernel (too old?) fall back to the aperture size.
+   */
+  anv_perf_warn("Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
+
+  if (anv_gem_get_aperture(fd, _size) == -1) {
+ return vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
+  "failed to get aperture size: %m");
+  }
+   }
+
+   /* Query the total ram from the system */
+   struct sysinfo info;
+   sysinfo();
+
+   uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;
+
+   /* We don't want to burn too much ram with the GPU.  If the user has
4GiB
+* or less, we use at most half.  If they have more than 4GiB, we use
3/4.
+*/
+   uint64_t available_ram;
+   if (total_ram < 4ull * 1024ull * 1024ull * 1024ull)
+  available_ram = total_ram / 2;
+   else
+  available_ram = total_ram * 3 / 4;


This will result in 3/4 in case of exactly 4GB of RAM present. Did you mean
to use <= instead of ___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa/main: Fix memset in formatquery.c

2017-03-17 Thread Gustaw Smolarczyk
17 mar 2017 10:01 "Alejandro Piñeiro"  napisał(a):

On 17/03/17 06:26, Edward O'Callaghan wrote:
> We memset number of elements without multiplication by the
> element size.
>
> Signed-off-by: Edward O'Callaghan 
> ---
>  src/mesa/main/formatquery.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
> index 598d34d..50d7c31 100644
> --- a/src/mesa/main/formatquery.c
> +++ b/src/mesa/main/formatquery.c
> @@ -1564,7 +1564,7 @@ _mesa_GetInternalformati64v(GLenum target, GLenum
internalformat,
>  * no pname can return a negative value, we fill params32 with
negative
>  * values as reference values, that can be used to know what
copy-back to
>  * params */
> -   memset(params32, -1, 16);

Urgh. Yes, this code is wrong, sorry. When I wrote it the idea is
initialize params32 elements to -1. But memset initilizes byte-per-byte.
Im not even sure if the resulting GLint value is still negative as intended.


In 2s complement arithmetic, memsetting all bytes to -1 is the same as
setting all dwords to -1 since both have a representation of all 1 bits.
Though it's better to leave a memset transformation to the compiler.

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] vulkan/wsi: move image count to shared structure.

2017-02-21 Thread Gustaw Smolarczyk
21 lut 2017 03:47 "Jason Ekstrand"  napisał(a):

Fine by me

Reviewed-by: Jason Ekstrand 

On Mon, Feb 20, 2017 at 6:26 PM, Dave Airlie  wrote:

> From: Dave Airlie 
>
> For prime support I need to access this, so move it in advance.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/vulkan/wsi/wsi_common.h |  1 +
>  src/vulkan/wsi/wsi_common_wayland.c | 20 +---
>  src/vulkan/wsi/wsi_common_x11.c | 29 ++---
>  3 files changed, 24 insertions(+), 26 deletions(-)
>
> diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
> index ae9e587..1a22935 100644
> --- a/src/vulkan/wsi/wsi_common.h
> +++ b/src/vulkan/wsi/wsi_common.h
> @@ -54,6 +54,7 @@ struct wsi_swapchain {
> const struct wsi_image_fns *image_fns;
> VkFence fences[3];
> VkPresentModeKHR present_mode;
> +   int image_count;
>
> VkResult (*destroy)(struct wsi_swapchain *swapchain,
> const VkAllocationCallbacks *pAllocator);
> diff --git a/src/vulkan/wsi/wsi_common_wayland.c
> b/src/vulkan/wsi/wsi_common_wayland.c
> index 4489736..e6490ee 100644
> --- a/src/vulkan/wsi/wsi_common_wayland.c
> +++ b/src/vulkan/wsi/wsi_common_wayland.c
> @@ -495,7 +495,6 @@ struct wsi_wl_swapchain {
> VkPresentModeKHR present_mode;
> bool fifo_ready;
>
> -   uint32_t image_count;
> struct wsi_wl_image  images[0];
>  };
>
> @@ -508,13 +507,13 @@ wsi_wl_swapchain_get_images(struct wsi_swapchain
> *wsi_chain,
> VkResult result;
>
> if (pSwapchainImages == NULL) {
> -  *pCount = chain->image_count;
> +  *pCount = chain->base.image_count;
>return VK_SUCCESS;
> }
>
> result = VK_SUCCESS;
> -   ret_count = chain->image_count;
> -   if (chain->image_count > *pCount) {
> +   ret_count = chain->base.image_count;
> +   if (chain->base.image_count > *pCount) {
>   ret_count = *pCount;
>   result = VK_INCOMPLETE;
> }
> @@ -543,7 +542,7 @@ wsi_wl_swapchain_acquire_next_image(struct
> wsi_swapchain *wsi_chain,
>return VK_ERROR_OUT_OF_DATE_KHR;
>
> while (1) {
> -  for (uint32_t i = 0; i < chain->image_count; i++) {
> +  for (uint32_t i = 0; i < chain->base.image_count; i++) {
>

Looks like a comparison between signed and unsigned. Not sure if you care
about this (it produces a warning at -Wall or -Wextra IIRC).

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] radeonsi, r600g: Alias 'R600_DEBUG' with 'RADEON_DEBUG'

2017-02-20 Thread Gustaw Smolarczyk
2017-02-20 11:19 GMT+01:00 Edward O'Callaghan :
>
> On 02/20/2017 09:15 PM, Edward O'Callaghan wrote:
>> The name has become a little misleading now that it applies
>> to both r600g and radeonsi.
>>
>> V.2: Michel Dänzer - R600_DEBUG must continue to work.
>>
>> Signed-off-by: Edward O'Callaghan 
>> ---
>>  src/gallium/drivers/r600/r600_pipe.c   | 1 +
>>  src/gallium/drivers/radeon/r600_pipe_common.c  | 2 ++
>>  src/gallium/drivers/radeonsi/glsl_tests/amdgcn_glslc.c | 1 +
>>  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c  | 2 +-
>>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c  | 4 +++-
>>  5 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_pipe.c 
>> b/src/gallium/drivers/r600/r600_pipe.c
>> index 1803c26..f4ab0ee 100644
>> --- a/src/gallium/drivers/r600/r600_pipe.c
>> +++ b/src/gallium/drivers/r600/r600_pipe.c
>> @@ -641,6 +641,7 @@ struct pipe_screen *r600_screen_create(struct 
>> radeon_winsys *ws)
>>   }
>>
>>   rscreen->b.debug_flags |= debug_get_flags_option("R600_DEBUG", 
>> r600_debug_options, 0);
>> + rscreen->b.debug_flags |= debug_get_flags_option("RADEON_DEBUG", 
>> r600_debug_options, 0);
>>   if (debug_get_bool_option("R600_DEBUG_COMPUTE", FALSE))
>>   rscreen->b.debug_flags |= DBG_COMPUTE;
>>   if (debug_get_bool_option("R600_DUMP_SHADERS", FALSE))
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
>> b/src/gallium/drivers/radeon/r600_pipe_common.c
>> index 1781584..a372cd1 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
>> @@ -1257,7 +1257,9 @@ bool r600_common_screen_init(struct r600_common_screen 
>> *rscreen,
>>   rscreen->ws = ws;
>>   rscreen->family = rscreen->info.family;
>>   rscreen->chip_class = rscreen->info.chip_class;
>> +
>>   rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", 
>> common_debug_options, 0);
>> + rscreen->debug_flags = debug_get_flags_option("RADEON_DEBUG", 
>> common_debug_options, 0);

Shouldn't this be |=?

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] radv/ac: use shader umsb helper.

2017-02-16 Thread Gustaw Smolarczyk
I think you meant "shared" and not "shader" in the first line of the commit
message.

Gustaw

16 lut 2017 04:55 "Dave Airlie"  napisał(a):

From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 18 +-
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_
llvm.c
index b28812e..c1e838e 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -937,23 +937,7 @@ static LLVMValueRef emit_ifind_msb(struct
nir_to_llvm_context *ctx,
 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
   LLVMValueRef src0)
 {
-   LLVMValueRef args[2] = {
-   src0,
-   ctx->i32one,
-   };
-   LLVMValueRef msb = ac_emit_llvm_intrinsic(>ac, "llvm.ctlz.i32",
-  ctx->i32, args,
ARRAY_SIZE(args),
-  AC_FUNC_ATTR_READNONE);
-
-   /* The HW returns the last bit index from MSB, but NIR wants
-* the index from LSB. Invert it by doing "31 - msb". */
-   msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
-  msb, "");
-
-   return LLVMBuildSelect(ctx->builder,
-  LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
-ctx->i32zero, ""),
-  LLVMConstInt(ctx->i32, -1, true), msb, "");
+   return ac_emit_umsb(>ac, src0, ctx->i32);
 }

 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC PATCH 1/1] st/dri: add a new driconf option override_glsl_version for ARK games

2017-02-07 Thread Gustaw Smolarczyk
2017-02-07 14:11 GMT+01:00 Eero Tamminen :
> Hi,
>
> On 06.02.2017 22:26, Samuel Pitoiset wrote:
>>
>> On 02/06/2017 04:45 PM, Eero Tamminen wrote:
>>>
>>> Results from quick try on Ubuntu 16.04 with today's version of the game
>>> (ARK: Survival Evolved)...
>>>
>>> With (Ubuntu 16.04 default) Mesa 11.2, game starts to the game main
>>> menu, but when one starts from that a single player campaign, it will
>>> crash before the real game begins, after few minutes of loading (when
>>> game RAM resident set size has grown to ~10 GB):
>>> 
>>> Thread 1 "ShooterGame" received signal SIGSEGV, Segmentation fault.
>>> 0x01369f9b in _start ()
>>> (gdb) bt
>>> #0  0x01369f9b in _start ()
>>> 
>>>
>>> With latest Mesa (with or without the patch), game will just show a
>>> small dialog with fedirectly fromw garbage characters and exit.  I have
>>> no idea what
>>> it wants to complain about.
>>
>>
>> That patch is actually wrong for intel (it doesn't override anything).
>>
>> Maybe you can try this one?
>>
>> https://cgit.freedesktop.org/~hakzsam/mesa/log/?h=override_glsl_version_v2
>
>
> Same results.  Patch doesn't help.
>
>
> On 06.02.2017 18:02, Bas Nieuwenhuizen wrote:
>> You might be able to find the message with a debugger, the GL4.3+
>> context error also manifested as such and it turned out that the error
>> string is UTF-16, while the function that shows the error dialog
>> expects something like ASCII or UTF-8.
>
> Thanks, setting breakpoint on SDL_ShowMessageBox() I can catch where it
> shows the dialog, but it appears to link SDL statically and not have debug
> symbols so that I could directly get the message.
>
> Unfortunately my gdb/assembly-foo is too weak to parse UTF-16 strings
> through pointers in stack (or is register passing using on Intel for subset
> of args?)...

On x86-64 [1], the first argument should be in rdi register. If SDL2
is used, it will point to SDL_MessageBoxData structure [2] which
should have the message pointer at offset 24.

[1] https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI
[2] https://wiki.libsdl.org/SDL_MessageBoxData

Regards,
Gustaw

>
>
>
> - Eero
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 15/17] gallium/radeon: remove r600_common_context::max_db

2017-01-26 Thread Gustaw Smolarczyk
2017-01-26 17:04 GMT+01:00 Marek Olšák :
> From: Marek Olšák 
>
> this cleanup is based on the vulkan driver, which seems to do the same thing

Is this also ok for r600g? If I'm right, the amdgpu-pro Vulkan driver
doesn't have any support for pre-GCN hardware.

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/12] glcpp: Use strpbrk in the line continuations pass

2017-01-07 Thread Gustaw Smolarczyk
7 sty 2017 20:04 "Vladislav Egorov"  napisał(a):

To find newlines the line continuations removal pass uses strchr()
twice -- first time to find '\n', second time to find '\r'. Now,
if the shader uses Unix line separators '\n', the file could contain
no '\r' at all, so each time it will scan to the end of the shader.
Use strpbrk() standard function instead that was specifically created
to search many characters at once.
---
 src/compiler/glsl/glcpp/pp.c | 28 
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/src/compiler/glsl/glcpp/pp.c b/src/compiler/glsl/glcpp/pp.c
index c196868..c4c0196 100644
--- a/src/compiler/glsl/glcpp/pp.c
+++ b/src/compiler/glsl/glcpp/pp.c
@@ -231,7 +231,6 @@ remove_line_continuations(glcpp_parser_t *ctx, const
char *shader)
 {
struct string_buffer sb;
const char *backslash, *newline, *search_start;
-const char *cr, *lf;
 char newline_separator[3];
int collapsed_newlines = 0;
int separator_len;
@@ -266,23 +265,19 @@ remove_line_continuations(glcpp_parser_t *ctx, const
char *shader)
 * examining the first encountered newline terminator, and using the
 * same terminator for any newlines we insert.
 */
-   cr = strchr(search_start, '\r');
-   lf = strchr(search_start, '\n');
+   newline = strpbrk(search_start, "\r\n");

newline_separator[0] = '\n';
newline_separator[1] = '\0';
newline_separator[2] = '\0';

-   if (cr == NULL) {
+   if (newline == NULL) {
/* Nothing to do. */
-   } else if (lf == NULL) {
-   newline_separator[0] = '\r';
-   } else if (lf == cr + 1) {
-   newline_separator[0] = '\r';
-   newline_separator[1] = '\n';
-   } else if (cr == lf + 1) {
-   newline_separator[0] = '\n';
-   newline_separator[1] = '\r';
+   } else if (newline[1] == '\n' || newline[1] == '\r') {

What if we have \n\n ? Won't it match it as a single newline?

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nir/algebraic: Add optimizations for "a == a && a CMP b"

2016-12-20 Thread Gustaw Smolarczyk
2016-12-20 6:32 GMT+01:00 Jason Ekstrand :
> This sequence shows up The Talos Principal, at least under Vulkan,
> and prevents loop analysis from properly computing trip counts in a
> few loops.
> ---
>  src/compiler/nir/nir_opt_algebraic.py | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/src/compiler/nir/nir_opt_algebraic.py 
> b/src/compiler/nir/nir_opt_algebraic.py
> index 698ac67..cc70ad5 100644
> --- a/src/compiler/nir/nir_opt_algebraic.py
> +++ b/src/compiler/nir/nir_opt_algebraic.py
> @@ -464,6 +464,14 @@ def bitfield_reverse(u):
>
>  optimizations += [(bitfield_reverse('x@32'), ('bitfield_reverse', 'x'))]
>
> +# For any comparison operation, "cmp", if you have "a != a && a cmp b" then
> +# the "a != a" is redundant because it's equivalent to "a is not NaN" and, if

Shouldn't the comment have a == a ?

Regards,
Gustaw
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] radv vs vulkan loader

2016-12-16 Thread Gustaw Smolarczyk
2016-12-16 18:12 GMT+01:00 Gustaw Smolarczyk <wielkie...@gmail.com>:
> 2016-12-16 17:57 GMT+01:00 Emil Velikov <emil.l.veli...@gmail.com>:
>> On 16 December 2016 at 15:27, Gustaw Smolarczyk <wielkie...@gmail.com> wrote:
>>> 2016-12-16 14:50 GMT+01:00 Emil Velikov <emil.l.veli...@gmail.com>:
>>>>
>>>> On 5 October 2016 at 23:12, Gustaw Smolarczyk <wielkie...@gmail.com>
>>>> wrote:
>>>> > 2016-10-06 0:05 GMT+02:00 Emil Velikov <emil.l.veli...@gmail.com>:
>>>> >> On 5 October 2016 at 21:45, Gustaw Smolarczyk <wielkie...@gmail.com>
>>>> >> wrote:
>>>> >>> Hello,
>>>> >>>
>>>> >>> I have encountered a following problem while trying to use radv
>>>> >>> through LunarG's vulkan loader.
>>>> >>>
>>>> >>> It seems that the loader dlopens() the ICD library twice. First, it
>>>> >>> looks up "vk_icdNegotiateLoaderICDInterfaceVersion" symbol, which
>>>> >>> seems to be the new mechanism used to determine the version of ICD
>>>> >>> interface. Since radv doesn't provide it, it fall backs to the older
>>>> >>> scheme. Unfortunately, it seems that the loader first unloads the ICD
>>>> >>> and then loads it again. That causes issues in LLVM libraries' command
>>>> >>> line registration which happens while initializing global objects with
>>>> >>> constructors. To be more specific, "asan-instrument-assembly"
>>>> >>> registered in libLLVMX86AsmPrinter.so registers for the second time
>>>> >>> and causes the following message:
>>>> >>>
>>>> >>>
>>>> >>> $ vulkaninfo
>>>> >>> ===
>>>> >>> VULKAN INFO
>>>> >>> ===
>>>> >>>
>>>> >>> Vulkan API Version: 1.0.26
>>>> >>>
>>>> >>>
>>>> >>> : CommandLine Error: Option 'asan-instrument-assembly' registered more
>>>> >>> than once!
>>>> >>> LLVM ERROR: inconsistency in registered CommandLine options
>>>> >>>
>>>> >>> I have "fixed" the problem by manually removing
>>>> >>> libLLVMX86AsmPrinter.so from the list of llvm dependencies to radv. It
>>>> >>> seems that it was the only library registering any command line
>>>> >>> option.
>>>> >>>
>>>> >>> I am not sure who is to blame for this situation. It's possible that
>>>> >>> advertising the new ICD entry point would fix it. LLVM is really
>>>> >>> fragile about its command line registration framework. Last, the
>>>> >>> vulkan loader could try not to unnecessarily dlclose and dlopen the
>>>> >>> ICD library.
>>>> >>>
>>>> >> From a quick read it sounds like something that should be fixed in
>>>> >> LLVM. Namely: if a library sets up a state it should cleanup after
>>>> >> itself.
>>>> >>
>>>> >> That aside, does the radv vulkan driver have unresolved/undefined
>>>> >> symbols (check via $ldd -r libvulkan_foo.so) with your workaround ? If
>>>> >> not we {c,sh}ould drop the library from the link chain. Alternatively
>>>> >> you can try static linking LLVM by using --disable-llvm-shared-libs at
>>>> >> mesa configure time.
>>>> >
>>>> > I see no relocation errors after doing ldd -r with my workaround.
>>>> >
>>>> > I think the problem lays with how llvm-config is called. We enable
>>>> > AMDGPU target and want the AsmPrinter module for it, so we enable
>>>> > asmprinter component. However, this enables asmprinter for all enabled
>>>> > targets. Since X86 target is enabled by default, this brings
>>>> > X86AsmPrinter into the list of libraries.
>>>> >
>>>> > llvmpipe/gallium need the X86 target, but radv could probably be built
>>>> > without it.
>>>> >
>>>> Pardon for missing your reply here.
>>>>
>>>> In general I agree that one shouldn't link with libraries which they don't
>>>> need.
>>>>
>>>> At the same time:

Re: [Mesa-dev] radv vs vulkan loader

2016-12-16 Thread Gustaw Smolarczyk
2016-12-16 17:57 GMT+01:00 Emil Velikov <emil.l.veli...@gmail.com>:
> On 16 December 2016 at 15:27, Gustaw Smolarczyk <wielkie...@gmail.com> wrote:
>> 2016-12-16 14:50 GMT+01:00 Emil Velikov <emil.l.veli...@gmail.com>:
>>>
>>> On 5 October 2016 at 23:12, Gustaw Smolarczyk <wielkie...@gmail.com>
>>> wrote:
>>> > 2016-10-06 0:05 GMT+02:00 Emil Velikov <emil.l.veli...@gmail.com>:
>>> >> On 5 October 2016 at 21:45, Gustaw Smolarczyk <wielkie...@gmail.com>
>>> >> wrote:
>>> >>> Hello,
>>> >>>
>>> >>> I have encountered a following problem while trying to use radv
>>> >>> through LunarG's vulkan loader.
>>> >>>
>>> >>> It seems that the loader dlopens() the ICD library twice. First, it
>>> >>> looks up "vk_icdNegotiateLoaderICDInterfaceVersion" symbol, which
>>> >>> seems to be the new mechanism used to determine the version of ICD
>>> >>> interface. Since radv doesn't provide it, it fall backs to the older
>>> >>> scheme. Unfortunately, it seems that the loader first unloads the ICD
>>> >>> and then loads it again. That causes issues in LLVM libraries' command
>>> >>> line registration which happens while initializing global objects with
>>> >>> constructors. To be more specific, "asan-instrument-assembly"
>>> >>> registered in libLLVMX86AsmPrinter.so registers for the second time
>>> >>> and causes the following message:
>>> >>>
>>> >>>
>>> >>> $ vulkaninfo
>>> >>> ===
>>> >>> VULKAN INFO
>>> >>> ===
>>> >>>
>>> >>> Vulkan API Version: 1.0.26
>>> >>>
>>> >>>
>>> >>> : CommandLine Error: Option 'asan-instrument-assembly' registered more
>>> >>> than once!
>>> >>> LLVM ERROR: inconsistency in registered CommandLine options
>>> >>>
>>> >>> I have "fixed" the problem by manually removing
>>> >>> libLLVMX86AsmPrinter.so from the list of llvm dependencies to radv. It
>>> >>> seems that it was the only library registering any command line
>>> >>> option.
>>> >>>
>>> >>> I am not sure who is to blame for this situation. It's possible that
>>> >>> advertising the new ICD entry point would fix it. LLVM is really
>>> >>> fragile about its command line registration framework. Last, the
>>> >>> vulkan loader could try not to unnecessarily dlclose and dlopen the
>>> >>> ICD library.
>>> >>>
>>> >> From a quick read it sounds like something that should be fixed in
>>> >> LLVM. Namely: if a library sets up a state it should cleanup after
>>> >> itself.
>>> >>
>>> >> That aside, does the radv vulkan driver have unresolved/undefined
>>> >> symbols (check via $ldd -r libvulkan_foo.so) with your workaround ? If
>>> >> not we {c,sh}ould drop the library from the link chain. Alternatively
>>> >> you can try static linking LLVM by using --disable-llvm-shared-libs at
>>> >> mesa configure time.
>>> >
>>> > I see no relocation errors after doing ldd -r with my workaround.
>>> >
>>> > I think the problem lays with how llvm-config is called. We enable
>>> > AMDGPU target and want the AsmPrinter module for it, so we enable
>>> > asmprinter component. However, this enables asmprinter for all enabled
>>> > targets. Since X86 target is enabled by default, this brings
>>> > X86AsmPrinter into the list of libraries.
>>> >
>>> > llvmpipe/gallium need the X86 target, but radv could probably be built
>>> > without it.
>>> >
>>> Pardon for missing your reply here.
>>>
>>> In general I agree that one shouldn't link with libraries which they don't
>>> need.
>>>
>>> At the same time:
>>>  - a library is should tear down all the state that it sets up.
>>> Afaict the LLVM module sets it up "asan-instrument-assembly" thus it
>>> is the one responsible to unregister it.
>>
>>
>> Yes, I also think this should be really fixed in LLVM. There is however an
>> easy work-around for mesa that I have posted 

Re: [Mesa-dev] radv vs vulkan loader

2016-12-16 Thread Gustaw Smolarczyk
2016-12-16 14:50 GMT+01:00 Emil Velikov <emil.l.veli...@gmail.com>:

> On 5 October 2016 at 23:12, Gustaw Smolarczyk <wielkie...@gmail.com>
> wrote:
> > 2016-10-06 0:05 GMT+02:00 Emil Velikov <emil.l.veli...@gmail.com>:
> >> On 5 October 2016 at 21:45, Gustaw Smolarczyk <wielkie...@gmail.com>
> wrote:
> >>> Hello,
> >>>
> >>> I have encountered a following problem while trying to use radv
> >>> through LunarG's vulkan loader.
> >>>
> >>> It seems that the loader dlopens() the ICD library twice. First, it
> >>> looks up "vk_icdNegotiateLoaderICDInterfaceVersion" symbol, which
> >>> seems to be the new mechanism used to determine the version of ICD
> >>> interface. Since radv doesn't provide it, it fall backs to the older
> >>> scheme. Unfortunately, it seems that the loader first unloads the ICD
> >>> and then loads it again. That causes issues in LLVM libraries' command
> >>> line registration which happens while initializing global objects with
> >>> constructors. To be more specific, "asan-instrument-assembly"
> >>> registered in libLLVMX86AsmPrinter.so registers for the second time
> >>> and causes the following message:
> >>>
> >>>
> >>> $ vulkaninfo
> >>> ===
> >>> VULKAN INFO
> >>> ===
> >>>
> >>> Vulkan API Version: 1.0.26
> >>>
> >>>
> >>> : CommandLine Error: Option 'asan-instrument-assembly' registered more
> >>> than once!
> >>> LLVM ERROR: inconsistency in registered CommandLine options
> >>>
> >>> I have "fixed" the problem by manually removing
> >>> libLLVMX86AsmPrinter.so from the list of llvm dependencies to radv. It
> >>> seems that it was the only library registering any command line
> >>> option.
> >>>
> >>> I am not sure who is to blame for this situation. It's possible that
> >>> advertising the new ICD entry point would fix it. LLVM is really
> >>> fragile about its command line registration framework. Last, the
> >>> vulkan loader could try not to unnecessarily dlclose and dlopen the
> >>> ICD library.
> >>>
> >> From a quick read it sounds like something that should be fixed in
> >> LLVM. Namely: if a library sets up a state it should cleanup after
> >> itself.
> >>
> >> That aside, does the radv vulkan driver have unresolved/undefined
> >> symbols (check via $ldd -r libvulkan_foo.so) with your workaround ? If
> >> not we {c,sh}ould drop the library from the link chain. Alternatively
> >> you can try static linking LLVM by using --disable-llvm-shared-libs at
> >> mesa configure time.
> >
> > I see no relocation errors after doing ldd -r with my workaround.
> >
> > I think the problem lays with how llvm-config is called. We enable
> > AMDGPU target and want the AsmPrinter module for it, so we enable
> > asmprinter component. However, this enables asmprinter for all enabled
> > targets. Since X86 target is enabled by default, this brings
> > X86AsmPrinter into the list of libraries.
> >
> > llvmpipe/gallium need the X86 target, but radv could probably be built
> > without it.
> >
> Pardon for missing your reply here.
>
> In general I agree that one shouldn't link with libraries which they don't
> need.
>
> At the same time:
>  - a library is should tear down all the state that it sets up.
> Afaict the LLVM module sets it up "asan-instrument-assembly" thus it
> is the one responsible to unregister it.
>

Yes, I also think this should be really fixed in LLVM. There is however an
easy work-around for mesa that I have posted a few days ago [1].


>
>  - Split shared LLVM wasn't a supported/recommended//good idea, last
> time I've heard.
>

This is how llvm is built on gentoo by default [2]. Because of that it
possibly affects all gentoo users.


> Please use single LLVM shared lib or [separate] static LLVM libs.
>

I will check what happens when I dlopen/dlclose/dlopen both separate and
monolithic shared LLVM libraries.

Regards,
Gustaw

[1]
https://lists.freedesktop.org/archives/mesa-dev/2016-December/137277.html
[2]
https://github.com/gentoo/gentoo/blob/master/sys-devel/llvm/llvm-3.9.0-r1.ebuild#L270


> Thanks
> Emil
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/4] nir: Rewrite lower_regs_to_ssa to use the phi builder

2016-12-15 Thread Gustaw Smolarczyk
15 gru 2016 18:19 "Jason Ekstrand"  napisał(a):

This keeps some of Connor's original code.  However, while I was at it,
I updated this very old pass to a bit more modern NIR.
---
 src/compiler/nir/nir_lower_regs_to_ssa.c | 582
---
 1 file changed, 155 insertions(+), 427 deletions(-)

diff --git a/src/compiler/nir/nir_lower_regs_to_ssa.c
b/src/compiler/nir/nir_lower_regs_to_ssa.c
index 74c1961..3c8fd876 100644
--- a/src/compiler/nir/nir_lower_regs_to_ssa.c
+++ b/src/compiler/nir/nir_lower_regs_to_ssa.c
@@ -26,513 +26,241 @@
  */

 #include "nir.h"
-#include 
+#include "nir_builder.h"
+#include "nir_phi_builder.h"
+#include "nir_vla.h"

-/*
- * Implements the classic to-SSA algorithm described by Cytron et. al. in
- * "Efficiently Computing Static Single Assignment Form and the Control
- * Dependence Graph."
- */
+struct regs_to_ssa_state {
+   nir_shader *shader;

-/* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */
+   struct nir_phi_builder_value **values;
+};

-static void
-insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx)
-{
-   nir_phi_instr *instr = nir_phi_instr_create(mem_ctx);
-
-   instr->dest.reg.reg = reg;
-   struct set_entry *entry;
-   set_foreach(block->predecessors, entry) {
-  nir_block *pred = (nir_block *) entry->key;
-
-  nir_phi_src *src = ralloc(instr, nir_phi_src);
-  src->pred = pred;
-  src->src.is_ssa = false;
-  src->src.reg.base_offset = 0;
-  src->src.reg.indirect = NULL;
-  src->src.reg.reg = reg;
-  exec_list_push_tail(>srcs, >node);
-   }
-
-   nir_instr_insert_before_block(block, >instr);
-}
-
-static void
-insert_phi_nodes(nir_function_impl *impl)
+static bool
+rewrite_src(nir_src *src, void *_state)
 {
-   void *mem_ctx = ralloc_parent(impl);
-
-   unsigned *work = calloc(impl->num_blocks, sizeof(unsigned));
-   unsigned *has_already = calloc(impl->num_blocks, sizeof(unsigned));
-
-   /*
-* Since the work flags already prevent us from inserting a node that
has
-* ever been inserted into W, we don't need to use a set to represent W.
-* Also, since no block can ever be inserted into W more than once, we
know
-* that the maximum size of W is the number of basic blocks in the
-* function. So all we need to handle W is an array and a pointer to the
-* next element to be inserted and the next element to be removed.
-*/
-   nir_block **W = malloc(impl->num_blocks * sizeof(nir_block *));
-   unsigned w_start, w_end;
-
-   unsigned iter_count = 0;
-
-   nir_index_blocks(impl);
-
-   foreach_list_typed(nir_register, reg, node, >registers) {
-  if (reg->num_array_elems != 0)
- continue;
-
-  w_start = w_end = 0;
-  iter_count++;
-
-  nir_foreach_def(dest, reg) {
- nir_instr *def = dest->reg.parent_instr;
- if (work[def->block->index] < iter_count)
-W[w_end++] = def->block;
- work[def->block->index] = iter_count;
-  }
+   struct regs_to_ssa_state *state = _state;

-  while (w_start != w_end) {
- nir_block *cur = W[w_start++];
- struct set_entry *entry;
- set_foreach(cur->dom_frontier, entry) {
-nir_block *next = (nir_block *) entry->key;
-
-/*
- * If there's more than one return statement, then the end
block
- * can be a join point for some definitions. However, there are
- * no instructions in the end block, so nothing would use those
- * phi nodes. Of course, we couldn't place those phi nodes
- * anyways due to the restriction of having no instructions in
the
- * end block...
- */
-if (next == impl->end_block)
-   continue;
-
-if (has_already[next->index] < iter_count) {
-   insert_trivial_phi(reg, next, mem_ctx);
-   has_already[next->index] = iter_count;
-   if (work[next->index] < iter_count) {
-  work[next->index] = iter_count;
-  W[w_end++] = next;
-   }
-}
- }
-  }
-   }
+   if (src->is_ssa)
+  return true;

-   free(work);
-   free(has_already);
-   free(W);
-}
+   nir_instr *instr = src->parent_instr;
+   nir_register *reg = src->reg.reg;
+   struct nir_phi_builder_value *value = state->values[reg->index];

-typedef struct {
-   nir_ssa_def **stack;
-   int index;
-   unsigned num_defs; /** < used to add indices to debug names */
-#ifndef NDEBUG
-   unsigned stack_size;
-#endif
-} reg_state;
-
-typedef struct {
-   reg_state *states;
-   void *mem_ctx;
-   nir_instr *parent_instr;
-   nir_if *parent_if;
-   nir_function_impl *impl;
-
-   /* map from SSA value -> original register */
-   struct hash_table *ssa_map;
-} rewrite_state;
-
-static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state)
-{
-   unsigned index = reg->index;
+   nir_ssa_def *def = 

Re: [Mesa-dev] [PATCH v2 2/4] nir: Add foreach_register helper macros

2016-12-15 Thread Gustaw Smolarczyk
15 gru 2016 18:19 "Jason Ekstrand" napisał(a):

---
 src/compiler/nir/nir.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index f1c99ce..4a9fe65 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -373,6 +373,11 @@ typedef struct nir_register {
struct list_head if_uses;
 } nir_register;

+#define nir_foreach_register(var, reg_list) \
+   foreach_list_typed(nir_register, reg, node, reg_list)

Shouldn't the macro definition use the var parameter? Below too.

Regards,
Gustaw

+#define nir_foreach_register_safe(var, reg_list) \
+   foreach_list_typed_safe(nir_register, reg, node, reg_list)
+
 typedef enum {
nir_instr_type_alu,
nir_instr_type_call,
--
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] configure: Strip LLVMX86Asm* dependencies.

2016-12-04 Thread Gustaw Smolarczyk
They are picked automatically by the provided llvm-config flags, but are
not needed.

Fixes loading radv through a vulkan loader.

Cc: 13.0 
---

It's work-around for:
https://lists.freedesktop.org/archives/mesa-dev/2016-October/130765.html

Since there are other people than me dealing with this issue, might
as well propose a patch that work-arounds it for me for these other
people to benefit from.

It's my first patch to the stable release, I hope I did it right.

 configure.ac | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index f62bc61e50..9d191edd97 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2493,6 +2493,13 @@ if test "x$HAVE_RADEON_VULKAN" = "xyes"; then
 radeon_llvm_check "radv" "3" "9" "0"
 fi
 
+strip_unwanted_llvm_libs() {
+# Use \> (marks the end of the word)
+echo `$1` | sed \
+   -e 's/-lLLVMX86AsmPrinter\>//g' \
+   -e 's/-lLLVMX86AsmParser\>//g'
+}
+
 dnl Set LLVM_LIBS - This is done after the driver configuration so
 dnl that drivers can add additional components to LLVM_COMPONENTS.
 dnl Previously, gallium drivers were updating LLVM_LIBS directly
@@ -2505,7 +2512,7 @@ if test "x$MESA_LLVM" != x0; then
 if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
 fi
-LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"
+LLVM_LIBS=`strip_unwanted_llvm_libs "$LLVM_CONFIG --libs 
${LLVM_COMPONENTS}"`
 
 dnl llvm-config may not give the right answer when llvm is a built as a
 dnl single shared library, so we must work the library name out for
-- 
2.11.0.rc2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/10] radeonsi: check for sampler state CSO corruption

2016-12-02 Thread Gustaw Smolarczyk
2016-12-02 21:39 GMT+01:00 Marek Olšák :

> From: Marek Olšák 
>
> It really happens.
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c | 1 +
>  src/gallium/drivers/radeonsi/si_pipe.h| 3 +++
>  src/gallium/drivers/radeonsi/si_state.c   | 5 +
>  3 files changed, 9 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 8b6e0bb..4f78b1a 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -796,20 +796,21 @@ static void si_bind_sampler_states(struct
> pipe_context *ctx,
> if (!count || shader >= SI_NUM_SHADERS)
> return;
>
> for (i = 0; i < count; i++) {
> unsigned slot = start + i;
>
> if (!sstates[i] ||
> sstates[i] == samplers->views.sampler_states[slot])
> continue;
>
> +   assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
> samplers->views.sampler_states[slot] = sstates[i];
>
> /* If FMASK is bound, don't overwrite it.
>  * The sampler state will be set after FMASK is unbound.
>  */
> if (samplers->views.views[slot] &&
> samplers->views.views[slot]->texture &&
> samplers->views.views[slot]->texture->target !=
> PIPE_BUFFER &&
> ((struct r600_texture*)samplers->views.
> views[slot]->texture)->fmask.size)
> continue;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/
> radeonsi/si_pipe.h
> index 42cbecb..a7985e7 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -130,21 +130,24 @@ struct si_sampler_view {
>  /* [0..7] = image descriptor
>   * [4..7] = buffer descriptor */
> uint32_tstate[8];
> uint32_tfmask_state[8];
> const struct radeon_surf_level  *base_level_info;
> unsignedbase_level;
> unsignedblock_width;
> bool is_stencil_sampler;
>  };
>
> +#define SI_SAMPLER_STATE_MAGIC 0x34f1c35a
> +
>  struct si_sampler_state {
> +   unsignedmagic;
>

How about wrapping it in #ifndef NDEBUG/#endif? Here and the other places.


> uint32_tval[4];
>  };
>
>  struct si_cs_shader_state {
> struct si_compute   *program;
> struct si_compute   *emitted_program;
> unsignedoffset;
> boolinitialized;
> booluses_scratch;
>  };
> diff --git a/src/gallium/drivers/radeonsi/si_state.c
> b/src/gallium/drivers/radeonsi/si_state.c
> index 1ccf5b6..7ff9f8c 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3240,20 +3240,21 @@ static void *si_create_sampler_state(struct
> pipe_context *ctx,
> util_memcpy_cpu_to_le32(
> >border_color_map[i],
>
> >border_color,
>
> sizeof(state->border_color));
> sctx->border_color_count++;
> }
>
> border_color_index = i;
> }
> }
>
> +   rstate->magic = SI_SAMPLER_STATE_MAGIC;
> rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
>   S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
>   S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
>   S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
>   S_008F30_DEPTH_COMPARE_FUNC(
> si_tex_compare(state->compare_func)) |
>   
> S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords)
> |
>   S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
>   S_008F30_ANISO_BIAS(max_aniso_ratio) |
>   
> S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map)
> |
>   S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
> @@ -3296,20 +3297,24 @@ static void si_emit_sample_mask(struct si_context
> *sctx, struct r600_atom *atom)
> assert(mask == 0x || sctx->framebuffer.nr_samples > 1 ||
>(mask & 1 && sctx->blitter->running));
>
> radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0,
> 2);
> radeon_emit(cs, mask | (mask << 16));
> radeon_emit(cs, mask | (mask << 16));
>  }
>
>  static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
>  {
> +   struct si_sampler_state *s = state;
> +
> +   assert(s->magic == SI_SAMPLER_STATE_MAGIC);
> +   s->magic = 0;
>

Re: [Mesa-dev] Mesa 13.0.2 release candidate

2016-11-24 Thread Gustaw Smolarczyk
2016-11-24 18:25 GMT+01:00 Emil Velikov :

> Hello list,
>
> The candidate for the Mesa 13.0.2 is now available. Currently we have:
>  - 49 queued
>  - 4 nominated (outstanding)
>  - and 1 rejected patch(es)
>
>
> With this series we have - fixes for vc4, i965 and radeon drivers. In
> addition
> to that PCI IDs for Geminilake have been added to the i965 driver,
>
> The respective Vulkan drivers have seen multiple improvements some of which
> include improved smoketesting and addressed memory leaks.
>
> Races during _mesa_HashWalk() (while using glDeleteFramebuffers alongside
> glTexImage2D) and "#version 0" in GLSL programs have been addressed.
>
> BSD and Hurd users should be above to build the latest code as we no longer
> use PATH_MAX.
>
>
> Take a look at section "Mesa stable queue" for more information.
>
>
> Testing reports/general approval
> 
> Any testing reports (or general approval of the state of the branch) will
> be
> greatly appreciated.
>
> The plan is to have 13.0.2 this Saturday (25th of November), around or
>

Saturday is 26th of November.

Regards,
Gustaw


> shortly after 17:00 GMT.
>
> If you have any questions or suggestions - be that about the current patch
> queue or otherwise, please go ahead.
>
>
> Trivial merge conflicts
> ---
> commit 9d5c3fc12b05d944508ef4e3b1f2ddc4f23c0a82
> Author: Jason Ekstrand 
>
> i965/gs: Allow primitive id to be a system value
>
> (cherry picked from commit a5e88e66e633aaeb587b274d80e21cd46c8ee2cb)
>
>
> commit 8dbdbc21910a6d37c381535186f9e728fff8690d
> Author: Jason Ekstrand 
>
> anv: Handle null in all destructors
>
> (cherry picked from commit 49f08ad77f51cc344e4bfe60ba9f8d9fccfbd753)
>
>
> commit 1809f17bda56d4f9d6385f63a9c4a5df890e3cad
> Author: Kenneth Graunke 
>
> mesa: Drop PATH_MAX usage.
>
> (cherry picked from commit 9bfee7047b70cb0aa026ca9536465762f96cb2b1)
>
> Cheers,
> Emil
>
>
> Mesa stable queue
> -
>
> Nominated (4)
> =
>
> Dave Airlie (1):
>   2de85eb radv: fix texturesamples to handle single sample case
>
> Jason Ekstrand (2):
>   e73d136 vulkan/wsi/x11: Implement FIFO mode.
>
> Note: temporary on hold.
> Commit seems to be a feature and provides no clear indication about the
> bugs
> it addresses. Jason, is this really applicable for stable ?
>
>   054e48e anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push
> constants are dirty
>
> Note: temporary on hold.
> Depends on the refactoring d33e2ad67c3 (anv: Move INTERFACE_DESCRIPTOR_DATA
> setup to the pipeline) which in itself depends on another refactoring
> (cleanup)
> commit 623e1e06d8c and likely others. Jason, any input ?
>
> Kevin Strasser (1):
>   932bb3f vulkan/wsi: Add a thread-safe queue implementation
> Requirement for "Implement FIFO mode above" above.
>
>
> Queued (49)
> ===
>
> Ben Widawsky (3):
>   i965: Add some APL and KBL SKU strings
>   i965: Reorder PCI ID list to match release order
>   i965/glk: Add basic Geminilake support
>
> Dave Airlie (7):
>   radv: fix texturesamples to handle single sample case
>   wsi: fix VK_INCOMPLETE for vkGetSwapchainImagesKHR
>   radv: don't crash on null swapchain destroy.
>   ac/nir/llvm: fix channel in texture gather lowering code.
>   radv: make sure to flush input attachments correctly.
>   radv: fix image view creation for depth and stencil only
>   radv: spir-v allows texture size query with and without lod.
>
> Eduardo Lima Mitev (2):
>   vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfaceFormatsKHR
>   vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfacePres
> entModesKHR
>
> Eduardo, I've picked these two since they are perfectly reasonable
> stable material.
> Let me know if you think that we should keep or drop them.
>
> Emil Velikov (3):
>   docs: add sha256 checksums for 13.0.1
>   cherry-ignore: add reverted LLVM_LIBDIR patch
>   anv: fix enumeration of properties
>
> Eric Anholt (3):
>   vc4: Don't abort when a shader compile fails.
>   vc4: Clamp the shadow comparison value.
>   vc4: Fix register class handling of DDX/DDY arguments.
>
> Gwan-gyeong Mun (2):
>   util/disk_cache: close a previously opened handle in disk_cache_put
> (v2)
>   anv: Fix unintentional integer overflow in anv_CreateDmaBufImageINTEL
>
> Iago Toral Quiroga (1):
>   anv/format: handle unsupported formats properly
>
> Ian Romanick (2):
>   glcpp: Handle '#version 0' and other invalid values
>   glsl: Parse 0 as a preprocessor INTCONSTANT
>
> Jason Ekstrand (14):
>   anv/gen8: Stall when needed in Cmd(Set|Reset)Event
>   anv/wsi: Set the fence to signaled in AcquireNextImageKHR
>   anv: Rework fences
>   vulkan/wsi/wayland: Include pthread.h
>   vulkan/wsi/wayland: Clean up some error handling 

Re: [Mesa-dev] [PATCH] [rfc] radv: add initial prime support.

2016-11-23 Thread Gustaw Smolarczyk
2016-11-23 6:28 GMT+01:00 Dave Airlie :

> From: Dave Airlie 
>
> This is kind of a gross hacks, but vulkan doesn't specify anything
> but it would be nice to let people with prime systems at least
> see some stuff rendering for now.
>
> This creates a linear shadow image in GART that gets blitted to at the
> image transition.
>
> Now ideally:
> this would use SDMA - but we want to use SDMA for transfer queues
> maybe we don't expose a transfer queue on prime cards who knows.
>

I might not be that much familiar with Vulkan, but I believe you can
specify the transfer queue as a present queue and then the Vulkan
applications would be required to perform the present operation on this
queue. Wouldn't that be enough for the prime to work?

Regards,
Gustaw


>
> we wouldn't have to add two pointers to every image, but my other
> attempts at this were ugly.
>
> Is the image transition the proper place to hack this in? not
> really sure anywhere else is appropriate.
>
> It also relies on DRI_PRIME=1 being set, I should be able
> to work this out somehow automatically I think, probably getting
> a DRI3 fd from the X server and doing drmGetDevice on it, and
> comparing where we end up.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c |  18 +++
>  src/amd/vulkan/radv_device.c |   3 ++
>  src/amd/vulkan/radv_meta.h   |   2 +
>  src/amd/vulkan/radv_meta_copy.c  |  31 +++
>  src/amd/vulkan/radv_private.h|   4 ++
>  src/amd/vulkan/radv_wsi.c| 111 ++
> -
>  6 files changed, 144 insertions(+), 25 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_
> buffer.c
> index a2d55833..4432afc 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -2296,6 +2296,20 @@ static void radv_handle_dcc_image_transition(struct
> radv_cmd_buffer *cmd_buffer,
> }
>  }
>
> +static void radv_handle_prime_image_transition(struct radv_cmd_buffer
> *cmd_buffer,
> +  struct radv_image *image,
> +  VkImageLayout src_layout,
> +  VkImageLayout dst_layout,
> +  VkImageSubresourceRange
> range,
> +  VkImageAspectFlags
> pending_clears)
> +{
> +   cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_
> FRAMEBUFFER;
> +   si_emit_cache_flush(cmd_buffer);
> +   radv_blit_to_prime_linear(cmd_buffer, image);
> +   cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_
> FRAMEBUFFER;
> +   si_emit_cache_flush(cmd_buffer);
> +}
> +
>  static void radv_handle_image_transition(struct radv_cmd_buffer
> *cmd_buffer,
>  struct radv_image *image,
>  VkImageLayout src_layout,
> @@ -2314,6 +2328,10 @@ static void radv_handle_image_transition(struct
> radv_cmd_buffer *cmd_buffer,
> if (image->surface.dcc_size)
> radv_handle_dcc_image_transition(cmd_buffer, image,
> src_layout,
>  dst_layout, range,
> pending_clears);
> +
> +   if (image->prime_image && dst_layout ==
> VK_IMAGE_LAYOUT_PRESENT_SRC_KHR)
> +   radv_handle_prime_image_transition(cmd_buffer, image,
> src_layout,
> +  dst_layout, range,
> pending_clears);
>  }
>
>  void radv_CmdPipelineBarrier(
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index c639d53..b21447f 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -105,6 +105,9 @@ radv_physical_device_init(struct radv_physical_device
> *device,
> }
> drmFreeVersion(version);
>
> +   if (getenv("DRI_PRIME"))
> +   device->is_different_gpu = true;
> +
> device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
> device->instance = instance;
> assert(strlen(path) < ARRAY_SIZE(device->path));
> diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
> index 97d020c..e43a0e7 100644
> --- a/src/amd/vulkan/radv_meta.h
> +++ b/src/amd/vulkan/radv_meta.h
> @@ -186,6 +186,8 @@ void radv_meta_resolve_compute_image(struct
> radv_cmd_buffer *cmd_buffer,
>  uint32_t region_count,
>  const VkImageResolve *regions);
>
> +void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
> +  struct radv_image *image);
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_
> copy.c
> index 4c01eb7..3fd8d0c 100644
> --- a/src/amd/vulkan/radv_meta_copy.c
> +++ b/src/amd/vulkan/radv_meta_copy.c
> @@ -397,3 +397,34 

Re: [Mesa-dev] [PATCH] mesa: if MESA_DEBUG=context, create a debug context

2016-11-17 Thread Gustaw Smolarczyk
2016-11-16 0:05 GMT+01:00 Brian Paul :

> A number of drivers report useful debug/perf information accessible
> through GL_ARB_debug_output and with debug contexts (i.e. setting the
> GLX_CONTEXT_DEBUG_BIT_ARB flag).  But few applications actually use
> the GL_ARB_debug_output extension.
>
> This change lets one set the MESA_DEBUG env var to "context" to force-set
> a debug context and report debug/perf messages to stderr (or whatever
> file MESA_LOG_FILE is set to).  This is a useful debugging tool.
>
> The small change in st_api_create_context() is needed so that
> st_update_debug_callback() gets called to hook up the driver debug
> callbacks when ST_CONTEXT_FLAG_DEBUG was not set, but MESA_DEBUG=context.
> ---
>  src/mesa/main/debug.c   |  3 ++-
>  src/mesa/main/debug_output.c| 28 
>  src/mesa/main/mtypes.h  |  3 ++-
>  src/mesa/state_tracker/st_manager.c |  2 ++
>  4 files changed, 34 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/main/debug.c b/src/mesa/main/debug.c
> index 5ca7d5c..3471b26 100644
> --- a/src/mesa/main/debug.c
> +++ b/src/mesa/main/debug.c
> @@ -189,7 +189,8 @@ set_debug_flags(const char *str)
>{ "silent", DEBUG_SILENT }, /* turn off debug messages */
>{ "flush", DEBUG_ALWAYS_FLUSH }, /* flush after each drawing
> command */
>{ "incomplete_tex", DEBUG_INCOMPLETE_TEXTURE },
> -  { "incomplete_fbo", DEBUG_INCOMPLETE_FBO }
> +  { "incomplete_fbo", DEBUG_INCOMPLETE_FBO },
> +  { "context", DEBUG_CONTEXT } /* force set GL_CONTEXT_FLAG_DEBUG_BIT
> flag */
> };
> GLuint i;
>
> diff --git a/src/mesa/main/debug_output.c b/src/mesa/main/debug_output.c
> index 85f64bd..290d626 100644
> --- a/src/mesa/main/debug_output.c
> +++ b/src/mesa/main/debug_output.c
> @@ -99,6 +99,7 @@ struct gl_debug_state
> const void *CallbackData;
> GLboolean SyncOutput;
> GLboolean DebugOutput;
> +   GLboolean LogToStderr;
>
> struct gl_debug_group *Groups[MAX_DEBUG_GROUP_STACK_DEPTH];
> struct gl_debug_message GroupMessages[MAX_DEBUG_GROUP_STACK_DEPTH];
> @@ -617,6 +618,17 @@ debug_log_message(struct gl_debug_state *debug,
> GLint nextEmpty;
> struct gl_debug_message *emptySlot;
>
> +   if (debug->LogToStderr) {
> +  /* since 'buf' is not null terminated, make a copy and add \0 */
> +  char *buf2 = malloc(len + 1);
> +  if (buf2) {
> + memcpy(buf2, buf, len);
> + buf2[len] = 0;
> + _mesa_log("Mesa debug output: %s\n", buf2);
> + free(buf2);
> +  }
> +   }
> +
> assert(len < MAX_DEBUG_MESSAGE_LENGTH);
>
> if (log->NumMessages == MAX_DEBUG_LOGGED_MESSAGES)
> @@ -845,6 +857,7 @@ log_msg_locked_and_unlock(struct gl_context *ctx,
> }
>
> if (ctx->Debug->Callback) {
> +  /* Call the user's callback function */
>GLenum gl_source = debug_source_enums[source];
>GLenum gl_type = debug_type_enums[type];
>GLenum gl_severity = debug_severity_enums[severity];
> @@ -860,6 +873,7 @@ log_msg_locked_and_unlock(struct gl_context *ctx,
>callback(gl_source, gl_type, id, gl_severity, len, buf, data);
> }
> else {
> +  /* add debug message to queue */
>debug_log_message(ctx->Debug, source, type, id, severity, len, buf);
>_mesa_unlock_debug_state(ctx);
> }
> @@ -1267,6 +1281,20 @@ void
>  _mesa_init_debug_output(struct gl_context *ctx)
>  {
> mtx_init(>DebugMutex, mtx_plain);
> +
> +   if (MESA_DEBUG_FLAGS & DEBUG_CONTEXT) {
> +  /* If the MESA_DEBUG env is set to "context", we'll turn on the
> +   * GL_CONTEXT_FLAG_DEBUG_BIT context flag and log debug output
> +   * messages to stderr (or whatever MESA_LOG_FILE points at).
> +   */
> +  struct gl_debug_state *debug = _mesa_lock_debug_state(ctx);
> +  if (!debug) {
> + return;
> +  }
> +  debug->DebugOutput = GL_TRUE;
> +  debug->LogToStderr = GL_TRUE;
> +  ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
>

Don't we want to perform an _mesa_unlock_debug_state(ctx); here?

Regards,
Gustaw


> +   }
>  }
>
>
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 5e98040..66bc07e 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -4679,7 +4679,8 @@ enum _debug
> DEBUG_SILENT = (1 << 0),
> DEBUG_ALWAYS_FLUSH  = (1 << 1),
> DEBUG_INCOMPLETE_TEXTURE = (1 << 2),
> -   DEBUG_INCOMPLETE_FBO = (1 << 3)
> +   DEBUG_INCOMPLETE_FBO = (1 << 3),
> +   DEBUG_CONTEXT= (1 << 4)
>  };
>
>  #ifdef __cplusplus
> diff --git a/src/mesa/state_tracker/st_manager.c
> b/src/mesa/state_tracker/st_manager.c
> index 0f71e63..c3d8286 100644
> --- a/src/mesa/state_tracker/st_manager.c
> +++ b/src/mesa/state_tracker/st_manager.c
> @@ -680,7 +680,9 @@ st_api_create_context(struct st_api *stapi, struct
> st_manager *smapi,
>}
>
>

Re: [Mesa-dev] [PATCH] radv: fix GetFenceStatus for signaled fences

2016-11-09 Thread Gustaw Smolarczyk
Looks like a follow-up to 24815bd7b3b50d2e634b56ca607856ecf08ec4ee

Note sure if my rb is helpful, but:

Reviewed-by: Gustaw Smolarczyk <wielkie...@gmail.com>

2016-11-09 8:43 GMT+01:00 Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>:

> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
>
> On Wed, Nov 9, 2016 at 2:22 AM, Dave Airlie <airl...@gmail.com> wrote:
> > From: Dave Airlie <airl...@redhat.com>
> >
> > if a fence is created pre-signaled we should return that
> > in GetFenceStatus even if it hasn't been submitted.
> >
> > Signed-off-by: Dave Airlie <airl...@redhat.com>
> > ---
> >  src/amd/vulkan/radv_device.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> > index fdb6db9..214af5f 100644
> > --- a/src/amd/vulkan/radv_device.c
> > +++ b/src/amd/vulkan/radv_device.c
> > @@ -1202,6 +1202,8 @@ VkResult radv_GetFenceStatus(VkDevice _device,
> VkFence _fence)
> > RADV_FROM_HANDLE(radv_device, device, _device);
> > RADV_FROM_HANDLE(radv_fence, fence, _fence);
> >
> > +   if (fence->signalled)
> > +   return VK_SUCCESS;
> > if (!fence->submitted)
> > return VK_NOT_READY;
> >
> > --
> > 2.7.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] nir: add conditional discard optimisation

2016-11-03 Thread Gustaw Smolarczyk
I am not really that much familiar with nir, so I apologize if what I write
below is wrong.

What if we had something like:

a = 0;
if (x) {
  discard_if(y);
  a = 1;
}

Then if (x && !y), we must not discard and both at the same time set a to
1. Your transformation would probably change it to:

a = 0;
discard_if(x && y);

Which is incorrect.

I think you have to ensure (if the intrinsic is discard_if) that discard_if
is the only instruction inside the then block. Or just remove the intrinsic
from the then block and leave the rest of the if alone.

Regards,
Gustaw

2016-11-03 4:28 GMT+01:00 Dave Airlie :

> From: Dave Airlie 
>
> This is ported from GLSL and converts
>
> if (cond)
> discard;
>
> into
> discard_if(cond);
>
> This removes a block, but also is needed by radv
> to workaround a bug in the LLVM backend.
>
> v2: handle if (a) discard_if(b) (nha)
> cleanup and drop pointless loop (Matt)
> make sure there are no dependent phis (Eric)
>
> Signed-off-by: Dave Airlie 
> ---
>  src/compiler/Makefile.sources  |   1 +
>  src/compiler/nir/nir.h |   2 +
>  src/compiler/nir/nir_opt_conditional_discard.c | 124
> +
>  3 files changed, 127 insertions(+)
>  create mode 100644 src/compiler/nir/nir_opt_conditional_discard.c
>
> diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
> index 669c499..b710cbd 100644
> --- a/src/compiler/Makefile.sources
> +++ b/src/compiler/Makefile.sources
> @@ -228,6 +228,7 @@ NIR_FILES = \
> nir/nir_metadata.c \
> nir/nir_move_vec_src_uses_to_dest.c \
> nir/nir_normalize_cubemap_coords.c \
> +   nir/nir_opt_conditional_discard.c \
> nir/nir_opt_constant_folding.c \
> nir/nir_opt_copy_propagate.c \
> nir/nir_opt_cse.c \
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 9264763..2a77139 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -2531,6 +2531,8 @@ bool nir_opt_remove_phis(nir_shader *shader);
>
>  bool nir_opt_undef(nir_shader *shader);
>
> +bool nir_opt_conditional_discard(nir_shader *shader);
> +
>  void nir_sweep(nir_shader *shader);
>
>  nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
> diff --git a/src/compiler/nir/nir_opt_conditional_discard.c
> b/src/compiler/nir/nir_opt_conditional_discard.c
> new file mode 100644
> index 000..2259a14
> --- /dev/null
> +++ b/src/compiler/nir/nir_opt_conditional_discard.c
> @@ -0,0 +1,124 @@
> +/*
> + * Copyright © 2016 Red Hat
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without
> limitation
> + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> next
> + * paragraph) shall be included in all copies or substantial portions of
> the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir.h"
> +#include "nir_builder.h"
> +
> +/** @file nir_opt_conditional_discard.c
> + *
> + * Handles optimization of lowering if (cond) discard to discard_if(cond).
> + */
> +
> +static bool
> +nir_opt_conditional_discard_block(nir_block *block, void *mem_ctx)
> +{
> +   nir_builder bld;
> +
> +   if (nir_cf_node_is_first(>cf_node))
> +  return false;
> +
> +   nir_cf_node *prev_node = nir_cf_node_prev(>cf_node);
> +   if (prev_node->type != nir_cf_node_if)
> +  return false;
> +
> +   nir_if *if_stmt = nir_cf_node_as_if(prev_node);
> +   nir_block *then_block = nir_if_first_then_block(if_stmt);
> +   nir_block *else_block = nir_if_first_else_block(if_stmt);
> +
> +   /* check there is only one else block and it is empty */
> +   if (nir_if_last_else_block(if_stmt) != else_block)
> +  return false;
> +   if (!exec_list_is_empty(_block->instr_list))
> +  return false;
> +
> +   /* check there is only one then block and it has instructions in it */
> +   if (nir_if_last_then_block(if_stmt) != then_block)
> +  return false;
> +   if (exec_list_is_empty(_block->instr_list))
> +  return false;
> +
> +   /*
> +* make sure no 

[Mesa-dev] [PATCH] radv/winsys: Fail early on overgrown cs.

2016-10-13 Thread Gustaw Smolarczyk
When !use_ib_bos, we can't easily chain ibs one to another. If the
required cs size grows over 1Mi - 8 dwords just fail the cs so that we
won't assert-fail in radv_amdgpu_winsys_cs_submit later on.
---
Please, push the patch after it has been reviewed.

 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 41dfcd3..b8558fa 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -187,12 +187,22 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs 
*_cs, size_t min_size)
}
 
if (!cs->ws->use_ib_bos) {
-   uint64_t ib_size = MAX2((cs->base.cdw + min_size) * 4 + 16,
-   cs->base.max_dw * 4 * 2);
-   uint32_t *new_buf = realloc(cs->base.buf, ib_size);
+   const uint64_t limit_dws = 0x8;
+   uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
+  MIN2(cs->base.max_dw * 2, limit_dws));
+
+   /* The total ib size cannot exceed limit_dws dwords. */
+   if (ib_dws > limit_dws)
+   {
+   cs->failed = true;
+   cs->base.cdw = 0;
+   return;
+   }
+
+   uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
if (new_buf) {
cs->base.buf = new_buf;
-   cs->base.max_dw = ib_size / 4;
+   cs->base.max_dw = ib_dws;
} else {
cs->failed = true;
cs->base.cdw = 0;
-- 
2.10.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >