date:20170331

I've had these patches lying around for a while now waiting for the extension
to go public.  It went public, so here they are.  This series adds all of the
core window system bits for doing a trivial implementation on X11 and a real
implementation on Wayland.

I believe I've kept radv building with these patches but it would be nice if
Dave or Bas could double-check.  Most of the work is in the core, so it should
be trivial to hook up.

Cc: Dave Airlie 

Jason Ekstrand (5):
  vulkan/wsi: Fix some line wrapping
  vulkan: Bump the header and XML to the latest public version
  vulkan/wsi: Plumb present regions through the common code
  vulkan/wsi/wayland: Pass damage through to the compositor
  anv: Implement VK_KHR_incremental_present

 include/vulkan/vulkan.h |  171 +++--
 src/amd/vulkan/radv_wsi.c   |3 +-
 src/intel/vulkan/anv_device.c   |6 +-
 src/intel/vulkan/anv_entrypoints_gen.py |1 +
 src/intel/vulkan/anv_wsi.c  |   11 +-
 src/vulkan/registry/vk.xml  | 1098 ++-
 src/vulkan/wsi/wsi_common.h |8 +-
 src/vulkan/wsi/wsi_common_wayland.c |   19 +-
 src/vulkan/wsi/wsi_common_x11.c |3 +-
 9 files changed, 799 insertions(+), 521 deletions(-)

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/5] vulkan/wsi: Fix some line wrapping

---
 src/vulkan/wsi/wsi_common.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index 2482876..8227c1e 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -70,7 +70,10 @@ struct wsi_swapchain {
   uint32_t *image_index);
VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
  uint32_t image_index);
-   void (*get_image_and_linear)(struct wsi_swapchain *swapchain, int 
imageIndex, VkImage *image, VkImage *linear_image);
+   void (*get_image_and_linear)(struct wsi_swapchain *swapchain,
+int imageIndex,
+VkImage *image,
+VkImage *linear_image);
 };
 
 struct wsi_interface {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/5] vulkan/wsi/wayland: Pass damage through to the compositor

---
 src/vulkan/wsi/wsi_common_wayland.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/vulkan/wsi/wsi_common_wayland.c 
b/src/vulkan/wsi/wsi_common_wayland.c
index 26a137c..78ddea2 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -488,6 +488,7 @@ struct wsi_wl_swapchain {
struct wsi_wl_display *  display;
struct wl_event_queue *  queue;
struct wl_surface *  surface;
+   uint32_t surface_version;
 
VkExtent2D   extent;
VkFormat vk_format;
@@ -594,7 +595,19 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain 
*wsi_chain,
 
assert(image_index < chain->base.image_count);
wl_surface_attach(chain->surface, chain->images[image_index].buffer, 0, 0);
-   wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX);
+
+   if (chain->surface_version >= 4 && damage &&
+   damage->pRectangles && damage->rectangleCount > 0) {
+  for (unsigned i = 0; i < damage->rectangleCount; i++) {
+ const VkRectLayerKHR *rect = >pRectangles[i];
+ assert(rect->layer == 0);
+ wl_surface_damage(chain->surface,
+   rect->offset.x, rect->offset.y,
+   rect->extent.width, rect->extent.height);
+  }
+   } else {
+  wl_surface_damage(chain->surface, 0, 0, INT32_MAX, INT32_MAX);
+   }
 
if (chain->base.present_mode == VK_PRESENT_MODE_FIFO_KHR) {
   struct wl_callback *frame = wl_surface_frame(chain->surface);
@@ -731,6 +744,7 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase 
*icd_surface,
chain->base.image_count = num_images;
chain->base.needs_linear_copy = false;
chain->surface = surface->surface;
+   chain->surface_version = wl_proxy_get_version((void *)surface->surface);
chain->extent = pCreateInfo->imageExtent;
chain->vk_format = pCreateInfo->imageFormat;
chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, alpha);
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] vulkan/wsi: Plumb present regions through the common code

---
 src/amd/vulkan/radv_wsi.c   | 3 ++-
 src/intel/vulkan/anv_wsi.c  | 3 ++-
 src/vulkan/wsi/wsi_common.h | 3 ++-
 src/vulkan/wsi/wsi_common_wayland.c | 3 ++-
 src/vulkan/wsi/wsi_common_x11.c | 3 ++-
 5 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index d2b760e..8b66095 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -485,7 +485,8 @@ VkResult radv_QueuePresentKHR(
fence->submitted = true;
 
result = swapchain->queue_present(swapchain,
- 
pPresentInfo->pImageIndices[i]);
+ 
pPresentInfo->pImageIndices[i],
+ NULL);
/* TODO: What if one of them returns OUT_OF_DATE? */
if (result != VK_SUCCESS)
return result;
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 4236ee1..45bf896 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -380,7 +380,8 @@ VkResult anv_QueuePresentKHR(
   anv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]);
 
   item_result = swapchain->queue_present(swapchain,
- pPresentInfo->pImageIndices[i]);
+ pPresentInfo->pImageIndices[i],
+ NULL);
   /* TODO: What if one of them returns OUT_OF_DATE? */
   if (pPresentInfo->pResults != NULL)
  pPresentInfo->pResults[i] = item_result;
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index 8227c1e..5e77518 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -69,7 +69,8 @@ struct wsi_swapchain {
   uint64_t timeout, VkSemaphore semaphore,
   uint32_t *image_index);
VkResult (*queue_present)(struct wsi_swapchain *swap_chain,
- uint32_t image_index);
+ uint32_t image_index,
+ const VkPresentRegionKHR *damage);
void (*get_image_and_linear)(struct wsi_swapchain *swapchain,
 int imageIndex,
 VkImage *image,
diff --git a/src/vulkan/wsi/wsi_common_wayland.c 
b/src/vulkan/wsi/wsi_common_wayland.c
index 2e47183..26a137c 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -578,7 +578,8 @@ static const struct wl_callback_listener frame_listener = {
 
 static VkResult
 wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
-   uint32_t image_index)
+   uint32_t image_index,
+   const VkPresentRegionKHR *damage)
 {
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;
 
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index 4afd606..c399aae 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -860,7 +860,8 @@ x11_acquire_next_image(struct wsi_swapchain *anv_chain,
 
 static VkResult
 x11_queue_present(struct wsi_swapchain *anv_chain,
-  uint32_t image_index)
+  uint32_t image_index,
+  const VkPresentRegionKHR *damage)
 {
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 5/5] anv: Implement VK_KHR_incremental_present

---
 src/intel/vulkan/anv_device.c   |  6 +-
 src/intel/vulkan/anv_entrypoints_gen.py |  1 +
 src/intel/vulkan/anv_wsi.c  | 10 +-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 4e4fa19..37b6f72 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -270,7 +270,11 @@ static const VkExtensionProperties device_extensions[] = {
{
   .extensionName = VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
   .specVersion = 1,
-   }
+   },
+   {
+  .extensionName = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME,
+  .specVersion = 1,
+   },
 };
 
 static void *
diff --git a/src/intel/vulkan/anv_entrypoints_gen.py 
b/src/intel/vulkan/anv_entrypoints_gen.py
index 203df45..1a7d753 100644
--- a/src/intel/vulkan/anv_entrypoints_gen.py
+++ b/src/intel/vulkan/anv_entrypoints_gen.py
@@ -35,6 +35,7 @@ MAX_API_VERSION = 1.0
 SUPPORTED_EXTENSIONS = [
 'VK_KHR_descriptor_update_template',
 'VK_KHR_get_physical_device_properties2',
+'VK_KHR_incremental_present',
 'VK_KHR_maintenance1',
 'VK_KHR_push_descriptor',
 'VK_KHR_sampler_mirror_clamp_to_edge',
diff --git a/src/intel/vulkan/anv_wsi.c b/src/intel/vulkan/anv_wsi.c
index 45bf896..6a0203a 100644
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -24,6 +24,7 @@
 #include "anv_private.h"
 #include "wsi_common.h"
 #include "vk_format_info.h"
+#include "util/vk_util.h"
 
 static const struct wsi_callbacks wsi_cbs = {
.get_phys_device_format_properties = anv_GetPhysicalDeviceFormatProperties,
@@ -355,10 +356,17 @@ VkResult anv_QueuePresentKHR(
ANV_FROM_HANDLE(anv_queue, queue, _queue);
VkResult result = VK_SUCCESS;
 
+   const VkPresentRegionsKHR *regions =
+  vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR);
+
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
   ANV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
   VkResult item_result;
 
+  const VkPresentRegionKHR *region = NULL;
+  if (regions && regions->pRegions)
+ region = >pRegions[i];
+
   assert(anv_device_from_handle(swapchain->device) == queue->device);
 
   if (swapchain->fences[0] == VK_NULL_HANDLE) {
@@ -381,7 +389,7 @@ VkResult anv_QueuePresentKHR(
 
   item_result = swapchain->queue_present(swapchain,
  pPresentInfo->pImageIndices[i],
- NULL);
+ region);
   /* TODO: What if one of them returns OUT_OF_DATE? */
   if (pPresentInfo->pResults != NULL)
  pPresentInfo->pResults[i] = item_result;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 6/6] nvc0: Add support for NV_fill_rectangle for the GM200+

2017-03-31 Thread Ilia Mirkin

I've pushed the amended series which fixed these trivial comments.
Thanks for your contribution!

On Thu, Mar 30, 2017 at 7:23 PM, Ilia Mirkin  wrote:
> On Thu, Mar 30, 2017 at 5:40 PM, Lyude  wrote:
>> This enables support for the GL_NV_fill_rectangle extension on the
>> GM200+ for Desktop OpenGL.
>>
>> Signed-off-by: Lyude 
>>
>> Changes since v1:
>> - Fix commit message
>> - Add note to reldocs
>>
>> Signed-off-by: Lyude 
>> ---
>>  docs/relnotes/17.1.0.html| 1 +
>>  src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h   | 3 +++
>>  src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 3 ++-
>>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c| 4 
>>  src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h | 2 +-
>>  5 files changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
>> index ada1e38..e0014bb 100644
>> --- a/docs/relnotes/17.1.0.html
>> +++ b/docs/relnotes/17.1.0.html
>> @@ -48,6 +48,7 @@ Note: some of the new features are only available with 
>> certain drivers.
>>  GL_ARB_transform_feedback2 on i965/gen6
>>  GL_ARB_transform_feedback_overflow_query on i965/gen6+
>>  Geometry shaders enabled on swr
>> +GL_NV_fill_rectangle on nvc0
>
> Sort please.
>
>>  
>>
>>  Bug fixes
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h 
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
>> index 1be5952..accde94 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
>> @@ -772,6 +772,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
>> SOFTWARE.
>>  #define NVC0_3D_VTX_ATTR_MASK_UNK0DD0_ALT__ESIZE   0x0004
>>  #define NVC0_3D_VTX_ATTR_MASK_UNK0DD0_ALT__LEN 0x0004
>>
>> +#define NVC0_3D_FILL_RECTANGLE 0x113c
>> +#define NVC0_3D_FILL_RECTANGLE_ENABLE  0x0002
>> +
>>  #define NVC0_3D_UNK11400x1140
>>
>>  #define NVC0_3D_UNK11440x1144
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>> index 945101b..f0e4e12 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>> @@ -256,6 +256,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
>> pipe_cap param)
>>return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0;
>> case PIPE_CAP_TGSI_FS_FBFETCH:
>>return class_3d >= NVE4_3D_CLASS; /* needs testing on fermi */
>> +   case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
>> +  return (class_3d >= GM200_3D_CLASS);
>
> Still unnecessary parens.
>
>>
>> /* unsupported caps */
>> case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
>> @@ -285,7 +287,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
>> pipe_cap param)
>> case PIPE_CAP_NATIVE_FENCE_FD:
>> case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
>> case PIPE_CAP_INT64_DIVMOD:
>> -   case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
>>return 0;
>>
>> case PIPE_CAP_VENDOR_ID:
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> index 32233a5..803843b 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> @@ -261,6 +261,10 @@ nvc0_rasterizer_state_create(struct pipe_context *pipe,
>>  SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
>>  SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth);
>>
>> +SB_IMMED_3D(so, FILL_RECTANGLE,
>> +cso->fill_front == PIPE_POLYGON_MODE_FILL_RECTANGLE ?
>> +NVC0_3D_FILL_RECTANGLE_ENABLE : 0);
>
> Oh, I forgot to mention this last time, but ... this will generate
> errors on pre-GM200 GPUs. Please stick this in a if (foo->class_3d >=
> GM204_3D_CLASS)
>
>> +
>>  SB_BEGIN_3D(so, MACRO_POLYGON_MODE_FRONT, 1);
>>  SB_DATA(so, nvgl_polygon_mode(cso->fill_front));
>>  SB_BEGIN_3D(so, MACRO_POLYGON_MODE_BACK, 1);
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h 
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
>> index 054b1e7..3006ed6 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
>> @@ -23,7 +23,7 @@ struct nvc0_blend_stateobj {
>>  struct nvc0_rasterizer_stateobj {
>> struct pipe_rasterizer_state pipe;
>> int size;
>> -   uint32_t state[42];
>> +   uint32_t state[43];
>>  };
>>
>>  struct nvc0_zsa_stateobj {
>> --
>> 2.9.3
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] intel: genxml: fix out of tree builds

2017-03-31 Thread Jonathan Gray

On Fri, Mar 31, 2017 at 04:53:21PM +0100, Emil Velikov wrote:
> On 31 March 2017 at 16:33, Lionel Landwerlin
>  wrote:
> > On 31/03/17 16:21, Emil Velikov wrote:
> >>
> >> On 31 March 2017 at 14:40, Lionel Landwerlin
> >>  wrote:
> >>>
> >>> v2: use Emil's recommendation
> >>>  change rule to closer to genxml/genX_bits.h
> >>>
> >>> Signed-off-by: Lionel Landwerlin 
> >>> ---
> >>>   src/intel/Makefile.genxml.am | 4 ++--
> >>>   1 file changed, 2 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
> >>> index 05a12f8f77..e34536d37c 100644
> >>> --- a/src/intel/Makefile.genxml.am
> >>> +++ b/src/intel/Makefile.genxml.am
> >>> @@ -34,9 +34,9 @@ $(GENXML_GENERATED_PACK_FILES):
> >>> genxml/gen_pack_header.py
> >>>  $(MKDIR_GEN)
> >>>  $(PYTHON_GEN) $(srcdir)/genxml/gen_pack_header.py $< > $@ ||
> >>> ($(RM) $@; false)
> >>>
> >>> -genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
> >>> +genxml/genX_xml.h: genxml/gen_zipped_file.py $(GENXML_XML_FILES)
> >>>  $(MKDIR_GEN)
> >>> -   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py
> >>> $(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
> >>> +   $(PYTHON_GEN) $< $(GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM)
> >>> $@; false)
> >>>
> >> This is not what I recommended :-( If my suggestion is unclear or
> >> buggy please say so.
> >>
> >> -Emil
> >>
> > Replacing "$(srcdir)/genxml/gen_zipped_file.py" by "$<" isn't right?
> > I think that's the only difference from what was in your email.
> 
> Barring the genX_bits.h case, we expand the script name throughout mesa.
> This way you don't really care the way the dependencies are listed,
> whether a new one gets added, etc.
> 
> -Emil

genX_bits.h should not be used as an example.

It uses $< in a non-suffix rule, a GNU extension.
It uses addprefix a GNU builtin.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 07/18] anv/allocator: Add a BO cache

2017-03-31 Thread Chad Versace

On Wed 15 Mar 2017, Jason Ekstrand wrote:
> This cache allows us to easily ensure that we have a unique anv_bo for
> each gem handle.  We'll need this in order to support multiple-import of
> memory objects and semaphores.
> 
> v2 (Jason Ekstrand):
>  - Reject BO imports if the size doesn't match the prime fd size as
>reported by lseek().
> 
> v3 (Jason Ekstrand):
>  - Fix reference counting around cache_release (Chris Willson)
>  - Move the mutex_unlock() later in cache_release
> ---
>  src/intel/vulkan/anv_allocator.c | 261 
> +++
>  src/intel/vulkan/anv_private.h   |  26 
>  2 files changed, 287 insertions(+)


> +static uint32_t
> +hash_uint32_t(const void *key)
> +{
> +   return (uint32_t)(uintptr_t)key;
> +}

This hash function does not appear hashy.

If I correctly understand the details of Mesa's struct hash_table,
choosing the identify function for the hash function causes unwanted
clustering when inserting consecutive gem handles.  Since the kernel does
allocate gem handles consecutively, the problem is real.

For proof, consider the following:

   - Suppose a long-running process (maybe the compositor) has thrashed on the
 hash table long enough that its bucket count
 is ht->size = hash_sizes[7].size = 283. Suppose a spike of
 compositor activity raises the hash table's density to about 0.5.
 And suppose the hash table buckets are filled with the consecutive gem
 handles
 
 {0, 0, 0, 0, 4, 5, 6, 7, 8, 9, ..., 127, 128, 0, 0, 0, ..., 0 }
 
 The exact density is (128 - 4 + 1) / 283 = 0.4417.

   - Next, some other in-process activity (maybe OpenGL) generated
 a lot of gem handles after Vulkan's most recently imported
 gem handle, 128.

   - Next, a new compositor client appears. When the compositor imports
 the new client's dma_buf, PRIME_FD_TO_HANDLE returns 287.

   - For an open-addressing, linear double-hashing table like Mesa's,
 with a load factor of 0.4417, and a perfectly random hash function,
 the expected number of probes when inserting a new key is 1.7862,
 according to my hacky Python script.

   - 
 When inserting 287, the actual number of probes is 1 + ceil((128
 - 4) / 7) = 19. (I used a hacky Python script to confirm this by
 simulation).
 Expected probes: 1.7862
 Actual probes: 19

   - Again, PRIME_FD_TO_HANDLE returns 288. Insert it into the table...
 Expected probes: 1.7975
 Actual probes: 17

   - Again, PRIME_FD_TO_HANDLE returns 289. Insert it into the table...
 Expected probes: 1.8089
 Actual probes: 15

   - One more time, PRIME_FD_TO_HANDLE returns 290.
 Expected probes: 1.8201
 Actual probes: 14

You see the problem... the performance is about 10x to 5x slower for
a long time.

Replacing the identity hash function with _mesa_hash_pointer() may fix
the linear degradation.

Or you could just say "Meh, in the common case, the hash table is
super fast; it's effectively a direct-addressed array. In Chad's
pathological case, the table is still fast enough. I want super-fast for
the common case". And keep the identity hash.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 97270] [softpipe] piglit ext_framebuffer_multisample-fast-clear GL_ARB_texture_rg single-sample regression

https://bugs.freedesktop.org/show_bug.cgi?id=97270

--- Comment #1 from Vinson Lee  ---
mesa: 150736b5c34fc07de296454b973ae72ede215ae2 (master 17.1.0-devel)

Regression is still present.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 96254] [softpipe] piglit unsized-array-not-in-last-position regression

https://bugs.freedesktop.org/show_bug.cgi?id=96254

Vinson Lee  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #2 from Vinson Lee  ---
commit 3368b4778153aa3bd042c74b25fb2f80065d9771
Author: Dave Airlie 
Date:   Wed May 25 13:26:42 2016 +1000

arb_shader_storage_buffer_object: test unsized vs implicit arrays.
This tests the difference between an unsized and an implicitly sized array

This removes a compiler test as this will be a linker error now.

The rules are you can have a [] array as long as the shader
later implicitly sizes it.

Reviewed-by: Jose Maria Casanova Crespo 
Signed-off-by: Dave Airlie 

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 91100] [softpipe] piglit egl-create-pbuffer-surface regression

https://bugs.freedesktop.org/show_bug.cgi?id=91100

--- Comment #3 from Vinson Lee  ---
mesa: 150736b5c34fc07de296454b973ae72ede215ae2 (master 17.1.0-devel)

Regression is still present.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 78318] [swrast] piglit glsl-kwin-blur-1 regression

https://bugs.freedesktop.org/show_bug.cgi?id=78318

--- Comment #7 from Vinson Lee  ---
mesa: 150736b5c34fc07de296454b973ae72ede215ae2 (master 17.1.0-devel)

piglit glsl-kwin-blur-1 regression is still present.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 77288] [swrast] piglit glean glsl1 regression

https://bugs.freedesktop.org/show_bug.cgi?id=77288

--- Comment #8 from Vinson Lee  ---
mesa: 150736b5c34fc07de296454b973ae72ede215ae2 (master 17.1.0-devel)

Regression is still present.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 97569] [swrast] piglit getteximage-formats regression

https://bugs.freedesktop.org/show_bug.cgi?id=97569

--- Comment #1 from Vinson Lee  ---
mesa: 150736b5c34fc07de296454b973ae72ede215ae2 (master 17.1.0-devel)

Regression is still present.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] st/mesa: Fix missing-braces warning.

2017-03-31 Thread Vinson Lee

  CXX  state_tracker/st_glsl_to_nir.lo
state_tracker/st_glsl_to_nir.cpp:250:57: warning: suggest braces around 
initialization of subobject [-Wmissing-braces]
  nir_lower_wpos_ytransform_options wpos_options = {0};
^
{}

Signed-off-by: Vinson Lee 
---
 src/mesa/state_tracker/st_glsl_to_nir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 674ccd07a4..e863bab4e7 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -247,7 +247,7 @@ st_glsl_to_nir(struct st_context *st, struct gl_program 
*prog,
   static const gl_state_index wposTransformState[STATE_LENGTH] = {
  STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
   };
-  nir_lower_wpos_ytransform_options wpos_options = {0};
+  nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
   struct pipe_screen *pscreen = st->pipe->screen;
 
   memcpy(wpos_options.state_tokens, wposTransformState,
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/4] anv/blorp: Align vertex buffers to 64B

This fixes issues seen when adding support for full 48-bit addresses.
The 48-bit addresses themselves have nothing to do with it other than
that it caused the kernel to place buffers slightly differently so they
interacted differently with the caches.

Cc: "13.0 17.0" 
---
 src/intel/vulkan/genX_blorp_exec.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/genX_blorp_exec.c 
b/src/intel/vulkan/genX_blorp_exec.c
index 1d07fd7..402f6d2 100644
--- a/src/intel/vulkan/genX_blorp_exec.c
+++ b/src/intel/vulkan/genX_blorp_exec.c
@@ -115,8 +115,20 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, 
uint32_t size,
   struct blorp_address *addr)
 {
struct anv_cmd_buffer *cmd_buffer = batch->driver_batch;
+
+   /* From the Sky Lake PRM, 3DSTATE_VERTEX_BUFFERS:
+*
+*"The VF cache needs to be invalidated before binding and then using
+*Vertex Buffers that overlap with any previously bound Vertex Buffer
+*(at a 64B granularity) since the last invalidation.  A VF cache
+*invalidate is performed by setting the "VF Cache Invalidation Enable"
+*bit in PIPE_CONTROL."
+*
+* In order to avoid this problem, we align all vertex buffer allocations
+* to 64 bytes.
+*/
struct anv_state vb_state =
-  anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 16);
+  anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, size, 64);
 
*addr = (struct blorp_address) {
   .buffer = _buffer->device->dynamic_state_block_pool.bo,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/4] i916/blorp: Align vertex buffers to 64B

Cc: "13.0 17.0" 
---
 src/mesa/drivers/dri/i965/genX_blorp_exec.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index f9334ee..b6122a3 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -122,8 +122,19 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, 
uint32_t size,
assert(batch->blorp->driver_ctx == batch->driver_batch);
struct brw_context *brw = batch->driver_batch;
 
+   /* From the Sky Lake PRM, 3DSTATE_VERTEX_BUFFERS:
+*
+*"The VF cache needs to be invalidated before binding and then using
+*Vertex Buffers that overlap with any previously bound Vertex Buffer
+*(at a 64B granularity) since the last invalidation.  A VF cache
+*invalidate is performed by setting the "VF Cache Invalidation Enable"
+*bit in PIPE_CONTROL."
+*
+* In order to avoid this problem, we align all vertex buffer allocations
+* to 64 bytes.
+*/
uint32_t offset;
-   void *data = brw_state_batch(brw, size, 32, );
+   void *data = brw_state_batch(brw, size, 64, );
 
*addr = (struct blorp_address) {
   .buffer = brw->batch.bo,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/4] anv/cmd_buffer: Flush the VF cache at the top of all primaries

Cc: "13.0 17.0" 
---
 src/intel/vulkan/genX_cmd_buffer.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 1ce549a..b5297f4 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -597,6 +597,18 @@ genX(BeginCommandBuffer)(
 
genX(cmd_buffer_emit_state_base_address)(cmd_buffer);
 
+   /* We sometimes store vertex data in the dynamic state buffer for blorp
+* operations and our dynamic state stream may re-use data from previous
+* command buffers.  In order to prevent stale cache data, we flush the VF
+* cache.  We could do this on every blorp call but that's not really
+* needed as all of the data will get written by the CPU prior to the GPU
+* executing anything.  The chances are fairly high that they will use
+* blorp at least once per primary command buffer so it shouldn't be
+* wasted.
+*/
+   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
+  cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
+
VkResult result = VK_SUCCESS;
if (cmd_buffer->usage_flags &
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/4] anv/blorp: Flush the texture cache in UpdateBuffer

Cc: "13.0 17.0" 
---
 src/intel/vulkan/anv_blorp.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 72a468a..f26f5e5 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -690,6 +690,11 @@ void anv_CmdUpdateBuffer(
 
assert(max_update_size < MAX_SURFACE_DIM * 4);
 
+   /* We're about to read data that was written from the CPU.  Flush the
+* texture cache so we don't get anything stale.
+*/
+   cmd_buffer->state.pending_pipe_bits |= 
ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
+
while (dataSize) {
   const uint32_t copy_size = MIN2(dataSize, max_update_size);
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium: remove support for predicates from TGSI (v2)

2017-03-31 Thread Roland Scheidegger

Am 30.03.2017 um 14:42 schrieb Marek Olšák:
> From: Marek Olšák 
> 
> Neved used.
> 
> v2: gallivm: rename "pred" -> "exec_mask"
> etnaviv: remove the cap
> gallium: fix tgsi_instruction::Padding
> ---


Reviewed-by: Roland Scheidegger 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] amd/addrlib: second update for Vega10 + bug fixes

Hi,

The patch is too big for the mailing list. It's here:

https://cgit.freedesktop.org/~mareko/mesa/commit/?id=f32ab540216be1db54723f46a0838c04978b1479

If there are no comments, I'll push it on Tuesday.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: fixup geometry clip emission since using the geom pass

Reviewed-by: Bas Nieuwenhuizen 

On Fri, Mar 31, 2017 at 10:58 PM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> Fixes: 2b35b60d: radv: move to using nir clip/cull merge pass.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index d188ea6..d405e27 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -5842,7 +5842,8 @@ LLVMModuleRef 
> ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
> ralloc_free(ctx.phis);
>
> if (nir->stage == MESA_SHADER_GEOMETRY) {
> -   shader_info->gs.gsvs_vertex_size = 
> util_bitcount64(ctx.output_mask) * 16;
> +   unsigned addclip = ctx.num_output_clips + 
> ctx.num_output_culls > 4;
> +   shader_info->gs.gsvs_vertex_size = 
> (util_bitcount64(ctx.output_mask) + addclip) * 16;
> shader_info->gs.max_gsvs_emit_size = 
> shader_info->gs.gsvs_vertex_size *
> nir->info->gs.vertices_out;
> } else if (nir->stage == MESA_SHADER_TESS_CTRL) {
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv: fixup geometry clip emission since using the geom pass

2017-03-31 Thread Dave Airlie

From: Dave Airlie 

Fixes: 2b35b60d: radv: move to using nir clip/cull merge pass.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index d188ea6..d405e27 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -5842,7 +5842,8 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
ralloc_free(ctx.phis);
 
if (nir->stage == MESA_SHADER_GEOMETRY) {
-   shader_info->gs.gsvs_vertex_size = 
util_bitcount64(ctx.output_mask) * 16;
+   unsigned addclip = ctx.num_output_clips + ctx.num_output_culls 
> 4;
+   shader_info->gs.gsvs_vertex_size = 
(util_bitcount64(ctx.output_mask) + addclip) * 16;
shader_info->gs.max_gsvs_emit_size = 
shader_info->gs.gsvs_vertex_size *
nir->info->gs.vertices_out;
} else if (nir->stage == MESA_SHADER_TESS_CTRL) {
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radeonsi: add load_image_desc()

For the series:

Reviewed-by: Marek Olšák 

Marek

On Thu, Mar 30, 2017 at 7:58 PM, Samuel Pitoiset
 wrote:
> Similar to load_sampler_desc(). Same deal for bindless.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 45 
> +---
>  1 file changed, 24 insertions(+), 21 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index cf725cebd8..6b02d61e17 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -3230,6 +3230,24 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, 
> int num_elements)
>CONST_ADDR_SPACE);
>  }
>
> +static LLVMValueRef load_image_desc(struct si_shader_context *ctx,
> +   LLVMValueRef list, LLVMValueRef index,
> +   unsigned target)
> +{
> +   LLVMBuilderRef builder = ctx->gallivm.builder;
> +
> +   if (target == TGSI_TEXTURE_BUFFER) {
> +   index = LLVMBuildMul(builder, index,
> +LLVMConstInt(ctx->i32, 2, 0), "");
> +   index = LLVMBuildAdd(builder, index,
> +LLVMConstInt(ctx->i32, 1, 0), "");
> +   list = LLVMBuildPointerCast(builder, list,
> +   const_array(ctx->v4i32, 0), "");
> +   }
> +
> +   return ac_build_indexed_load_const(>ac, list, index);
> +}
> +
>  /**
>   * Load the resource descriptor for \p image.
>   */
> @@ -3243,8 +3261,8 @@ image_fetch_rsrc(
> struct si_shader_context *ctx = si_shader_context(bld_base);
> LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
>  SI_PARAM_IMAGES);
> -   LLVMValueRef index, tmp;
> -   bool dcc_off = target != TGSI_TEXTURE_BUFFER && is_store;
> +   LLVMValueRef index;
> +   bool dcc_off = is_store;
>
> assert(image->Register.File == TGSI_FILE_IMAGE);
>
> @@ -3255,8 +3273,7 @@ image_fetch_rsrc(
>
> index = LLVMConstInt(ctx->i32, image->Register.Index, 0);
>
> -   if (images_writemask & (1 << image->Register.Index) &&
> -   target != TGSI_TEXTURE_BUFFER)
> +   if (images_writemask & (1 << image->Register.Index))
> dcc_off = true;
> } else {
> /* From the GL_ARB_shader_image_load_store extension spec:
> @@ -3273,23 +3290,9 @@ image_fetch_rsrc(
>SI_NUM_IMAGES);
> }
>
> -   if (target == TGSI_TEXTURE_BUFFER) {
> -   LLVMBuilderRef builder = ctx->gallivm.builder;
> -
> -   rsrc_ptr = LLVMBuildPointerCast(builder, rsrc_ptr,
> -   const_array(ctx->v4i32, 0), 
> "");
> -   index = LLVMBuildMul(builder, index,
> -LLVMConstInt(ctx->i32, 2, 0), "");
> -   index = LLVMBuildAdd(builder, index,
> -LLVMConstInt(ctx->i32, 1, 0), "");
> -   *rsrc = ac_build_indexed_load_const(>ac, rsrc_ptr, 
> index);
> -   return;
> -   }
> -
> -   tmp = ac_build_indexed_load_const(>ac, rsrc_ptr, index);
> -   if (dcc_off)
> -   tmp = force_dcc_off(ctx, tmp);
> -   *rsrc = tmp;
> +   *rsrc = load_image_desc(ctx, rsrc_ptr, index, target);
> +   if (dcc_off && target != TGSI_TEXTURE_BUFFER)
> +   *rsrc = force_dcc_off(ctx, *rsrc);
>  }
>
>  static LLVMValueRef image_fetch_coords(
> --
> 2.12.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: always do tess ring size calculations.

)

On Fri, Mar 31, 2017 at 3:33 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> We could store these in the device, but it's probably
> not that much overhead to recalculate them, this is needed
> because we will emit the rings if the queue has them created
> so we need to emit the register values correctly as well.
>
> This fixes some tess tests failing when run after other tests
> inside CTS.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_device.c | 14 +-
>  1 file changed, 5 insertions(+), 9 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index af82926..5c48be1 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -1393,15 +1393,11 @@ radv_get_preamble_cs(struct radv_queue *queue,
> if (needs_tess_rings)
> add_tess_rings = true;
> }
> -
> -   if (add_tess_rings) {
> -   tess_factor_ring_size = 32768 * 
> queue->device->physical_device->rad_info.max_se;

I just realized we can alloc 16 KiB instead of 32 KiB for HAWAII
(based on tess_offchip_block_dw_size).

Either way:

Reviewed-by: Bas Nieuwenhuizen 

> -   hs_offchip_param = radv_get_hs_offchip_param(queue->device,
> -
> _offchip_buffers);
> -   tess_offchip_ring_size = max_offchip_buffers *
> -   queue->device->tess_offchip_block_dw_size * 4;
> -
> -   }
> +   tess_factor_ring_size = 32768 * 
> queue->device->physical_device->rad_info.max_se;
> +   hs_offchip_param = radv_get_hs_offchip_param(queue->device,
> +_offchip_buffers);
> +   tess_offchip_ring_size = max_offchip_buffers *
> +   queue->device->tess_offchip_block_dw_size * 4;
>
> if (scratch_size <= queue->scratch_size &&
> compute_scratch_size <= queue->compute_scratch_size &&
> --
> 2.7.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] radv: add ia_multi_vgt_param tessellation support.

On Fri, Mar 31, 2017 at 2:44 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This just ports the relevant radeonsi pieces.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/si_cmd_buffer.c | 31 ++-
>  1 file changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index 4673f28..6ee0f17 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -609,13 +609,42 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer 
> *cmd_buffer,
> uint32_t num_prims = 
> radv_prims_for_vertices(_buffer->state.pipeline->graphics.prim_vertex_count,
>  draw_vertex_count);
> bool multi_instances_smaller_than_primgroup;
>
> -   if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
> +   if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
> +   primgroup_size = 
> cmd_buffer->state.pipeline->graphics.tess.num_patches;
> +   else if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
> primgroup_size = 64;  /* recommended with a GS */
>
> multi_instances_smaller_than_primgroup = indirect_draw || 
> (instanced_draw &&
>num_prims 
> < primgroup_size);
> /* TODO TES */
With this TODO removed, these 2 patches are

Reviewed-by: Bas Nieuwenhuizen 

> +   if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
> +   /* SWITCH_ON_EOI must be set if PrimID is used. */
> +   if 
> (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id
>  ||
> +   
> cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.uses_prim_id)
> +   ia_switch_on_eoi = true;
> +
> +   /* Bug with tessellation and GS on Bonaire and older 2 SE 
> chips. */
> +   if ((family == CHIP_TAHITI ||
> +family == CHIP_PITCAIRN ||
> +family == CHIP_BONAIRE) &&
> +   radv_pipeline_has_gs(cmd_buffer->state.pipeline))
> +   partial_vs_wave = true;
> +
> +   /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
> +   if (cmd_buffer->device->has_distributed_tess) {
> +   if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) 
> {
> +   partial_es_wave = true;
>
> +   if (family == CHIP_TONGA ||
> +   family == CHIP_FIJI ||
> +   family == CHIP_POLARIS10 ||
> +   family == CHIP_POLARIS11)
> +   partial_vs_wave = true;
> +   } else {
> +   partial_vs_wave = true;
> +   }
> +   }
> +   }
> /* TODO linestipple */
>
> if (chip_class >= CIK) {
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/10] radeonsi/gfx9: fix linear mipmap CPU access


On 31.03.2017 10:18, Marek Olšák wrote:

On Fri, Mar 31, 2017 at 8:17 AM, Nicolai Hähnle  wrote:

On 30.03.2017 19:16, Marek Olšák wrote:


From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c  | 8 +++-
 src/gallium/drivers/radeon/radeon_winsys.h | 4 ++--
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 877f555..77e9bec 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -186,22 +186,22 @@ static unsigned r600_texture_get_offset(struct
r600_common_screen *rscreen,
if (rscreen->chip_class >= GFX9) {
*stride = rtex->surface.u.gfx9.surf_pitch *
rtex->surface.bpe;
*layer_stride = rtex->surface.u.gfx9.surf_slice_size;

if (!box)
return 0;

/* Each texture is an array of slices. Each slice is an
array
 * of mipmap levels. */
return box->z * rtex->surface.u.gfx9.surf_slice_size +
-  ((rtex->surface.u.gfx9.surf_ymip_offset[level] +
-box->y / rtex->surface.blk_h) *
+  rtex->surface.u.gfx9.offset[level] +
+  (box->y / rtex->surface.blk_h *
rtex->surface.u.gfx9.surf_pitch +
box->x / rtex->surface.blk_w) * rtex->surface.bpe;



Does this part of the formula really not depend on the mip level? It looks
like each mip level uses the same amount of memory and pitch?


Only the same pitch. All mip levels are placed in one 2D plane. The
linear layout puts them below each other. That's why the pitch is the
same. blk_w/blk_h are for compressed textures.


Thanks for the explanation.

Reviewed-by: Nicolai Hähnle 


Marek




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Fwd: [PATCH 1/1] glsl/blob: handle copy of NULL ptr in blob_write_string

2017-03-31 Thread gregory hainaut

On Fri, 31 Mar 2017 12:53:47 -0400
Ilia Mirkin  wrote:

> On Fri, Mar 31, 2017 at 6:12 AM, Gregory Hainaut
>  wrote:
> >> Others have reported this crashing on Nouveau. I haven't seen the problem 
> >> on radeonsi or i965.
> >
> > Hello Timothy (sorry for the double mail, email is a complex tool:) )
> >
> > Hum, tbh. I was quite surprised to hit this bug. I guess you save a
> > pre-optimized shader in the cache. So it could depends on optimization
> > passes.
> >
> > From the top of my head, I think the "offending" line is this one
> > const ivec2 offsets[4] = {ivec2(...), ivec2(...), ivec2(...), ivec2(...)};
> >
> > Strangely enough there are only 3 parameters without name in the
> > parameter list (signature is int, size 2 and CONTANT). Maybe one was
> > optimized away, I didn't look further.
> 
> Note that nouveau is unique in that it can process
> textureGatherOffsets() directly, without lowering it to 4x
> textureGatherOffset.
> 
> The relevant code is in st_glsl_to_tgsi.cpp
> 
>   if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
>  lower_offset_arrays(ir);
> 
> So I think with nouveau, you're seeing glsl ir that you wouldn't see 
> otherwise.
> 
>   -ilia

Hello ilia

You're right. The issue appears in the texture gather 4 opcode.

I can see this path (st_glsl_to_tgsi.cpp) in GDB.
case ir_tg4:
   opcode = TGSI_OPCODE_TG4;


Cheers,
Gregory
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2 06/18] anv: Implement VK_KHX_external_memory

2017-03-31 Thread Chad Versace

On Thu 30 Mar 2017, Jason Ekstrand wrote:
> On Thu, Mar 30, 2017 at 11:27 AM, Chad Versace 
> wrote:
> 
> > On Mon 13 Mar 2017, Jason Ekstrand wrote:
> > > There's really nothing for us to do here.  So long as the user doesn't
> > > set any crazy environment variables such as INTEL_VK_HIZ=false, all of
> > > the compression formats etc. should "just work" at least for opaque
> > > handle types.
> >
> > I think the commit message should go with the opaque fd commit. This
> > patch's commit message should say something like,
> >
> >   Turn it on. Trivially correct. Don't support any
> > VkExternalMemoryHandleTypes yet.
> >
> 
> Good call.  I wrote:
> 
> This is the trivial implementation that just exposes the extension
> string but exposes zero external handle types.
> 
> I moved the other comment to the external_memory_fd commit.
> 
> 
> > but in real sentences ;)

Cool. I see the updated commit on your branch.
Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: add null pointer check in print_without_declaration()

2017-03-31 Thread Brian Paul


On 03/31/2017 12:01 AM, Nicolai Hähnle wrote:

On 31.03.2017 05:21, Brian Paul wrote:

To avoid/fix a segmentation fault when running the stand-alone GLSL
compiler utility for cases such as the Mesa demos toyball test:

glsl_compiler --dump-builder --version 120 CH11-toyball.vert
CH11-toyball.frag
---
 src/compiler/glsl/ir_builder_print_visitor.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_builder_print_visitor.cpp
b/src/compiler/glsl/ir_builder_print_visitor.cpp
index 825dbe1..164a237 100644
--- a/src/compiler/glsl/ir_builder_print_visitor.cpp
+++ b/src/compiler/glsl/ir_builder_print_visitor.cpp
@@ -581,7 +581,9 @@
ir_builder_print_visitor::print_without_declaration(const
ir_expression *ir)
  const struct hash_entry *const he =
 _mesa_hash_table_search(index_map, ir->operands[i]);

- print_without_indent("r%04X", (unsigned)(uintptr_t) he->data);
+ if (he) {
+print_without_indent("r%04X", (unsigned)(uintptr_t)
he->data);
+ }


Is the output still usable in this case? I don't quite understand the
use case and how this case can happen.


I didn't dig either and would appreciate any feedback.  I'm merely 
trying to avoid a segfault (found while debugging the MinGW optimization 
issue).


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/19] radeonsi: emit TGSI_OPCODE_BALLOT

From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 12eca9f..37346c6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5238,20 +5238,37 @@ static void vote_eq_emit(
vote_set = si_emit_ballot(ctx, emit_data->args[0]);
 
all = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, vote_set, active_set, 
"");
none = LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
 vote_set, LLVMConstInt(ctx->i64, 0, 0), "");
tmp = LLVMBuildOr(gallivm->builder, all, none, "");
emit_data->output[emit_data->chan] =
LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
 }
 
+static void ballot_emit(
+   const struct lp_build_tgsi_action *action,
+   struct lp_build_tgsi_context *bld_base,
+   struct lp_build_emit_data *emit_data)
+{
+   struct si_shader_context *ctx = si_shader_context(bld_base);
+   LLVMBuilderRef builder = ctx->gallivm.builder;
+   LLVMValueRef tmp;
+
+   tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
+   tmp = si_emit_ballot(ctx, tmp);
+   tmp = LLVMBuildBitCast(builder, tmp, ctx->v2i32, "");
+
+   emit_data->output[0] = LLVMBuildExtractElement(builder, tmp, 
ctx->i32_0, "");
+   emit_data->output[1] = LLVMBuildExtractElement(builder, tmp, 
ctx->i32_1, "");
+}
+
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
   struct lp_build_emit_data *emit_data)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
LLVMValueRef imm;
unsigned stream;
 
assert(src0.File == TGSI_FILE_IMMEDIATE);
 
@@ -6690,20 +6707,21 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit;
 
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
 
bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
+   bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
 
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
 #define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
 #define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
 
 /* Return true if the PARAM export has been eliminated. */
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 17/19] radeonsi: strengthen emit_optimization_barrier

From: Nicolai Hähnle 

LLVM will lift inline assembly out of if-else-blocks if both paths have
the same inline assembly. Prevent this by adding an irrelevant unique
text to the assembly.

This requires the LLVM assembly parser to be initialized.

Furthermore, allow forcing subsequent computations to happen after the
optimization barrier by defining a data dependency.
---
 src/gallium/drivers/radeonsi/si_shader.c   | 39 +++---
 .../drivers/radeonsi/si_shader_tgsi_setup.c|  3 ++
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index da1db4e..a56e886 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -65,20 +65,22 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
   struct si_shader *shader,
   LLVMTargetMachineRef tm);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 struct lp_build_tgsi_context *bld_base,
 struct lp_build_emit_data *emit_data);
 
 static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
   FILE *f);
 
+static unsigned llvm_get_type_size(LLVMTypeRef type);
+
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
 static void si_build_vs_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 union si_shader_part_key *key);
 static void si_build_ps_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
@@ -3140,28 +3142,57 @@ static LLVMValueRef get_buffer_size(
return size;
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
 
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
  */
 #if 0 /* unused currently */
-static void emit_optimization_barrier(struct si_shader_context *ctx)
+static void emit_optimization_barrier(struct si_shader_context *ctx,
+ LLVMValueRef *pvgpr)
 {
+   static int counter = 0;
+
LLVMBuilderRef builder = ctx->gallivm.builder;
-   LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
-   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
-   LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+   char code[16];
+
+   snprintf(code, sizeof(code), "; %d", p_atomic_inc_return());
+
+   if (!pvgpr) {
+   LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, 
false);
+   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", 
true, false);
+   LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+   } else {
+   LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, >i32, 1, 
false);
+   LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, 
"=v,0", true, false);
+   LLVMValueRef vgpr = *pvgpr;
+   LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+   unsigned vgpr_size = llvm_get_type_size(vgpr_type);
+   LLVMValueRef vgpr0;
+
+   assert(vgpr_size % 4 == 0);
+
+   vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, 
vgpr_size / 4), "");
+   vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+   vgpr0 = LLVMBuildCall(builder, inlineasm, , 1, "");
+   vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, 
"");
+   vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+   *pvgpr = vgpr;
+   }
 }
 #endif
 
 /* Combine these with & instead of |. */
 #define NOOP_WAITCNT 0xf7f
 #define LGKM_CNT 0x07f
 #define VM_CNT 0xf70
 
 static void emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
 {
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index da463c2..a5725a3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++

[Mesa-dev] [PATCH 05/19] glsl: add gl_SubGroup*ARB builtins

From: Nicolai Hähnle 

---
 src/compiler/glsl/builtin_variables.cpp | 22 
 src/compiler/shader_enums.c |  7 
 src/compiler/shader_enums.h | 59 +
 3 files changed, 88 insertions(+)

diff --git a/src/compiler/glsl/builtin_variables.cpp 
b/src/compiler/glsl/builtin_variables.cpp
index fc0443e..c232571 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -358,20 +358,21 @@ per_vertex_accumulator::construct_interface_instance() 
const
 }
 
 
 class builtin_variable_generator
 {
 public:
builtin_variable_generator(exec_list *instructions,
   struct _mesa_glsl_parse_state *state);
void generate_constants();
void generate_uniforms();
+   void generate_special_vars();
void generate_vs_special_vars();
void generate_tcs_special_vars();
void generate_tes_special_vars();
void generate_gs_special_vars();
void generate_fs_special_vars();
void generate_cs_special_vars();
void generate_varyings();
 
 private:
const glsl_type *array(const glsl_type *base, unsigned elements)
@@ -421,39 +422,41 @@ private:
/**
 * True if compatibility-profile-only variables should be included.  (In
 * desktop GL, these are always included when the GLSL version is 1.30 and
 * or below).
 */
const bool compatibility;
 
const glsl_type * const bool_t;
const glsl_type * const int_t;
const glsl_type * const uint_t;
+   const glsl_type * const uint64_t;
const glsl_type * const float_t;
const glsl_type * const vec2_t;
const glsl_type * const vec3_t;
const glsl_type * const vec4_t;
const glsl_type * const uvec3_t;
const glsl_type * const mat3_t;
const glsl_type * const mat4_t;
 
per_vertex_accumulator per_vertex_in;
per_vertex_accumulator per_vertex_out;
 };
 
 
 builtin_variable_generator::builtin_variable_generator(
exec_list *instructions, struct _mesa_glsl_parse_state *state)
: instructions(instructions), state(state), symtab(state->symbols),
  compatibility(state->compat_shader || !state->is_version(140, 100)),
  bool_t(glsl_type::bool_type), int_t(glsl_type::int_type),
  uint_t(glsl_type::uint_type),
+ uint64_t(glsl_type::uint64_t_type),
  float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type),
  vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type),
  uvec3_t(glsl_type::uvec3_type),
  mat3_t(glsl_type::mat3_type), mat4_t(glsl_type::mat4_type)
 {
 }
 
 ir_variable *
 builtin_variable_generator::add_index_variable(const char *name,
  const glsl_type *type,
@@ -977,20 +980,38 @@ builtin_variable_generator::generate_uniforms()
   add_uniform(texcoords_vec4, "gl_ObjectPlaneT");
   add_uniform(texcoords_vec4, "gl_ObjectPlaneR");
   add_uniform(texcoords_vec4, "gl_ObjectPlaneQ");
 
   add_uniform(type("gl_FogParameters"), "gl_Fog");
}
 }
 
 
 /**
+ * Generate special variables which exist in all shaders.
+ */
+void
+builtin_variable_generator::generate_special_vars()
+{
+   if (state->ARB_shader_ballot_enable) {
+  add_system_value(SYSTEM_VALUE_SUBGROUP_SIZE, uint_t, 
"gl_SubGroupSizeARB");
+  add_system_value(SYSTEM_VALUE_SUBGROUP_INVOCATION, uint_t, 
"gl_SubGroupInvocationARB");
+  add_system_value(SYSTEM_VALUE_SUBGROUP_EQ_MASK, uint64_t, 
"gl_SubGroupEqMaskARB");
+  add_system_value(SYSTEM_VALUE_SUBGROUP_GE_MASK, uint64_t, 
"gl_SubGroupGeMaskARB");
+  add_system_value(SYSTEM_VALUE_SUBGROUP_GT_MASK, uint64_t, 
"gl_SubGroupGtMaskARB");
+  add_system_value(SYSTEM_VALUE_SUBGROUP_LE_MASK, uint64_t, 
"gl_SubGroupLeMaskARB");
+  add_system_value(SYSTEM_VALUE_SUBGROUP_LT_MASK, uint64_t, 
"gl_SubGroupLtMaskARB");
+   }
+}
+
+
+/**
  * Generate variables which only exist in vertex shaders.
  */
 void
 builtin_variable_generator::generate_vs_special_vars()
 {
ir_variable *var;
 
if (state->is_version(130, 300))
   add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID");
if (state->ARB_draw_instanced_enable)
@@ -1409,20 +1430,21 @@ builtin_variable_generator::generate_varyings()
 
 
 void
 _mesa_glsl_initialize_variables(exec_list *instructions,
struct _mesa_glsl_parse_state *state)
 {
builtin_variable_generator gen(instructions, state);
 
gen.generate_constants();
gen.generate_uniforms();
+   gen.generate_special_vars();
 
gen.generate_varyings();
 
switch (state->stage) {
case MESA_SHADER_VERTEX:
   gen.generate_vs_special_vars();
   break;
case MESA_SHADER_TESS_CTRL:
   gen.generate_tcs_special_vars();
   break;
diff --git a/src/compiler/shader_enums.c b/src/compiler/shader_enums.c
index e704c95..ca62cda 100644
--- a/src/compiler/shader_enums.c
+++ b/src/compiler/shader_enums.c
@@ -196,20 +196,27 @@ gl_varying_slot_name(gl_varying_slot

[Mesa-dev] [PATCH 18/19] radeonsi: optimization barriers to work around LLVM deficiencies

From: Nicolai Hähnle 

Notably, llvm.amdgcn.readfirstlane and llvm.amdgcn.icmp may be hoisted
out of loops or if/else branches in cases like

  if (cond) {
v = readFirstInvocationARB(x);
... use v ...
  } else {
v = readFirstInvocationARB(x);
... use v ...
  }
===>
  v = readFirstInvocationARB(x);
  if (cond) {
... use v ...
  } else {
... use v ...
  }

The optimization barrier is a heavy hammer to stop that until LLVM
is taught the semantics of the intrinsic properly.
---
 src/gallium/drivers/radeonsi/si_shader.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a56e886..a24db0d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3146,21 +3146,20 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
 
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
  *
  * Optionally, a value can be passed through the inline assembly to prevent
  * LLVM from hoisting calls to ReadNone functions.
  */
-#if 0 /* unused currently */
 static void emit_optimization_barrier(struct si_shader_context *ctx,
  LLVMValueRef *pvgpr)
 {
static int counter = 0;
 
LLVMBuilderRef builder = ctx->gallivm.builder;
char code[16];
 
snprintf(code, sizeof(code), "; %d", p_atomic_inc_return());
 
@@ -3180,21 +3179,20 @@ static void emit_optimization_barrier(struct 
si_shader_context *ctx,
 
vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, 
vgpr_size / 4), "");
vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
vgpr0 = LLVMBuildCall(builder, inlineasm, , 1, "");
vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, 
"");
vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
 
*pvgpr = vgpr;
}
 }
-#endif
 
 /* Combine these with & instead of |. */
 #define NOOP_WAITCNT 0xf7f
 #define LGKM_CNT 0x07f
 #define VM_CNT 0xf70
 
 static void emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
 {
struct gallivm_state *gallivm = >gallivm;
LLVMBuilderRef builder = gallivm->builder;
@@ -5201,22 +5199,27 @@ static void build_interp_intrinsic(const struct 
lp_build_tgsi_action *action,
 static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
   LLVMValueRef value)
 {
struct gallivm_state *gallivm = >gallivm;
LLVMValueRef args[3] = {
value,
ctx->i32_0,
LLVMConstInt(ctx->i32, LLVMIntNE, 0)
};
 
-   if (LLVMTypeOf(value) != ctx->i32)
-   args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, 
"");
+   /* We currently have no other way to prevent LLVM from lifting the icmp
+* calls to a dominating basic block.
+*/
+   emit_optimization_barrier(ctx, [0]);
+
+   if (LLVMTypeOf(args[0]) != ctx->i32)
+   args[0] = LLVMBuildBitCast(gallivm->builder, args[0], ctx->i32, 
"");
 
return lp_build_intrinsic(gallivm->builder,
  "llvm.amdgcn.icmp.i32",
  ctx->i64, args, 3,
  LP_FUNC_ATTR_NOUNWIND |
  LP_FUNC_ATTR_READNONE |
  LP_FUNC_ATTR_CONVERGENT);
 }
 
 static void vote_all_emit(
@@ -5307,20 +5310,25 @@ static void read_invoc_fetch_args(
 }
 
 static void read_lane_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMBuilderRef builder = ctx->gallivm.builder;
 
+   /* We currently have no other way to prevent LLVM from lifting the icmp
+* calls to a dominating basic block.
+*/
+   emit_optimization_barrier(ctx, _data->args[0]);
+
for (unsigned i = 0; i < emit_data->arg_count; ++i) {
emit_data->args[i] = LLVMBuildBitCast(builder, 
emit_data->args[i],
  ctx->i32, "");
}
 
emit_data->output[emit_data->chan] =
ac_build_intrinsic(>ac, action->intr_name,
   ctx->i32, emit_data->args, 
emit_data->arg_count,
   AC_FUNC_ATTR_READNONE |
   AC_FUNC_ATTR_CONVERGENT);
-- 
2.9.3

[Mesa-dev] [PATCH 16/19] radeonsi: emit TGSI_OPCODE_READ_*

From: Nicolai Hähnle 

---
 src/gallium/drivers/radeonsi/si_shader.c | 38 
 1 file changed, 38 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 37346c6..da1db4e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5255,20 +5255,53 @@ static void ballot_emit(
LLVMValueRef tmp;
 
tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
tmp = si_emit_ballot(ctx, tmp);
tmp = LLVMBuildBitCast(builder, tmp, ctx->v2i32, "");
 
emit_data->output[0] = LLVMBuildExtractElement(builder, tmp, 
ctx->i32_0, "");
emit_data->output[1] = LLVMBuildExtractElement(builder, tmp, 
ctx->i32_1, "");
 }
 
+static void read_invoc_fetch_args(
+   struct lp_build_tgsi_context *bld_base,
+   struct lp_build_emit_data *emit_data)
+{
+   emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+0, emit_data->src_chan);
+
+   /* Always read the source invocation (= lane) from the X channel. */
+   emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+1, TGSI_CHAN_X);
+   emit_data->arg_count = 2;
+}
+
+static void read_lane_emit(
+   const struct lp_build_tgsi_action *action,
+   struct lp_build_tgsi_context *bld_base,
+   struct lp_build_emit_data *emit_data)
+{
+   struct si_shader_context *ctx = si_shader_context(bld_base);
+   LLVMBuilderRef builder = ctx->gallivm.builder;
+
+   for (unsigned i = 0; i < emit_data->arg_count; ++i) {
+   emit_data->args[i] = LLVMBuildBitCast(builder, 
emit_data->args[i],
+ ctx->i32, "");
+   }
+
+   emit_data->output[emit_data->chan] =
+   ac_build_intrinsic(>ac, action->intr_name,
+  ctx->i32, emit_data->args, 
emit_data->arg_count,
+  AC_FUNC_ATTR_READNONE |
+  AC_FUNC_ATTR_CONVERGENT);
+}
+
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
   struct lp_build_emit_data *emit_data)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
LLVMValueRef imm;
unsigned stream;
 
assert(src0.File == TGSI_FILE_IMMEDIATE);
 
@@ -6708,20 +6741,25 @@ static void si_init_shader_ctx(struct si_shader_context 
*ctx,
 
bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy;
bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy;
 
bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
+   bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = 
"llvm.amdgcn.readfirstlane";
+   bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
+   bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = 
"llvm.amdgcn.readlane";
+   bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = 
read_invoc_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
 
bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
 }
 
 #define EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
 #define EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
 
 /* Return true if the PARAM export has been eliminated. */
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/19] glsl: add ARB_shader_ballot builtin functions

From: Nicolai Hähnle 

---
 src/compiler/glsl/builtin_functions.cpp | 77 +
 1 file changed, 77 insertions(+)

diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 683217d..5351b98 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -460,20 +460,26 @@ shader_atomic_counters(const _mesa_glsl_parse_state 
*state)
return state->has_atomic_counters();
 }
 
 static bool
 shader_atomic_counter_ops(const _mesa_glsl_parse_state *state)
 {
return state->ARB_shader_atomic_counter_ops_enable;
 }
 
 static bool
+shader_ballot(const _mesa_glsl_parse_state *state)
+{
+   return state->ARB_shader_ballot_enable;
+}
+
+static bool
 shader_clock(const _mesa_glsl_parse_state *state)
 {
return state->ARB_shader_clock_enable;
 }
 
 static bool
 shader_clock_int64(const _mesa_glsl_parse_state *state)
 {
return state->ARB_shader_clock_enable &&
   state->ARB_gpu_shader_int64_enable;
@@ -915,20 +921,24 @@ private:
  unsigned num_arguments,
  unsigned flags,
  enum ir_intrinsic_id id);
 
ir_function_signature *_memory_barrier_intrinsic(
   builtin_available_predicate avail,
   enum ir_intrinsic_id id);
ir_function_signature *_memory_barrier(const char *intrinsic_name,
   builtin_available_predicate avail);
 
+   ir_function_signature *_ballot();
+   ir_function_signature *_read_first_invocation(const glsl_type *type);
+   ir_function_signature *_read_invocation(const glsl_type *type);
+
ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate 
avail,
   const glsl_type *type);
ir_function_signature *_shader_clock(builtin_available_predicate avail,
 const glsl_type *type);
 
ir_function_signature *_vote(enum ir_expression_operation opcode);
 
 #undef B0
 #undef B1
 #undef B2
@@ -3085,20 +3095,56 @@ builtin_builder::create_builtins()
 NULL);
add_function("memoryBarrierImage",
 _memory_barrier("__intrinsic_memory_barrier_image",
 compute_shader_supported),
 NULL);
add_function("memoryBarrierShared",
 _memory_barrier("__intrinsic_memory_barrier_shared",
 compute_shader),
 NULL);
 
+   add_function("ballotARB", _ballot(), NULL);
+
+   add_function("readInvocationARB",
+_read_invocation(glsl_type::float_type),
+_read_invocation(glsl_type::vec2_type),
+_read_invocation(glsl_type::vec3_type),
+_read_invocation(glsl_type::vec4_type),
+
+_read_invocation(glsl_type::int_type),
+_read_invocation(glsl_type::ivec2_type),
+_read_invocation(glsl_type::ivec3_type),
+_read_invocation(glsl_type::ivec4_type),
+
+_read_invocation(glsl_type::uint_type),
+_read_invocation(glsl_type::uvec2_type),
+_read_invocation(glsl_type::uvec3_type),
+_read_invocation(glsl_type::uvec4_type),
+NULL);
+
+   add_function("readFirstInvocationARB",
+_read_first_invocation(glsl_type::float_type),
+_read_first_invocation(glsl_type::vec2_type),
+_read_first_invocation(glsl_type::vec3_type),
+_read_first_invocation(glsl_type::vec4_type),
+
+_read_first_invocation(glsl_type::int_type),
+_read_first_invocation(glsl_type::ivec2_type),
+_read_first_invocation(glsl_type::ivec3_type),
+_read_first_invocation(glsl_type::ivec4_type),
+
+_read_first_invocation(glsl_type::uint_type),
+_read_first_invocation(glsl_type::uvec2_type),
+_read_first_invocation(glsl_type::uvec3_type),
+_read_first_invocation(glsl_type::uvec4_type),
+NULL);
+
add_function("clock2x32ARB",
 _shader_clock(shader_clock,
   glsl_type::uvec2_type),
 NULL);
 
add_function("clockARB",
 _shader_clock(shader_clock_int64,
   glsl_type::uint64_t_type),
 NULL);
 
@@ -5927,20 +5973,51 @@ ir_function_signature *
 builtin_builder::_memory_barrier(const char *intrinsic_name,
  builtin_available_predicate avail)
 {
MAKE_SIG(glsl_type::void_type, avail, 0);
body.emit(call(shader->symbols->get_function(intrinsic_name),
   NULL, sig->parameters));
return sig;
 }
 
 ir_function_signature *
+builtin_builder::_ballot()
+{
+   ir_variable *value =

[Mesa-dev] [PATCH 02/19] glsl: add ARB_shader_ballot enable

From: Nicolai Hähnle 

---
 src/compiler/glsl/glsl_parser_extras.cpp | 1 +
 src/compiler/glsl/glsl_parser_extras.h   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 8b5df3b..4629e78 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -620,20 +620,21 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(ARB_fragment_coord_conventions),
EXT(ARB_fragment_layer_viewport),
EXT(ARB_gpu_shader5),
EXT(ARB_gpu_shader_fp64),
EXT(ARB_gpu_shader_int64),
EXT(ARB_post_depth_coverage),
EXT(ARB_sample_shading),
EXT(ARB_separate_shader_objects),
EXT(ARB_shader_atomic_counter_ops),
EXT(ARB_shader_atomic_counters),
+   EXT(ARB_shader_ballot),
EXT(ARB_shader_bit_encoding),
EXT(ARB_shader_clock),
EXT(ARB_shader_draw_parameters),
EXT(ARB_shader_group_vote),
EXT(ARB_shader_image_load_store),
EXT(ARB_shader_image_size),
EXT(ARB_shader_precision),
EXT(ARB_shader_stencil_export),
EXT(ARB_shader_storage_buffer_object),
EXT(ARB_shader_subroutine),
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index af08bb2..6c3bc8a 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -625,20 +625,22 @@ struct _mesa_glsl_parse_state {
bool ARB_post_depth_coverage_enable;
bool ARB_post_depth_coverage_warn;
bool ARB_sample_shading_enable;
bool ARB_sample_shading_warn;
bool ARB_separate_shader_objects_enable;
bool ARB_separate_shader_objects_warn;
bool ARB_shader_atomic_counter_ops_enable;
bool ARB_shader_atomic_counter_ops_warn;
bool ARB_shader_atomic_counters_enable;
bool ARB_shader_atomic_counters_warn;
+   bool ARB_shader_ballot_enable;
+   bool ARB_shader_ballot_warn;
bool ARB_shader_bit_encoding_enable;
bool ARB_shader_bit_encoding_warn;
bool ARB_shader_clock_enable;
bool ARB_shader_clock_warn;
bool ARB_shader_draw_parameters_enable;
bool ARB_shader_draw_parameters_warn;
bool ARB_shader_group_vote_enable;
bool ARB_shader_group_vote_warn;
bool ARB_shader_image_load_store_enable;
bool ARB_shader_image_load_store_warn;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 19/19] radeonsi: enable ARB_shader_ballot

From: Nicolai Hähnle 

Require LLVM 5.0 or later because LLVM 4.0 is easily fooled into
putting the lane select of llvm.amdgcn.readlane into a VGPR and then
fails to continue to compile.
---
 docs/features.txt  | 2 +-
 docs/relnotes/17.1.0.html  | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c | 4 +++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 1e145e1..662fbd2 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -285,21 +285,21 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_gl_spirv   not started
   GL_ARB_gpu_shader_int64   DONE (i965/gen8+, 
nvc0, radeonsi, softpipe, llvmpipe)
   GL_ARB_indirect_parametersDONE (nvc0, radeonsi)
   GL_ARB_parallel_shader_compilenot started, but 
Chia-I Wu did some related work in 2014
   GL_ARB_pipeline_statistics_query  DONE (i965, nvc0, 
radeonsi, softpipe, swr)
   GL_ARB_post_depth_coverageDONE (i965)
   GL_ARB_robustness_isolation   not started
   GL_ARB_sample_locations   not started
   GL_ARB_seamless_cubemap_per_texture   DONE (i965, nvc0, 
radeonsi, r600, softpipe, swr)
   GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
nvc0, radeonsi, softpipe)
-  GL_ARB_shader_ballot  not started
+  GL_ARB_shader_ballot  DONE (radeonsi)
   GL_ARB_shader_clock   DONE (i965/gen7+, 
radeonsi)
   GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
   GL_ARB_shader_group_vote  DONE (nvc0, radeonsi)
   GL_ARB_shader_stencil_export  DONE (i965/gen9+, 
radeonsi, softpipe, llvmpipe, swr)
   GL_ARB_shader_viewport_layer_arrayDONE (i965/gen6+)
   GL_ARB_sparse_buffer  not started
   GL_ARB_sparse_texture not started
   GL_ARB_sparse_texture2not started
   GL_ARB_sparse_texture_clamp   not started
   GL_ARB_texture_filter_minmax  not started
diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index a11a37f..27595f5 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -38,20 +38,21 @@ TBD.
 
 
 New features
 
 
 Note: some of the new features are only available with certain drivers.
 
 
 
 GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, 
llvmpipe
+GL_ARB_shader_ballot on radeonsi
 GL_ARB_shader_clock on radeonsi
 GL_ARB_shader_group_vote on radeonsi
 GL_ARB_transform_feedback2 on i965/gen6
 GL_ARB_transform_feedback_overflow_query on i965/gen6+
 Geometry shaders enabled on swr
 
 
 Bug fixes
 
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 9388eb9..e647d3e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -420,20 +420,23 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
return 1;
 
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_CLOCK:
return HAVE_LLVM >= 0x0309;
 
case PIPE_CAP_TGSI_VOTE:
return HAVE_LLVM >= 0x0400;
 
+   case PIPE_CAP_TGSI_BALLOT:
+   return HAVE_LLVM >= 0x0500;
+
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
 
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
return (sscreen->b.info.drm_major == 2 &&
sscreen->b.info.drm_minor >= 43) ||
   sscreen->b.info.drm_major == 3;
 
case PIPE_CAP_TEXTURE_MULTISAMPLE:
/* 2D tiling on CIK is supported since DRM 2.35.0 */
@@ -481,21 +484,20 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_UMA:
-   case PIPE_CAP_TGSI_BALLOT:
return 0;
 
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return si_have_tgsi_compute(sscreen);
 
case PIPE_CAP_DRAW_PARAMETERS:
case PIPE_CAP_MULTI_DRAW_INDIRECT:
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
return

[Mesa-dev] [PATCH 03/19] glsl: add ARB_shader_ballot operations

From: Nicolai Hähnle 

---
 src/compiler/glsl/ir.cpp | 12 
 src/compiler/glsl/ir_expression_operation.py |  7 +++
 src/compiler/glsl/ir_validate.cpp| 16 
 src/mesa/program/ir_to_mesa.cpp  |  3 +++
 4 files changed, 38 insertions(+)

diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp
index f80bd81..2bbc7a1 100644
--- a/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@ -374,20 +374,28 @@ ir_expression::ir_expression(int op, ir_rvalue *op0)
case ir_unop_frexp_exp:
   this->type = glsl_type::get_instance(GLSL_TYPE_INT,
   op0->type->vector_elements, 1);
   break;
 
case ir_unop_get_buffer_size:
case ir_unop_ssbo_unsized_array_length:
   this->type = glsl_type::int_type;
   break;
 
+   case ir_unop_ballot:
+  this->type = glsl_type::uint64_t_type;
+  break;
+
+   case ir_unop_read_first_invocation:
+  this->type = op0->type;
+  break;
+
case ir_unop_vote_any:
case ir_unop_vote_all:
case ir_unop_vote_eq:
   this->type = glsl_type::bool_type;
   break;
 
case ir_unop_bitcast_i642d:
case ir_unop_bitcast_u642d:
   this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE,
op0->type->vector_elements, 1);
@@ -489,20 +497,24 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, 
ir_rvalue *op1)
case ir_binop_ldexp:
case ir_binop_interpolate_at_offset:
case ir_binop_interpolate_at_sample:
   this->type = op0->type;
   break;
 
case ir_binop_vector_extract:
   this->type = op0->type->get_scalar_type();
   break;
 
+   case ir_binop_read_invocation:
+  this->type = op0->type;
+  break;
+
default:
   assert(!"not reached: missing automatic type setup for ir_expression");
   this->type = glsl_type::float_type;
}
 }
 
 ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1,
  ir_rvalue *op2)
: ir_rvalue(ir_type_expression)
 {
diff --git a/src/compiler/glsl/ir_expression_operation.py 
b/src/compiler/glsl/ir_expression_operation.py
index bde9c01..1d29560 100644
--- a/src/compiler/glsl/ir_expression_operation.py
+++ b/src/compiler/glsl/ir_expression_operation.py
@@ -563,20 +563,24 @@ ir_expression_operation = [
operation("get_buffer_size", 1),
 
# Calculate length of an unsized array inside a buffer block.
# This opcode is going to be replaced in a lowering pass inside
# the linker.
#
# operand0 is the unsized array's ir_value for the calculation
# of its length.
operation("ssbo_unsized_array_length", 1),
 
+   # ARB_shader_ballot operations
+   operation("ballot", 1, source_types=(bool_type,), dest_type=uint64_type),
+   operation("read_first_invocation", 1),
+
# Vote among threads on the value of the boolean argument.
operation("vote_any", 1),
operation("vote_all", 1),
operation("vote_eq", 1),
 
# 64-bit integer packing ops.
operation("pack_int_2x32", 1, printable_name="packInt2x32", 
source_types=(int_type,), dest_type=int64_type, 
c_expression="memcpy([0], [0]->value.i[0], sizeof(int64_t))", 
flags=frozenset((horizontal_operation, non_assign_operation))),
operation("pack_uint_2x32", 1, printable_name="packUint2x32", 
source_types=(uint_type,), dest_type=uint64_type, 
c_expression="memcpy([0], [0]->value.u[0], sizeof(uint64_t))", 
flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", 
source_types=(int64_type,), dest_type=int_type, 
c_expression="memcpy([0], [0]->value.i64[0], sizeof(int64_t))", 
flags=frozenset((horizontal_operation, non_assign_operation))),
operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", 
source_types=(uint64_type,), dest_type=uint_type, 
c_expression="memcpy([0], [0]->value.u64[0], sizeof(uint64_t))", 
flags=frozenset((horizontal_operation, non_assign_operation))),
@@ -659,20 +663,23 @@ ir_expression_operation = [
# operand0 is the fs input
# operand1 is the offset from the pixel center
operation("interpolate_at_offset", 2),
 
# Interpolate fs input at sample position
#
# operand0 is the fs input
# operand1 is the sample ID
operation("interpolate_at_sample", 2),
 
+   # ARB_shader_ballot operation
+   operation("read_invocation", 2),
+
# Fused floating-point multiply-add, part of ARB_gpu_shader5.
operation("fma", 3, source_types=real_types, c_expression="{src0} * {src1} 
+ {src2}"),
 
operation("lrp", 3, source_types=real_types, c_expression={'f': "{src0} * 
(1.0f - {src2}) + ({src1} * {src2})", 'd': "{src0} * (1.0 - {src2}) + ({src1} * 
{src2})"}),
 
# Conditional Select
#
# A vector conditional select instruction (like ?:, but operating per-
# component on vectors).
#
diff --git

[Mesa-dev] [PATCH 11/19] st/mesa: enable ARB_shader_ballot

From: Nicolai Hähnle 

---
 src/mesa/state_tracker/st_extensions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index a48c22e..5a97c31 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -592,20 +592,21 @@ void st_init_extensions(struct pipe_screen *screen,
   { o(ARB_indirect_parameters),  
PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS   },
   { o(ARB_instanced_arrays), 
PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR  },
   { o(ARB_occlusion_query),  PIPE_CAP_OCCLUSION_QUERY  
},
   { o(ARB_occlusion_query2), PIPE_CAP_OCCLUSION_QUERY  
},
   { o(ARB_pipeline_statistics_query),
PIPE_CAP_QUERY_PIPELINE_STATISTICS},
   { o(ARB_point_sprite), PIPE_CAP_POINT_SPRITE 
},
   { o(ARB_query_buffer_object),  PIPE_CAP_QUERY_BUFFER_OBJECT  
},
   { o(ARB_robust_buffer_access_behavior), 
PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR   },
   { o(ARB_sample_shading),   PIPE_CAP_SAMPLE_SHADING   
},
   { o(ARB_seamless_cube_map),PIPE_CAP_SEAMLESS_CUBE_MAP
},
+  { o(ARB_shader_ballot),PIPE_CAP_TGSI_BALLOT  
},
   { o(ARB_shader_clock), PIPE_CAP_TGSI_CLOCK   
},
   { o(ARB_shader_draw_parameters),   PIPE_CAP_DRAW_PARAMETERS  
},
   { o(ARB_shader_group_vote),PIPE_CAP_TGSI_VOTE
},
   { o(ARB_shader_stencil_export),PIPE_CAP_SHADER_STENCIL_EXPORT
},
   { o(ARB_shader_texture_image_samples), PIPE_CAP_TGSI_TXQS
},
   { o(ARB_shader_texture_lod),   PIPE_CAP_SM3  
},
   { o(ARB_shadow),   PIPE_CAP_TEXTURE_SHADOW_MAP   
},
   { o(ARB_texture_buffer_object),PIPE_CAP_TEXTURE_BUFFER_OBJECTS   
},
   { o(ARB_texture_cube_map_array),   PIPE_CAP_CUBE_MAP_ARRAY   
},
   { o(ARB_texture_gather),   
PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS},
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 14/19] radeonsi: implement TGSI_SEMANTIC_SUBGROUP_*

From: Nicolai Hähnle 

64-bit system values are stored as v2i32 to simplify the fetch logic.
---
 src/gallium/drivers/radeonsi/si_shader.c | 40 
 1 file changed, 40 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4682a8e..12eca9f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1596,20 +1596,60 @@ static void declare_system_value(
   ctx->i1, NULL, 0,
   LP_FUNC_ATTR_READNONE);
value = LLVMBuildNot(gallivm->builder, value, "");
value = LLVMBuildSExt(gallivm->builder, value, 
ctx->i32, "");
} else {
assert(!"TGSI_SEMANTIC_HELPER_INVOCATION unsupported");
return;
}
break;
 
+   case TGSI_SEMANTIC_SUBGROUP_SIZE:
+   value = LLVMConstInt(ctx->i32, 64, 0);
+   break;
+
+   case TGSI_SEMANTIC_SUBGROUP_INVOCATION:
+   value = ac_get_thread_id(>ac);
+   break;
+
+   case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
+   {
+   LLVMValueRef id = ac_get_thread_id(>ac);
+   id = LLVMBuildZExt(gallivm->builder, id, ctx->i64, "");
+   value = LLVMBuildShl(gallivm->builder, LLVMConstInt(ctx->i64, 
1, 0), id, "");
+   value = LLVMBuildBitCast(gallivm->builder, value, ctx->v2i32, 
"");
+   break;
+   }
+
+   case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
+   case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
+   case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
+   case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
+   {
+   LLVMValueRef id = ac_get_thread_id(>ac);
+   if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK ||
+   decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) {
+   /* All bits set except LSB */
+   value = LLVMConstInt(ctx->i64, -2, 0);
+   } else {
+   /* All bits set */
+   value = LLVMConstInt(ctx->i64, -1, 0);
+   }
+   id = LLVMBuildZExt(gallivm->builder, id, ctx->i64, "");
+   value = LLVMBuildShl(gallivm->builder, value, id, "");
+   if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
+   decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
+   value = LLVMBuildNot(gallivm->builder, value, "");
+   value = LLVMBuildBitCast(gallivm->builder, value, ctx->v2i32, 
"");
+   break;
+   }
+
default:
assert(!"unknown system value");
return;
}
 
radeon_bld->system_values[index] = value;
 }
 
 static void declare_compute_memory(struct si_shader_context *radeon_bld,
const struct tgsi_full_declaration *decl)
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/19] st/glsl_to_tgsi: implement ARB_shader_ballot builtin functions

From: Nicolai Hähnle 

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7da08da..e84c61a 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2360,20 +2360,29 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* 
ir, st_src_reg *op)
 
case ir_unop_vote_any:
   emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]);
   break;
case ir_unop_vote_all:
   emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]);
   break;
case ir_unop_vote_eq:
   emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]);
   break;
+   case ir_unop_ballot:
+  emit_asm(ir, TGSI_OPCODE_BALLOT, result_dst, op[0]);
+  break;
+   case ir_unop_read_first_invocation:
+  emit_asm(ir, TGSI_OPCODE_READ_FIRST, result_dst, op[0]);
+  break;
+   case ir_binop_read_invocation:
+  emit_asm(ir, TGSI_OPCODE_READ_INVOC, result_dst, op[0], op[1]);
+  break;
case ir_unop_u2i64:
case ir_unop_u2u64:
case ir_unop_b2i64: {
   st_src_reg temp = get_temp(glsl_type::uvec4_type);
   st_dst_reg temp_dst = st_dst_reg(temp);
   unsigned orig_swz = op[0].swizzle;
   /* 
* To convert unsigned to 64-bit:
* zero Y channel, copy X channel.
*/
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 13/19] radeonsi: support 64-bit system values

From: Nicolai Hähnle 

For simplicitly, always store system values as 32-bit values or arrays
of 32-bit values. 64-bit values are unpacked and packed accordingly.
---
 .../drivers/radeonsi/si_shader_tgsi_setup.c| 24 ++
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index efc9452..da463c2 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -757,27 +757,43 @@ LLVMValueRef si_llvm_emit_fetch(struct 
lp_build_tgsi_context *bld_base,
 
return bitcast(bld_base, type, result);
 }
 
 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
   const struct tgsi_full_src_register *reg,
   enum tgsi_opcode_type type,
   unsigned swizzle)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
-   struct gallivm_state *gallivm = bld_base->base.gallivm;
-
+   LLVMBuilderRef builder = ctx->gallivm.builder;
LLVMValueRef cval = ctx->system_values[reg->Register.Index];
+
+   if (tgsi_type_is_64bit(type)) {
+   LLVMValueRef lo, hi;
+
+   assert(swizzle == 0 || swizzle == 2);
+
+   lo = LLVMBuildExtractElement(
+   builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
+   hi = LLVMBuildExtractElement(
+   builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), 
"");
+
+   return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+   }
+
if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
-   cval = LLVMBuildExtractElement(gallivm->builder, cval,
-  lp_build_const_int32(gallivm, 
swizzle), "");
+   cval = LLVMBuildExtractElement(
+   builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
+   } else {
+   assert(swizzle == 0);
}
+
return bitcast(bld_base, type, cval);
 }
 
 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 const struct tgsi_full_declaration *decl)
 {
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
unsigned first, last, i;
switch(decl->Declaration.File) {
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/19] st/glsl_to_tgsi: implement ARB_shader_ballot system variables

From: Nicolai Hähnle 

---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 16 
 1 file changed, 16 insertions(+)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index e84c61a..8c94f05 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -5440,20 +5440,36 @@ _mesa_sysval_to_semantic(unsigned sysval)
/* Compute shader */
case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
   return TGSI_SEMANTIC_THREAD_ID;
case SYSTEM_VALUE_WORK_GROUP_ID:
   return TGSI_SEMANTIC_BLOCK_ID;
case SYSTEM_VALUE_NUM_WORK_GROUPS:
   return TGSI_SEMANTIC_GRID_SIZE;
case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
   return TGSI_SEMANTIC_BLOCK_SIZE;
 
+   /* ARB_shader_ballot */
+   case SYSTEM_VALUE_SUBGROUP_SIZE:
+  return TGSI_SEMANTIC_SUBGROUP_SIZE;
+   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
+  return TGSI_SEMANTIC_SUBGROUP_INVOCATION;
+   case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+  return TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
+   case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+  return TGSI_SEMANTIC_SUBGROUP_GE_MASK;
+   case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+  return TGSI_SEMANTIC_SUBGROUP_GT_MASK;
+   case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+  return TGSI_SEMANTIC_SUBGROUP_LE_MASK;
+   case SYSTEM_VALUE_SUBGROUP_LT_MASK:
+  return TGSI_SEMANTIC_SUBGROUP_LT_MASK;
+
/* Unhandled */
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
case SYSTEM_VALUE_VERTEX_CNT:
default:
   assert(!"Unexpected SYSTEM_VALUE_ enum");
   return TGSI_SEMANTIC_COUNT;
}
 }
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/19] tgsi: add BALLOT/READ_* opcodes

From: Ilia Mirkin 

v2 (Nicolai):
- BALLOT isn't per-channel
- expand the documentation (also for VOTE_*)

Signed-off-by: Ilia Mirkin 
Signed-off-by: Nicolai Hähnle 
---
 src/gallium/auxiliary/tgsi/tgsi_info.c |  6 +--
 src/gallium/docs/source/tgsi.rst   | 67 +-
 src/gallium/include/pipe/p_shader_tokens.h |  6 +--
 3 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 5a6a9bc..30bad6d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -106,51 +106,51 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 3, 0, 0, 0, 0, 0, COMP, "CMP", TGSI_OPCODE_CMP },
{ 1, 1, 0, 0, 0, 0, 0, CHAN, "SCS", TGSI_OPCODE_SCS },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXB", TGSI_OPCODE_TXB },
{ 1, 1, 0, 0, 0, 0, 0, OTHR, "FBFETCH", TGSI_OPCODE_FBFETCH },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "DIV", TGSI_OPCODE_DIV },
{ 1, 2, 0, 0, 0, 0, 0, REPL, "DP2", TGSI_OPCODE_DP2 },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXL", TGSI_OPCODE_TXL },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "BRK", TGSI_OPCODE_BRK },
{ 0, 1, 0, 0, 1, 0, 1, NONE, "IF", TGSI_OPCODE_IF },
{ 0, 1, 0, 0, 1, 0, 1, NONE, "UIF", TGSI_OPCODE_UIF },
-   { 0, 1, 0, 0, 0, 0, 1, NONE, "", 76 },  /* removed */
+   { 1, 2, 0, 0, 0, 0, 0, COMP, "READ_INVOC", TGSI_OPCODE_READ_INVOC },
{ 0, 0, 0, 0, 1, 1, 1, NONE, "ELSE", TGSI_OPCODE_ELSE },
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDIF", TGSI_OPCODE_ENDIF },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "PUSHA", TGSI_OPCODE_PUSHA },
{ 1, 0, 0, 0, 0, 0, 0, NONE, "POPA", TGSI_OPCODE_POPA },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "CEIL", TGSI_OPCODE_CEIL },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "I2F", TGSI_OPCODE_I2F },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "NOT", TGSI_OPCODE_NOT },
{ 1, 1, 0, 0, 0, 0, 0, COMP, "TRUNC", TGSI_OPCODE_TRUNC },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "SHL", TGSI_OPCODE_SHL },
-   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 88 },  /* removed */
+   { 1, 1, 0, 0, 0, 0, 0, OTHR, "BALLOT", TGSI_OPCODE_BALLOT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "AND", TGSI_OPCODE_AND },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "OR", TGSI_OPCODE_OR },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "MOD", TGSI_OPCODE_MOD },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "XOR", TGSI_OPCODE_XOR },
{ 1, 3, 0, 0, 0, 0, 0, COMP, "SAD", TGSI_OPCODE_SAD },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXF", TGSI_OPCODE_TXF },
{ 1, 2, 1, 0, 0, 0, 0, OTHR, "TXQ", TGSI_OPCODE_TXQ },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "CONT", TGSI_OPCODE_CONT },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "EMIT", TGSI_OPCODE_EMIT },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
{ 0, 0, 0, 0, 1, 0, 1, NONE, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
{ 0, 0, 0, 0, 0, 0, 1, NONE, "BGNSUB", TGSI_OPCODE_BGNSUB },
{ 0, 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
{ 0, 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
{ 1, 1, 1, 0, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
{ 1, 1, 0, 0, 0, 0, 0, OTHR, "RESQ", TGSI_OPCODE_RESQ },
-   { 0, 0, 0, 0, 0, 0, 0, NONE, "", 106 }, /* removed */
+   { 1, 1, 0, 0, 0, 0, 0, COMP, "READ_FIRST", TGSI_OPCODE_READ_FIRST },
{ 0, 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSEQ", TGSI_OPCODE_FSEQ },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSGE", TGSI_OPCODE_FSGE },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSLT", TGSI_OPCODE_FSLT },
{ 1, 2, 0, 0, 0, 0, 0, COMP, "FSNE", TGSI_OPCODE_FSNE },
{ 0, 1, 0, 0, 0, 0, 0, OTHR, "MEMBAR", TGSI_OPCODE_MEMBAR },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "CALLNZ", TGSI_OPCODE_CALLNZ },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "", 114 }, /* removed */
{ 0, 1, 0, 0, 0, 0, 0, NONE, "BREAKC", TGSI_OPCODE_BREAKC },
{ 0, 1, 0, 0, 0, 0, 0, NONE, "KILL_IF", TGSI_OPCODE_KILL_IF },
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 05b06ce..7e9b47c 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -2852,36 +2852,81 @@ only be used with 32-bit integer image formats.
 
   The following operation is performed atomically:
 
 .. math::
 
   dst_x = resource[offset]
 
   resource[offset] = (dst_x > src_x ? dst_x : src_x)
 
 
-.. _voteopcodes:
+.. _interlaneopcodes:
+
+Inter-lane opcodes
+^^
+
+These opcodes reduce the given value across the shader invocations
+running in the current SIMD group. For all but the READ_INVOC operations,
+every thread in the subgroup will receive the same result. The VOTE_* and
+BALLOT operations accept a single-channel argument that is treated as a
+boolean and produce a 64-bit value.
+
+.. opcode:: VOTE_ANY -

[Mesa-dev] [PATCH 12/19] radeonsi: bump RADEON_LLVM_MAX_SYSTEM_VALUES

From: Nicolai Hähnle 

ARB_shader_ballot introduces 7 new system values that can be used
in all shader stages.
---
 src/gallium/drivers/radeonsi/si_shader.c  | 2 ++
 src/gallium/drivers/radeonsi/si_shader_internal.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 874535a..4682a8e 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1386,20 +1386,22 @@ static void declare_system_value(
struct si_shader_context *radeon_bld,
unsigned index,
const struct tgsi_full_declaration *decl)
 {
struct si_shader_context *ctx =
si_shader_context(_bld->bld_base);
struct lp_build_context *bld = _bld->bld_base.base;
struct gallivm_state *gallivm = _bld->gallivm;
LLVMValueRef value = 0;
 
+   assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
+
switch (decl->Semantic.Name) {
case TGSI_SEMANTIC_INSTANCEID:
value = LLVMGetParam(radeon_bld->main_fn,
 ctx->param_instance_id);
break;
 
case TGSI_SEMANTIC_VERTEXID:
value = LLVMBuildAdd(gallivm->builder,
 LLVMGetParam(radeon_bld->main_fn,
  ctx->param_vertex_id),
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 47aeb5d..fd7deec 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -36,21 +36,21 @@
 
 struct pipe_debug_callback;
 struct ac_shader_binary;
 
 #define RADEON_LLVM_MAX_INPUT_SLOTS 32
 #define RADEON_LLVM_MAX_INPUTS 32 * 4
 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
 
 #define RADEON_LLVM_INITIAL_CF_DEPTH 4
 
-#define RADEON_LLVM_MAX_SYSTEM_VALUES 4
+#define RADEON_LLVM_MAX_SYSTEM_VALUES 11
 #define RADEON_LLVM_MAX_ADDRS 16
 
 struct si_llvm_flow;
 
 struct si_shader_context {
struct lp_build_tgsi_context bld_base;
struct gallivm_state gallivm;
struct ac_llvm_context ac;
struct si_shader *shader;
struct si_screen *screen;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/19] mesa: add GL_ARB_shader_ballot boilerplate

From: Nicolai Hähnle 

---
 src/mapi/glapi/registry/gl.xml   | 2 +-
 src/mesa/main/extensions_table.h | 1 +
 src/mesa/main/mtypes.h   | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/registry/gl.xml b/src/mapi/glapi/registry/gl.xml
index 3fc8863..9e2ff98 100644
--- a/src/mapi/glapi/registry/gl.xml
+++ b/src/mapi/glapi/registry/gl.xml
@@ -38657,21 +38657,21 @@ typedef unsigned int GLhandleARB;
 
 
 
 
 
 
 
 
 
 
-
+
 
 
 
 
 
 
 
 
 
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index ec71791..80a9846 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -100,20 +100,21 @@ EXT(ARB_provoking_vertex, 
EXT_provoking_vertex
 EXT(ARB_query_buffer_object , ARB_query_buffer_object  
  , GLL, GLC,  x ,  x , 2013)
 EXT(ARB_robust_buffer_access_behavior   , 
ARB_robust_buffer_access_behavior  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_robustness  , dummy_true   
  , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_sample_shading  , ARB_sample_shading   
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_sampler_objects , dummy_true   
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_seamless_cube_map   , ARB_seamless_cube_map
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_seamless_cubemap_per_texture, AMD_seamless_cubemap_per_texture 
  , GLL, GLC,  x ,  x , 2013)
 EXT(ARB_separate_shader_objects , dummy_true   
  , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_shader_atomic_counter_ops   , ARB_shader_atomic_counter_ops
  , GLL, GLC,  x ,  x , 2015)
 EXT(ARB_shader_atomic_counters  , ARB_shader_atomic_counters   
  , GLL, GLC,  x ,  x , 2011)
+EXT(ARB_shader_ballot   , ARB_shader_ballot
  , GLL, GLC,  x ,  x , 2015)
 EXT(ARB_shader_bit_encoding , ARB_shader_bit_encoding  
  , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_shader_clock, ARB_shader_clock 
  , GLL, GLC,  x ,  x , 2015)
 EXT(ARB_shader_draw_parameters  , ARB_shader_draw_parameters   
  , GLL, GLC,  x ,  x , 2013)
 EXT(ARB_shader_group_vote   , ARB_shader_group_vote
  , GLL, GLC,  x ,  x , 2013)
 EXT(ARB_shader_image_load_store , ARB_shader_image_load_store  
  , GLL, GLC,  x ,  x , 2011)
 EXT(ARB_shader_image_size   , ARB_shader_image_size
  , GLL, GLC,  x ,  x , 2012)
 EXT(ARB_shader_objects  , dummy_true   
  , GLL, GLC,  x ,  x , 2002)
 EXT(ARB_shader_precision, ARB_shader_precision 
  , GLL, GLC,  x ,  x , 2010)
 EXT(ARB_shader_stencil_export   , ARB_shader_stencil_export
  , GLL, GLC,  x ,  x , 2009)
 EXT(ARB_shader_storage_buffer_object, ARB_shader_storage_buffer_object 
  , GLL, GLC,  x ,  x , 2012)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 401c35a..73ce94c 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3883,20 +3883,21 @@ struct gl_extensions
GLboolean ARB_occlusion_query2;
GLboolean ARB_pipeline_statistics_query;
GLboolean ARB_point_sprite;
GLboolean ARB_post_depth_coverage;
GLboolean ARB_query_buffer_object;
GLboolean ARB_robust_buffer_access_behavior;
GLboolean ARB_sample_shading;
GLboolean ARB_seamless_cube_map;
GLboolean ARB_shader_atomic_counter_ops;
GLboolean ARB_shader_atomic_counters;
+   GLboolean ARB_shader_ballot;
GLboolean ARB_shader_bit_encoding;
GLboolean ARB_shader_clock;
GLboolean ARB_shader_draw_parameters;
GLboolean ARB_shader_group_vote;
GLboolean ARB_shader_image_load_store;
GLboolean ARB_shader_image_size;
GLboolean ARB_shader_precision;
GLboolean ARB_shader_stencil_export;
GLboolean ARB_shader_storage_buffer_object;
GLboolean ARB_shader_subroutine;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/19] tgsi: add SUBGROUP_* semantics

From: Ilia Mirkin 

v2: add documentation (Nicolai)

Signed-off-by: Ilia Mirkin 
Signed-off-by: Nicolai Hähnle 
---
 src/gallium/auxiliary/tgsi/tgsi_strings.c  |  7 
 src/gallium/docs/source/tgsi.rst   | 51 ++
 src/gallium/include/pipe/p_shader_tokens.h |  7 
 3 files changed, 65 insertions(+)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c 
b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index d7201fc..19e5cea 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -93,20 +93,27 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
"BASEVERTEX",
"PATCH",
"TESSCOORD",
"TESSOUTER",
"TESSINNER",
"VERTICESIN",
"HELPER_INVOCATION",
"BASEINSTANCE",
"DRAWID",
"WORK_DIM",
+   "SUBGROUP_SIZE",
+   "SUBGROUP_INVOCATION",
+   "SUBGROUP_EQ_MASK",
+   "SUBGROUP_GE_MASK",
+   "SUBGROUP_GT_MASK",
+   "SUBGROUP_LE_MASK",
+   "SUBGROUP_LT_MASK",
 };
 
 const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
 {
"BUFFER",
"1D",
"2D",
"3D",
"CUBE",
"RECT",
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 7e9b47c..2adac3b 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -3422,20 +3422,71 @@ For compute shaders, this semantic indicates the 
maximum (x, y, z) dimensions
 of a block in threads.
 
 
 TGSI_SEMANTIC_THREAD_ID
 """
 
 For compute shaders, this semantic indicates the (x, y, z) coordinates of the
 current thread inside of the block.
 
 
+TGSI_SEMANTIC_SUBGROUP_SIZE
+"""
+
+This semantic indicates the subgroup size for the current invocation. This is
+an integer of at most 64, as it indicates the width of lanemasks. It does not
+depend on the number of invocations that are active.
+
+
+TGSI_SEMANTIC_SUBGROUP_INVOCATION
+"
+
+The index of the current invocation within its subgroup.
+
+
+TGSI_SEMANTIC_SUBGROUP_EQ_MASK
+""
+
+A bit mask of ``bit index == TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
+``1 << subgroup_invocation`` in arbitrary precision arithmetic.
+
+
+TGSI_SEMANTIC_SUBGROUP_GE_MASK
+""
+
+A bit mask of ``bit index >= TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
+``((1 << (subgroup_size - subgroup_invocation)) - 1) << subgroup_invocation``
+in arbitrary precision arithmetic.
+
+
+TGSI_SEMANTIC_SUBGROUP_GT_MASK
+""
+
+A bit mask of ``bit index > TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
+``((1 << (subgroup_size - subgroup_invocation - 1)) - 1) << 
(subgroup_invocation + 1)``
+in arbitrary precision arithmetic.
+
+
+TGSI_SEMANTIC_SUBGROUP_LE_MASK
+""
+
+A bit mask of ``bit index <= TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
+``(1 << (subgroup_invocation + 1)) - 1`` in arbitrary precision arithmetic.
+
+
+TGSI_SEMANTIC_SUBGROUP_LT_MASK
+""
+
+A bit mask of ``bit index > TGSI_SEMANTIC_SUBGROUP_INVOCATION``, i.e.
+``(1 << subgroup_invocation) - 1`` in arbitrary precision arithmetic.
+
+
 Declaration Interpolate
 ^^^
 
 This token is only valid for fragment shader INPUT declarations.
 
 The Interpolate field specifes the way input is being interpolated by
 the rasteriser and is one of TGSI_INTERPOLATE_*.
 
 The Location field specifies the location inside the pixel that the
 interpolation should be done at, one of ``TGSI_INTERPOLATE_LOC_*``. Note that
diff --git a/src/gallium/include/pipe/p_shader_tokens.h 
b/src/gallium/include/pipe/p_shader_tokens.h
index d461f78..dbab23c 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -193,20 +193,27 @@ enum tgsi_semantic {
TGSI_SEMANTIC_BASEVERTEX,
TGSI_SEMANTIC_PATCH,   /**< generic per-patch semantic */
TGSI_SEMANTIC_TESSCOORD,   /**< coordinate being processed by tess */
TGSI_SEMANTIC_TESSOUTER,   /**< outer tessellation levels */
TGSI_SEMANTIC_TESSINNER,   /**< inner tessellation levels */
TGSI_SEMANTIC_VERTICESIN,  /**< number of input vertices */
TGSI_SEMANTIC_HELPER_INVOCATION,  /**< current invocation is helper */
TGSI_SEMANTIC_BASEINSTANCE,
TGSI_SEMANTIC_DRAWID,
TGSI_SEMANTIC_WORK_DIM,/**< opencl get_work_dim value */
+   TGSI_SEMANTIC_SUBGROUP_SIZE,
+   TGSI_SEMANTIC_SUBGROUP_INVOCATION,
+   TGSI_SEMANTIC_SUBGROUP_EQ_MASK,
+   TGSI_SEMANTIC_SUBGROUP_GE_MASK,
+   TGSI_SEMANTIC_SUBGROUP_GT_MASK,
+   TGSI_SEMANTIC_SUBGROUP_LE_MASK,
+   TGSI_SEMANTIC_SUBGROUP_LT_MASK,
TGSI_SEMANTIC_COUNT,   /**< number of semantic values */
 };
 
 struct tgsi_declaration_semantic
 {
unsigned Name   : 8;  /**< one of TGSI_SEMANTIC_x */
unsigned Index  : 16; /**< UINT */
unsigned StreamX: 2; /**< vertex stream

[Mesa-dev] [PATCH 06/19] gallium: add PIPE_CAP_TGSI_BALLOT

From: Nicolai Hähnle 

---
 src/gallium/docs/source/screen.rst   | 2 ++
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 17 files changed, 18 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 947b843..cb49454 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -370,20 +370,22 @@ The integer capabilities:
 * ``PIPE_CAP_TGSI_MUL_ZERO_WINS``: Whether TGSI shaders support the
   ``TGSI_PROPERTY_MUL_ZERO_WINS`` shader property.
 * ``PIPE_CAP_DOUBLES``: Whether double precision floating-point operations
   are supported.
 * ``PIPE_CAP_INT64``: Whether 64-bit integer operations are supported.
 * ``PIPE_CAP_INT64_DIVMOD``: Whether 64-bit integer division/modulo
   operations are supported.
 * ``PIPE_CAP_TGSI_TEX_TXF_LZ``: Whether TEX_LZ and TXF_LZ opcodes are
   supported.
 * ``PIPE_CAP_TGSI_CLOCK``: Whether the CLOCK opcode is supported.
+* ``PIPE_CAP_TGSI_BALLOT``: Whether the BALLOT and READ_* opcodes as well as
+  the SUBGROUP_* semantics are supported.
 
 
 .. _pipe_capf:
 
 PIPE_CAPF_*
 
 
 The floating-point capabilities are:
 
 * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line.
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 11053c6..1e7fe91 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -239,20 +239,21 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_TEX_TXF_LZ:
case PIPE_CAP_TGSI_CLOCK:
+   case PIPE_CAP_TGSI_BALLOT:
   return 0;
 
/* Stream output. */
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
   return 0;
 
/* Geometry shader output, unsupported. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 2eaedc0..c1a41ba 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -296,20 +296,21 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_TEX_TXF_LZ:
case PIPE_CAP_TGSI_CLOCK:
+   case PIPE_CAP_TGSI_BALLOT:
return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
return 1;
 
case PIPE_CAP_SHAREABLE_SHADERS:
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
/* manage the variants for these ourself, to avoid breaking precompile: 
*/
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 703c49a..5bcf03d 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -297,20 +297,21 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
case PIPE_CAP_NATIVE_FENCE_FD:
case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_TEX_TXF_LZ:
case PIPE_CAP_TGSI_CLOCK:
+   case PIPE_CAP_TGSI_BALLOT:
   return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:

[Mesa-dev] [PATCH 00/19] ARB_shader_ballot implementation for radeonsi

Hi all,

this series implements ARB_shader_ballot for radeonsi, tested against
a bunch of piglit tests I just sent out as well as an upcoming test
in the GLCTS.

There are a bunch of gotchas in LLVM, and I'll probably be sending
out those patches next week. The basic functionality is working
even with LLVM 4.0, but it's easy to run into trouble.

ARB_shader_ballot could be interesting for AZDO-style programming on
AMD hardware. By default, texture instructions can only be applied to
samplers that are dynamically uniform, i.e. the same texture needs to
be sampled by all shader invocations within a draw call. Even
ARB_bindless_texture doesn't relax this constraint.

However, using the readFirstInvocationARB builtin provided by
ARB_shader_ballot, one could write a loop like:

  samplerXX textures[N];

  int idx = ...;
  for (;;) {
int local_idx = readFirstInvocationARB(idx);
if (local_idx != idx)
  continue;

sample from textures[local_idx]
  }

or some equivalent using ARB_bindless_texture instead of indices,
and have it work correctly. There's a bit of overhead to the loop,
of course, but as long as _most_ shader waves have uniform values
it could be a useful tool for reducing CPU overhead by building
bigger batches and fewer draw calls.

Note that the spec language of ARB_shader_ballot doesn't guarantee
that this trick works, but it does work on all AMD GCN hardware.

Please review!
Nicolai
--
 docs/features.txt|   2 +-
 docs/relnotes/17.1.0.html|   1 +
 src/compiler/glsl/builtin_functions.cpp  |  77 +
 src/compiler/glsl/builtin_variables.cpp  |  22 +++
 src/compiler/glsl/glsl_parser_extras.cpp |   1 +
 src/compiler/glsl/glsl_parser_extras.h   |   2 +
 src/compiler/glsl/ir.cpp |  12 ++
 src/compiler/glsl/ir_expression_operation.py |   7 +
 src/compiler/glsl/ir_validate.cpp|  16 ++
 src/compiler/shader_enums.c  |   7 +
 src/compiler/shader_enums.h  |  59 +++
 src/gallium/auxiliary/tgsi/tgsi_info.c   |   6 +-
 src/gallium/auxiliary/tgsi/tgsi_strings.c|   7 +
 src/gallium/docs/source/screen.rst   |   2 +
 src/gallium/docs/source/tgsi.rst | 118 +++--
 src/gallium/drivers/etnaviv/etnaviv_screen.c |   1 +
 .../drivers/freedreno/freedreno_screen.c |   1 +
 src/gallium/drivers/i915/i915_screen.c   |   1 +
 src/gallium/drivers/llvmpipe/lp_screen.c |   1 +
 .../drivers/nouveau/nv30/nv30_screen.c   |   1 +
 .../drivers/nouveau/nv50/nv50_screen.c   |   1 +
 .../drivers/nouveau/nvc0/nvc0_screen.c   |   1 +
 src/gallium/drivers/r300/r300_screen.c   |   1 +
 src/gallium/drivers/r600/r600_pipe.c |   1 +
 src/gallium/drivers/radeonsi/si_pipe.c   |   3 +
 src/gallium/drivers/radeonsi/si_shader.c | 153 -
 .../drivers/radeonsi/si_shader_internal.h|   2 +-
 .../drivers/radeonsi/si_shader_tgsi_setup.c  |  27 ++-
 src/gallium/drivers/softpipe/sp_screen.c |   1 +
 src/gallium/drivers/svga/svga_screen.c   |   1 +
 src/gallium/drivers/swr/swr_screen.cpp   |   1 +
 src/gallium/drivers/vc4/vc4_screen.c |   1 +
 src/gallium/drivers/virgl/virgl_screen.c |   1 +
 src/gallium/include/pipe/p_defines.h |   1 +
 src/gallium/include/pipe/p_shader_tokens.h   |  13 +-
 src/mapi/glapi/registry/gl.xml   |   2 +-
 src/mesa/main/extensions_table.h |   1 +
 src/mesa/main/mtypes.h   |   1 +
 src/mesa/program/ir_to_mesa.cpp  |   3 +
 src/mesa/state_tracker/st_extensions.c   |   1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   |  25 +++
 41 files changed, 553 insertions(+), 32 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Fwd: [PATCH 1/1] glsl/blob: handle copy of NULL ptr in blob_write_string

2017-03-31 Thread Ilia Mirkin

On Fri, Mar 31, 2017 at 6:12 AM, Gregory Hainaut
 wrote:
>> Others have reported this crashing on Nouveau. I haven't seen the problem on 
>> radeonsi or i965.
>
> Hello Timothy (sorry for the double mail, email is a complex tool:) )
>
> Hum, tbh. I was quite surprised to hit this bug. I guess you save a
> pre-optimized shader in the cache. So it could depends on optimization
> passes.
>
> From the top of my head, I think the "offending" line is this one
> const ivec2 offsets[4] = {ivec2(...), ivec2(...), ivec2(...), ivec2(...)};
>
> Strangely enough there are only 3 parameters without name in the
> parameter list (signature is int, size 2 and CONTANT). Maybe one was
> optimized away, I didn't look further.

Note that nouveau is unique in that it can process
textureGatherOffsets() directly, without lowering it to 4x
textureGatherOffset.

The relevant code is in st_glsl_to_tgsi.cpp

  if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
 lower_offset_arrays(ir);

So I think with nouveau, you're seeing glsl ir that you wouldn't see otherwise.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] glsl/blob: handle copy of NULL ptr in blob_write_string

2017-03-31 Thread gregory hainaut

On Fri, 31 Mar 2017 19:16:10 +1100
Timothy Arceri  wrote:

> 
> 
> On 31/03/17 18:00, gregory hainaut wrote:
> > On Fri, 31 Mar 2017 08:24:36 +0200
> > Nicolai Hähnle  wrote:
> >
> > Hello Nicolai
> >
> >> On 30.03.2017 21:55, Gregory Hainaut wrote:
> >>> Typically happen when we want to copy an unnamed shader parameter
> >>> in the shader cache.
> >>
> >> So this happens only when blob_write_string is called from nouveau?
> >
> > Sorry, I poorly explain myself. I should have written reproduce &
> > tested on Nouveau. I don't know for others drivers, they should be
> > impacted.
> >
> > _mesa_add_parameter seems to allow to store a NULL pointer in p->Name.
> > Which is later written by blob_write_string. I guess it could
> > depends on the shader cache state.
> >
> >
> > I got the crash with this piglit test:
> > textureGather fs offsets r 0 float 2D repeat -auto -fb
> 
> Others have reported this crashing on Nouveau. I haven't seen the 
> problem on radeonsi or i965.
> 
> >
> >
> >> By the way, please setup send-mail so that it threads your mails.
> >> That should be the default, so I'm not sure what happened here...
> > Oh. I edited my email in the mailer queue which moved the email from my
> > pop3 to my imap account. I guess it broke the threading link. I will
> > be more careful next time.
> >
> > Thanks
> >
> >
> >> Thanks,
> >> Nicolai
> >
> >>>
> >>> Note: it is safer to copy an empty string so we can read it back
> >>> safely.
> >>>
> >>> Fix piglit crashes of the 'texturegatheroffsets' tests
> >>>
> >>> Signed-off-by: Gregory Hainaut 
> >>> ---
> >>>  src/compiler/glsl/blob.c | 5 -
> >>>  1 file changed, 4 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/src/compiler/glsl/blob.c b/src/compiler/glsl/blob.c
> >>> index 769ebf1..f84d7f3 100644
> >>> --- a/src/compiler/glsl/blob.c
> >>> +++ b/src/compiler/glsl/blob.c
> >>> @@ -176,7 +176,10 @@ blob_write_intptr(struct blob *blob, intptr_t
> >>> value) bool
> >>>  blob_write_string(struct blob *blob, const char *str)
> >>>  {
> >>> -   return blob_write_bytes(blob, str, strlen(str) + 1);
> >>> +   if (str == NULL)
> >>> +  return blob_write_bytes(blob, "", 1);
> >>> +   else
> >>> +  return blob_write_bytes(blob, str, strlen(str) + 1);
> >>>  }
> >>>
> >>>  void
> >>>
> >>
> >>

Fwiw, I backtraced the origin of the NULL. As you can see 
_mesa_add_typed_unnamed_constant will set the name string to NULL instead of "".
So it seems Intel/AMD don't use unnamed constant when the shader is linked.

#0  _mesa_add_parameter (paramList=0x8126628, type=PROGRAM_CONSTANT, name=0x0, 
size=2, datatype=5124, values=0xc4d0, state=0x0) at 
mesa/program/prog_parameter.c:256
#1  0xf6d6224d in _mesa_add_typed_unnamed_constant (paramList=0x8126628, 
values=0xc4d0, size=2, datatype=5124, swizzleOut=0xc468) at 
mesa/program/prog_parameter.c:345
#2  0xf6d1d9ee in glsl_to_tgsi_visitor::add_constant (this=0x839b838, 
file=PROGRAM_CONSTANT, values=0xc4d0, size=2, datatype=5124, 
swizzle_out=0x83a0a98)
at state_tracker/st_glsl_to_tgsi.cpp:1126
#3  0xf6d296af in glsl_to_tgsi_visitor::visit (this=0x839b838, ir=0x8395b60) at 
state_tracker/st_glsl_to_tgsi.cpp:3410
#4  0xf6e184c7 in ir_constant::accept (this=0x8395b60, v=0x839b838) at 
glsl/ir.h:2133
#5  0xf6d28b5d in glsl_to_tgsi_visitor::visit (this=0x839b838, ir=0x8395a98) at 
state_tracker/st_glsl_to_tgsi.cpp:3278


So we can do 3 different fixes (and potentially the 3).
1/ update _mesa_add_typed_unnamed_constant to use an empty string
2/ update _mesa_add_parameter "p->Name = name ? strdup(name) : NULL;" to use an 
empty string when name is null
3/ or my previous patch :)

Cheers,
Gregory
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] intel: genxml: fix out of tree builds

On 31 March 2017 at 16:33, Lionel Landwerlin
 wrote:
> On 31/03/17 16:21, Emil Velikov wrote:
>>
>> On 31 March 2017 at 14:40, Lionel Landwerlin
>>  wrote:
>>>
>>> v2: use Emil's recommendation
>>>  change rule to closer to genxml/genX_bits.h
>>>
>>> Signed-off-by: Lionel Landwerlin 
>>> ---
>>>   src/intel/Makefile.genxml.am | 4 ++--
>>>   1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
>>> index 05a12f8f77..e34536d37c 100644
>>> --- a/src/intel/Makefile.genxml.am
>>> +++ b/src/intel/Makefile.genxml.am
>>> @@ -34,9 +34,9 @@ $(GENXML_GENERATED_PACK_FILES):
>>> genxml/gen_pack_header.py
>>>  $(MKDIR_GEN)
>>>  $(PYTHON_GEN) $(srcdir)/genxml/gen_pack_header.py $< > $@ ||
>>> ($(RM) $@; false)
>>>
>>> -genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
>>> +genxml/genX_xml.h: genxml/gen_zipped_file.py $(GENXML_XML_FILES)
>>>  $(MKDIR_GEN)
>>> -   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py
>>> $(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
>>> +   $(PYTHON_GEN) $< $(GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM)
>>> $@; false)
>>>
>> This is not what I recommended :-( If my suggestion is unclear or
>> buggy please say so.
>>
>> -Emil
>>
> Replacing "$(srcdir)/genxml/gen_zipped_file.py" by "$<" isn't right?
> I think that's the only difference from what was in your email.

Barring the genX_bits.h case, we expand the script name throughout mesa.
This way you don't really care the way the dependencies are listed,
whether a new one gets added, etc.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] intel: genxml: fix out of tree builds


On 31/03/17 16:21, Emil Velikov wrote:

On 31 March 2017 at 14:40, Lionel Landwerlin
 wrote:

v2: use Emil's recommendation
 change rule to closer to genxml/genX_bits.h

Signed-off-by: Lionel Landwerlin 
---
  src/intel/Makefile.genxml.am | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
index 05a12f8f77..e34536d37c 100644
--- a/src/intel/Makefile.genxml.am
+++ b/src/intel/Makefile.genxml.am
@@ -34,9 +34,9 @@ $(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py
 $(MKDIR_GEN)
 $(PYTHON_GEN) $(srcdir)/genxml/gen_pack_header.py $< > $@ || ($(RM) 
$@; false)

-genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
+genxml/genX_xml.h: genxml/gen_zipped_file.py $(GENXML_XML_FILES)
 $(MKDIR_GEN)
-   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py 
$(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
+   $(PYTHON_GEN) $< $(GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM) $@; 
false)


This is not what I recommended :-( If my suggestion is unclear or
buggy please say so.

-Emil


Replacing "$(srcdir)/genxml/gen_zipped_file.py" by "$<" isn't right?
I think that's the only difference from what was in your email.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v2] intel: genxml: fix out of tree builds

On 31 March 2017 at 14:40, Lionel Landwerlin
 wrote:
> v2: use Emil's recommendation
> change rule to closer to genxml/genX_bits.h
>
> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/Makefile.genxml.am | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
> index 05a12f8f77..e34536d37c 100644
> --- a/src/intel/Makefile.genxml.am
> +++ b/src/intel/Makefile.genxml.am
> @@ -34,9 +34,9 @@ $(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py
> $(MKDIR_GEN)
> $(PYTHON_GEN) $(srcdir)/genxml/gen_pack_header.py $< > $@ || ($(RM) 
> $@; false)
>
> -genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
> +genxml/genX_xml.h: genxml/gen_zipped_file.py $(GENXML_XML_FILES)
> $(MKDIR_GEN)
> -   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py 
> $(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
> +   $(PYTHON_GEN) $< $(GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM) $@; 
> false)
>
This is not what I recommended :-( If my suggestion is unclear or
buggy please say so.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] glsl: ir_explog_to_explog2 is no more

2017-03-31 Thread Kenneth Graunke

On Friday, March 31, 2017 3:58:58 AM PDT Erik Faye-Lund wrote:
> Since 63684a9a ("glsl: Combine many instruction lowering passes
> into one.", Thu Nov 18 2010), we no longer have anything called
> ir_explog_to_explog2. So it's only confusing to have those
> references there.
> 
> Update with the appropriate method, so people can grep for it the
> current tree if they encounter it.
> 
> Signed-off-by: Erik Faye-Lund 
> ---
>  src/mesa/program/ir_to_mesa.cpp| 4 +++-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 +++-
>  2 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
> index a2476dd..9760f7b 100644
> --- a/src/mesa/program/ir_to_mesa.cpp
> +++ b/src/mesa/program/ir_to_mesa.cpp
> @@ -1069,8 +1069,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
>emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
>break;
> case ir_unop_exp:
> +  assert(!"not reached: should be handled by exp_to_exp2");
> +  break;
> case ir_unop_log:
> -  assert(!"not reached: should be handled by ir_explog_to_explog2");
> +  assert(!"not reached: should be handled by log_to_log2");
>break;
> case ir_unop_log2:
>emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 369dff7..8438e98 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -1661,8 +1661,10 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* 
> ir, st_src_reg *op)
>emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
>break;
> case ir_unop_exp:
> +  assert(!"not reached: should be handled by exp_to_exp2");
> +  break;
> case ir_unop_log:
> -  assert(!"not reached: should be handled by ir_explog_to_explog2");
> +  assert(!"not reached: should be handled by log_to_log2");
>break;
> case ir_unop_log2:
>emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
> 

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/3] misc cleanups


Series is

Reviewed-by: Nicolai Hähnle 

On 31.03.2017 12:57, Erik Faye-Lund wrote:

Here's a few cleanup-patches I've had in my tree for a while, that
I thought I should send upstream soon.

Erik Faye-Lund (3):
  st/mesa: avoid aliasing violation in st_cb_perfmon.c
  gallium/docs: remove documentation of removed arg
  glsl: ir_explog_to_explog2 is no more

 src/gallium/docs/source/screen.rst | 2 --
 src/mesa/program/ir_to_mesa.cpp| 4 +++-
 src/mesa/state_tracker/st_cb_perfmon.c | 6 +++---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 +++-
 4 files changed, 9 insertions(+), 7 deletions(-)




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: use -O0 optimization for builtin_functions.cpp with MinGW

2017-03-31 Thread Jose Fonseca


On 31/03/17 04:23, Brian Paul wrote:

Some versions of MinGW-w64 such as 5.3.1 and 6.2.0 produce bad code
with -O2 or -O3 causing a random driver crash when running programs
that use GLSL.  Most Mesa demos in the glsl/ directory trigger the
bug, but not the fragcoord.c test.

Use a #pragma to force -O1 for this file for later MinGW versions.
Luckily, this is basically one-time setup code.  I suspect the bug
is related to the sheer size of this file.

This should let us move to newer versions of MinGW-w64 for Mesa.
---
 src/compiler/glsl/builtin_functions.cpp | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index e30509a..e32b18c 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -53,6 +53,26 @@
  *name and parameters.
  */

+
+/**
+ * Unfortunately, some versions of MinGW produce bad code if this file
+ * is compiled with -O2 or -O3.  The resulting driver will crash in random
+ * places if the app uses GLSL.
+ * The work-around is to disable optimizations for just this file.  Luckily,
+ * this code is basically just executed once.
+ *
+ * MinGW 4.6.3 (in Ubuntu 13.10) does not have this bug.
+ * MinGW 5.3.1 (in Ubuntu 16.04) definitely has this bug.
+ * MinGW 6.2.0 (in Ubuntu 16.10) definitely has this bug.
+ * MinGW x.y.z - don't know.  Assume versions after 4.6.x are buggy
+ */
+
+#if defined(__MINGW32__) && ((__GNUC__ * 100) + __GNUC_MINOR >= 407)
+#warning "disabling optimizations for this file to work around compiler bug"
+#pragma GCC optimize("O0")
+#endif
+
+
 #include 
 #include 
 #include "main/core.h" /* for struct gl_shader */



Look good to me.  Great find.


Reviewed-by: Jose Fonseca 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: add si_init_descriptor_list() helper

2017-03-31 Thread Samuel Pitoiset

This will be used by bindless to initialize the descriptor for
both samplers and images.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index d106351c85..84da830c11 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -96,6 +96,21 @@ static uint32_t null_image_descriptor[8] = {
 * descriptor */
 };
 
+static void si_init_descriptor_list(uint32_t *desc_list,
+   unsigned element_dw_size,
+   unsigned num_elements,
+   const uint32_t *null_descriptor)
+{
+   int i;
+
+   /* Initialize the array to NULL descriptors if the element size is 8. */
+   if (null_descriptor) {
+   assert(element_dw_size % 8 == 0);
+   for (i = 0; i < num_elements * element_dw_size / 8; i++)
+   memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
+   }
+}
+
 static void si_init_descriptors(struct si_descriptors *desc,
unsigned shader_userdata_index,
unsigned element_dw_size,
@@ -103,8 +118,6 @@ static void si_init_descriptors(struct si_descriptors *desc,
const uint32_t *null_descriptor,
unsigned *ce_offset)
 {
-   int i;
-
assert(num_elements <= sizeof(desc->dirty_mask)*8);
 
desc->list = CALLOC(num_elements, element_dw_size * 4);
@@ -121,13 +134,8 @@ static void si_init_descriptors(struct si_descriptors 
*desc,
*ce_offset += align(element_dw_size * num_elements * 4, 32);
}
 
-   /* Initialize the array to NULL descriptors if the element size is 8. */
-   if (null_descriptor) {
-   assert(element_dw_size % 8 == 0);
-   for (i = 0; i < num_elements * element_dw_size / 8; i++)
-   memcpy(desc->list + i * 8, null_descriptor,
-  8 * 4);
-   }
+   si_init_descriptor_list(desc->list, element_dw_size, num_elements,
+   null_descriptor);
 }
 
 static void si_release_descriptors(struct si_descriptors *desc)
-- 
2.12.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] intel: genxml: fix out of tree builds


On 31/03/17 14:23, Emil Velikov wrote:

On 31 March 2017 at 11:42, Lionel Landwerlin
 wrote:
The GENXML_XML_FILES variable was missing the "srcdir" prefix, thus
the files won't be found on OOT builds.

Side note: If you want to spare yourself such fun experiences, CC me
on build related patches. Or poke me on IRC of course.


Thanks, will do.
Just sent a v2 that makes it a bit closer to genX_bits.h




Signed-off-by: Lionel Landwerlin 
---
  src/intel/Makefile.genxml.am | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
index 05a12f8f77..d780ca8470 100644
--- a/src/intel/Makefile.genxml.am
+++ b/src/intel/Makefile.genxml.am
@@ -36,7 +36,7 @@ $(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py

  genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
 $(MKDIR_GEN)
-   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py 
$(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
+   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py $(addprefix 
$(srcdir)/,$(GENXML_XML_FILES)) > $@ || ($(RM) $@; false)

Or even better:

$(PYTHON_GEN) $(srcdir)/genxml/gen_zipped_file.py
$(GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM) $@; false)

With the above
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2] intel: genxml: fix out of tree builds

v2: use Emil's recommendation
change rule to closer to genxml/genX_bits.h

Signed-off-by: Lionel Landwerlin 
---
 src/intel/Makefile.genxml.am | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
index 05a12f8f77..e34536d37c 100644
--- a/src/intel/Makefile.genxml.am
+++ b/src/intel/Makefile.genxml.am
@@ -34,9 +34,9 @@ $(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py
$(MKDIR_GEN)
$(PYTHON_GEN) $(srcdir)/genxml/gen_pack_header.py $< > $@ || ($(RM) $@; 
false)

-genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
+genxml/genX_xml.h: genxml/gen_zipped_file.py $(GENXML_XML_FILES)
$(MKDIR_GEN)
-   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py 
$(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
+   $(PYTHON_GEN) $< $(GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM) $@; 
false)

 genxml/genX_bits.h: genxml/gen_bits_header.py $(GENXML_XML_FILES)
$(MKDIR_GEN)
--
2.11.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] intel: genxml: fix out of tree builds

On 31 March 2017 at 11:42, Lionel Landwerlin
 wrote:
The GENXML_XML_FILES variable was missing the "srcdir" prefix, thus
the files won't be found on OOT builds.

Side note: If you want to spare yourself such fun experiences, CC me
on build related patches. Or poke me on IRC of course.

> Signed-off-by: Lionel Landwerlin 
> ---
>  src/intel/Makefile.genxml.am | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
> index 05a12f8f77..d780ca8470 100644
> --- a/src/intel/Makefile.genxml.am
> +++ b/src/intel/Makefile.genxml.am
> @@ -36,7 +36,7 @@ $(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py
>
>  genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
> $(MKDIR_GEN)
> -   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py 
> $(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
> +   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py 
> $(addprefix $(srcdir)/,$(GENXML_XML_FILES)) > $@ || ($(RM) $@; false)
Or even better:

   $(PYTHON_GEN) $(srcdir)/genxml/gen_zipped_file.py
$(GENXML_XML_FILES:%=$(srcdir)/%) > $@ || ($(RM) $@; false)

With the above
Reviewed-by: Emil Velikov 

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] egl/android: Dequeue buffers inside EGL calls

2017-03-31 Thread Rob Clark

On Fri, Mar 31, 2017 at 2:06 AM, Tapani Pälli  wrote:
>
>
> On 03/31/2017 08:24 AM, Rob Clark wrote:
>>
>> On Fri, Mar 31, 2017 at 12:22 AM, Tapani Pälli 
>> wrote:
>>>
>>>
>>>
>>> On 03/30/2017 05:57 PM, Emil Velikov wrote:


 On 30 March 2017 at 15:30, Tomasz Figa  wrote:
>
>
> On Thu, Mar 30, 2017 at 11:17 PM, Emil Velikov
> 
> wrote:
>>
>>
>>
>> On 30 March 2017 at 11:55, Tomasz Figa  wrote:
>>>
>>>
>>> Android buffer queues can be abandoned, which results in failing to
>>> dequeue next buffer. Currently this would fail somewhere deep within
>>> the DRI stack calling loader's getBuffers*(), without any error
>>> reporting to the client app. However Android framework code relies on
>>> proper signaling of this event, so we move buffer dequeue to
>>> createWindowSurface() and swapBuffers() call, which can generate
>>> proper
>>> EGL errors. To keep the performance benefits of delayed buffer
>>> handling,
>>> if any, fence wait and DRI image creation is kept delayed until
>>> getBuffers*() is called by the DRI driver.
>>>
>> Thank you Tomasz.
>>
>> I'm fairly confident that this should resolve the crash [in
>> swap_buffers] that Mauro was seeing.
>> Mauro can you give it a test ?
>
>
>
> Ah, I actually noticed a problem with existing code, supposedly fixed
> by [1], but I'm afraid it's still wrong.
>
> Current swap_buffers calls get_back_bo(), but doesn't call
> update_buffers(), which is the function that should be called before
> to actually dequeue a buffer from Android's buffer queue. Given that,
> get_back_bo() would simply fail with !dri2_surf->buffer, because no
> buffer was dequeued.
>
 Right - I was wondering why we don't hit that on EGL/GBM or EGL/Wayland.
 From a quick look - may be because EGL/Android drops the dpy mutex in
 droid_window_enqueue_buffer().

> My patch removes update_buffers() and changes the buffer management so
> that there is always a buffer dequeued, starting from surface
> creation, unless there was an error somewhere.
>
 Of the top of your head - is there something stopping us from using
 the same method on $other platforms?

> [1]
>
> https://cgit.freedesktop.org/mesa/mesa/commit/src/egl/drivers/dri2/platform_android.c?id=4d4558411db166d2d66f8cec9cb581149dbe1597
>
>>
>>
>> Not that huge of an expert on the Android specifics, so just a humble
>> request:
>> Can we seek the code resuffle (droid_{alloc,free}_local_buffer,


 Oops silly typo - s/seek/split/.

>> other?) separate from the functionality changes ?
>
>
>
> Sure. Thanks for suggestion.
>
 Please give it a day or two for others to comment.
>>>
>>>
>>>
>>> I'm trying to debug why this causes our homescreen (wallpaper) to be
>>> black.
>>> Otherwise I haven't seen any issues with these changes.
>>>
>>
>> wallpaper seems to be a special sorta hell..  I wonder if there is
>> somehow some sort of interaction with what I fixed / worked-around in
>> a5e733c6b52e93de3000647d075f5ca2f55fcb71 ??
>>
>> Maybe at least try commenting out the temp-pbuffer thing to get max
>> texture size, and see if that "fixes" things
>
>
> Can you give more details, I still live in la la land and don't know about
> 'temp-pbuffer thing'?


Sorry, this is something in the wallpaper app java code.. I forget
exactly where it lives now, but there was some code matching what I
pasted in the commit msg which was creating and then destroying a
temporary pbuffer..

BR,
-R
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Fix GLX_MESA_query_renderer video memory on 32-bit.

Hi Ken,

On 31 March 2017 at 00:28, Kenneth Graunke  wrote:
> On modern systems with 4GB apertures, the size in bytes is 4294967296,
> or (1ull << 32).  The kernel gives us the aperture size as a __u64,
> which works out great.
>
> Unfortunately, libdrm "helpfully" returns the data as a size_t, which
> on 32-bit systems means it truncates the aperture size to 0 bytes.
> We've happily reported this value as 0 MB of video memory via
> GLX_MESA_query_renderer since it was originally exposed.
>
> This patch bypasses libdrm and calls the ioctl ourselves so we can
> use a proper uint64_t, avoiding the 32-bit integer overflow.  We now
> report a proper video memory size on 32-bit systems.
> ---
>  src/mesa/drivers/dri/i965/intel_screen.c | 16 
>  1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
> b/src/mesa/drivers/dri/i965/intel_screen.c
> index 811a9c5a867..f94e8a77c10 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -950,6 +950,17 @@ static const __DRIimageExtension intelImageExtension = {
>  .createImageWithModifiers   = intel_create_image_with_modifiers,
>  };
>
> +static uint64_t
> +get_aperture_size(int fd)
> +{
> +   struct drm_i915_gem_get_aperture aperture;
> +
> +   if (drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, ) != 0)
> +  return 0;
> +
> +   return aperture.aper_size;
> +}
> +
Can we also use this for i915 + i915g ?

At the moment a simple glxinfo invocation wakes up all the pci devices
on the system. Props to libpciaccess, which is used by libdrm_intel.

I've fixed that in both the kernel and libpciaccess, but latter hasn't
seen any release yet. Will check over the weekend if I have access do
a release myself.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965: Fix GLX_MESA_query_renderer video memory on 32-bit.

2017-03-31 Thread Chris Wilson

On Thu, Mar 30, 2017 at 06:48:38PM -0700, Kenneth Graunke wrote:
> Looking again...libdrm_intel sets bufmgr_gem->gtt_size to
> drm_i915_gem_get_aperture::aper_available_size - and uses that field
> to -ENOSPC your execbuffers.  drm_intel_get_aperture_sizes, and this
> query, use drm_i915_gem_get_aperture::aper_size - which is not quite
> the same.  Reading the kernel sources, it looks like aper_available_size
> subtracts any pinned memory.  At least in a PPGTT world, that's probably
> not materially different given how early we're calling it.

get_aperture is *meaningless* with full-ppgtt. But if libdrm is limiting
your batchbuffers using its result, than that is the maximum usable
memory for GL (whilst it remains using libdrm_intel batchbuffers). :(
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] egl/android: Dequeue buffers inside EGL calls

2017-03-31 Thread Tapani Pälli

On 03/31/2017 10:12 AM, Tapani Pälli wrote:

On 03/31/2017 09:06 AM, Tapani Pälli wrote:

On 03/31/2017 08:24 AM, Rob Clark wrote:

On Fri, Mar 31, 2017 at 12:22 AM, Tapani Pälli
wrote:

On 03/30/2017 05:57 PM, Emil Velikov wrote:

On 30 March 2017 at 15:30, Tomasz Figa wrote:

On Thu, Mar 30, 2017 at 11:17 PM, Emil Velikov

wrote:

On 30 March 2017 at 11:55, Tomasz Figa wrote:

Android buffer queues can be abandoned, which results in failing to
dequeue next buffer. Currently this would fail somewhere deep
within
the DRI stack calling loader's getBuffers*(), without any error
reporting to the client app. However Android framework code
relies on
proper signaling of this event, so we move buffer dequeue to
createWindowSurface() and swapBuffers() call, which can generate
proper
EGL errors. To keep the performance benefits of delayed buffer
handling,
if any, fence wait and DRI image creation is kept delayed until
getBuffers*() is called by the DRI driver.

Thank you Tomasz.

I'm fairly confident that this should resolve the crash [in
swap_buffers] that Mauro was seeing.
Mauro can you give it a test ?

Ah, I actually noticed a problem with existing code, supposedly fixed
by [1], but I'm afraid it's still wrong.

Current swap_buffers calls get_back_bo(), but doesn't call
update_buffers(), which is the function that should be called before
to actually dequeue a buffer from Android's buffer queue. Given that,
get_back_bo() would simply fail with !dri2_surf->buffer, because no
buffer was dequeued.

Right - I was wondering why we don't hit that on EGL/GBM or
EGL/Wayland.
From a quick look - may be because EGL/Android drops the dpy mutex in
droid_window_enqueue_buffer().

My patch removes update_buffers() and changes the buffer
management so
that there is always a buffer dequeued, starting from surface
creation, unless there was an error somewhere.

Of the top of your head - is there something stopping us from using
the same method on $other platforms?

[1]
https://cgit.freedesktop.org/mesa/mesa/commit/src/egl/drivers/dri2/platform_android.c?id=4d4558411db166d2d66f8cec9cb581149dbe1597

Not that huge of an expert on the Android specifics, so just a
humble
request:
Can we seek the code resuffle (droid_{alloc,free}_local_buffer,

Oops silly typo - s/seek/split/.

other?) separate from the functionality changes ?

Sure. Thanks for suggestion.

Please give it a day or two for others to comment.

I'm trying to debug why this causes our homescreen (wallpaper) to be
black.
Otherwise I haven't seen any issues with these changes.

wallpaper seems to be a special sorta hell.. I wonder if there is
somehow some sort of interaction with what I fixed / worked-around in
a5e733c6b52e93de3000647d075f5ca2f55fcb71 ??

Maybe at least try commenting out the temp-pbuffer thing to get max
texture size, and see if that "fixes" things

Can you give more details, I still live in la la land and don't know
about 'temp-pbuffer thing'?

aa I did not recall the problem, you mean the 'dummy pbuffer' in
SurfaceFlinger .. yes I will check if this is related.

If I take away that dummy pbuffer usage (which is useless anyway),
couple of errors disappear from the log. They are:

SurfaceFlinger: releasePendingBuffer failed: Unknown error -1 (1)
SurfaceFlinger: releasePendingBuffer failed: Unknown error -1 (1)

but otherwise the desktop still stays black, live wallpapers seem to
work so there is something special about this default wallpaper. Will
continue digging ..

// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] glsl: ir_explog_to_explog2 is no more

Since 63684a9a ("glsl: Combine many instruction lowering passes
into one.", Thu Nov 18 2010), we no longer have anything called
ir_explog_to_explog2. So it's only confusing to have those
references there.

Update with the appropriate method, so people can grep for it the
current tree if they encounter it.

Signed-off-by: Erik Faye-Lund 
---
 src/mesa/program/ir_to_mesa.cpp| 4 +++-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index a2476dd..9760f7b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1069,8 +1069,10 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
   emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
   break;
case ir_unop_exp:
+  assert(!"not reached: should be handled by exp_to_exp2");
+  break;
case ir_unop_log:
-  assert(!"not reached: should be handled by ir_explog_to_explog2");
+  assert(!"not reached: should be handled by log_to_log2");
   break;
case ir_unop_log2:
   emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 369dff7..8438e98 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1661,8 +1661,10 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* 
ir, st_src_reg *op)
   emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
   break;
case ir_unop_exp:
+  assert(!"not reached: should be handled by exp_to_exp2");
+  break;
case ir_unop_log:
-  assert(!"not reached: should be handled by ir_explog_to_explog2");
+  assert(!"not reached: should be handled by log_to_log2");
   break;
case ir_unop_log2:
   emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] gallium/docs: remove documentation of removed arg

geom was removed in e968975 ("gallium: remove the geom_flags param
from is_format_supported", Tue Mar 8 00:01:58 2011 +0100), but the
documentation of it was left over. Let's bring the documentation up
to date.

Signed-off-by: Erik Faye-Lund 
---
 src/gallium/docs/source/screen.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 947b843..6fb9ddc 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -657,8 +657,6 @@ the maximum allowed legal value is 32.
 
 **bindings** is a bitmask of :ref:`PIPE_BIND` flags.
 
-**geom_flags** is a bitmask of PIPE_TEXTURE_GEOM_x flags.
-
 Returns TRUE if all usages can be satisfied.
 
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 0/3] misc cleanups

Here's a few cleanup-patches I've had in my tree for a while, that
I thought I should send upstream soon.

Erik Faye-Lund (3):
  st/mesa: avoid aliasing violation in st_cb_perfmon.c
  gallium/docs: remove documentation of removed arg
  glsl: ir_explog_to_explog2 is no more

 src/gallium/docs/source/screen.rst | 2 --
 src/mesa/program/ir_to_mesa.cpp| 4 +++-
 src/mesa/state_tracker/st_cb_perfmon.c | 6 +++---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 +++-
 4 files changed, 9 insertions(+), 7 deletions(-)

-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] st/mesa: avoid aliasing violation in st_cb_perfmon.c

Signed-off-by: Erik Faye-Lund 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index cd7fdc0..23d32dc 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -308,16 +308,16 @@ st_GetPerfMonitorResult(struct gl_context *ctx,
   data[offset++] = cid;
   switch (type) {
   case GL_UNSIGNED_INT64_AMD:
- *(uint64_t *)[offset] = result.u64;
+ memcpy([offset], , sizeof(uint64_t));
  offset += sizeof(uint64_t) / sizeof(GLuint);
  break;
   case GL_UNSIGNED_INT:
- *(uint32_t *)[offset] = result.u32;
+ memcpy([offset], , sizeof(uint32_t));
  offset += sizeof(uint32_t) / sizeof(GLuint);
  break;
   case GL_FLOAT:
   case GL_PERCENTAGE_AMD:
- *(GLfloat *)[offset] = result.f;
+ memcpy([offset], , sizeof(GLfloat));
  offset += sizeof(GLfloat) / sizeof(GLuint);
  break;
   }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] st/mesa: add st_convert_image()

2017-03-31 Thread Samuel Pitoiset




On 03/31/2017 08:11 AM, Nicolai Hähnle wrote:

On 30.03.2017 18:55, Samuel Pitoiset wrote:

Should be used by the state tracker when glGetImageHandleARB()
is called in order to create a pipe_image_view template.

v2: - make 'st' const
- describe the function

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_atom_image.c | 103
++---
 src/mesa/state_tracker/st_texture.h|   6 ++
 2 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_image.c
b/src/mesa/state_tracker/st_atom_image.c
index 5dd2cd64f9..805014c44a 100644
--- a/src/mesa/state_tracker/st_atom_image.c
+++ b/src/mesa/state_tracker/st_atom_image.c
@@ -44,6 +44,61 @@
 #include "st_program.h"
 #include "st_format.h"

+void
+st_convert_image(const struct st_context *st, const struct
gl_image_unit *u,
+ struct pipe_image_view *img)
+{
+   struct st_texture_object *stObj = st_texture_object(u->TexObj);
+
+   img->resource = stObj->pt;
+   img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
+
+   switch (u->Access) {
+   case GL_READ_ONLY:
+  img->access = PIPE_IMAGE_ACCESS_READ;
+  break;
+   case GL_WRITE_ONLY:
+  img->access = PIPE_IMAGE_ACCESS_WRITE;
+  break;
+   case GL_READ_WRITE:
+  img->access = PIPE_IMAGE_ACCESS_READ_WRITE;
+  break;
+   default:
+  unreachable("bad gl_image_unit::Access");
+   }
+
+   if (stObj->pt->target == PIPE_BUFFER) {
+  unsigned base, size;
+
+  base = stObj->base.BufferOffset;
+  assert(base < stObj->pt->width0);
+  size = MIN2(stObj->pt->width0 - base,
(unsigned)stObj->base.BufferSize);
+
+  img->u.buf.offset = base;
+  img->u.buf.size = size;
+   } else {
+  img->u.tex.level = u->Level + stObj->base.MinLevel;
+  if (stObj->pt->target == PIPE_TEXTURE_3D) {
+ if (u->Layered) {
+img->u.tex.first_layer = 0;
+img->u.tex.last_layer = u_minify(stObj->pt->depth0,
img->u.tex.level) - 1;
+ } else {
+img->u.tex.first_layer = u->_Layer;
+img->u.tex.last_layer = u->_Layer;
+ }
+  } else {
+ img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
+ img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
+ if (u->Layered && img->resource->array_size > 1) {
+if (stObj->base.Immutable)
+   img->u.tex.last_layer += stObj->base.NumLayers - 1;
+else
+   img->u.tex.last_layer += img->resource->array_size - 1;
+ }
+  }
+   }
+}
+
 static void
 st_bind_images(struct st_context *st, struct gl_program *prog,
enum pipe_shader_type shader_type)
@@ -70,53 +125,7 @@ st_bind_images(struct st_context *st, struct
gl_program *prog,
  continue;
   }

-  img->resource = stObj->pt;
-  img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
-
-  switch (u->Access) {
-  case GL_READ_ONLY:
- img->access = PIPE_IMAGE_ACCESS_READ;
- break;
-  case GL_WRITE_ONLY:
- img->access = PIPE_IMAGE_ACCESS_WRITE;
- break;
-  case GL_READ_WRITE:
- img->access = PIPE_IMAGE_ACCESS_READ_WRITE;
- break;
-  default:
- unreachable("bad gl_image_unit::Access");
-  }
-
-  if (stObj->pt->target == PIPE_BUFFER) {
- unsigned base, size;
-
- base = stObj->base.BufferOffset;
- assert(base < stObj->pt->width0);
- size = MIN2(stObj->pt->width0 - base,
(unsigned)stObj->base.BufferSize);
-
- img->u.buf.offset = base;
- img->u.buf.size = size;
-  } else {
- img->u.tex.level = u->Level + stObj->base.MinLevel;
- if (stObj->pt->target == PIPE_TEXTURE_3D) {
-if (u->Layered) {
-   img->u.tex.first_layer = 0;
-   img->u.tex.last_layer = u_minify(stObj->pt->depth0,
img->u.tex.level) - 1;
-} else {
-   img->u.tex.first_layer = u->_Layer;
-   img->u.tex.last_layer = u->_Layer;
-}
- } else {
-img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
-img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
-if (u->Layered && img->resource->array_size > 1) {
-   if (stObj->base.Immutable)
-  img->u.tex.last_layer += stObj->base.NumLayers - 1;
-   else
-  img->u.tex.last_layer += img->resource->array_size
- 1;
-}
- }
-  }
+  st_convert_image(st, u, img);
}
cso_set_shader_images(st->cso_context, shader_type, 0,
  prog->info.num_images, images);
diff --git a/src/mesa/state_tracker/st_texture.h
b/src/mesa/state_tracker/st_texture.h
index 0ce7989562..d9584c9acd 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -254,4 +254,10 @@

[Mesa-dev] [PATCH v3 1/2] st/mesa: add st_convert_image()

2017-03-31 Thread Samuel Pitoiset

Should be used by the state tracker when glGetImageHandleARB()
is called in order to create a pipe_image_view template.

v3: - move the comment to *.c
v2: - make 'st' const
- describe the function

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_atom_image.c | 106 ++---
 src/mesa/state_tracker/st_texture.h|   4 ++
 2 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_image.c 
b/src/mesa/state_tracker/st_atom_image.c
index 5dd2cd64f9..6295e8d2cd 100644
--- a/src/mesa/state_tracker/st_atom_image.c
+++ b/src/mesa/state_tracker/st_atom_image.c
@@ -44,6 +44,64 @@
 #include "st_program.h"
 #include "st_format.h"
 
+/**
+ * Convert a gl_image_unit object to a pipe_image_view object.
+ */
+void
+st_convert_image(const struct st_context *st, const struct gl_image_unit *u,
+ struct pipe_image_view *img)
+{
+   struct st_texture_object *stObj = st_texture_object(u->TexObj);
+
+   img->resource = stObj->pt;
+   img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
+
+   switch (u->Access) {
+   case GL_READ_ONLY:
+  img->access = PIPE_IMAGE_ACCESS_READ;
+  break;
+   case GL_WRITE_ONLY:
+  img->access = PIPE_IMAGE_ACCESS_WRITE;
+  break;
+   case GL_READ_WRITE:
+  img->access = PIPE_IMAGE_ACCESS_READ_WRITE;
+  break;
+   default:
+  unreachable("bad gl_image_unit::Access");
+   }
+
+   if (stObj->pt->target == PIPE_BUFFER) {
+  unsigned base, size;
+
+  base = stObj->base.BufferOffset;
+  assert(base < stObj->pt->width0);
+  size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);
+
+  img->u.buf.offset = base;
+  img->u.buf.size = size;
+   } else {
+  img->u.tex.level = u->Level + stObj->base.MinLevel;
+  if (stObj->pt->target == PIPE_TEXTURE_3D) {
+ if (u->Layered) {
+img->u.tex.first_layer = 0;
+img->u.tex.last_layer = u_minify(stObj->pt->depth0, 
img->u.tex.level) - 1;
+ } else {
+img->u.tex.first_layer = u->_Layer;
+img->u.tex.last_layer = u->_Layer;
+ }
+  } else {
+ img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
+ img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
+ if (u->Layered && img->resource->array_size > 1) {
+if (stObj->base.Immutable)
+   img->u.tex.last_layer += stObj->base.NumLayers - 1;
+else
+   img->u.tex.last_layer += img->resource->array_size - 1;
+ }
+  }
+   }
+}
+
 static void
 st_bind_images(struct st_context *st, struct gl_program *prog,
enum pipe_shader_type shader_type)
@@ -70,53 +128,7 @@ st_bind_images(struct st_context *st, struct gl_program 
*prog,
  continue;
   }
 
-  img->resource = stObj->pt;
-  img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
-
-  switch (u->Access) {
-  case GL_READ_ONLY:
- img->access = PIPE_IMAGE_ACCESS_READ;
- break;
-  case GL_WRITE_ONLY:
- img->access = PIPE_IMAGE_ACCESS_WRITE;
- break;
-  case GL_READ_WRITE:
- img->access = PIPE_IMAGE_ACCESS_READ_WRITE;
- break;
-  default:
- unreachable("bad gl_image_unit::Access");
-  }
-
-  if (stObj->pt->target == PIPE_BUFFER) {
- unsigned base, size;
-
- base = stObj->base.BufferOffset;
- assert(base < stObj->pt->width0);
- size = MIN2(stObj->pt->width0 - base, 
(unsigned)stObj->base.BufferSize);
-
- img->u.buf.offset = base;
- img->u.buf.size = size;
-  } else {
- img->u.tex.level = u->Level + stObj->base.MinLevel;
- if (stObj->pt->target == PIPE_TEXTURE_3D) {
-if (u->Layered) {
-   img->u.tex.first_layer = 0;
-   img->u.tex.last_layer = u_minify(stObj->pt->depth0, 
img->u.tex.level) - 1;
-} else {
-   img->u.tex.first_layer = u->_Layer;
-   img->u.tex.last_layer = u->_Layer;
-}
- } else {
-img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
-img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
-if (u->Layered && img->resource->array_size > 1) {
-   if (stObj->base.Immutable)
-  img->u.tex.last_layer += stObj->base.NumLayers - 1;
-   else
-  img->u.tex.last_layer += img->resource->array_size - 1;
-}
- }
-  }
+  st_convert_image(st, u, img);
}
cso_set_shader_images(st->cso_context, shader_type, 0,
  prog->info.num_images, images);
diff --git a/src/mesa/state_tracker/st_texture.h 
b/src/mesa/state_tracker/st_texture.h
index 0ce7989562..00c30f06cf 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -254,4 +254,8 @@

[Mesa-dev] [PATCH] intel: genxml: fix out of tree builds

Signed-off-by: Lionel Landwerlin 
---
 src/intel/Makefile.genxml.am | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/Makefile.genxml.am b/src/intel/Makefile.genxml.am
index 05a12f8f77..d780ca8470 100644
--- a/src/intel/Makefile.genxml.am
+++ b/src/intel/Makefile.genxml.am
@@ -36,7 +36,7 @@ $(GENXML_GENERATED_PACK_FILES): genxml/gen_pack_header.py
 
 genxml/genX_xml.h: $(GENXML_XML_FILES) genxml/gen_zipped_file.py
$(MKDIR_GEN)
-   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py 
$(GENXML_XML_FILES) > $@ || ($(RM) $@; false)
+   $(AM_V_GEN) $(PYTHON2) $(srcdir)/genxml/gen_zipped_file.py $(addprefix 
$(srcdir)/,$(GENXML_XML_FILES)) > $@ || ($(RM) $@; false)
 
 genxml/genX_bits.h: genxml/gen_bits_header.py $(GENXML_XML_FILES)
$(MKDIR_GEN)
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] i965/fs: Gracefully handle TXS on multisampled textures with no LOD


On 29/03/17 23:22, Jason Ekstrand wrote:

This can happen for multisampled textures since they are never mipmapped
and textureSize(gsampler2DMS*) does not take an LOD parameter.  This
fixes a shader validation error in the new Sascha deferredmultisampling
demo.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100391
Cc: "13.0 17.0" 
---

We could also easily enough handle this in spirv_to_nir like we do with
GLSL.  However, it seems perfectly reasonable that multisampled txs should
allow no LOD in NIR.

  src/intel/compiler/brw_fs_nir.cpp | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index bc1ccfb..60604e1 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4381,9 +4381,12 @@ fs_visitor::nir_emit_texture(const fs_builder , 
nir_tex_instr *instr)
 srcs[TEX_LOGICAL_SRC_GRAD_COMPONENTS] = brw_imm_d(lod_components);
  
 if (instr->op == nir_texop_query_levels ||

+   (instr->op == nir_texop_txs &&
+instr->sampler_dim == GLSL_SAMPLER_DIM_MS) ||
 (instr->op == nir_texop_tex && stage != MESA_SHADER_FRAGMENT)) {
-  /* textureQueryLevels() and texture() are implemented in terms of TXS
-   * and TXL respectively, so we need to pass a valid LOD argument.
+  /* textureQueryLevels(), textureSize(), and texture() are implemented in
+   * terms of TXS and TXL respectively, so we need to pass a valid LOD


Maybe replace with "TXS, TXS and TXL respectively"

Reviewed-by: Lionel Landwerlin 


+   * argument.
 */
assert(srcs[TEX_LOGICAL_SRC_LOD].file == BAD_FILE);
srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_ud(0u);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] Fwd: [PATCH 1/1] glsl/blob: handle copy of NULL ptr in blob_write_string

2017-03-31 Thread Gregory Hainaut

> Others have reported this crashing on Nouveau. I haven't seen the problem on 
> radeonsi or i965.

Hello Timothy (sorry for the double mail, email is a complex tool:) )

Hum, tbh. I was quite surprised to hit this bug. I guess you save a
pre-optimized shader in the cache. So it could depends on optimization
passes.

From the top of my head, I think the "offending" line is this one
const ivec2 offsets[4] = {ivec2(...), ivec2(...), ivec2(...), ivec2(...)};

Strangely enough there are only 3 parameters without name in the
parameter list (signature is int, size 2 and CONTANT). Maybe one was
optimized away, I didn't look further.

Cheers
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] intel: genxml: compress all gen files into one


Hi,

Sorry I broke the out of tree build. I have a fix coming up.

Thanks,

-
Lionel

On 31/03/17 02:06, Mike Lothian wrote:

This prevents me building master

PYTHONPATH=/var/tmp/portage/media-libs/mesa-/work/mesa-/src/compiler/nir 
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/compiler/brw_nir_trig_workarounds.py 
> compiler/brw_nir_trig_workarounds.c || (rm -f compi

ler/brw_nir_trig_workarounds.c; false)
/bin/mkdir -p genxml
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen4.xml 
> genxml/gen4_pack.h || (rm -f genxml/gen4_pack.h; false)
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen45.xml 
> genxml/gen45_pack.h || (rm -f genxml/gen45_pack.h; false)

/bin/mkdir -p genxml
/bin/mkdir -p genxml
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen5.xml 
> genxml/gen5_pack.h || (rm -f genxml/gen5_pack.h; false)

/bin/mkdir -p genxml
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen6.xml 
> genxml/gen6_pack.h || (rm -f genxml/gen6_pack.h; false)
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen7.xml 
> genxml/gen7_pack.h || (rm -f genxml/gen7_pack.h; false)
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen75.xml 
> genxml/gen75_pack.h || (rm -f genxml/gen75_pack.h; false)

/bin/mkdir -p genxml
/bin/mkdir -p genxml
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen8.xml 
> genxml/gen8_pack.h || (rm -f genxml/gen8_pack.h; false)
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_pack_header.py 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen9.xml 
> genxml/gen9_pack.h || (rm -f genxml/gen9_pack.h; false)

/bin/mkdir -p genxml
/bin/mkdir -p genxml
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_bits_header.py 
-o genxml/genX_bits.h 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen4.xml 
/var/tmp/portage/media-libs/mesa-/work/m
esa-/src/intel/genxml/gen45.xml 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen5.xml 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen6.xml 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/int
el/genxml/gen7.xml 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen75.xml 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen8.xml 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen9.x

ml
/usr/bin/python2.7 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_zipped_file.py 
genxml/gen4.xml genxml/gen45.xml genxml/gen5.xml genxml/gen6.xml 
genxml/gen7.xml genxml/gen75.xml genxml/gen8.xml genxml/gen9.xml > 
genxml/genX_xm

l.h || (rm -f genxml/genX_xml.h; false)
/bin/mkdir -p isl
/bin/mkdir -p vulkan
/usr/bin/python2.7 
 /var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/isl/gen_format_layout.py 
\
   --csv 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/isl/isl_format_layout.csv 
--out isl/isl_format_layout.c
/usr/bin/python2.7 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/vulkan/anv_entrypoints_gen.py 
\
   --xml 
/var/tmp/portage/media-libs/mesa-/work/mesa-/src/vulkan/registry/vk.xml 
--outdir ./vulkan

Traceback (most recent call last):
 File 
"/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_zipped_file.py", 
line 71, in 

   main()
 File 
"/var/tmp/portage/media-libs/mesa-/work/mesa-/src/intel/genxml/gen_zipped_file.py", 
line 48, in main

   xml = open(filename).read()
IOError: [Errno 2] No such file or directory: 'genxml/gen4.xml'
make[3]: *** [Makefile:4275: genxml/genX_xml.h] Error 1
make[3]: *** Waiting for unfinished jobs
make[3]: Leaving directory 
'/var/tmp/portage/media-libs/mesa-/work/mesa--abi_x86_32.x86/src/intel'

make[2]: *** [Makefile:852: all-recursive] Error 1
make[2]: Leaving directory 
'/var/tmp/portage/media-libs/mesa-/work/mesa--abi_x86_32.x86/src'

make[1]: *** [Makefile:643: all] Error 2

Re: [Mesa-dev] [PATCH v2 2/2] st/glsl_to_tgsi: fix 64-bit integer bit shifts

For the series:

Reviewed-by: Marek Olšák 

Marek

On Fri, Mar 31, 2017 at 10:03 AM, Nicolai Hähnle  wrote:
> From: Nicolai Hähnle 
>
> Fix a bug that was caused by a type mismatch in the shfit count between
> GLSL and TGSI. I briefly considered adjusting the TGSI semantics, but
> since both LLVM and AMD GCN require both arguments to be of the same type,
> it makes more sense to keep TGSI as-is -- it reflects the underlying
> implementation better.
>
> I'm also sending out piglit tests that expose this error.
>
> v2: use the right number of components for the temporary register
> ---
> There are vector-by-vector shifts, so a full vec4 may be needed, but
> let's do without when we can.
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++-
>  1 file changed, 15 insertions(+), 5 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 369dff7..7da08da 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -2095,27 +2095,37 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* 
> ir, st_src_reg *op)
>if (native_integers) {
>   emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
>   break;
>}
> case ir_unop_u2f:
>if (native_integers) {
>   emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
>   break;
>}
> case ir_binop_lshift:
> -  if (native_integers) {
> - emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
> - break;
> -  }
> case ir_binop_rshift:
>if (native_integers) {
> - emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
> + unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL
> +: 
> TGSI_OPCODE_ISHR;
> + st_src_reg count;
> +
> + if (glsl_base_type_is_64bit(op[0].type)) {
> +/* GLSL shift operations have 32-bit shift counts, but TGSI uses
> + * 64 bits.
> + */
> +count = 
> get_temp(glsl_type::u64vec(ir->operands[1]->type->components()));
> +emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]);
> + } else {
> +count = op[1];
> + }
> +
> + emit_asm(ir, opcode, result_dst, op[0], count);
>   break;
>}
> case ir_binop_bit_and:
>if (native_integers) {
>   emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
>   break;
>}
> case ir_binop_bit_xor:
>if (native_integers) {
>   emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH V3] mesa: disable glthread when DEBUG_OUTPUT_SYNCHRONOUS is enabled

2017-03-31 Thread Timothy Arceri

We could re-enable it also but I haven't tested that yet, and I'm
not sure we care much anyway.

V2: don't disable it from with the call itself. We need a custom
marshalling function or we get stuck waiting for thread to
finish.

V3: tidy up redundant code copied from generated verion.
---
 src/mapi/glapi/gen/gl_API.xml |  2 +-
 src/mesa/main/marshal.c   | 37 +
 src/mesa/main/marshal.h   |  8 
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index dfaeaaf..148387e 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -2354,21 +2354,21 @@
 
 
 
 
 
 
 
 
 
 
-
+

Re: [Mesa-dev] [PATCH 04/10] radeonsi/gfx9: fix linear mipmap CPU access

On Fri, Mar 31, 2017 at 8:17 AM, Nicolai Hähnle  wrote:
> On 30.03.2017 19:16, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>  src/gallium/drivers/radeon/r600_texture.c  | 8 +++-
>>  src/gallium/drivers/radeon/radeon_winsys.h | 4 ++--
>>  src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 2 +-
>>  3 files changed, 6 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeon/r600_texture.c
>> b/src/gallium/drivers/radeon/r600_texture.c
>> index 877f555..77e9bec 100644
>> --- a/src/gallium/drivers/radeon/r600_texture.c
>> +++ b/src/gallium/drivers/radeon/r600_texture.c
>> @@ -186,22 +186,22 @@ static unsigned r600_texture_get_offset(struct
>> r600_common_screen *rscreen,
>> if (rscreen->chip_class >= GFX9) {
>> *stride = rtex->surface.u.gfx9.surf_pitch *
>> rtex->surface.bpe;
>> *layer_stride = rtex->surface.u.gfx9.surf_slice_size;
>>
>> if (!box)
>> return 0;
>>
>> /* Each texture is an array of slices. Each slice is an
>> array
>>  * of mipmap levels. */
>> return box->z * rtex->surface.u.gfx9.surf_slice_size +
>> -  ((rtex->surface.u.gfx9.surf_ymip_offset[level] +
>> -box->y / rtex->surface.blk_h) *
>> +  rtex->surface.u.gfx9.offset[level] +
>> +  (box->y / rtex->surface.blk_h *
>> rtex->surface.u.gfx9.surf_pitch +
>> box->x / rtex->surface.blk_w) * rtex->surface.bpe;
>
>
> Does this part of the formula really not depend on the mip level? It looks
> like each mip level uses the same amount of memory and pitch?

Only the same pitch. All mip levels are placed in one 2D plane. The
linear layout puts them below each other. That's why the pitch is the
same. blk_w/blk_h are for compressed textures.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] glsl/blob: handle copy of NULL ptr in blob_write_string

2017-03-31 Thread Timothy Arceri




On 31/03/17 18:00, gregory hainaut wrote:

On Fri, 31 Mar 2017 08:24:36 +0200
Nicolai Hähnle  wrote:

Hello Nicolai


On 30.03.2017 21:55, Gregory Hainaut wrote:

Typically happen when we want to copy an unnamed shader parameter
in the shader cache.


So this happens only when blob_write_string is called from nouveau?


Sorry, I poorly explain myself. I should have written reproduce &
tested on Nouveau. I don't know for others drivers, they should be
impacted.

_mesa_add_parameter seems to allow to store a NULL pointer in p->Name.
Which is later written by blob_write_string. I guess it could
depends on the shader cache state.


I got the crash with this piglit test:
textureGather fs offsets r 0 float 2D repeat -auto -fb


Others have reported this crashing on Nouveau. I haven't seen the 
problem on radeonsi or i965.






By the way, please setup send-mail so that it threads your mails.
That should be the default, so I'm not sure what happened here...

Oh. I edited my email in the mailer queue which moved the email from my
pop3 to my imap account. I guess it broke the threading link. I will
be more careful next time.

Thanks



Thanks,
Nicolai




Note: it is safer to copy an empty string so we can read it back
safely.

Fix piglit crashes of the 'texturegatheroffsets' tests

Signed-off-by: Gregory Hainaut 
---
 src/compiler/glsl/blob.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/blob.c b/src/compiler/glsl/blob.c
index 769ebf1..f84d7f3 100644
--- a/src/compiler/glsl/blob.c
+++ b/src/compiler/glsl/blob.c
@@ -176,7 +176,10 @@ blob_write_intptr(struct blob *blob, intptr_t
value) bool
 blob_write_string(struct blob *blob, const char *str)
 {
-   return blob_write_bytes(blob, str, strlen(str) + 1);
+   if (str == NULL)
+  return blob_write_bytes(blob, "", 1);
+   else
+  return blob_write_bytes(blob, str, strlen(str) + 1);
 }

 void





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v2 2/2] st/glsl_to_tgsi: fix 64-bit integer bit shifts

From: Nicolai Hähnle 

Fix a bug that was caused by a type mismatch in the shfit count between
GLSL and TGSI. I briefly considered adjusting the TGSI semantics, but
since both LLVM and AMD GCN require both arguments to be of the same type,
it makes more sense to keep TGSI as-is -- it reflects the underlying
implementation better.

I'm also sending out piglit tests that expose this error.

v2: use the right number of components for the temporary register
---
There are vector-by-vector shifts, so a full vec4 may be needed, but
let's do without when we can.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 369dff7..7da08da 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2095,27 +2095,37 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* 
ir, st_src_reg *op)
   if (native_integers) {
  emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
  break;
   }
case ir_unop_u2f:
   if (native_integers) {
  emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
  break;
   }
case ir_binop_lshift:
-  if (native_integers) {
- emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
- break;
-  }
case ir_binop_rshift:
   if (native_integers) {
- emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
+ unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL
+: TGSI_OPCODE_ISHR;
+ st_src_reg count;
+
+ if (glsl_base_type_is_64bit(op[0].type)) {
+/* GLSL shift operations have 32-bit shift counts, but TGSI uses
+ * 64 bits.
+ */
+count = 
get_temp(glsl_type::u64vec(ir->operands[1]->type->components()));
+emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]);
+ } else {
+count = op[1];
+ }
+
+ emit_asm(ir, opcode, result_dst, op[0], count);
  break;
   }
case ir_binop_bit_and:
   if (native_integers) {
  emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
  break;
   }
case ir_binop_bit_xor:
   if (native_integers) {
  emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/25] radv: add tessellation ring allocation support.

2017-03-31 Thread Dave Airlie

On 31 March 2017 at 16:59, Bas Nieuwenhuizen  wrote:
> On Thu, Mar 30, 2017 at 10:00 AM, Dave Airlie  wrote:
>> From: Dave Airlie 
>>
>> This patch adds support for the offchip rings for storing
>> tessellation factors and attribute data.
>>
>> It includes the register setup for the TF ring
>>
>> Signed-off-by: Dave Airlie 
>> ---
>>  src/amd/vulkan/radv_cmd_buffer.c |   6 ++
>>  src/amd/vulkan/radv_device.c | 210 
>> ---
>>  src/amd/vulkan/radv_private.h|   4 +
>>  3 files changed, 207 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
>> b/src/amd/vulkan/radv_cmd_buffer.c
>> index dbd74de..70f6fad 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -221,6 +221,7 @@ static void  radv_reset_cmd_buffer(struct 
>> radv_cmd_buffer *cmd_buffer)
>> cmd_buffer->compute_scratch_size_needed = 0;
>> cmd_buffer->esgs_ring_size_needed = 0;
>> cmd_buffer->gsvs_ring_size_needed = 0;
>> +   cmd_buffer->tess_rings_needed = false;
>>
>> if (cmd_buffer->upload.upload_bo)
>> cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
>> @@ -1896,6 +1897,9 @@ void radv_CmdBindPipeline(
>> if (pipeline->graphics.gsvs_ring_size > 
>> cmd_buffer->gsvs_ring_size_needed)
>> cmd_buffer->gsvs_ring_size_needed = 
>> pipeline->graphics.gsvs_ring_size;
>>
>> +   if (radv_pipeline_has_tess(pipeline))
>> +   cmd_buffer->tess_rings_needed = true;
>> +
>> if (radv_pipeline_has_gs(pipeline)) {
>> struct ac_userdata_info *loc = 
>> radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
>>  
>> AC_UD_SCRATCH_RING_OFFSETS);
>> @@ -2063,6 +2067,8 @@ void radv_CmdExecuteCommands(
>> primary->esgs_ring_size_needed = 
>> secondary->esgs_ring_size_needed;
>> if (secondary->gsvs_ring_size_needed > 
>> primary->gsvs_ring_size_needed)
>> primary->gsvs_ring_size_needed = 
>> secondary->gsvs_ring_size_needed;
>> +   if (secondary->tess_rings_needed)
>> +   primary->tess_rings_needed = true;
>>
>> if (secondary->ring_offsets_idx != -1) {
>> if (primary->ring_offsets_idx == -1)
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index fe531e1..b75d76b 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -845,6 +845,10 @@ radv_queue_finish(struct radv_queue *queue)
>> queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
>> if (queue->gsvs_ring_bo)
>> queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
>> +   if (queue->tess_factor_ring_bo)
>> +   
>> queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
>> +   if (queue->tess_offchip_ring_bo)
>> +   
>> queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
>> if (queue->compute_scratch_bo)
>> queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
>>  }
>> @@ -1182,20 +1186,29 @@ static void radv_dump_trace(struct radv_device 
>> *device,
>>  }
>>
>>  static void
>> -fill_geom_rings(struct radv_queue *queue,
>> -   uint32_t *map,
>> -   uint32_t esgs_ring_size,
>> -   struct radeon_winsys_bo *esgs_ring_bo,
>> -   uint32_t gsvs_ring_size,
>> -   struct radeon_winsys_bo *gsvs_ring_bo)
>> +fill_geom_tess_rings(struct radv_queue *queue,
>> +uint32_t *map,
>> +uint32_t esgs_ring_size,
>> +struct radeon_winsys_bo *esgs_ring_bo,
>> +uint32_t gsvs_ring_size,
>> +struct radeon_winsys_bo *gsvs_ring_bo,
>> +uint32_t tess_factor_ring_size,
>> +struct radeon_winsys_bo *tess_factor_ring_bo,
>> +uint32_t tess_offchip_ring_size,
>> +struct radeon_winsys_bo *tess_offchip_ring_bo)
>>  {
>> uint64_t esgs_va = 0, gsvs_va = 0;
>> +   uint64_t tess_factor_va = 0, tess_offchip_va = 0;
>> uint32_t *desc = [4];
>>
>> if (esgs_ring_bo)
>> esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
>> if (gsvs_ring_bo)
>> gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
>> +   if (tess_factor_ring_bo)
>> +   tess_factor_va = 
>> queue->device->ws->buffer_get_va(tess_factor_ring_bo);
>> +   if (tess_offchip_ring_bo)
>> +   tess_offchip_va = 
>> queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
>>
>> /*

Re: [Mesa-dev] [RFC PATCH] egl/android: Dequeue buffers inside EGL calls

2017-03-31 Thread Tapani Pälli

On 03/31/2017 09:06 AM, Tapani Pälli wrote:

On 03/31/2017 08:24 AM, Rob Clark wrote:

On Fri, Mar 31, 2017 at 12:22 AM, Tapani Pälli
wrote:

On 03/30/2017 05:57 PM, Emil Velikov wrote:

On 30 March 2017 at 15:30, Tomasz Figa wrote:

On Thu, Mar 30, 2017 at 11:17 PM, Emil Velikov

wrote:

On 30 March 2017 at 11:55, Tomasz Figa wrote:

Android buffer queues can be abandoned, which results in failing to
dequeue next buffer. Currently this would fail somewhere deep within
the DRI stack calling loader's getBuffers*(), without any error
reporting to the client app. However Android framework code
relies on
proper signaling of this event, so we move buffer dequeue to
createWindowSurface() and swapBuffers() call, which can generate
proper
EGL errors. To keep the performance benefits of delayed buffer
handling,
if any, fence wait and DRI image creation is kept delayed until
getBuffers*() is called by the DRI driver.

Thank you Tomasz.

I'm fairly confident that this should resolve the crash [in
swap_buffers] that Mauro was seeing.
Mauro can you give it a test ?

Ah, I actually noticed a problem with existing code, supposedly fixed
by [1], but I'm afraid it's still wrong.

Right - I was wondering why we don't hit that on EGL/GBM or
EGL/Wayland.
From a quick look - may be because EGL/Android drops the dpy mutex in
droid_window_enqueue_buffer().

My patch removes update_buffers() and changes the buffer management so
that there is always a buffer dequeued, starting from surface
creation, unless there was an error somewhere.

Of the top of your head - is there something stopping us from using
the same method on $other platforms?

[1]
https://cgit.freedesktop.org/mesa/mesa/commit/src/egl/drivers/dri2/platform_android.c?id=4d4558411db166d2d66f8cec9cb581149dbe1597

Not that huge of an expert on the Android specifics, so just a humble
request:
Can we seek the code resuffle (droid_{alloc,free}_local_buffer,

Oops silly typo - s/seek/split/.

other?) separate from the functionality changes ?

Sure. Thanks for suggestion.

Please give it a day or two for others to comment.

I'm trying to debug why this causes our homescreen (wallpaper) to be
black.
Otherwise I haven't seen any issues with these changes.

wallpaper seems to be a special sorta hell.. I wonder if there is
somehow some sort of interaction with what I fixed / worked-around in
a5e733c6b52e93de3000647d075f5ca2f55fcb71 ??

Maybe at least try commenting out the temp-pbuffer thing to get max
texture size, and see if that "fixes" things

Can you give more details, I still live in la la land and don't know
about 'temp-pbuffer thing'?

aa I did not recall the problem, you mean the 'dummy pbuffer' in
SurfaceFlinger .. yes I will check if this is related.

// Tapani

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] glsl/blob: handle copy of NULL ptr in blob_write_string

2017-03-31 Thread gregory hainaut

On Fri, 31 Mar 2017 08:24:36 +0200
Nicolai Hähnle  wrote:

Hello Nicolai

> On 30.03.2017 21:55, Gregory Hainaut wrote:
> > Typically happen when we want to copy an unnamed shader parameter
> > in the shader cache.
> 
> So this happens only when blob_write_string is called from nouveau?

Sorry, I poorly explain myself. I should have written reproduce &
tested on Nouveau. I don't know for others drivers, they should be
impacted.

_mesa_add_parameter seems to allow to store a NULL pointer in p->Name.
Which is later written by blob_write_string. I guess it could
depends on the shader cache state.


I got the crash with this piglit test:
textureGather fs offsets r 0 float 2D repeat -auto -fb


> By the way, please setup send-mail so that it threads your mails.
> That should be the default, so I'm not sure what happened here...
Oh. I edited my email in the mailer queue which moved the email from my
pop3 to my imap account. I guess it broke the threading link. I will
be more careful next time.

Thanks


> Thanks,
> Nicolai

> >
> > Note: it is safer to copy an empty string so we can read it back
> > safely.
> >
> > Fix piglit crashes of the 'texturegatheroffsets' tests
> >
> > Signed-off-by: Gregory Hainaut 
> > ---
> >  src/compiler/glsl/blob.c | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/compiler/glsl/blob.c b/src/compiler/glsl/blob.c
> > index 769ebf1..f84d7f3 100644
> > --- a/src/compiler/glsl/blob.c
> > +++ b/src/compiler/glsl/blob.c
> > @@ -176,7 +176,10 @@ blob_write_intptr(struct blob *blob, intptr_t
> > value) bool
> >  blob_write_string(struct blob *blob, const char *str)
> >  {
> > -   return blob_write_bytes(blob, str, strlen(str) + 1);
> > +   if (str == NULL)
> > +  return blob_write_bytes(blob, "", 1);
> > +   else
> > +  return blob_write_bytes(blob, str, strlen(str) + 1);
> >  }
> >
> >  void
> >
> 
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/25] radv: add tessellation ring allocation support.

On Thu, Mar 30, 2017 at 10:00 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This patch adds support for the offchip rings for storing
> tessellation factors and attribute data.
>
> It includes the register setup for the TF ring
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/vulkan/radv_cmd_buffer.c |   6 ++
>  src/amd/vulkan/radv_device.c | 210 
> ---
>  src/amd/vulkan/radv_private.h|   4 +
>  3 files changed, 207 insertions(+), 13 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c 
> b/src/amd/vulkan/radv_cmd_buffer.c
> index dbd74de..70f6fad 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -221,6 +221,7 @@ static void  radv_reset_cmd_buffer(struct radv_cmd_buffer 
> *cmd_buffer)
> cmd_buffer->compute_scratch_size_needed = 0;
> cmd_buffer->esgs_ring_size_needed = 0;
> cmd_buffer->gsvs_ring_size_needed = 0;
> +   cmd_buffer->tess_rings_needed = false;
>
> if (cmd_buffer->upload.upload_bo)
> cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
> @@ -1896,6 +1897,9 @@ void radv_CmdBindPipeline(
> if (pipeline->graphics.gsvs_ring_size > 
> cmd_buffer->gsvs_ring_size_needed)
> cmd_buffer->gsvs_ring_size_needed = 
> pipeline->graphics.gsvs_ring_size;
>
> +   if (radv_pipeline_has_tess(pipeline))
> +   cmd_buffer->tess_rings_needed = true;
> +
> if (radv_pipeline_has_gs(pipeline)) {
> struct ac_userdata_info *loc = 
> radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
>  
> AC_UD_SCRATCH_RING_OFFSETS);
> @@ -2063,6 +2067,8 @@ void radv_CmdExecuteCommands(
> primary->esgs_ring_size_needed = 
> secondary->esgs_ring_size_needed;
> if (secondary->gsvs_ring_size_needed > 
> primary->gsvs_ring_size_needed)
> primary->gsvs_ring_size_needed = 
> secondary->gsvs_ring_size_needed;
> +   if (secondary->tess_rings_needed)
> +   primary->tess_rings_needed = true;
>
> if (secondary->ring_offsets_idx != -1) {
> if (primary->ring_offsets_idx == -1)
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index fe531e1..b75d76b 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -845,6 +845,10 @@ radv_queue_finish(struct radv_queue *queue)
> queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
> if (queue->gsvs_ring_bo)
> queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
> +   if (queue->tess_factor_ring_bo)
> +   queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
> +   if (queue->tess_offchip_ring_bo)
> +   
> queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
> if (queue->compute_scratch_bo)
> queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
>  }
> @@ -1182,20 +1186,29 @@ static void radv_dump_trace(struct radv_device 
> *device,
>  }
>
>  static void
> -fill_geom_rings(struct radv_queue *queue,
> -   uint32_t *map,
> -   uint32_t esgs_ring_size,
> -   struct radeon_winsys_bo *esgs_ring_bo,
> -   uint32_t gsvs_ring_size,
> -   struct radeon_winsys_bo *gsvs_ring_bo)
> +fill_geom_tess_rings(struct radv_queue *queue,
> +uint32_t *map,
> +uint32_t esgs_ring_size,
> +struct radeon_winsys_bo *esgs_ring_bo,
> +uint32_t gsvs_ring_size,
> +struct radeon_winsys_bo *gsvs_ring_bo,
> +uint32_t tess_factor_ring_size,
> +struct radeon_winsys_bo *tess_factor_ring_bo,
> +uint32_t tess_offchip_ring_size,
> +struct radeon_winsys_bo *tess_offchip_ring_bo)
>  {
> uint64_t esgs_va = 0, gsvs_va = 0;
> +   uint64_t tess_factor_va = 0, tess_offchip_va = 0;
> uint32_t *desc = [4];
>
> if (esgs_ring_bo)
> esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
> if (gsvs_ring_bo)
> gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
> +   if (tess_factor_ring_bo)
> +   tess_factor_va = 
> queue->device->ws->buffer_get_va(tess_factor_ring_bo);
> +   if (tess_offchip_ring_bo)
> +   tess_offchip_va = 
> queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
>
> /* stride 0, num records - size, add tid, swizzle, elsize4,
>index stride 64 */
> @@ -1270,6 +1283,88 @@ fill_geom_rings(struct radv_queue *queue,
> S_008F0C_ELEMENT_SIZE(1)

Re: [Mesa-dev] [PATCH 02/25] radv: handle clip dist in es outputs.

On Thu, Mar 30, 2017 at 10:00 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 16c1eae..d3f6112 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -4570,6 +4570,10 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx,
>
> if (param_index > max_output_written)
> max_output_written = param_index;
> +   if (length > 4) {
> +   if (param_index + 1 > max_output_written)
> +   max_output_written = param_index + 1;
> +   }

can we use something like max_output_written =
MAX2(max_output_written, param_index + (length - 1) / 4) instead of
all these ifs?
>
> for (j = 0; j < length; j++) {
> LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, 
> out_ptr[j], "");
> --
> 2.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH V2] mesa: disable glthread when DEBUG_OUTPUT_SYNCHRONOUS is enabled


On 31.03.2017 02:45, Timothy Arceri wrote:

We could re-enable it also but I haven't tested that yet, and I'm
not sure we care much anyway.

V2: don't disable it from with the call itself. We need a custom
marshalling function or we get stuck waiting for thread to
finish.


Yeah, I missed that, too.


---
 src/mapi/glapi/gen/gl_API.xml |  2 +-
 src/mesa/main/marshal.c   | 37 +
 src/mesa/main/marshal.h   |  8 
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index dfaeaaf..148387e 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -2354,21 +2354,21 @@
 
 
 
 

 
 
 
 

-
+

Re: [Mesa-dev] [PATCH] gallivm: add lp_build_emit_fetch_src() helper


On 30.03.2017 19:57, Samuel Pitoiset wrote:

lp_build_emit_fetch() is useful when the source type can be
infered from the instruction opcode.

However, for bindless samplers/images we can't do that easily
because tgsi_opcode_infer_src_type() returns TGSI_TYPE_FLOAT for
TEX instructions, while we need TGSI_TYPE_UNSIGNED64 if the
resource register is bindless.

Signed-off-by: Samuel Pitoiset 


Reviewed-by: Nicolai Hähnle 



---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c | 22 +-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi.h |  7 +++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index d368f38d09..69863ab93c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -323,16 +323,14 @@ lp_build_tgsi_inst_llvm(


 LLVMValueRef
-lp_build_emit_fetch(
+lp_build_emit_fetch_src(
struct lp_build_tgsi_context *bld_base,
-   const struct tgsi_full_instruction *inst,
-   unsigned src_op,
+   const struct tgsi_full_src_register *reg,
+   enum tgsi_opcode_type stype,
const unsigned chan_index)
 {
-   const struct tgsi_full_src_register *reg = >Src[src_op];
unsigned swizzle;
LLVMValueRef res;
-   enum tgsi_opcode_type stype = 
tgsi_opcode_infer_src_type(inst->Instruction.Opcode);

if (chan_index == LP_CHAN_ALL) {
   swizzle = ~0u;
@@ -413,7 +411,21 @@ lp_build_emit_fetch(
}

return res;
+}
+
+
+LLVMValueRef
+lp_build_emit_fetch(
+   struct lp_build_tgsi_context *bld_base,
+   const struct tgsi_full_instruction *inst,
+   unsigned src_op,
+   const unsigned chan_index)
+{
+   const struct tgsi_full_src_register *reg = >Src[src_op];
+   enum tgsi_opcode_type stype =
+  tgsi_opcode_infer_src_type(inst->Instruction.Opcode);

+   return lp_build_emit_fetch_src(bld_base, reg, stype, chan_index);
 }


diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index b6b3fe369b..22bd2a16ec 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -645,6 +645,13 @@ lp_build_tgsi_inst_llvm(
const struct tgsi_full_instruction *inst);

 LLVMValueRef
+lp_build_emit_fetch_src(
+   struct lp_build_tgsi_context *bld_base,
+   const struct tgsi_full_src_register *reg,
+   enum tgsi_opcode_type stype,
+   const unsigned chan_index);
+
+LLVMValueRef
 lp_build_emit_fetch(
struct lp_build_tgsi_context *bld_base,
const struct tgsi_full_instruction *inst,




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/1] glsl/blob: handle copy of NULL ptr in blob_write_string


On 30.03.2017 21:55, Gregory Hainaut wrote:

Typically happen when we want to copy an unnamed shader parameter in the
shader cache.


So this happens only when blob_write_string is called from nouveau?

By the way, please setup send-mail so that it threads your mails. That 
should be the default, so I'm not sure what happened here...


Thanks,
Nicolai



Note: it is safer to copy an empty string so we can read it back safely.

Fix piglit crashes of the 'texturegatheroffsets' tests

Signed-off-by: Gregory Hainaut 
---
 src/compiler/glsl/blob.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/blob.c b/src/compiler/glsl/blob.c
index 769ebf1..f84d7f3 100644
--- a/src/compiler/glsl/blob.c
+++ b/src/compiler/glsl/blob.c
@@ -176,7 +176,10 @@ blob_write_intptr(struct blob *blob, intptr_t value)
 bool
 blob_write_string(struct blob *blob, const char *str)
 {
-   return blob_write_bytes(blob, str, strlen(str) + 1);
+   if (str == NULL)
+  return blob_write_bytes(blob, "", 1);
+   else
+  return blob_write_bytes(blob, str, strlen(str) + 1);
 }

 void




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] mesa/glthread: Call unmarshal_batch directly in glthread_finish


On 30.03.2017 22:31, Bartosz Tomczyk wrote:

Call it directly when batch queue is empty. This avoids costly thread
synchronisation. This commit improves performance of games that have
previously regressed with mesa_glthread=true.


Reviewed-by: Nicolai Hähnle 


---
 src/mesa/main/glthread.c | 47 ++-
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
index 06115b916d..4fcd322163 100644
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -194,16 +194,12 @@ _mesa_glthread_restore_dispatch(struct gl_context *ctx)
}
 }

-void
-_mesa_glthread_flush_batch(struct gl_context *ctx)
+static void
+_mesa_glthread_flush_batch_locked(struct gl_context *ctx)
 {
struct glthread_state *glthread = ctx->GLThread;
-   struct glthread_batch *batch;
-
-   if (!glthread)
-  return;
-
-   batch = glthread->batch;
+   struct glthread_batch *batch = glthread->batch;
+
if (!batch->used)
   return;

@@ -223,10 +219,26 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
   return;
}

-   pthread_mutex_lock(>mutex);
*glthread->batch_queue_tail = batch;
glthread->batch_queue_tail = >next;
pthread_cond_broadcast(>new_work);
+}
+
+void
+_mesa_glthread_flush_batch(struct gl_context *ctx)
+{
+   struct glthread_state *glthread = ctx->GLThread;
+   struct glthread_batch *batch;
+
+   if (!glthread)
+  return;
+
+   batch = glthread->batch;
+   if (!batch->used)
+  return;
+
+   pthread_mutex_lock(>mutex);
+   _mesa_glthread_flush_batch_locked(ctx);
pthread_mutex_unlock(>mutex);
 }

@@ -252,12 +264,21 @@ _mesa_glthread_finish(struct gl_context *ctx)
if (pthread_self() == glthread->thread)
   return;

-   _mesa_glthread_flush_batch(ctx);
-
pthread_mutex_lock(>mutex);

-   while (glthread->batch_queue || glthread->busy)
-  pthread_cond_wait(>work_done, >mutex);
+   if (!(glthread->batch_queue || glthread->busy)) {
+  if (glthread->batch && glthread->batch->used) {
+ struct _glapi_table *dispatch = _glapi_get_dispatch();
+ glthread_unmarshal_batch(ctx, glthread->batch);
+ _glapi_set_dispatch(dispatch);
+ glthread_allocate_batch(ctx);
+  }
+   }
+   else {
+  _mesa_glthread_flush_batch_locked(ctx);
+  while (glthread->batch_queue || glthread->busy)
+ pthread_cond_wait(>work_done, >mutex);
+   }

pthread_mutex_unlock(>mutex);
 }




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radeonsi: add load_image_desc()


On 30.03.2017 19:58, Samuel Pitoiset wrote:

Similar to load_sampler_desc(). Same deal for bindless.

Signed-off-by: Samuel Pitoiset 


Both patches:

Reviewed-by: Nicolai Hähnle 



---
 src/gallium/drivers/radeonsi/si_shader.c | 45 +---
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cf725cebd8..6b02d61e17 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3230,6 +3230,24 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, 
int num_elements)
   CONST_ADDR_SPACE);
 }

+static LLVMValueRef load_image_desc(struct si_shader_context *ctx,
+   LLVMValueRef list, LLVMValueRef index,
+   unsigned target)
+{
+   LLVMBuilderRef builder = ctx->gallivm.builder;
+
+   if (target == TGSI_TEXTURE_BUFFER) {
+   index = LLVMBuildMul(builder, index,
+LLVMConstInt(ctx->i32, 2, 0), "");
+   index = LLVMBuildAdd(builder, index,
+LLVMConstInt(ctx->i32, 1, 0), "");
+   list = LLVMBuildPointerCast(builder, list,
+   const_array(ctx->v4i32, 0), "");
+   }
+
+   return ac_build_indexed_load_const(>ac, list, index);
+}
+
 /**
  * Load the resource descriptor for \p image.
  */
@@ -3243,8 +3261,8 @@ image_fetch_rsrc(
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
 SI_PARAM_IMAGES);
-   LLVMValueRef index, tmp;
-   bool dcc_off = target != TGSI_TEXTURE_BUFFER && is_store;
+   LLVMValueRef index;
+   bool dcc_off = is_store;

assert(image->Register.File == TGSI_FILE_IMAGE);

@@ -3255,8 +3273,7 @@ image_fetch_rsrc(

index = LLVMConstInt(ctx->i32, image->Register.Index, 0);

-   if (images_writemask & (1 << image->Register.Index) &&
-   target != TGSI_TEXTURE_BUFFER)
+   if (images_writemask & (1 << image->Register.Index))
dcc_off = true;
} else {
/* From the GL_ARB_shader_image_load_store extension spec:
@@ -3273,23 +3290,9 @@ image_fetch_rsrc(
   SI_NUM_IMAGES);
}

-   if (target == TGSI_TEXTURE_BUFFER) {
-   LLVMBuilderRef builder = ctx->gallivm.builder;
-
-   rsrc_ptr = LLVMBuildPointerCast(builder, rsrc_ptr,
-   const_array(ctx->v4i32, 0), "");
-   index = LLVMBuildMul(builder, index,
-LLVMConstInt(ctx->i32, 2, 0), "");
-   index = LLVMBuildAdd(builder, index,
-LLVMConstInt(ctx->i32, 1, 0), "");
-   *rsrc = ac_build_indexed_load_const(>ac, rsrc_ptr, index);
-   return;
-   }
-
-   tmp = ac_build_indexed_load_const(>ac, rsrc_ptr, index);
-   if (dcc_off)
-   tmp = force_dcc_off(ctx, tmp);
-   *rsrc = tmp;
+   *rsrc = load_image_desc(ctx, rsrc_ptr, index, target);
+   if (dcc_off && target != TGSI_TEXTURE_BUFFER)
+   *rsrc = force_dcc_off(ctx, *rsrc);
 }

 static LLVMValueRef image_fetch_coords(




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/10] radeonsi/gfx9: fix linear mipmap CPU access


On 30.03.2017 19:16, Marek Olšák wrote:

From: Marek Olšák 

---
 src/gallium/drivers/radeon/r600_texture.c  | 8 +++-
 src/gallium/drivers/radeon/radeon_winsys.h | 4 ++--
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 877f555..77e9bec 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -186,22 +186,22 @@ static unsigned r600_texture_get_offset(struct 
r600_common_screen *rscreen,
if (rscreen->chip_class >= GFX9) {
*stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
*layer_stride = rtex->surface.u.gfx9.surf_slice_size;

if (!box)
return 0;

/* Each texture is an array of slices. Each slice is an array
 * of mipmap levels. */
return box->z * rtex->surface.u.gfx9.surf_slice_size +
-  ((rtex->surface.u.gfx9.surf_ymip_offset[level] +
-box->y / rtex->surface.blk_h) *
+  rtex->surface.u.gfx9.offset[level] +
+  (box->y / rtex->surface.blk_h *
rtex->surface.u.gfx9.surf_pitch +
box->x / rtex->surface.blk_w) * rtex->surface.bpe;


Does this part of the formula really not depend on the mip level? It 
looks like each mip level uses the same amount of memory and pitch?


Thanks,
Nicolai



} else {
*stride = rtex->surface.u.legacy.level[level].nblk_x *
  rtex->surface.bpe;
*layer_stride = rtex->surface.u.legacy.level[level].slice_size;

if (!box)
return rtex->surface.u.legacy.level[level].offset;

@@ -1616,23 +1616,21 @@ static void *r600_texture_transfer_map(struct 
pipe_context *ctx,

/* Tiled textures need to be converted into a linear texture 
for CPU
 * access. The staging texture is always linear and is placed 
in GART.
 *
 * Reading from VRAM or GTT WC is slow, always use the staging
 * texture in this case.
 *
 * Use the staging texture for uploads if the underlying BO
 * is busy.
 */
-   /* TODO: Linear CPU mipmap addressing is broken on GFX9: */
-   if (!rtex->surface.is_linear ||
-   (rctx->chip_class == GFX9 && level))
+   if (!rtex->surface.is_linear)
use_staging_texture = true;
else if (usage & PIPE_TRANSFER_READ)
use_staging_texture =
rtex->resource.domains & RADEON_DOMAIN_VRAM ||
rtex->resource.flags & RADEON_FLAG_GTT_WC;
/* Write & linear only: */
else if (r600_rings_is_buffer_referenced(rctx, 
rtex->resource.buf,
 
RADEON_USAGE_READWRITE) ||
 !rctx->ws->buffer_wait(rtex->resource.buf, 0,
RADEON_USAGE_READWRITE)) {
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 43f74f6..a19ece6 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -357,22 +357,22 @@ struct gfx9_surf_layout {
 struct gfx9_surf_meta_flags dcc;   /* metadata of color */
 struct gfx9_surf_meta_flags htile; /* metadata of depth and stencil */
 struct gfx9_surf_meta_flags cmask; /* metadata of fmask */

 enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
 uint64_tsurf_offset; /* 0 unless imported with an 
offset */
 /* The size of the 2D plane containing all mipmap levels. */
 uint64_tsurf_slice_size;
 uint16_tsurf_pitch; /* in blocks */
 uint16_tsurf_height;
-/* Y mipmap level offset in blocks. Only valid for LINEAR. */
-uint16_tsurf_ymip_offset[RADEON_SURF_MAX_LEVELS];
+/* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
+uint32_toffset[RADEON_SURF_MAX_LEVELS];

 uint16_tdcc_pitch_max;  /* (mip chain pitch - 1) */

 uint64_tstencil_offset; /* separate stencil */
 uint64_tfmask_size;
 uint64_tcmask_size;

 uint32_tfmask_alignment;
 uint32_tcmask_alignment;
 };
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 7dfd6d2..7566087

[Mesa-dev] [Bug 97957] Awful screen tearing in a separate X server with DRI3

https://bugs.freedesktop.org/show_bug.cgi?id=97957

--- Comment #10 from Michel Dänzer  ---
Chris, are you planning to submit the fix for review?

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/10] radeonsi: adjust checking for SC bug workarounds


On 30.03.2017 19:15, Marek Olšák wrote:

From: Marek Olšák 

no change in behavior, just making sure that no later chips will use
the workarounds


Reviewed-by: Nicolai Hähnle 



---
 src/gallium/drivers/radeonsi/si_pipe.c  |  4 
 src/gallium/drivers/radeonsi/si_pipe.h  |  1 +
 src/gallium/drivers/radeonsi/si_state.c | 12 
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 18b56fa..c32546f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -835,20 +835,24 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
(sscreen->b.chip_class == CIK &&
 sscreen->b.info.pfp_fw_version >= 211 &&
 sscreen->b.info.me_fw_version >= 173) ||
(sscreen->b.chip_class == SI &&
 sscreen->b.info.pfp_fw_version >= 121 &&
 sscreen->b.info.me_fw_version >= 87);

sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
   sscreen->b.chip_class >= VI;

+   sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 
&&
+   sscreen->b.family <= 
CHIP_POLARIS12) ||
+  sscreen->b.family == CHIP_VEGA10;
+
sscreen->b.has_cp_dma = true;
sscreen->b.has_streamout = true;

/* Some chips have RB+ registers, but don't support RB+. Those must
 * always disable it.
 */
if (sscreen->b.family == CHIP_STONEY ||
sscreen->b.chip_class >= GFX9) {
sscreen->b.has_rbplus = true;

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 3a6503a..9225899 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -72,20 +72,21 @@ struct si_compute;
 struct hash_table;
 struct u_suballocator;

 struct si_screen {
struct r600_common_screen   b;
unsignedgs_table_depth;
unsignedtess_offchip_block_dw_size;
boolhas_distributed_tess;
boolhas_draw_indirect_multi;
boolhas_ds_bpermute;
+   boolhas_msaa_sample_loc_bug;

/* Whether shaders are monolithic (1-part) or separate (3-part). */
booluse_monolithic_shaders;
boolrecord_llvm_ir;

mtx_t   shader_parts_mutex;
struct si_shader_part   *vs_prologs;
struct si_shader_part   *vs_epilogs;
struct si_shader_part   *tcs_epilogs;
struct si_shader_part   *gs_prologs;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index f8c6faf..78d6996 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -914,21 +914,21 @@ static void si_bind_rs_state(struct pipe_context *ctx, 
void *state)
(struct si_state_rasterizer*)sctx->queued.named.rasterizer;
struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state;

if (!state)
return;

if (!old_rs || old_rs->multisample_enable != rs->multisample_enable) {
si_mark_atom_dirty(sctx, >db_render_state);

/* Update the small primitive filter workaround if necessary. */
-   if (sctx->b.family >= CHIP_POLARIS10 &&
+   if (sctx->screen->has_msaa_sample_loc_bug &&
sctx->framebuffer.nr_samples > 1)
si_mark_atom_dirty(sctx, >msaa_sample_locs.atom);
}

r600_viewport_set_rast_deps(>b, rs->scissor_enable, 
rs->clip_halfz);

si_pm4_bind_state(sctx, rasterizer, rs);
si_update_poly_offset_state(sctx);

si_mark_atom_dirty(sctx, >clip_regs);
@@ -2854,50 +2854,54 @@ static void si_emit_framebuffer_state(struct si_context 
*sctx, struct r600_atom

sctx->framebuffer.dirty_cbufs = 0;
sctx->framebuffer.dirty_zsbuf = false;
 }

 static void si_emit_msaa_sample_locs(struct si_context *sctx,
 struct r600_atom *atom)
 {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned nr_samples = sctx->framebuffer.nr_samples;
+   bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug;

/* Smoothing (only possible with nr_samples == 1) uses the same
 * sample locations as the MSAA it simulates.
 */
if (nr_samples <= 1 && sctx->smoothing_enabled)
nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;

/* On Polaris, the small primitive

Re: [Mesa-dev] [PATCH 1/2] st/mesa: add st_convert_image()


On 30.03.2017 18:55, Samuel Pitoiset wrote:

Should be used by the state tracker when glGetImageHandleARB()
is called in order to create a pipe_image_view template.

v2: - make 'st' const
- describe the function

Signed-off-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_atom_image.c | 103 ++---
 src/mesa/state_tracker/st_texture.h|   6 ++
 2 files changed, 62 insertions(+), 47 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_image.c 
b/src/mesa/state_tracker/st_atom_image.c
index 5dd2cd64f9..805014c44a 100644
--- a/src/mesa/state_tracker/st_atom_image.c
+++ b/src/mesa/state_tracker/st_atom_image.c
@@ -44,6 +44,61 @@
 #include "st_program.h"
 #include "st_format.h"

+void
+st_convert_image(const struct st_context *st, const struct gl_image_unit *u,
+ struct pipe_image_view *img)
+{
+   struct st_texture_object *stObj = st_texture_object(u->TexObj);
+
+   img->resource = stObj->pt;
+   img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
+
+   switch (u->Access) {
+   case GL_READ_ONLY:
+  img->access = PIPE_IMAGE_ACCESS_READ;
+  break;
+   case GL_WRITE_ONLY:
+  img->access = PIPE_IMAGE_ACCESS_WRITE;
+  break;
+   case GL_READ_WRITE:
+  img->access = PIPE_IMAGE_ACCESS_READ_WRITE;
+  break;
+   default:
+  unreachable("bad gl_image_unit::Access");
+   }
+
+   if (stObj->pt->target == PIPE_BUFFER) {
+  unsigned base, size;
+
+  base = stObj->base.BufferOffset;
+  assert(base < stObj->pt->width0);
+  size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);
+
+  img->u.buf.offset = base;
+  img->u.buf.size = size;
+   } else {
+  img->u.tex.level = u->Level + stObj->base.MinLevel;
+  if (stObj->pt->target == PIPE_TEXTURE_3D) {
+ if (u->Layered) {
+img->u.tex.first_layer = 0;
+img->u.tex.last_layer = u_minify(stObj->pt->depth0, 
img->u.tex.level) - 1;
+ } else {
+img->u.tex.first_layer = u->_Layer;
+img->u.tex.last_layer = u->_Layer;
+ }
+  } else {
+ img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
+ img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
+ if (u->Layered && img->resource->array_size > 1) {
+if (stObj->base.Immutable)
+   img->u.tex.last_layer += stObj->base.NumLayers - 1;
+else
+   img->u.tex.last_layer += img->resource->array_size - 1;
+ }
+  }
+   }
+}
+
 static void
 st_bind_images(struct st_context *st, struct gl_program *prog,
enum pipe_shader_type shader_type)
@@ -70,53 +125,7 @@ st_bind_images(struct st_context *st, struct gl_program 
*prog,
  continue;
   }

-  img->resource = stObj->pt;
-  img->format = st_mesa_format_to_pipe_format(st, u->_ActualFormat);
-
-  switch (u->Access) {
-  case GL_READ_ONLY:
- img->access = PIPE_IMAGE_ACCESS_READ;
- break;
-  case GL_WRITE_ONLY:
- img->access = PIPE_IMAGE_ACCESS_WRITE;
- break;
-  case GL_READ_WRITE:
- img->access = PIPE_IMAGE_ACCESS_READ_WRITE;
- break;
-  default:
- unreachable("bad gl_image_unit::Access");
-  }
-
-  if (stObj->pt->target == PIPE_BUFFER) {
- unsigned base, size;
-
- base = stObj->base.BufferOffset;
- assert(base < stObj->pt->width0);
- size = MIN2(stObj->pt->width0 - base, 
(unsigned)stObj->base.BufferSize);
-
- img->u.buf.offset = base;
- img->u.buf.size = size;
-  } else {
- img->u.tex.level = u->Level + stObj->base.MinLevel;
- if (stObj->pt->target == PIPE_TEXTURE_3D) {
-if (u->Layered) {
-   img->u.tex.first_layer = 0;
-   img->u.tex.last_layer = u_minify(stObj->pt->depth0, 
img->u.tex.level) - 1;
-} else {
-   img->u.tex.first_layer = u->_Layer;
-   img->u.tex.last_layer = u->_Layer;
-}
- } else {
-img->u.tex.first_layer = u->_Layer + stObj->base.MinLayer;
-img->u.tex.last_layer = u->_Layer + stObj->base.MinLayer;
-if (u->Layered && img->resource->array_size > 1) {
-   if (stObj->base.Immutable)
-  img->u.tex.last_layer += stObj->base.NumLayers - 1;
-   else
-  img->u.tex.last_layer += img->resource->array_size - 1;
-}
- }
-  }
+  st_convert_image(st, u, img);
}
cso_set_shader_images(st->cso_context, shader_type, 0,
  prog->info.num_images, images);
diff --git a/src/mesa/state_tracker/st_texture.h 
b/src/mesa/state_tracker/st_texture.h
index 0ce7989562..d9584c9acd 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -254,4 +254,10 @@ st_create_color_map_texture(struct gl_context *ctx);
 bool

Re: [Mesa-dev] [PATCH 2/2] radeonsi: decompress DCC in set_sampler_view instead of create_sampler_view (v2)


Both patches:

Reviewed-by: Nicolai Hähnle 


On 30.03.2017 16:30, Marek Olšák wrote:

From: Marek Olšák 

v2: don't add a new decompress helper function
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 14 +++---
 src/gallium/drivers/radeonsi/si_pipe.h|  1 +
 src/gallium/drivers/radeonsi/si_state.c   |  7 ---
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index f28202f..2b91158 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -473,47 +473,55 @@ static void si_set_sampler_view(struct si_context *sctx,
struct si_sampler_views *views = >samplers[shader].views;
struct si_sampler_view *rview = (struct si_sampler_view*)view;
struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
uint32_t *desc = descs->list + slot * 16;

if (views->views[slot] == view && !disallow_early_out)
return;

if (view) {
struct r600_texture *rtex = (struct r600_texture 
*)view->texture;
+   bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
+
+   if (unlikely(!is_buffer && rview->dcc_incompatible)) {
+   if (vi_dcc_enabled(rtex, view->u.tex.first_level))
+   if (!r600_texture_disable_dcc(>b, rtex))
+   sctx->b.decompress_dcc(>b.b, 
rtex);
+
+   rview->dcc_incompatible = false;
+   }

assert(rtex); /* views with texture == NULL aren't supported */
pipe_sampler_view_reference(>views[slot], view);
memcpy(desc, rview->state, 8*4);

-   if (rtex->resource.b.b.target == PIPE_BUFFER) {
+   if (is_buffer) {
rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;

si_set_buf_desc_address(>resource,
view->u.buf.offset,
desc + 4);
} else {
bool is_separate_stencil =
rtex->db_compatible &&
rview->is_stencil_sampler;

si_set_mutable_tex_desc_fields(sctx->screen, rtex,
   rview->base_level_info,
   rview->base_level,
   
rview->base.u.tex.first_level,
   rview->block_width,
   is_separate_stencil,
   desc);
}

-   if (rtex->resource.b.b.target != PIPE_BUFFER &&
-   rtex->fmask.size) {
+   if (!is_buffer && rtex->fmask.size) {
memcpy(desc + 8,
   rview->fmask_state, 8*4);
} else {
/* Disable FMASK and bind sampler state in [12:15]. */
memcpy(desc + 8,
   null_texture_descriptor, 4*4);

if (views->sampler_states[slot])
memcpy(desc + 12,
   views->sampler_states[slot]->val, 4*4);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index e1df3b6..3a6503a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -120,20 +120,21 @@ struct si_blend_color {
 struct si_sampler_view {
struct pipe_sampler_viewbase;
 /* [0..7] = image descriptor
  * [4..7] = buffer descriptor */
uint32_tstate[8];
uint32_tfmask_state[8];
const struct legacy_surf_level  *base_level_info;
unsignedbase_level;
unsignedblock_width;
bool is_stencil_sampler;
+   bool dcc_incompatible;
 };

 #define SI_SAMPLER_STATE_MAGIC 0x34f1c35a

 struct si_sampler_state {
 #ifdef DEBUG
unsignedmagic;
 #endif
uint32_tval[4];
 };
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 2c2e3c7..f8c6faf 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3430,23 +3430,24 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
case PIPE_FORMAT_X24S8_UINT:
case PIPE_FORMAT_S8X24_UINT:
case PIPE_FORMAT_X32_S8X24_UINT:

Re: [Mesa-dev] [PATCH 2/3] nvc0/ir: Handle TGSI_OPCODE_CLOCK

2017-03-31 Thread Boyan Ding

2017-03-31 11:24 GMT+08:00 Ilia Mirkin :
> On Thu, Mar 30, 2017 at 10:33 PM, Boyan Ding  wrote:
>> Signed-off-by: Boyan Ding 
>> ---
>>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 5 +
>>  1 file changed, 5 insertions(+)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> index 7aaeedf8dd..9fbd3c0d30 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> @@ -3410,6 +3410,11 @@ Converter::handleInstruction(const struct 
>> tgsi_full_instruction *insn)
>>   mkCvt(OP_CVT, TYPE_U32, dst0[c], TYPE_U8, val0);
>>}
>>break;
>> +   case TGSI_OPCODE_CLOCK:
>> +  // The shifting is weird, but that's how they made it
>> +  mkOp1(OP_RDSV, TYPE_U32, dst0[1], mkSysVal(SV_CLOCK, 0))->fixed = 1;
>> +  mkOp1(OP_RDSV, TYPE_U32, dst0[0], mkSysVal(SV_CLOCK, 1))->fixed = 1;
>
> How sure are you about this? Shouldn't clocklo go into dst[0] and
> clockhi go into dst[1]? This is confirmed by
>
> """
> clock2x32ARB() returns
> the same value encoded as a two-component vector of 32-bit unsigned 
> integers
> with the first component containing the 32 least significant bits and the
> second component containing the 32 most significant bits.
> """
>
> Did the tests fail without that? Perhaps that indicates something else is 
> wrong?

I just noticed there are some issues with clock2x32ARB, wil check it
more carefully.

>
>> +  break;
>> case TGSI_OPCODE_KILL_IF:
>>val0 = new_LValue(func, FILE_PREDICATE);
>>mask = 0;
>> --
>> 2.12.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC PATCH] egl/android: Dequeue buffers inside EGL calls

2017-03-31 Thread Tapani Pälli

On 03/31/2017 08:24 AM, Rob Clark wrote:

On Fri, Mar 31, 2017 at 12:22 AM, Tapani Pälli wrote:

On 03/30/2017 05:57 PM, Emil Velikov wrote:

On 30 March 2017 at 15:30, Tomasz Figa wrote:

On Thu, Mar 30, 2017 at 11:17 PM, Emil Velikov
wrote:

On 30 March 2017 at 11:55, Tomasz Figa wrote:

Android buffer queues can be abandoned, which results in failing to
dequeue next buffer. Currently this would fail somewhere deep within
the DRI stack calling loader's getBuffers*(), without any error
reporting to the client app. However Android framework code relies on
proper signaling of this event, so we move buffer dequeue to
createWindowSurface() and swapBuffers() call, which can generate proper
EGL errors. To keep the performance benefits of delayed buffer
handling,
if any, fence wait and DRI image creation is kept delayed until
getBuffers*() is called by the DRI driver.

Thank you Tomasz.

I'm fairly confident that this should resolve the crash [in
swap_buffers] that Mauro was seeing.
Mauro can you give it a test ?

Ah, I actually noticed a problem with existing code, supposedly fixed
by [1], but I'm afraid it's still wrong.

Right - I was wondering why we don't hit that on EGL/GBM or EGL/Wayland.
From a quick look - may be because EGL/Android drops the dpy mutex in
droid_window_enqueue_buffer().

My patch removes update_buffers() and changes the buffer management so
that there is always a buffer dequeued, starting from surface
creation, unless there was an error somewhere.

Of the top of your head - is there something stopping us from using
the same method on $other platforms?

[1]
https://cgit.freedesktop.org/mesa/mesa/commit/src/egl/drivers/dri2/platform_android.c?id=4d4558411db166d2d66f8cec9cb581149dbe1597

Not that huge of an expert on the Android specifics, so just a humble
request:
Can we seek the code resuffle (droid_{alloc,free}_local_buffer,

Oops silly typo - s/seek/split/.

other?) separate from the functionality changes ?

Sure. Thanks for suggestion.

Please give it a day or two for others to comment.

I'm trying to debug why this causes our homescreen (wallpaper) to be black.
Otherwise I haven't seen any issues with these changes.

wallpaper seems to be a special sorta hell.. I wonder if there is
somehow some sort of interaction with what I fixed / worked-around in
a5e733c6b52e93de3000647d075f5ca2f55fcb71 ??

Maybe at least try commenting out the temp-pbuffer thing to get max
texture size, and see if that "fixes" things

Can you give more details, I still live in la la land and don't know
about 'temp-pbuffer thing'?

// Tapani
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] [RFC v3] mesa/glthread: Call unmarshal_batch directly in glthread_finish

On 30.03.2017 14:20, Bartosz Tomczyk wrote:

Thanks Nicolai,

Adding Timothy who seems most active on glthread topic.

Guys, do you think we can land this, with above comments addressed?

Well, Edmondo's testing suggests the performance looks good, so I'd say 
go for it, and we'll see if anything regresses.

Cheers,
Nicolai

On Thu, Mar 30, 2017 at 1:40 PM, Nicolai Hähnle > wrote:

On 30.03.2017 10:30, Bartosz Tomczyk wrote:

Thank you guys for testing.

I'll address all issues in next patch if we decide to merge it.

Nicolai,

Could you comment on _glapi_{get/set}_dispatch part. I'm not
familiar with it and I'm not sure if it's correct.

It looks good to me.

Cheers,
Nicolai

On Thu, Mar 30, 2017 at 5:38 AM, Michel Dänzer

>> wrote:

On 30/03/17 02:31 AM, Bartosz Tomczyk wrote:
> Call it directly when batch queue is empty. This avoids
costly thread
> synchronisation. With this fix games that previously regressed
> with mesa_glthread=true like xonotic or grid autosport.

The second sentence here is missing a verb (at least).

--
Earthling Michel Dänzer   |
 http://www.amd.com
Libre software enthusiast | Mesa and
X developer

--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.

--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] glsl: use -O0 optimization for builtin_functions.cpp with MinGW