Re: [Mesa-dev] [PATCH 7/7] i965: Fix asynchronous mappings on !LLC platforms.

2017-07-06 Thread Kenneth Graunke
On Wednesday, July 5, 2017 2:24:55 PM PDT Chris Wilson wrote:
> Quoting Kenneth Graunke (2017-07-05 21:56:54)
> > ---
> >  src/mesa/drivers/dri/i965/brw_bufmgr.c | 15 +--
> >  1 file changed, 13 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> > b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > index 7756e2b5f6c..46696be3577 100644
> > --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> > @@ -56,6 +56,7 @@
> >  #ifndef ETIME
> >  #define ETIME ETIMEDOUT
> >  #endif
> > +#include "common/gen_clflush.h"
> >  #include "common/gen_debug.h"
> >  #include "common/gen_device_info.h"
> >  #include "libdrm_macros.h"
> > @@ -698,12 +699,22 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo 
> > *bo, unsigned flags)
> >   VG(VALGRIND_FREELIKE_BLOCK(map, 0));
> >   drm_munmap(map, bo->size);
> >}
> > +   } else if (!bo->cache_coherent) {
> > +  /* If we're reusing an existing CPU mapping, the CPU caches may
> > +   * contain stale data from the last time we read from that mapping.
> > +   * (With the BO cache, it might even be data from a previous buffer!)
> > +   *
> > +   * We need to invalidate those cachelines so that we see the latest
> > +   * contents.
> > +   */
> > +  gen_invalidate_range(bo->map_cpu, bo->size);
> > }
> 
> This leaves us in trouble on the first invocation with MAP_ASYNC, where
> we create the cpu mmaping but don't change any of its domains. (Not that
> GL allows for READ | UNSYNCRONIZED if my reading of the spec was
> correct.) However, if you use
> 
>   if (!(flags & MAP_ASYNC))
>   wait_rendering(); /* teach me to use gem_wait! */
> 
>   if (!bo->cache_coherent)
>   gen_invalidate_range();
> 
> and a set_domain(GTT, 0) on creation that should cover everything.

I think I meant to set_domain(GTT, 0) on creation in this series, and
lost that patch somehow.  It seems like a good idea to add that.

You're correct, READ | UNSYNCHRONIZED is not allowed.  Since we disallow
CPU maps for writes, we should never see MAP_ASYNC here.  Which means we'll
always do gen_invalidate_range() and then set_domain(CPU).

I'm struggling to see how wait_rendering (aka set_domain(GTT)) helps us
here...there are no CPU writes to wait for...we disallowed them...

> In the meantime, s/else if (!bo->cache_coherent)/if (!bo->cache_coherent)/
> -Chris

Oh?  I can do that.  I figured that when we asked the kernel to create a
brand new CPU map for us, it would guarantee that the new virtual address
range didn't have any stale data in the CPU caches.  But, if it doesn't,
then we definitely need to clflush them out.

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] i965: Don't use PREAD for glGetBufferSubData().

2017-07-06 Thread Kenneth Graunke
On Thursday, July 6, 2017 4:21:28 AM PDT Chris Wilson wrote:
> Quoting Kenneth Graunke (2017-07-05 21:56:53)
> > diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
> > b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> > index a9ac29a6a81..2b0f7b9a698 100644
> > --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> > +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> > @@ -289,7 +289,10 @@ brw_get_buffer_subdata(struct gl_context *ctx,
> > if (brw_batch_references(>batch, intel_obj->buffer)) {
> >intel_batchbuffer_flush(brw);
> > }
> > -   brw_bo_get_subdata(intel_obj->buffer, offset, size, data);
> > +
> > +   void *map = brw_bo_map(brw, intel_obj->buffer, MAP_READ);
> 
> Be paranoid and wrap this in a if (map). Data pointer is provided by the
> user? otherwise you probably want to memset it on failure.
> 
> > +   memcpy(data, map + offset, size);
> > +   brw_bo_unmap(intel_obj->buffer);
> >  
> > mark_buffer_inactive(intel_obj);
> >  }

I suppose the paranoia is reasonable, but I'm not sure why I'd memset
it on failure...unless you're suggesting filling it with 0xd0d0d0d0 or
something to aid in debugging.  The only reasonably legitimate error
handling I can think of is to raise GL_OUT_OF_MEMORY, at which point
we may as well not write anything...

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: enable support for EXT_memory_object v2

2017-07-06 Thread Andres Rodriguez
v2: fix an indentation error

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/r600/r600_pipe.c   | 2 +-
 src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index e3abc10..dc225aa 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -296,6 +296,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_MEMOBJ:
return 1;
 
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -398,7 +399,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
-   case PIPE_CAP_MEMOBJ:
return 0;
 
case PIPE_CAP_DOUBLES:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index bc0e38f..60da3ce 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -506,6 +506,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TEX_TXF_LZ:
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 1;
 
case PIPE_CAP_INT64:
@@ -593,7 +594,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_UMA:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
case PIPE_CAP_POST_DEPTH_COVERAGE:
-   case PIPE_CAP_MEMOBJ:
return 0;
 
case PIPE_CAP_QUERY_BUFFER_OBJECT:
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/25] radv: generate the same driver UUID as radeonsi

2017-07-06 Thread Andres Rodriguez
These need to match for interop compatibility queries.

Signed-off-by: Andres Rodriguez 
---
 src/amd/vulkan/radv_device.c  | 9 -
 src/amd/vulkan/radv_private.h | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 176e0c2..b2a867f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -63,6 +63,12 @@ radv_device_get_cache_uuid(enum radeon_family family, void 
*uuid)
 }
 
 static void
+radv_get_driver_uuid(void *uuid)
+{
+   ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
+}
+
+static void
 radv_get_device_uuid(struct radeon_info *info, void *uuid)
 {
ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
@@ -300,6 +306,7 @@ radv_physical_device_init(struct radv_physical_device 
*device,
fprintf(stderr, "WARNING: radv is not a conformant vulkan 
implementation, testing use only.\n");
device->name = get_chip_name(device->rad_info.family);
 
+   radv_get_driver_uuid(>device_uuid);
radv_get_device_uuid(>rad_info, >device_uuid);
 
if (device->rad_info.family == CHIP_STONEY ||
@@ -744,7 +751,7 @@ void radv_GetPhysicalDeviceProperties2KHR(
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
VkPhysicalDeviceIDPropertiesKHX *properties = 
(VkPhysicalDeviceIDPropertiesKHX*)ext;
-   radv_device_get_cache_uuid(0, properties->driverUUID);
+   memcpy(properties->driverUUID, pdevice->driver_uuid, 
VK_UUID_SIZE);
memcpy(properties->deviceUUID, pdevice->device_uuid, 
VK_UUID_SIZE);
properties->deviceLUIDValid = false;
break;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 5130439..5f09d05 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -267,6 +267,7 @@ struct radv_physical_device {
struct radeon_info rad_info;
charpath[20];
const char *name;
+   uint8_t driver_uuid[VK_UUID_SIZE];
uint8_t device_uuid[VK_UUID_SIZE];
uint8_t cache_uuid[VK_UUID_SIZE];
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 25/25] radeonsi: enable support for EXT_memory_object

2017-07-06 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/r600/r600_pipe.c   | 2 +-
 src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index e3abc10..078f12b 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -296,6 +296,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_MEMOBJ:
return 1;
 
case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -398,7 +399,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
-   case PIPE_CAP_MEMOBJ:
return 0;
 
case PIPE_CAP_DOUBLES:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index bc0e38f..60da3ce 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -506,6 +506,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_TEX_TXF_LZ:
case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 1;
 
case PIPE_CAP_INT64:
@@ -593,7 +594,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
case PIPE_CAP_UMA:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
case PIPE_CAP_POST_DEPTH_COVERAGE:
-   case PIPE_CAP_MEMOBJ:
return 0;
 
case PIPE_CAP_QUERY_BUFFER_OBJECT:
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/25] mesa: hook up queries for NUM_TILING_TYPES and TILING_TYPES

2017-07-06 Thread Andres Rodriguez
These are just basic implementations.

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/formatquery.c | 17 +
 src/mesa/main/mtypes.h  |  3 +++
 src/mesa/main/texparam.c| 27 +++
 3 files changed, 47 insertions(+)

diff --git a/src/mesa/main/formatquery.c b/src/mesa/main/formatquery.c
index 07fb2f2..77c7faa 100644
--- a/src/mesa/main/formatquery.c
+++ b/src/mesa/main/formatquery.c
@@ -216,6 +216,8 @@ _legal_parameters(struct gl_context *ctx, GLenum target, 
GLenum internalformat,
case GL_CLEAR_BUFFER:
case GL_TEXTURE_VIEW:
case GL_VIEW_COMPATIBILITY_CLASS:
+   case GL_NUM_TILING_TYPES_EXT:
+   case GL_TILING_TYPES_EXT:
   /* The ARB_internalformat_query spec says:
*
* "If the  parameter to GetInternalformativ is not SAMPLES
@@ -284,6 +286,7 @@ _set_default_response(GLenum pname, GLint buffer[16])
 */
switch(pname) {
case GL_SAMPLES:
+   case GL_TILING_TYPES_EXT:
   break;
 
case GL_MAX_COMBINED_DIMENSIONS:
@@ -309,6 +312,7 @@ _set_default_response(GLenum pname, GLint buffer[16])
case GL_TEXTURE_COMPRESSED_BLOCK_WIDTH:
case GL_TEXTURE_COMPRESSED_BLOCK_HEIGHT:
case GL_TEXTURE_COMPRESSED_BLOCK_SIZE:
+   case GL_NUM_TILING_TYPES_EXT:
   buffer[0] = 0;
   break;
 
@@ -700,6 +704,13 @@ _mesa_query_internal_format_default(struct gl_context 
*ctx, GLenum target,
case GL_FILTER:
   params[0] = GL_FULL_SUPPORT;
   break;
+   case GL_NUM_TILING_TYPES_EXT:
+  params[0] = 2;
+  break;
+   case GL_TILING_TYPES_EXT:
+  params[0] = GL_OPTIMAL_TILING_EXT;
+  params[1] = GL_LINEAR_TILING_EXT;
+  break;
 
default:
   _set_default_response(pname, params);
@@ -1519,6 +1530,12 @@ _mesa_GetInternalformativ(GLenum target, GLenum 
internalformat, GLenum pname,
   }
   break;
 
+   case GL_NUM_TILING_TYPES_EXT:
+   case GL_TILING_TYPES_EXT:
+  ctx->Driver.QueryInternalFormat(ctx, target, internalformat, pname,
+  buffer);
+  break;
+
default:
   unreachable("bad param");
}
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 463f444..38055f3 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1057,6 +1057,9 @@ struct gl_texture_object
/** GL_ARB_shader_image_load_store */
GLenum ImageFormatCompatibilityType;
 
+   /** GL_EXT_memory_object */
+   GLenum TextureTiling;
+
/** GL_ARB_bindless_texture */
struct util_dynarray SamplerHandles;
struct util_dynarray ImageHandles;
diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c
index d8bbabf..b6e9150 100644
--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -622,6 +622,14 @@ set_tex_parameteri(struct gl_context *ctx,
   }
   goto invalid_pname;
 
+   case GL_TEXTURE_TILING_EXT:
+  if (ctx->Extensions.EXT_memory_object) {
+ texObj->TextureTiling = params[0];
+
+ return GL_TRUE;
+  }
+  goto invalid_pname;
+
default:
   goto invalid_pname;
}
@@ -778,6 +786,13 @@ set_tex_parameterf(struct gl_context *ctx,
   }
   return GL_TRUE;
 
+   case GL_TEXTURE_TILING_EXT:
+  if (ctx->Extensions.EXT_memory_object) {
+ texObj->TextureTiling = params[0];
+ return GL_TRUE;
+  }
+  goto invalid_pname;
+
default:
   goto invalid_pname;
}
@@ -2019,6 +2034,12 @@ get_tex_parameterfv(struct gl_context *ctx,
  *params = ENUM_TO_FLOAT(obj->Target);
  break;
 
+  case GL_TEXTURE_TILING_EXT:
+ if (!ctx->Extensions.EXT_memory_object)
+goto invalid_pname;
+ *params = ENUM_TO_FLOAT(obj->TextureTiling);
+ break;
+
   default:
  goto invalid_pname;
}
@@ -2251,6 +2272,12 @@ get_tex_parameteriv(struct gl_context *ctx,
  *params = (GLint) obj->Target;
  break;
 
+  case GL_TEXTURE_TILING_EXT:
+ if (!ctx->Extensions.EXT_memory_object)
+goto invalid_pname;
+ *params = (GLint) obj->TextureTiling;
+ break;
+
   default:
  goto invalid_pname;
}
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/25] radv: rename physical_device->uuid[] to cache_uuid[]

2017-07-06 Thread Andres Rodriguez
We have a few UUIDs, so lets be more specific.

Signed-off-by: Andres Rodriguez 
---
 src/amd/vulkan/radv_device.c | 4 ++--
 src/amd/vulkan/radv_pipeline_cache.c | 4 ++--
 src/amd/vulkan/radv_private.h| 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 8cf5442..d43892d 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -285,7 +285,7 @@ radv_physical_device_init(struct radv_physical_device 
*device,
goto fail;
}
 
-   if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
+   if (radv_device_get_cache_uuid(device->rad_info.family, 
device->cache_uuid)) {
radv_finish_wsi(device);
device->ws->destroy(device->ws);
result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
@@ -727,7 +727,7 @@ void radv_GetPhysicalDeviceProperties(
};
 
strcpy(pProperties->deviceName, pdevice->name);
-   memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
+   memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, 
VK_UUID_SIZE);
 }
 
 void radv_GetPhysicalDeviceProperties2KHR(
diff --git a/src/amd/vulkan/radv_pipeline_cache.c 
b/src/amd/vulkan/radv_pipeline_cache.c
index e57c99b..86479f6 100644
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -332,7 +332,7 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
return;
if (header.device_id != device->physical_device->rad_info.pci_id)
return;
-   if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 
0)
+   if (memcmp(header.uuid, device->physical_device->cache_uuid, 
VK_UUID_SIZE) != 0)
return;
 
char *end = (void *) data + size;
@@ -431,7 +431,7 @@ VkResult radv_GetPipelineCacheData(
header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
header->vendor_id = 0x1002;
header->device_id = device->physical_device->rad_info.pci_id;
-   memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
+   memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
p += header->header_size;
 
struct cache_entry *entry;
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index a167409..5130439 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -267,8 +267,8 @@ struct radv_physical_device {
struct radeon_info rad_info;
charpath[20];
const char *name;
-   uint8_t uuid[VK_UUID_SIZE];
uint8_t device_uuid[VK_UUID_SIZE];
+   uint8_t cache_uuid[VK_UUID_SIZE];
 
int local_fd;
struct wsi_device   wsi_device;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 23/25] radv: generate same device UUID as radeonsi v2

2017-07-06 Thread Andres Rodriguez
This is required for interop use cases. The same device must report
identical UUIDs through the GL and Vulkan APIs so that users can
identify when it is safe to perform a memory object import.

v2: use ac helpers to calculate the uuid

Signed-off-by: Andres Rodriguez 
---
 src/amd/vulkan/radv_device.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index d43892d..176e0c2 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -63,12 +63,9 @@ radv_device_get_cache_uuid(enum radeon_family family, void 
*uuid)
 }
 
 static void
-radv_get_device_uuid(drmDevicePtr device, void *uuid) {
-   memset(uuid, 0, VK_UUID_SIZE);
-   memcpy((char*)uuid + 0, >businfo.pci->domain, 2);
-   memcpy((char*)uuid + 2, >businfo.pci->bus, 1);
-   memcpy((char*)uuid + 3, >businfo.pci->dev, 1);
-   memcpy((char*)uuid + 4, >businfo.pci->func, 1);
+radv_get_device_uuid(struct radeon_info *info, void *uuid)
+{
+   ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
 }
 
 static const VkExtensionProperties instance_extensions[] = {
@@ -303,7 +300,7 @@ radv_physical_device_init(struct radv_physical_device 
*device,
fprintf(stderr, "WARNING: radv is not a conformant vulkan 
implementation, testing use only.\n");
device->name = get_chip_name(device->rad_info.family);
 
-   radv_get_device_uuid(drm_device, device->device_uuid);
+   radv_get_device_uuid(>rad_info, >device_uuid);
 
if (device->rad_info.family == CHIP_STONEY ||
device->rad_info.chip_class >= GFX9) {
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/25] gallium: introduce device/driver UUID queries

2017-07-06 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/ddebug/dd_screen.c | 17 +
 src/gallium/include/pipe/p_defines.h   |  1 +
 src/gallium/include/pipe/p_screen.h| 13 +
 3 files changed, 31 insertions(+)

diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index fe9c841..955158f 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -197,6 +197,21 @@ dd_screen_get_driver_query_group_info(struct pipe_screen 
*_screen,
 }
 
 
+static void
+dd_screen_get_driver_uuid(struct pipe_screen *_screen, char *uuid)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_driver_uuid(screen, uuid);
+}
+
+static void
+dd_screen_get_device_uuid(struct pipe_screen *_screen, char *uuid)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_device_uuid(screen, uuid);
+}
 /
  * resource
  */
@@ -462,6 +477,8 @@ ddebug_screen_create(struct pipe_screen *screen)
SCR_INIT(get_driver_query_info);
SCR_INIT(get_driver_query_group_info);
SCR_INIT(get_compiler_options);
+   SCR_INIT(get_driver_uuid);
+   SCR_INIT(get_device_uuid);
 
 #undef SCR_INIT
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 91efa65..22d60d3 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -1066,6 +1066,7 @@ enum pipe_debug_type
PIPE_DEBUG_TYPE_CONFORMANCE,
 };
 
+#define PIPE_UUID_SIZE 16
 
 #ifdef __cplusplus
 }
diff --git a/src/gallium/include/pipe/p_screen.h 
b/src/gallium/include/pipe/p_screen.h
index d81625c..acfbd03 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -393,6 +393,19 @@ struct pipe_screen {
   struct pipe_memory_object 
*memobj,
   uint64_t offset);
 
+   /**
+* Fill @uuid with a unique driver identifier
+*
+* \param uuidpointer to a memory region of PIPE_UUID_SIZE bytes
+*/
+   void (*get_driver_uuid)(struct pipe_screen *screen, char *uuid);
+
+   /**
+* Fill @uuid with a unique device identifier
+*
+* \param uuidpointer to a memory region of PIPE_UUID_SIZE bytes
+*/
+   void (*get_device_uuid)(struct pipe_screen *screen, char *uuid);
 };
 
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/25] mesa: fix error handling for memory objects

2017-07-06 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/externalobjects.c | 63 +
 1 file changed, 51 insertions(+), 12 deletions(-)

diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index 919a81c..73c9d4b 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -233,13 +233,30 @@ invalid_pname:
pname);
 }
 
+static GLboolean
+texstorage_validate(struct gl_context *ctx,
+struct gl_memory_object *memObj,
+const char *func)
+{
+   if (memObj->Name == 0) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "%s(memory=0)", func);
+  return GL_TRUE;
+   }
+
+   if (!memObj->Immutable) {
+  _mesa_error(ctx, GL_INVALID_OPERATION, "%s(no associated memory)", func);
+  return GL_TRUE;
+   }
+
+   return GL_FALSE;
+}
 /**
  * Helper used by _mesa_TexStorageMem1/2/3DEXT().
  */
 static void
 texstorage_memory(GLuint dims, GLenum target, GLsizei levels, GLenum 
internalFormat,
   GLsizei width, GLsizei height, GLsizei depth, GLuint memory,
-  GLuint64 offset)
+  GLuint64 offset, const char *func)
 {
struct gl_texture_object *texObj;
struct gl_memory_object *memObj;
@@ -254,6 +271,9 @@ texstorage_memory(GLuint dims, GLenum target, GLsizei 
levels, GLenum internalFor
if (!memObj)
   return;
 
+   if (texstorage_validate(ctx, memObj, func))
+  return;
+
_mesa_texture_storage_memory(ctx, dims, texObj, memObj, target,
 levels, internalFormat,
 width, height, depth, offset, false);
@@ -278,6 +298,9 @@ texstorage_memory_ms(GLuint dims, GLenum target, GLsizei 
samples, GLenum interna
if (!memObj)
   return;
 
+   if (texstorage_validate(ctx, memObj, func))
+  return;
+
_mesa_texture_storage_ms_memory(ctx, dims, texObj, memObj, target, samples,
internalFormat, width, height, depth,
fixedSampleLocations, offset, func);
@@ -289,7 +312,8 @@ texstorage_memory_ms(GLuint dims, GLenum target, GLsizei 
samples, GLenum interna
 static void
 texturestorage_memory(GLuint dims, GLuint texture, GLsizei levels,
   GLenum internalFormat, GLsizei width, GLsizei height,
-  GLsizei depth, GLuint memory, GLuint64 offset)
+  GLsizei depth, GLuint memory, GLuint64 offset,
+  const char *func)
 {
struct gl_texture_object *texObj;
struct gl_memory_object *memObj;
@@ -304,6 +328,9 @@ texturestorage_memory(GLuint dims, GLuint texture, GLsizei 
levels,
if (!memObj)
   return;
 
+   if (texstorage_validate(ctx, memObj, func))
+  return;
+
_mesa_texture_storage_memory(ctx, dims, texObj, memObj, texObj->Target,
 levels, internalFormat,
 width, height, depth, offset, true);
@@ -328,6 +355,9 @@ texturestorage_memory_ms(GLuint dims, GLuint texture, 
GLsizei samples,
if (!memObj)
   return;
 
+   if (texstorage_validate(ctx, memObj, func))
+  return;
+
_mesa_texture_storage_ms_memory(ctx, dims, texObj, memObj, texObj->Target,
samples, internalFormat, width, height,
depth, fixedSampleLocations, offset, func);
@@ -342,7 +372,9 @@ _mesa_TexStorageMem2DEXT(GLenum target,
  GLuint memory,
  GLuint64 offset)
 {
-   texstorage_memory(2, target, levels, internalFormat, width, height, 1, 
memory, offset);
+   texstorage_memory(2, target, levels, internalFormat,
+ width, height, 1, memory, offset,
+ "glTexStorageMem2DEXT");
 }
 
 void GLAPIENTRY
@@ -370,7 +402,9 @@ _mesa_TexStorageMem3DEXT(GLenum target,
  GLuint memory,
  GLuint64 offset)
 {
-   texstorage_memory(3, target, levels, internalFormat, width, height, depth, 
memory, offset);
+   texstorage_memory(3, target, levels, internalFormat,
+ width, height, depth, memory, offset,
+ "glTexStorageMem3DEXT");
 }
 
 void GLAPIENTRY
@@ -398,7 +432,9 @@ _mesa_TextureStorageMem2DEXT(GLuint texture,
  GLuint memory,
  GLuint64 offset)
 {
-   texturestorage_memory(2, texture, levels, internalFormat, width, height, 1, 
memory, offset);
+   texturestorage_memory(2, texture, levels, internalFormat,
+ width, height, 1, memory, offset,
+ "glTexureStorageMem2DEXT");
 }
 
 void GLAPIENTRY
@@ -426,7 +462,9 @@ _mesa_TextureStorageMem3DEXT(GLuint texture,
  GLuint memory,
  GLuint64 offset)
 {
-   texturestorage_memory(3, texture, levels, internalFormat, width, 

[Mesa-dev] [PATCH 18/25] ac/gpu: add driver/device UUID query helpers

2017-07-06 Thread Andres Rodriguez
We need vulkan and gl to produce the same UUIDs. Therefore we should
keep the mechanism to compute these in a common location to guarantee
they are updated in lockstep.

Signed-off-by: Andres Rodriguez 
---
 src/amd/common/ac_gpu_info.c | 27 +++
 src/amd/common/ac_gpu_info.h |  5 +
 2 files changed, 32 insertions(+)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 3f39a08..1b94468 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -301,3 +301,30 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return true;
 }
 
+void ac_compute_driver_uuid(char *uuid, size_t size)
+{
+   char amd_uuid[] = "AMD-MESA-DRV";
+
+   assert(size >= sizeof(amd_uuid));
+
+   memset(uuid, 0, size);
+   strncpy(uuid, amd_uuid, size);
+}
+
+void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size)
+{
+   uint32_t *uint_uuid = (uint32_t*)uuid;
+
+   assert(size >= sizeof(uint32_t)*4);
+
+   /**
+* Use the device info directly instead of using a sha1. GL/VK UUIDs
+* are 16 byte vs 20 byte for sha1, and the truncation that would be
+* required would get rid of part of the little entropy we have.
+* */
+   memset(uuid, 0, size);
+   uint_uuid[0] = info->pci_domain;
+   uint_uuid[1] = info->pci_bus;
+   uint_uuid[2] = info->pci_dev;
+   uint_uuid[3] = info->pci_func;
+}
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 3091fed..516299a 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -26,6 +26,7 @@
 #ifndef AC_GPU_INFO_H
 #define AC_GPU_INFO_H
 
+#include 
 #include 
 #include 
 #include "amd_family.h"
@@ -104,6 +105,10 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
   struct radeon_info *info,
   struct amdgpu_gpu_info *amdinfo);
 
+void ac_compute_driver_uuid(char *uuid, size_t size);
+
+void ac_compute_device_uuid(struct radeon_info *info, char *uuid, size_t size);
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/25] radeonsi: hook up device/driver UUID queries

2017-07-06 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index fd67d9a..c14d4eb 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -895,6 +895,18 @@ static const char* r600_get_name(struct pipe_screen* 
pscreen)
return rscreen->renderer_string;
 }
 
+static void r600_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+   ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE);
+}
+
+static void r600_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+   struct r600_common_screen *rscreen = (struct r600_common_screen 
*)pscreen;
+
+   ac_compute_device_uuid(>info, uuid, PIPE_UUID_SIZE);
+}
+
 static float r600_get_paramf(struct pipe_screen* pscreen,
 enum pipe_capf param)
 {
@@ -1355,6 +1367,8 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
rscreen->b.get_name = r600_get_name;
rscreen->b.get_vendor = r600_get_vendor;
rscreen->b.get_device_vendor = r600_get_device_vendor;
+   rscreen->b.get_device_uuid = r600_get_device_uuid;
+   rscreen->b.get_driver_uuid = r600_get_driver_uuid;
rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] MRT rendering speed up from the pro driver

2017-07-06 Thread Dave Airlie
Hi,

Hopefully someone in here can help with this, and maybe ask the
internal Vulkan team how this works.

I've been looking into a large perf cliff in radv vs pro when MRT
rendering is enabled (I didn't know that
was what I was looking for until today - about 2-3 weeks ago I was
just digging around).

I looked at some traces from pro and finally spotted that it realigns
the non-zero MRT color bases and
dcc bases on 0x3800 (14336) multiples on my rx480 gpu, if I hack radv
to do the same I get the same
MRT speedups.

I think radeonsi should look into doing the same sort of alignments,
it might be easier there.

Now with vulkan changing the base address of the image after
allocation and image view creation
seems to be fraught with dangerous corner cases. Vulkan is pretty
explicit, and I've no idea
what would happen if you rendered to a target as MRT2 then later
decided to render to same target
as MRT1. I've no idea how we can propogate the MRT offset we find out
about at vkCreateFramebuffer
time into texture descriptors that we store in image views, (short of
keeping a long list of every image
view that was ever created for the image and propogating the change
across all of them), and how would
that work if two threads did vkCreateFramebufer on the same image one
for MRT1 and one for MRT2,

Anyways just thought I'd share the results of the investigating and
hope someone can fill me in on
how to calculate 0x3800 and how we might do this cleanly in radv.

Dave.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/25] mesa: hook up UUID queries for driver and device v2

2017-07-06 Thread Andres Rodriguez
v2: respective changes for new gallium interface

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/dd.h  | 15 +++
 src/mesa/main/get.c | 17 +
 src/mesa/main/version.c | 13 +
 src/mesa/main/version.h |  6 ++
 src/mesa/state_tracker/st_context.c | 20 
 5 files changed, 71 insertions(+)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 27c6efc..f7fe217 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1108,6 +1108,21 @@ struct dd_function_table {
   GLenum usage,
   struct gl_buffer_object *bufObj);
 
+   /**
+* Fill uuid with an unique identifier for this driver
+*
+* uuid must point to GL_UUID_SIZE_EXT bytes of available memory
+*/
+   void (*GetDriverUuid)(struct gl_context *ctx, char *uuid);
+
+   /**
+* Fill uuid with an unique identifier for the device associated
+* to this driver
+*
+* uuid must point to GL_UUID_SIZE_EXT bytes of available memory
+*/
+   void (*GetDeviceUuid)(struct gl_context *ctx, char *uuid);
+
/*@}*/
 
/**
diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 9f26ad1..bcbec1a 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -40,6 +40,7 @@
 #include "framebuffer.h"
 #include "samplerobj.h"
 #include "stencil.h"
+#include "version.h"
 
 /* This is a table driven implemetation of the glGet*v() functions.
  * The basic idea is that most getters just look up an int somewhere
@@ -832,6 +833,14 @@ find_custom_value(struct gl_context *ctx, const struct 
value_desc *d, union valu
 ctx->Texture.Unit[unit].CurrentTex[d->offset]->Name;
   break;
 
+   /* GL_EXT_external_objects */
+   case GL_DRIVER_UUID_EXT:
+  _mesa_get_driver_uuid(ctx, v->value_int_4);
+  break;
+   case GL_DEVICE_UUID_EXT:
+  _mesa_get_device_uuid(ctx, v->value_int_4);
+  break;
+
/* GL_EXT_packed_float */
case GL_RGBA_SIGNED_COMPONENTS_EXT:
   {
@@ -2491,6 +2500,14 @@ find_value_indexed(const char *func, GLenum pname, 
GLuint index, union value *v)
  goto invalid_value;
   v->value_int = ctx->Const.MaxComputeVariableGroupSize[index];
   return TYPE_INT;
+
+   /* GL_EXT_external_objects */
+   case GL_DRIVER_UUID_EXT:
+  _mesa_get_driver_uuid(ctx, v->value_int_4);
+  return TYPE_INT_4;
+   case GL_DEVICE_UUID_EXT:
+  _mesa_get_device_uuid(ctx, v->value_int_4);
+  return TYPE_INT_4;
}
 
  invalid_enum:
diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c
index 34f8bbb..c8aa3ca 100644
--- a/src/mesa/main/version.c
+++ b/src/mesa/main/version.c
@@ -635,3 +635,16 @@ _mesa_compute_version(struct gl_context *ctx)
   break;
}
 }
+
+
+void
+_mesa_get_driver_uuid(struct gl_context *ctx, GLint *uuid)
+{
+   ctx->Driver.GetDriverUuid(ctx, (char*) uuid);
+}
+
+void
+_mesa_get_device_uuid(struct gl_context *ctx, GLint *uuid)
+{
+   ctx->Driver.GetDeviceUuid(ctx, (char*) uuid);
+}
diff --git a/src/mesa/main/version.h b/src/mesa/main/version.h
index ee7cb75..4cb5e5f 100644
--- a/src/mesa/main/version.h
+++ b/src/mesa/main/version.h
@@ -47,4 +47,10 @@ _mesa_override_gl_version(struct gl_context *ctx);
 extern void
 _mesa_override_glsl_version(struct gl_constants *consts);
 
+extern void
+_mesa_get_driver_uuid(struct gl_context *ctx, GLint *uuid);
+
+extern void
+_mesa_get_device_uuid(struct gl_context *ctx, GLint *uuid);
+
 #endif /* VERSION_H */
diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index a846be3..a8194ed 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -641,6 +641,24 @@ st_set_background_context(struct gl_context *ctx,
smapi->set_background_context(>iface, queue_info);
 }
 
+static void
+st_get_device_uuid(struct gl_context *ctx, char *uuid)
+{
+   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+
+   assert(GL_UUID_SIZE_EXT <= PIPE_UUID_SIZE);
+   screen->get_device_uuid(screen, uuid);
+}
+
+static void
+st_get_driver_uuid(struct gl_context *ctx, char *uuid)
+{
+   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
+
+   assert(GL_UUID_SIZE_EXT <= PIPE_UUID_SIZE);
+   screen->get_driver_uuid(screen, uuid);
+}
+
 void st_init_driver_functions(struct pipe_screen *screen,
   struct dd_function_table *functions)
 {
@@ -687,4 +705,6 @@ void st_init_driver_functions(struct pipe_screen *screen,
functions->UpdateState = st_invalidate_state;
functions->QueryMemoryInfo = st_query_memory_info;
functions->SetBackgroundContext = st_set_background_context;
+   functions->GetDriverUuid = st_get_device_uuid;
+   functions->GetDeviceUuid = st_get_driver_uuid;
 }
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org

[Mesa-dev] [PATCH 12/25] mesa: hook up (Named)BufferStorageMem api

2017-07-06 Thread Andres Rodriguez
Include no_error variants as well.

Signed-off-by: Andres Rodriguez 
---
 src/mapi/glapi/gen/EXT_external_objects.xml |  4 +-
 src/mesa/main/bufferobj.c   | 80 +++--
 src/mesa/main/bufferobj.h   | 16 +-
 src/mesa/main/externalobjects.c | 18 ---
 src/mesa/main/externalobjects.h | 12 -
 5 files changed, 81 insertions(+), 49 deletions(-)

diff --git a/src/mapi/glapi/gen/EXT_external_objects.xml 
b/src/mapi/glapi/gen/EXT_external_objects.xml
index 7c4fd6f..25e6879 100644
--- a/src/mapi/glapi/gen/EXT_external_objects.xml
+++ b/src/mapi/glapi/gen/EXT_external_objects.xml
@@ -108,7 +108,7 @@
 
 
 
-
+
 
 
 
@@ -159,7 +159,7 @@
 
 
 
-
+
 
 
 
diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c
index 419972e..13d16b1 100644
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -39,6 +39,7 @@
 #include "imports.h"
 #include "context.h"
 #include "bufferobj.h"
+#include "externalobjects.h"
 #include "mtypes.h"
 #include "teximage.h"
 #include "glformats.h"
@@ -1596,9 +1597,12 @@ validate_buffer_storage(struct gl_context *ctx,
 
 static void
 buffer_storage(struct gl_context *ctx, struct gl_buffer_object *bufObj,
-   GLenum target, GLsizeiptr size, const GLvoid *data,
-   GLbitfield flags, const char *func)
+   struct gl_memory_object *memObj, GLenum target,
+   GLsizeiptr size, const GLvoid *data, GLbitfield flags,
+   GLuint64 offset, const char *func)
 {
+   GLboolean err;
+
/* Unmap the existing buffer.  We'll replace it now.  Not an error. */
_mesa_buffer_unmap_all_mappings(ctx, bufObj);
 
@@ -1608,9 +1612,18 @@ buffer_storage(struct gl_context *ctx, struct 
gl_buffer_object *bufObj,
bufObj->Immutable = GL_TRUE;
bufObj->MinMaxCacheDirty = true;
 
-   assert(ctx->Driver.BufferData);
-   if (!ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
-   flags, bufObj)) {
+   if (memObj) {
+  assert(ctx->Driver.BufferData);
+  err = ctx->Driver.BufferDataMem(ctx, target, size, memObj, offset,
+  GL_DYNAMIC_DRAW, bufObj);
+   }
+   else {
+  assert(ctx->Driver.BufferData);
+  err = ctx->Driver.BufferData(ctx, target, size, data, GL_DYNAMIC_DRAW,
+   flags, bufObj);
+   }
+
+   if (err) {
   if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
  /* Even though the interaction between AMD_pinned_memory and
   * glBufferStorage is not described in the spec, Graham Sellers
@@ -1627,11 +1640,16 @@ buffer_storage(struct gl_context *ctx, struct 
gl_buffer_object *bufObj,
 
 static ALWAYS_INLINE void
 inlined_buffer_storage(GLenum target, GLuint buffer, GLsizeiptr size,
-   const GLvoid *data, GLbitfield flags, bool dsa,
-   bool no_error, const char *func)
+   const GLvoid *data, GLbitfield flags,
+   GLuint memory, GLuint64 offset,
+   bool dsa, bool mem, bool no_error, const char *func)
 {
GET_CURRENT_CONTEXT(ctx);
struct gl_buffer_object *bufObj;
+   struct gl_memory_object *memObj = NULL;
+
+   if (mem)
+  memObj = _mesa_lookup_memory_object(ctx, memory);
 
if (dsa) {
   if (no_error) {
@@ -1653,7 +1671,7 @@ inlined_buffer_storage(GLenum target, GLuint buffer, 
GLsizeiptr size,
}
 
if (no_error || validate_buffer_storage(ctx, bufObj, size, flags, func))
-  buffer_storage(ctx, bufObj, target, size, data, flags, func);
+  buffer_storage(ctx, bufObj, memObj, target, size, data, flags, offset, 
func);
 }
 
 
@@ -1661,8 +1679,8 @@ void GLAPIENTRY
 _mesa_BufferStorage_no_error(GLenum target, GLsizeiptr size,
  const GLvoid *data, GLbitfield flags)
 {
-   inlined_buffer_storage(target, 0, size, data, flags, false, true,
-  "glBufferStorage");
+   inlined_buffer_storage(target, 0, size, data, flags, GL_NONE, 0,
+  false, false, true, "glBufferStorage");
 }
 
 
@@ -1670,10 +1688,25 @@ void GLAPIENTRY
 _mesa_BufferStorage(GLenum target, GLsizeiptr size, const GLvoid *data,
 GLbitfield flags)
 {
-   inlined_buffer_storage(target, 0, size, data, flags, false, false,
-  "glBufferStorage");
+   inlined_buffer_storage(target, 0, size, data, flags, GL_NONE, 0,
+  false, false, false, "glBufferStorage");
 }
 
+void GLAPIENTRY
+_mesa_BufferStorageMemEXT_no_error(GLenum target, GLsizeiptr size,
+   GLuint memory, GLuint64 offset)
+{
+   inlined_buffer_storage(target, 0, size, NULL, 0, memory, offset,
+  false, true, true, "glBufferStorageMemEXT");
+}
+
+void GLAPIENTRY

[Mesa-dev] [PATCH 15/25] mesa: Implement glGetUnsignedByte{v|i_v}

2017-07-06 Thread Andres Rodriguez
These are used by EXT_external_objects to present UUIDs for the device
and the driver.

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/get.c | 177 
 1 file changed, 177 insertions(+)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index b5c0723..9f26ad1 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1419,6 +1419,73 @@ static const int transpose[] = {
3, 7, 11, 15
 };
 
+static GLsizei
+get_value_size(enum value_type type, const union value *v)
+{
+   switch (type) {
+   case TYPE_INVALID:
+  return 0;
+   case TYPE_CONST:
+   case TYPE_UINT:
+   case TYPE_INT:
+  return sizeof(GLint);
+   case TYPE_INT_2:
+   case TYPE_UINT_2:
+  return sizeof(GLint) * 2;
+   case TYPE_INT_3:
+   case TYPE_UINT_3:
+  return sizeof(GLint) * 3;
+   case TYPE_INT_4:
+   case TYPE_UINT_4:
+  return sizeof(GLint) * 4;
+   case TYPE_INT_N:
+  return sizeof(GLint) * v->value_int_n.n;
+   case TYPE_INT64:
+  return sizeof(GLint64);
+  break;
+  break;
+   case TYPE_ENUM:
+  return sizeof(GLenum);
+   case TYPE_ENUM_2:
+  return sizeof(GLenum) * 2;
+   case TYPE_BOOLEAN:
+  return sizeof(GLboolean);
+   case TYPE_BIT_0:
+   case TYPE_BIT_1:
+   case TYPE_BIT_2:
+   case TYPE_BIT_3:
+   case TYPE_BIT_4:
+   case TYPE_BIT_5:
+   case TYPE_BIT_6:
+   case TYPE_BIT_7:
+  return 1;
+   case TYPE_FLOAT:
+   case TYPE_FLOATN:
+  return sizeof(GLfloat);
+   case TYPE_FLOAT_2:
+   case TYPE_FLOATN_2:
+  return sizeof(GLfloat) * 2;
+   case TYPE_FLOAT_3:
+   case TYPE_FLOATN_3:
+  return sizeof(GLfloat) * 3;
+   case TYPE_FLOAT_4:
+   case TYPE_FLOATN_4:
+  return sizeof(GLfloat) * 4;
+   case TYPE_FLOAT_8:
+  return sizeof(GLfloat) * 8;
+   case TYPE_DOUBLEN:
+  return sizeof(GLdouble);
+   case TYPE_DOUBLEN_2:
+  return sizeof(GLdouble) * 2;
+   case TYPE_MATRIX:
+  return sizeof (GLfloat) * 16;
+   case TYPE_MATRIX_T:
+  return sizeof (GLfloat) * 16;
+   default:
+  return -1;
+   }
+}
+
 void GLAPIENTRY
 _mesa_GetBooleanv(GLenum pname, GLboolean *params)
 {
@@ -1937,7 +2004,70 @@ _mesa_GetDoublev(GLenum pname, GLdouble *params)
 void GLAPIENTRY
 _mesa_GetUnsignedBytevEXT(GLenum pname, GLubyte *data)
 {
+   const struct value_desc *d;
+   union value v;
+   int shift;
+   void *p;
+   GLsizei size;
+   const char *func = "glGetUnsignedBytevEXT";
+
+   GET_CURRENT_CONTEXT(ctx);
 
+   d = find_value(func, pname, , );
+   size = get_value_size(d->type, );
+   if (size <= 0) {
+  _mesa_error(ctx, GL_INVALID_ENUM, "%s(pname=%s)",
+  func, _mesa_enum_to_string(pname));
+   }
+
+   switch (d->type) {
+   case TYPE_BIT_0:
+   case TYPE_BIT_1:
+   case TYPE_BIT_2:
+   case TYPE_BIT_3:
+   case TYPE_BIT_4:
+   case TYPE_BIT_5:
+   case TYPE_BIT_6:
+   case TYPE_BIT_7:
+  shift = d->type - TYPE_BIT_0;
+  data[0] = (*(GLbitfield *) p >> shift) & 1;
+  break;
+   case TYPE_CONST:
+  memcpy(data, >offset, size);
+  break;
+   case TYPE_INT_N:
+  memcpy(data, _int_n.ints, size);
+  break;
+   case TYPE_UINT:
+   case TYPE_INT:
+   case TYPE_INT_2:
+   case TYPE_UINT_2:
+   case TYPE_INT_3:
+   case TYPE_UINT_3:
+   case TYPE_INT_4:
+   case TYPE_UINT_4:
+   case TYPE_INT64:
+   case TYPE_ENUM:
+   case TYPE_ENUM_2:
+   case TYPE_BOOLEAN:
+   case TYPE_FLOAT:
+   case TYPE_FLOATN:
+   case TYPE_FLOAT_2:
+   case TYPE_FLOATN_2:
+   case TYPE_FLOAT_3:
+   case TYPE_FLOATN_3:
+   case TYPE_FLOAT_4:
+   case TYPE_FLOATN_4:
+   case TYPE_FLOAT_8:
+   case TYPE_DOUBLEN:
+   case TYPE_DOUBLEN_2:
+   case TYPE_MATRIX:
+   case TYPE_MATRIX_T:
+  memcpy(data, p, size);
+  break;
+   default:
+  break; /* nothing - GL error was recorded */
+   }
 }
 
 /**
@@ -2648,7 +2778,54 @@ _mesa_GetDoublei_v(GLenum pname, GLuint index, GLdouble 
*params)
 void GLAPIENTRY
 _mesa_GetUnsignedBytei_vEXT(GLenum target, GLuint index, GLubyte *data)
 {
+   GLsizei size;
+   union value v;
+   enum value_type type;
+   const char *func = "glGetUnsignedBytei_vEXT";
+
+   GET_CURRENT_CONTEXT(ctx);
 
+   type = find_value_indexed(func, target, index, );
+   size = get_value_size(type, );
+   if (size <= 0) {
+  _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)",
+  func, _mesa_enum_to_string(target));
+   }
+
+   switch (type) {
+   case TYPE_UINT:
+   case TYPE_INT:
+   case TYPE_INT_2:
+   case TYPE_UINT_2:
+   case TYPE_INT_3:
+   case TYPE_UINT_3:
+   case TYPE_INT_4:
+   case TYPE_UINT_4:
+   case TYPE_INT64:
+   case TYPE_ENUM:
+   case TYPE_ENUM_2:
+   case TYPE_BOOLEAN:
+   case TYPE_FLOAT:
+   case TYPE_FLOATN:
+   case TYPE_FLOAT_2:
+   case TYPE_FLOATN_2:
+   case TYPE_FLOAT_3:
+   case TYPE_FLOATN_3:
+   case TYPE_FLOAT_4:
+   case TYPE_FLOATN_4:
+   case TYPE_FLOAT_8:
+   case TYPE_DOUBLEN:
+   case TYPE_DOUBLEN_2:
+   case TYPE_MATRIX:
+   case TYPE_MATRIX_T:
+  memcpy(data, _int, size);
+  

[Mesa-dev] [PATCH 14/25] mesa: expose EXT_memory_object and EXT_memory_object_fd v2

2017-07-06 Thread Andres Rodriguez
v2: use PIPE_CAP_MEMOBJ to guard the extension

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/extensions_table.h   | 2 ++
 src/mesa/main/mtypes.h | 2 ++
 src/mesa/state_tracker/st_extensions.c | 5 +
 3 files changed, 9 insertions(+)

diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 757b7bf..347a619 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -226,6 +226,8 @@ EXT(EXT_gpu_program_parameters  , 
EXT_gpu_program_parameters
 EXT(EXT_gpu_shader4 , EXT_gpu_shader4  
  , GLL, GLC,  x ,  x , 2006)
 EXT(EXT_gpu_shader5 , ARB_gpu_shader5  
  ,  x ,  x ,  x ,  31, 2014)
 EXT(EXT_map_buffer_range, ARB_map_buffer_range 
  ,  x ,  x , ES1, ES2, 2012)
+EXT(EXT_memory_object   , EXT_memory_object
  , GLL, GLC,  x , ES2, 2017)
+EXT(EXT_memory_object_fd, EXT_memory_object_fd 
  , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_multi_draw_arrays   , dummy_true   
  , GLL,  x , ES1, ES2, 1999)
 EXT(EXT_packed_depth_stencil, dummy_true   
  , GLL, GLC,  x ,  x , 2005)
 EXT(EXT_packed_float, EXT_packed_float 
  , GLL, GLC,  x ,  x , 2004)
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8dcc1a8..463f444 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4125,6 +4125,8 @@ struct gl_extensions
GLboolean EXT_framebuffer_sRGB;
GLboolean EXT_gpu_program_parameters;
GLboolean EXT_gpu_shader4;
+   GLboolean EXT_memory_object;
+   GLboolean EXT_memory_object_fd;
GLboolean EXT_packed_float;
GLboolean EXT_pixel_buffer_object;
GLboolean EXT_point_parameters;
diff --git a/src/mesa/state_tracker/st_extensions.c 
b/src/mesa/state_tracker/st_extensions.c
index 74193cc..b00d7e8 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -1321,4 +1321,9 @@ void st_init_extensions(struct pipe_screen *screen,
   extensions->ARB_texture_cube_map_array &&
   extensions->ARB_texture_stencil8 &&
   extensions->ARB_texture_multisample;
+
+   if (screen->get_param(screen, PIPE_CAP_MEMOBJ)) {
+  extensions->EXT_memory_object = GL_TRUE;
+  extensions->EXT_memory_object_fd = GL_TRUE;
+   }
 }
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/25] gallium: introduce PIPE_CAP_MEMOBJ

2017-07-06 Thread Andres Rodriguez
This can be used to guard support for EXT_memory_object and related
extensions.

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 15 files changed, 15 insertions(+)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index eefb51c..718f0ac 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -258,6 +258,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
/* Stream output. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index a915d65..6c9c011 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -321,6 +321,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 4ad98e2..3919978 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -310,6 +310,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index e98e30d..7959015 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -356,6 +356,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
}
/* should only get here on unhandled cases */
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index a352ff5..14d1b1a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -220,6 +220,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 8bbe403..571df1c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -301,6 +301,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_MEMOBJ:
   return 0;
 
case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index 5cdb248..37d102b 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -242,6 +242,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
 case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
 case PIPE_CAP_POST_DEPTH_COVERAGE:
 case PIPE_CAP_BINDLESS_TEXTURE:
+case PIPE_CAP_MEMOBJ:
 return 0;
 
 /* SWTCL-only features. */
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index f0ea409..e3abc10 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ 

[Mesa-dev] [PATCH 07/25] mesa: hook up memory object multisamples tex(ture)storage api

2017-07-06 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/externalobjects.c | 53 
 src/mesa/main/teximage.c| 76 +
 src/mesa/main/teximage.h| 10 ++
 3 files changed, 110 insertions(+), 29 deletions(-)

diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index d083ad9..0b4f5f5 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -261,10 +261,26 @@ texstorage_memory(GLuint dims, GLenum target, GLsizei 
levels, GLenum internalFor
 
 static void
 texstorage_memory_ms(GLuint dims, GLenum target, GLsizei samples, GLenum 
internalFormat,
- GLsizei width, GLsizei height, GLsizei depth, GLboolean 
fixedSampleLocations,
- GLuint memory, GLuint64 offset)
+ GLsizei width, GLsizei height, GLsizei depth,
+ GLboolean fixedSampleLocations, GLuint memory, GLuint64 
offset,
+ const char* func)
 {
+   struct gl_texture_object *texObj;
+   struct gl_memory_object *memObj;
+
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+  return;
+
+   memObj = _mesa_lookup_memory_object(ctx, memory);
+   if (!memObj)
+  return;
 
+   _mesa_texture_storage_ms_memory(ctx, dims, texObj, memObj, target, samples,
+   internalFormat, width, height, depth,
+   fixedSampleLocations, offset, func);
 }
 
 /**
@@ -297,9 +313,24 @@ static void
 texturestorage_memory_ms(GLuint dims, GLuint texture, GLsizei samples,
  GLenum internalFormat, GLsizei width, GLsizei height,
  GLsizei depth, GLboolean fixedSampleLocations,
- GLuint memory, GLuint64 offset)
+ GLuint memory, GLuint64 offset, const char* func)
 {
+   struct gl_texture_object *texObj;
+   struct gl_memory_object *memObj;
+
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture(ctx, texture);
+   if (!texObj)
+  return;
+
+   memObj = _mesa_lookup_memory_object(ctx, memory);
+   if (!memObj)
+  return;
 
+   _mesa_texture_storage_ms_memory(ctx, dims, texObj, memObj, texObj->Target,
+   samples, internalFormat, width, height,
+   depth, fixedSampleLocations, offset, func);
 }
 
 void GLAPIENTRY
@@ -324,7 +355,9 @@ _mesa_TexStorageMem2DMultisampleEXT(GLenum target,
 GLuint memory,
 GLuint64 offset)
 {
-   texstorage_memory_ms(2, target, samples, internalFormat, width, height, 1, 
fixedSampleLocations, memory, offset);
+   texstorage_memory_ms(2, target, samples, internalFormat, width, height, 1,
+fixedSampleLocations, memory, offset,
+"glTexStorageMem2DMultisampleEXT");
 }
 
 void GLAPIENTRY
@@ -351,7 +384,9 @@ _mesa_TexStorageMem3DMultisampleEXT(GLenum target,
 GLuint memory,
 GLuint64 offset)
 {
-   texstorage_memory_ms(3, target, samples, internalFormat, width, height, 
depth, fixedSampleLocations, memory, offset);
+   texstorage_memory_ms(3, target, samples, internalFormat, width, height, 
depth,
+fixedSampleLocations, memory, offset,
+"glTexStorageMem3DMultisampleEXT");
 }
 
 void GLAPIENTRY
@@ -385,7 +420,9 @@ _mesa_TextureStorageMem2DMultisampleEXT(GLuint texture,
 GLuint memory,
 GLuint64 offset)
 {
-   texturestorage_memory_ms(2, texture, samples, internalFormat, width, 
height, 1, fixedSampleLocations, memory, offset);
+   texturestorage_memory_ms(2, texture, samples, internalFormat, width, 
height, 1,
+fixedSampleLocations, memory, offset,
+"glTextureStorageMem2DMultisampleEXT");
 }
 
 void GLAPIENTRY
@@ -412,7 +449,9 @@ _mesa_TextureStorageMem3DMultisampleEXT(GLuint texture,
 GLuint memory,
 GLuint64 offset)
 {
-   texturestorage_memory_ms(3, texture, samples, internalFormat, width, 
height, depth, fixedSampleLocations, memory, offset);
+   texturestorage_memory_ms(3, texture, samples, internalFormat, width, 
height, depth,
+fixedSampleLocations, memory, offset,
+"glTextureStorageMem3DMultisampleEXT");
 }
 
 void GLAPIENTRY
diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 36747a7..e6aeaae 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -5617,11 +5617,13 @@ check_multisample_target(GLuint dims, GLenum target, 
bool dsa)
 static void
 texture_image_multisample(struct gl_context 

[Mesa-dev] [PATCH 09/25] radeonsi: add basic memory object support

2017-07-06 Thread Andres Rodriguez
From: Dave Airlie 

Signed-off-by: Andres Rodriguez 
---
 src/gallium/drivers/radeon/r600_pipe_common.h |   7 ++
 src/gallium/drivers/radeon/r600_texture.c | 112 ++
 2 files changed, 119 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b22a3a7..4c1a706 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -377,6 +377,13 @@ union r600_mmio_counters {
unsigned array[0];
 };
 
+struct r600_memory_object {
+   struct pipe_memory_object   b;
+   struct pb_buffer*buf;
+   uint32_tstride;
+   uint32_toffset;
+};
+
 struct r600_common_screen {
struct pipe_screen  b;
struct radeon_winsys*ws;
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 2deb56a..0baa0ee 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2822,10 +2822,122 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
}
 }
 
+static struct pipe_memory_object *
+r600_memobj_from_handle(struct pipe_screen *screen,
+   struct winsys_handle *whandle,
+   bool dedicated)
+{
+   struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+   struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object);
+   struct pb_buffer *buf = NULL;
+   uint32_t stride, offset;
+
+   if (!memobj)
+   return NULL;
+
+   buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
+ , );
+   if (!buf)
+   return NULL;
+
+   memobj->b.dedicated = dedicated;
+   memobj->buf = buf;
+   memobj->stride = stride;
+   memobj->offset = offset;
+
+   return (struct pipe_memory_object *)memobj;
+}
+
+static void
+r600_memobj_destroy(struct pipe_screen *screen,
+   struct pipe_memory_object *memobj)
+{
+   free(memobj);
+}
+
+static struct pipe_resource *
+r600_texture_from_memobj(struct pipe_screen *screen,
+const struct pipe_resource *templ,
+struct pipe_memory_object *_memobj,
+uint64_t offset)
+{
+   int r;
+   struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
+   struct r600_memory_object *memobj = (struct r600_memory_object 
*)_memobj;
+   struct r600_texture *rtex;
+   struct radeon_surf surface;
+   struct radeon_bo_metadata metadata = {};
+   unsigned array_mode;
+
+   if (memobj->b.dedicated) {
+   rscreen->ws->buffer_get_metadata(memobj->buf, );
+
+   surface.u.legacy.pipe_config = metadata.u.legacy.pipe_config;
+   surface.u.legacy.bankw = metadata.u.legacy.bankw;
+   surface.u.legacy.bankh = metadata.u.legacy.bankh;
+   surface.u.legacy.tile_split = metadata.u.legacy.tile_split;
+   surface.u.legacy.mtilea = metadata.u.legacy.mtilea;
+   surface.u.legacy.num_banks = metadata.u.legacy.num_banks;
+
+   if (metadata.u.legacy.macrotile == RADEON_LAYOUT_TILED)
+   array_mode = RADEON_SURF_MODE_2D;
+   else if (metadata.u.legacy.microtile == RADEON_LAYOUT_TILED)
+   array_mode = RADEON_SURF_MODE_1D;
+   else
+   array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+   } else {
+   /**
+* The bo metadata is unset for un-dedicated images. So we fall
+* back to linear. See answer to question 5 of the
+* VK_KHX_external_memory spec for some details.
+*
+* It is possible that this case isn't going to work if the
+* surface pitch isn't correctly aligned by default.
+*
+* In order to support it correctly we require multi-image
+* metadata to be syncrhonized between radv and radeonsi. The
+* semantics of associating multiple image metadata to a memory
+* object on the vulkan export side are not concretely defined
+* either.
+*
+* All the use cases we are aware of at the moment for memory
+* objects use dedicated allocations. So lets keep the initial
+* implementation simple.
+*
+* A possible alternative is to attempt to reconstruct the
+* tiling information when the TexParameter TEXTURE_TILING_EXT
+* is set.
+*/
+   array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
+
+   }

[Mesa-dev] [PATCH 11/25] mesa: implement memory objects as a backend for buffer objects

2017-07-06 Thread Andres Rodriguez
Use a memory object instead of user memory.

Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/dd.h   | 12 +
 src/mesa/state_tracker/st_cb_bufferobjects.c | 66 +---
 2 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index ec7a195..27c6efc 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1096,6 +1096,18 @@ struct dd_function_table {
  struct gl_memory_object 
*mem_obj,
  GLsizei levels, GLsizei 
width, GLsizei height,
  GLsizei depth, GLuint64 
offset);
+
+   /**
+* Use a memory object as the backing data for a buffer object
+*/
+   GLboolean (*BufferDataMem)(struct gl_context *ctx,
+  GLenum target,
+  GLsizeiptrARB size,
+  struct gl_memory_object *memObj,
+  GLuint64 offset,
+  GLenum usage,
+  struct gl_buffer_object *bufObj);
+
/*@}*/
 
/**
diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index 68ec250..940c65d 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -40,6 +40,7 @@
 
 #include "st_context.h"
 #include "st_cb_bufferobjects.h"
+#include "st_cb_memoryobjects.h"
 #include "st_debug.h"
 
 #include "pipe/p_context.h"
@@ -163,27 +164,22 @@ st_bufferobj_get_subdata(struct gl_context *ctx,
 offset, size, data);
 }
 
-
-/**
- * Allocate space for and store data in a buffer object.  Any data that was
- * previously stored in the buffer object is lost.  If data is NULL,
- * memory will be allocated, but no copy will occur.
- * Called via ctx->Driver.BufferData().
- * \return GL_TRUE for success, GL_FALSE if out of memory
- */
-static GLboolean
-st_bufferobj_data(struct gl_context *ctx,
-  GLenum target,
-  GLsizeiptrARB size,
-  const void * data,
-  GLenum usage,
-  GLbitfield storageFlags,
-  struct gl_buffer_object *obj)
+static ALWAYS_INLINE GLboolean
+bufferobj_data(struct gl_context *ctx,
+   GLenum target,
+   GLsizeiptrARB size,
+   const void * data,
+   struct gl_memory_object *memObj,
+   GLuint64 offset,
+   GLenum usage,
+   GLbitfield storageFlags,
+   struct gl_buffer_object *obj)
 {
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
struct pipe_screen *screen = pipe->screen;
struct st_buffer_object *st_obj = st_buffer_object(obj);
+   struct st_memory_object *st_mem_obj = st_memory_object(memObj);
unsigned bind, pipe_usage, pipe_flags = 0;
 
if (target != GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD &&
@@ -317,7 +313,12 @@ st_bufferobj_data(struct gl_context *ctx,
   buffer.depth0 = 1;
   buffer.array_size = 1;
 
-  if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
+  if (st_mem_obj) {
+ st_obj->buffer = screen->resource_from_memobj(screen, ,
+   st_mem_obj->memory,
+   offset);
+  }
+  else if (target == GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD) {
  st_obj->buffer =
 screen->resource_from_user_memory(screen, , (void*)data);
   }
@@ -352,6 +353,36 @@ st_bufferobj_data(struct gl_context *ctx,
return GL_TRUE;
 }
 
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
+ */
+static GLboolean
+st_bufferobj_data(struct gl_context *ctx,
+  GLenum target,
+  GLsizeiptrARB size,
+  const void * data,
+  GLenum usage,
+  GLbitfield storageFlags,
+  struct gl_buffer_object *obj)
+{
+   return bufferobj_data(ctx, target, size, data, NULL, 0, usage, 
storageFlags, obj);
+}
+
+static GLboolean
+st_bufferobj_data_mem(struct gl_context *ctx,
+  GLenum target,
+  GLsizeiptrARB size,
+  struct gl_memory_object *memObj,
+  GLuint64 offset,
+  GLenum usage,
+  struct gl_buffer_object *bufObj)
+{
+   return bufferobj_data(ctx, target, size, NULL, memObj, offset, usage, 0, 
bufObj);
+}
 
 /**
  * Called via glInvalidateBuffer(Sub)Data.
@@ -586,6 +617,7 @@ 

[Mesa-dev] [PATCH 10/25] mesa: fix inconsistent indentation of st_cb_bufferobjects.c

2017-07-06 Thread Andres Rodriguez
No changes, just re-indent.

Signed-off-by: Andres Rodriguez 
---
 src/mesa/state_tracker/st_cb_bufferobjects.c | 36 ++--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c 
b/src/mesa/state_tracker/st_cb_bufferobjects.c
index c1994d5..68ec250 100644
--- a/src/mesa/state_tracker/st_cb_bufferobjects.c
+++ b/src/mesa/state_tracker/st_cb_bufferobjects.c
@@ -96,9 +96,9 @@ st_bufferobj_free(struct gl_context *ctx, struct 
gl_buffer_object *obj)
  */
 static void
 st_bufferobj_subdata(struct gl_context *ctx,
-GLintptrARB offset,
-GLsizeiptrARB size,
-const void * data, struct gl_buffer_object *obj)
+ GLintptrARB offset,
+ GLsizeiptrARB size,
+ const void * data, struct gl_buffer_object *obj)
 {
struct st_buffer_object *st_obj = st_buffer_object(obj);
 
@@ -130,8 +130,8 @@ st_bufferobj_subdata(struct gl_context *ctx,
 * buffer directly.
 */
pipe_buffer_write(st_context(ctx)->pipe,
-st_obj->buffer,
-offset, size, data);
+ st_obj->buffer,
+ offset, size, data);
 }
 
 
@@ -173,12 +173,12 @@ st_bufferobj_get_subdata(struct gl_context *ctx,
  */
 static GLboolean
 st_bufferobj_data(struct gl_context *ctx,
- GLenum target,
- GLsizeiptrARB size,
- const void * data,
- GLenum usage,
+  GLenum target,
+  GLsizeiptrARB size,
+  const void * data,
+  GLenum usage,
   GLbitfield storageFlags,
- struct gl_buffer_object *obj)
+  struct gl_buffer_object *obj)
 {
struct st_context *st = st_context(ctx);
struct pipe_context *pipe = st->pipe;
@@ -263,7 +263,7 @@ st_bufferobj_data(struct gl_context *ctx,
   case GL_STATIC_DRAW:
   case GL_STATIC_COPY:
   default:
-pipe_usage = PIPE_USAGE_DEFAULT;
+ pipe_usage = PIPE_USAGE_DEFAULT;
  break;
   case GL_DYNAMIC_DRAW:
   case GL_DYNAMIC_COPY:
@@ -420,7 +420,7 @@ st_bufferobj_map_range(struct gl_context *ctx,
   flags |= PIPE_TRANSFER_COHERENT;
 
/* ... other flags ...
-*/
+   */
 
if (access & MESA_MAP_NOWAIT_BIT)
   flags |= PIPE_TRANSFER_DONTBLOCK;
@@ -431,10 +431,10 @@ st_bufferobj_map_range(struct gl_context *ctx,
assert(offset + length <= obj->Size);
 
obj->Mappings[index].Pointer = pipe_buffer_map_range(pipe,
-st_obj->buffer,
-offset, length,
-flags,
-_obj->transfer[index]);
+st_obj->buffer,
+offset, length,
+flags,
+
_obj->transfer[index]);
if (obj->Mappings[index].Pointer) {
   obj->Mappings[index].Offset = offset;
   obj->Mappings[index].Length = length;
@@ -556,8 +556,8 @@ st_clear_buffer_subdata(struct gl_context *ctx,
  */
 void
 st_bufferobj_validate_usage(struct st_context *st,
-   struct st_buffer_object *obj,
-   unsigned usage)
+struct st_buffer_object *obj,
+unsigned usage)
 {
 }
 
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/25] mesa/st: implement memory objects as a backend for texture storage v2

2017-07-06 Thread Andres Rodriguez
From: Dave Airlie 

Instead of allocating memory to back a texture, use the provided memory
object.

v2: split off extension exposure logic

Signed-off-by: Andres Rodriguez 
---
 src/mesa/state_tracker/st_cb_texture.c | 123 +
 1 file changed, 123 insertions(+)

diff --git a/src/mesa/state_tracker/st_cb_texture.c 
b/src/mesa/state_tracker/st_cb_texture.c
index 1847cc3..b59d9b2 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -53,6 +53,7 @@
 #include "state_tracker/st_cb_flush.h"
 #include "state_tracker/st_cb_texture.h"
 #include "state_tracker/st_cb_bufferobjects.h"
+#include "state_tracker/st_cb_memoryobjects.h"
 #include "state_tracker/st_format.h"
 #include "state_tracker/st_pbo.h"
 #include "state_tracker/st_texture.h"
@@ -2902,6 +2903,125 @@ st_TexParameter(struct gl_context *ctx,
}
 }
 
+/**
+ * Allocate a new pipe_resource object
+ * width0, height0, depth0 are the dimensions of the level 0 image
+ * (the highest resolution).  last_level indicates how many mipmap levels
+ * to allocate storage for.  For non-mipmapped textures, this will be zero.
+ */
+static struct pipe_resource *
+st_texture_create_memory(struct st_context *st,
+ struct st_memory_object *memObj,
+ GLuint64 offset,
+ enum pipe_texture_target target,
+ enum pipe_format format,
+ GLuint last_level,
+ GLuint width0,
+ GLuint height0,
+ GLuint depth0,
+ GLuint layers,
+ GLuint nr_samples,
+ GLuint bind )
+{
+   struct pipe_resource pt, *newtex;
+   struct pipe_screen *screen = st->pipe->screen;
+
+   assert(target < PIPE_MAX_TEXTURE_TYPES);
+   assert(width0 > 0);
+   assert(height0 > 0);
+   assert(depth0 > 0);
+   if (target == PIPE_TEXTURE_CUBE)
+  assert(layers == 6);
+
+   DBG("%s target %d format %s last_level %d\n", __func__,
+   (int) target, util_format_name(format), last_level);
+
+   assert(format);
+   assert(screen->is_format_supported(screen, format, target, 0,
+  PIPE_BIND_SAMPLER_VIEW));
+
+   memset(, 0, sizeof(pt));
+   pt.target = target;
+   pt.format = format;
+   pt.last_level = last_level;
+   pt.width0 = width0;
+   pt.height0 = height0;
+   pt.depth0 = depth0;
+   pt.array_size = layers;
+   pt.usage = PIPE_USAGE_DEFAULT;
+   pt.bind = bind;
+   /* only set this for OpenGL textures, not renderbuffers */
+   pt.flags = PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY;
+   pt.nr_samples = nr_samples;
+
+   newtex = screen->resource_from_memobj(screen, , memObj->memory, offset);
+
+   assert(!newtex || pipe_is_referenced(>reference));
+
+   return newtex;
+}
+
+
+static bool
+st_SetTextureStorageForMemoryObject(struct gl_context *ctx,
+struct gl_texture_object *texObj,
+struct gl_memory_object *memObj,
+GLsizei levels, GLsizei width,
+GLsizei height, GLsizei depth,
+GLuint64 offset)
+{
+   struct st_context *st = st_context(ctx);
+   struct gl_texture_image *texImage = texObj->Image[0][0];
+   struct st_texture_object *stObj = st_texture_object(texObj);
+   struct st_memory_object *smObj = st_memory_object(memObj);
+   struct pipe_screen *screen = st->pipe->screen;
+   unsigned ptWidth;
+   uint16_t ptHeight, ptDepth, ptLayers;
+   GLuint bindings;
+   enum pipe_format fmt;
+   GLuint num_samples = texImage->NumSamples;
+
+   stObj->lastLevel = levels - 1;
+
+   fmt = st_mesa_format_to_pipe_format(st, texImage->TexFormat);
+
+   bindings = default_bindings(st, fmt);
+
+   /* Raise the sample count if the requested one is unsupported. */
+   if (num_samples > 1) {
+  GLboolean found = GL_FALSE;
+
+  for (; num_samples <= ctx->Const.MaxSamples; num_samples++) {
+ if (screen->is_format_supported(screen, fmt, PIPE_TEXTURE_2D,
+ num_samples,
+ PIPE_BIND_SAMPLER_VIEW)) {
+/* Update the sample count in gl_texture_image as well. */
+texImage->NumSamples = num_samples;
+found = GL_TRUE;
+break;
+ }
+  }
+
+  if (!found)
+ return GL_FALSE;
+   }
+
+   st_gl_texture_dims_to_pipe_dims(texObj->Target,
+   width, height, depth,
+   , , , );
+   stObj->pt = st_texture_create_memory(st,
+smObj,
+offset,
+gl_target_to_pipe(texObj->Target),
+fmt,
+ 

[Mesa-dev] [PATCH 06/25] mesa: hook up memoryobject tex(ture)storage api

2017-07-06 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/dd.h  |  9 
 src/mesa/main/externalobjects.c | 93 -
 src/mesa/main/texstorage.c  | 76 -
 src/mesa/main/texstorage.h  | 13 +-
 4 files changed, 160 insertions(+), 31 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 97ef5b8..ec7a195 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1087,6 +1087,15 @@ struct dd_function_table {
 */
void (*DeleteMemoryObject)(struct gl_context *ctx,
   struct gl_memory_object *mem_obj);
+
+   /**
+* Set the given memory object as the texture's storage.
+*/
+   GLboolean (*SetTextureStorageForMemoryObject)(struct gl_context *ctx,
+ struct gl_texture_object 
*tex_obj,
+ struct gl_memory_object 
*mem_obj,
+ GLsizei levels, GLsizei 
width, GLsizei height,
+ GLsizei depth, GLuint64 
offset);
/*@}*/
 
/**
diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index edcb5bf..d083ad9 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -24,6 +24,10 @@
 #include "macros.h"
 #include "mtypes.h"
 #include "externalobjects.h"
+#include "teximage.h"
+#include "texobj.h"
+#include "glformats.h"
+#include "texstorage.h"
 
 /**
  * Allocate and initialize a new memory object.  But don't put it into the
@@ -229,6 +233,75 @@ invalid_pname:
pname);
 }
 
+/**
+ * Helper used by _mesa_TexStorageMem1/2/3DEXT().
+ */
+static void
+texstorage_memory(GLuint dims, GLenum target, GLsizei levels, GLenum 
internalFormat,
+  GLsizei width, GLsizei height, GLsizei depth, GLuint memory,
+  GLuint64 offset)
+{
+   struct gl_texture_object *texObj;
+   struct gl_memory_object *memObj;
+
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   if (!texObj)
+  return;
+
+   memObj = _mesa_lookup_memory_object(ctx, memory);
+   if (!memObj)
+  return;
+
+   _mesa_texture_storage_memory(ctx, dims, texObj, memObj, target,
+levels, internalFormat,
+width, height, depth, offset, false);
+}
+
+static void
+texstorage_memory_ms(GLuint dims, GLenum target, GLsizei samples, GLenum 
internalFormat,
+ GLsizei width, GLsizei height, GLsizei depth, GLboolean 
fixedSampleLocations,
+ GLuint memory, GLuint64 offset)
+{
+
+}
+
+/**
+ * Helper used by _mesa_TextureStorageMem1/2/3DEXT().
+ */
+static void
+texturestorage_memory(GLuint dims, GLuint texture, GLsizei levels,
+  GLenum internalFormat, GLsizei width, GLsizei height,
+  GLsizei depth, GLuint memory, GLuint64 offset)
+{
+   struct gl_texture_object *texObj;
+   struct gl_memory_object *memObj;
+
+   GET_CURRENT_CONTEXT(ctx);
+
+   texObj = _mesa_lookup_texture(ctx, texture);
+   if (!texObj)
+  return;
+
+   memObj = _mesa_lookup_memory_object(ctx, memory);
+   if (!memObj)
+  return;
+
+   _mesa_texture_storage_memory(ctx, dims, texObj, memObj, texObj->Target,
+levels, internalFormat,
+width, height, depth, offset, true);
+}
+
+static void
+texturestorage_memory_ms(GLuint dims, GLuint texture, GLsizei samples,
+ GLenum internalFormat, GLsizei width, GLsizei height,
+ GLsizei depth, GLboolean fixedSampleLocations,
+ GLuint memory, GLuint64 offset)
+{
+
+}
+
 void GLAPIENTRY
 _mesa_TexStorageMem2DEXT(GLenum target,
  GLsizei levels,
@@ -238,7 +311,7 @@ _mesa_TexStorageMem2DEXT(GLenum target,
  GLuint memory,
  GLuint64 offset)
 {
-
+   texstorage_memory(2, target, levels, internalFormat, width, height, 1, 
memory, offset);
 }
 
 void GLAPIENTRY
@@ -251,7 +324,7 @@ _mesa_TexStorageMem2DMultisampleEXT(GLenum target,
 GLuint memory,
 GLuint64 offset)
 {
-
+   texstorage_memory_ms(2, target, samples, internalFormat, width, height, 1, 
fixedSampleLocations, memory, offset);
 }
 
 void GLAPIENTRY
@@ -264,7 +337,7 @@ _mesa_TexStorageMem3DEXT(GLenum target,
  GLuint memory,
  GLuint64 offset)
 {
-
+   texstorage_memory(3, target, levels, internalFormat, width, height, depth, 
memory, offset);
 }
 
 void GLAPIENTRY
@@ -278,7 +351,7 @@ _mesa_TexStorageMem3DMultisampleEXT(GLenum target,
 GLuint memory,
 GLuint64 offset)
 {
-
+   texstorage_memory_ms(3, 

[Mesa-dev] [PATCH 05/25] mesa/st: start adding memory object support v2

2017-07-06 Thread Andres Rodriguez
From: Dave Airlie 

v2: pass dedicated flag

Signed-off-by: Andres Rodriguez 
---
 src/mesa/Makefile.sources|  2 +
 src/mesa/state_tracker/st_cb_memoryobjects.c | 66 
 src/mesa/state_tracker/st_cb_memoryobjects.h | 25 +++
 src/mesa/state_tracker/st_context.c  |  2 +
 4 files changed, 95 insertions(+)
 create mode 100644 src/mesa/state_tracker/st_cb_memoryobjects.c
 create mode 100644 src/mesa/state_tracker/st_cb_memoryobjects.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 2ee1d2f..2e4b04c 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -467,6 +467,8 @@ STATETRACKER_FILES = \
state_tracker/st_cb_feedback.h \
state_tracker/st_cb_flush.c \
state_tracker/st_cb_flush.h \
+   state_tracker/st_cb_memoryobjects.c \
+   state_tracker/st_cb_memoryobjects.h \
state_tracker/st_cb_msaa.c \
state_tracker/st_cb_msaa.h \
state_tracker/st_cb_perfmon.c \
diff --git a/src/mesa/state_tracker/st_cb_memoryobjects.c 
b/src/mesa/state_tracker/st_cb_memoryobjects.c
new file mode 100644
index 000..219cf30
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_memoryobjects.c
@@ -0,0 +1,66 @@
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "main/externalobjects.h"
+
+#include "st_context.h"
+#include "st_cb_memoryobjects.h"
+
+#include "state_tracker/drm_driver.h"
+#include "pipe/p_context.h"
+#include "pipe/p_screen.h"
+
+static struct gl_memory_object *
+st_memoryobj_alloc(struct gl_context *ctx, GLuint name)
+{
+   struct st_memory_object *st_obj = ST_CALLOC_STRUCT(st_memory_object);
+   if (!st_obj)
+  return NULL;
+
+   _mesa_initialize_memory_object(ctx, _obj->Base, name);
+   return _obj->Base;
+}
+
+static void
+st_memoryobj_free(struct gl_context *ctx,
+  struct gl_memory_object *obj)
+{
+   _mesa_delete_memory_object(ctx, obj);
+}
+
+
+static void
+st_import_memoryobj_fd(struct gl_context *ctx,
+   struct gl_memory_object *obj,
+   GLuint64 size,
+   int fd)
+{
+   struct st_memory_object *st_obj = st_memory_object(obj);
+   struct st_context *st = st_context(ctx);
+   struct pipe_context *pipe = st->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   struct winsys_handle whandle;
+
+   whandle.type = DRM_API_HANDLE_TYPE_FD;
+   whandle.handle = fd;
+   whandle.offset = 0;
+   whandle.layer = 0;
+   whandle.stride = 0;
+
+   st_obj->memory = screen->memobj_create_from_handle(screen,
+  ,
+  obj->Dedicated);
+
+   /* We own fd, but we no longer need it. So get rid of it */
+   close(fd);
+}
+
+void
+st_init_memoryobject_functions(struct dd_function_table *functions)
+{
+   _mesa_init_memory_object_functions(functions);
+
+   functions->NewMemoryObject = st_memoryobj_alloc;
+   functions->DeleteMemoryObject = st_memoryobj_free;
+   functions->ImportMemoryObjectFd = st_import_memoryobj_fd;
+}
diff --git a/src/mesa/state_tracker/st_cb_memoryobjects.h 
b/src/mesa/state_tracker/st_cb_memoryobjects.h
new file mode 100644
index 000..6606516
--- /dev/null
+++ b/src/mesa/state_tracker/st_cb_memoryobjects.h
@@ -0,0 +1,25 @@
+#ifndef ST_CB_MEMORYOBJECTS_H
+#define ST_CB_MEMORYOBJECTS_H
+
+#include "main/compiler.h"
+#include "main/mtypes.h"
+
+struct dd_function_table;
+struct pipe_screen;
+
+struct st_memory_object
+{
+   struct gl_memory_object Base;
+   struct pipe_memory_object *memory;
+};
+
+static inline struct st_memory_object *
+st_memory_object(struct gl_memory_object *obj)
+{
+   return (struct st_memory_object *)obj;
+}
+
+extern void
+st_init_memoryobject_functions(struct dd_function_table *functions);
+
+#endif
diff --git a/src/mesa/state_tracker/st_context.c 
b/src/mesa/state_tracker/st_context.c
index f535139..a846be3 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -53,6 +53,7 @@
 #include "st_cb_eglimage.h"
 #include "st_cb_fbo.h"
 #include "st_cb_feedback.h"
+#include "st_cb_memoryobjects.h"
 #include "st_cb_msaa.h"
 #include "st_cb_perfmon.h"
 #include "st_cb_program.h"
@@ -660,6 +661,7 @@ void st_init_driver_functions(struct pipe_screen *screen,
 
st_init_fbo_functions(functions);
st_init_feedback_functions(functions);
+   st_init_memoryobject_functions(functions);
st_init_msaa_functions(functions);
st_init_perfmon_functions(functions);
st_init_program_functions(functions);
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/25] mapi: add EXT_external_objects and EXT_external_objects_fd

2017-07-06 Thread Andres Rodriguez
Includes implementation stubs.

Signed-off-by: Andres Rodriguez 
---
 src/mapi/glapi/gen/EXT_external_objects.xml| 234 +
 src/mapi/glapi/gen/EXT_external_objects_fd.xml |  28 +++
 src/mapi/glapi/gen/Makefile.am |   2 +
 src/mapi/glapi/gen/gl_API.xml  |   3 +
 src/mapi/glapi/gen/gl_genexec.py   |   1 +
 src/mesa/Makefile.sources  |   2 +
 src/mesa/main/externalobjects.c| 275 +
 src/mesa/main/externalobjects.h| 212 +++
 src/mesa/main/get.c|  12 ++
 src/mesa/main/get.h|   6 +
 src/mesa/main/tests/dispatch_sanity.cpp|  32 +++
 11 files changed, 807 insertions(+)
 create mode 100644 src/mapi/glapi/gen/EXT_external_objects.xml
 create mode 100644 src/mapi/glapi/gen/EXT_external_objects_fd.xml
 create mode 100644 src/mesa/main/externalobjects.c
 create mode 100644 src/mesa/main/externalobjects.h

diff --git a/src/mapi/glapi/gen/EXT_external_objects.xml 
b/src/mapi/glapi/gen/EXT_external_objects.xml
new file mode 100644
index 000..7c4fd6f
--- /dev/null
+++ b/src/mapi/glapi/gen/EXT_external_objects.xml
@@ -0,0 +1,234 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/mapi/glapi/gen/EXT_external_objects_fd.xml 
b/src/mapi/glapi/gen/EXT_external_objects_fd.xml
new file mode 100644
index 000..6eaa3f5
--- /dev/null
+++ b/src/mapi/glapi/gen/EXT_external_objects_fd.xml
@@ -0,0 +1,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/mapi/glapi/gen/Makefile.am b/src/mapi/glapi/gen/Makefile.am
index bd04519..7f4a469 100644
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -192,6 +192,8 @@ API_XML = \
APPLE_object_purgeable.xml \
APPLE_vertex_array_object.xml \
EXT_draw_buffers2.xml \
+   EXT_external_objects.xml \
+   EXT_external_objects_fd.xml \
EXT_framebuffer_object.xml \
EXT_gpu_shader4.xml \
EXT_packed_depth_stencil.xml \
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 18839ec..6084518 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -13060,6 +13060,9 @@
 
 http://www.w3.org/2001/XInclude"/>
 
+http://www.w3.org/2001/XInclude"/>
+http://www.w3.org/2001/XInclude"/>
+
 http://www.w3.org/2001/XInclude"/>
 
 
diff --git a/src/mapi/glapi/gen/gl_genexec.py b/src/mapi/glapi/gen/gl_genexec.py
index 57e155b..b7b2232 100644
--- a/src/mapi/glapi/gen/gl_genexec.py
+++ b/src/mapi/glapi/gen/gl_genexec.py
@@ -75,6 +75,7 @@ header = """/**
 #include "main/errors.h"
 #include "main/es1_conversion.h"
 #include "main/eval.h"
+#include "main/externalobjects.h"
 #include "main/get.h"
 #include "main/feedback.h"
 #include "main/fog.h"
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 86fbf39..2ee1d2f 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -85,6 +85,8 @@ MAIN_FILES = \
main/extensions.h \
main/extensions_table.c \
main/extensions_table.h \
+   main/externalobjects.c \
+   

[Mesa-dev] [PATCH 04/25] gallium: introduce memory object v2

2017-07-06 Thread Andres Rodriguez
From: Dave Airlie 

v2: fix comment regarding fd ownership, define pipe_memory_object
---
 src/gallium/drivers/ddebug/dd_screen.c | 40 ++
 src/gallium/include/pipe/p_screen.h| 36 ++
 src/gallium/include/pipe/p_state.h |  8 +++
 3 files changed, 84 insertions(+)

diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index a5d2be1..fe9c841 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -245,6 +245,22 @@ dd_screen_resource_from_user_memory(struct pipe_screen 
*_screen,
return res;
 }
 
+static struct pipe_resource *
+dd_screen_resource_from_memobj(struct pipe_screen *_screen,
+   const struct pipe_resource *templ,
+   struct pipe_memory_object *memobj,
+   uint64_t offset)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+   struct pipe_resource *res =
+  screen->resource_from_memobj(screen, templ, memobj, offset);
+
+   if (!res)
+  return NULL;
+   res->screen = _screen;
+   return res;
+}
+
 static void
 dd_screen_resource_changed(struct pipe_screen *_screen,
struct pipe_resource *res)
@@ -303,7 +319,28 @@ dd_screen_fence_finish(struct pipe_screen *_screen,
return screen->fence_finish(screen, ctx, fence, timeout);
 }
 
+/
+ * memobj
+ */
+
+static struct pipe_memory_object *
+dd_screen_memobj_create_from_handle(struct pipe_screen *_screen,
+struct winsys_handle *handle,
+bool dedicated)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->memobj_create_from_handle(screen, handle, dedicated);
+}
+
+static void
+dd_screen_memobj_destroy(struct pipe_screen *_screen,
+ struct pipe_memory_object *memobj)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
 
+   return screen->memobj_destroy(screen, memobj);
+}
 /
  * screen
  */
@@ -412,6 +449,7 @@ ddebug_screen_create(struct pipe_screen *screen)
SCR_INIT(can_create_resource);
dscreen->base.resource_create = dd_screen_resource_create;
dscreen->base.resource_from_handle = dd_screen_resource_from_handle;
+   SCR_INIT(resource_from_memobj);
SCR_INIT(resource_from_user_memory);
dscreen->base.resource_get_handle = dd_screen_resource_get_handle;
SCR_INIT(resource_changed);
@@ -419,6 +457,8 @@ ddebug_screen_create(struct pipe_screen *screen)
SCR_INIT(flush_frontbuffer);
SCR_INIT(fence_reference);
SCR_INIT(fence_finish);
+   SCR_INIT(memobj_create_from_handle);
+   SCR_INIT(memobj_destroy);
SCR_INIT(get_driver_query_info);
SCR_INIT(get_driver_query_group_info);
SCR_INIT(get_compiler_options);
diff --git a/src/gallium/include/pipe/p_screen.h 
b/src/gallium/include/pipe/p_screen.h
index 65e954a..d81625c 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -357,6 +357,42 @@ struct pipe_screen {
   enum pipe_format format, int max,
   uint64_t *modifiers,
   unsigned int *external_only, int *count);
+
+   /**
+* Create a memory object from a winsys handle
+*
+* The underlying memory is most often allocated in by a foregin API.
+* Then the underlying memory object is then exported through interfaces
+* compatible with EXT_external_resources.
+*
+* Note: For DRM_API_HANDLE_TYPE_FD handles, the caller retains ownership
+* of the fd.
+*
+* \param handle  A handle representing the memory object to import
+*/
+   struct pipe_memory_object *(*memobj_create_from_handle)(struct pipe_screen 
*screen,
+   struct 
winsys_handle *handle,
+   bool dedicated);
+
+   /**
+* Destroy a memory object
+*
+* \param memobj  The memory object to destroy
+*/
+   void (*memobj_destroy)(struct pipe_screen *screen,
+  struct pipe_memory_object *memobj);
+
+   /**
+* Create a texture from a memory object
+*
+* \param t   texture template
+* \param memobj  The memory object used to back the texture
+*/
+   struct pipe_resource * (*resource_from_memobj)(struct pipe_screen *screen,
+  const struct pipe_resource 
*t,
+  struct pipe_memory_object 
*memobj,
+  uint64_t offset);
+
 };
 
 
diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h

[Mesa-dev] [PATCH 03/25] mesa: add support for memory object parameters

2017-07-06 Thread Andres Rodriguez
Signed-off-by: Andres Rodriguez 
---
 src/mesa/main/externalobjects.c | 54 -
 src/mesa/main/mtypes.h  |  5 +++-
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index 2ae3f0b..edcb5bf 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -72,6 +72,8 @@ _mesa_initialize_memory_object(struct gl_context *ctx,
 {
memset(obj, 0, sizeof(struct gl_memory_object));
obj->Name = name;
+   obj->Dedicated = GL_FALSE;
+   obj->Protected = GL_FALSE;
 }
 
 void GLAPIENTRY
@@ -143,7 +145,6 @@ _mesa_CreateMemoryObjectsEXT(GLsizei n, GLuint 
*memoryObjects)
   GLsizei i;
   for (i = 0; i < n; i++) {
  struct gl_memory_object *memObj;
-
  memoryObjects[i] = first + i;
 
  /* allocate memory object */
@@ -167,7 +168,35 @@ _mesa_MemoryObjectParameterivEXT(GLuint memoryObject,
  GLenum pname,
  const GLint *params)
 {
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_memory_object *memObj;
+
+   memObj = _mesa_lookup_memory_object(ctx, memoryObject);
+   if (!memObj)
+  return;
+
+   if (memObj->Immutable) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glMemoryObjectParameterivEXT(memoryObject is immutable");
+  return;
+   }
 
+   switch (pname) {
+   case GL_DEDICATED_MEMORY_OBJECT_EXT:
+  memObj->Dedicated = (GLboolean) params[0];
+  break;
+   case GL_PROTECTED_MEMORY_OBJECT_EXT:
+  /* EXT_protected_textures not supported */
+  goto invalid_pname;
+   default:
+  goto invalid_pname;
+   }
+   return;
+
+invalid_pname:
+   _mesa_error(ctx, GL_INVALID_ENUM,
+   "glMemoryObjectParameterivEXT(pname=0x%x)",
+   pname);
 }
 
 void GLAPIENTRY
@@ -175,7 +204,29 @@ _mesa_GetMemoryObjectParameterivEXT(GLuint memoryObject,
 GLenum pname,
 GLint *params)
 {
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_memory_object *memObj;
+
+   memObj = _mesa_lookup_memory_object(ctx, memoryObject);
+   if (!memObj)
+  return;
+
+   switch (pname) {
+  case GL_DEDICATED_MEMORY_OBJECT_EXT:
+ *params = (GLint) memObj->Dedicated;
+ break;
+  case GL_PROTECTED_MEMORY_OBJECT_EXT:
+ /* EXT_protected_textures not supported */
+ goto invalid_pname;
+  default:
+ goto invalid_pname;
+   }
+   return;
 
+invalid_pname:
+   _mesa_error(ctx, GL_INVALID_ENUM,
+   "glMemoryObjectParameterivEXT(pname=0x%x)",
+   pname);
 }
 
 void GLAPIENTRY
@@ -403,6 +454,7 @@ _mesa_ImportMemoryFdEXT(GLuint memory,
}
 
ctx->Driver.ImportMemoryObjectFd(ctx, memObj, size, fd);
+   memObj->Immutable = GL_TRUE;
 }
 
 void GLAPIENTRY
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index a720b07..8dcc1a8 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4656,7 +4656,10 @@ struct gl_image_handle_object
 
 struct gl_memory_object
 {
-   GLuint Name;  /**< hash table ID/name */
+   GLuint Name;/**< hash table ID/name */
+   GLboolean Immutable;/**< denotes mutability state of parameters */
+   GLboolean Dedicated;/**< import memory from a dedicated allocation */
+   GLboolean Protected;/**< import memory from a protected allocation */
 };
 
 /**
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] Initial support for EXT_external_objects v2

2017-07-06 Thread Andres Rodriguez
This series is an initial step towards the implementation of
EXT_external_objects. It implements the functionality under
EXT_memory_object and EXT_memory_object_fd. 

This updated version of the series has the following changes:

 * Re-worked UUIDs to be provided by the gallium driver
 * Use a PIPE_CAP to gate the exposure of the extension
 * Add a comment for the non-dedicated memobj path
 * Fixed radeonsi and radv producing different driver UUIDs

Regards,
Andres

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/25] mesa: add support for memory object creation/import/delete

2017-07-06 Thread Andres Rodriguez
Used by EXT_external_objects and EXT_external_objects_fd

Signed-off-by: Andres Rodriguez 
---
 src/mesa/drivers/common/driverfuncs.c |   4 +
 src/mesa/main/dd.h|  36 +
 src/mesa/main/externalobjects.c   | 145 +-
 src/mesa/main/externalobjects.h   |  28 +++
 src/mesa/main/mtypes.h|   9 +++
 src/mesa/main/shared.c|  15 
 6 files changed, 234 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/common/driverfuncs.c 
b/src/mesa/drivers/common/driverfuncs.c
index 5008ae8..ddb4bb6 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -49,6 +49,7 @@
 #include "main/syncobj.h"
 #include "main/barrier.h"
 #include "main/transformfeedback.h"
+#include "main/externalobjects.h"
 
 #include "program/program.h"
 #include "tnl/tnl.h"
@@ -166,6 +167,9 @@ _mesa_init_driver_functions(struct dd_function_table 
*driver)
 
_mesa_init_sync_object_functions(driver);
 
+   /* memory objects */
+   _mesa_init_memory_object_functions(driver);
+
driver->NewFramebuffer = _mesa_new_framebuffer;
driver->NewRenderbuffer = _swrast_new_soft_renderbuffer;
driver->MapRenderbuffer = _swrast_map_soft_renderbuffer;
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 8e382e1..97ef5b8 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -1069,6 +1069,42 @@ struct dd_function_table {
void (*MakeImageHandleResident)(struct gl_context *ctx, GLuint64 handle,
GLenum access, bool resident);
/*@}*/
+
+
+   /**
+* \name GL_EXT_external_objects interface
+*/
+   /*@{*/
+  /**
+* Called to allocate a new memory object.  Drivers will usually
+* allocate/return a subclass of gl_memory_object.
+*/
+   struct gl_memory_object * (*NewMemoryObject)(struct gl_context *ctx,
+GLuint name);
+   /**
+* Called to delete/free a memory object.  Drivers should free the
+* object and any image data it contains.
+*/
+   void (*DeleteMemoryObject)(struct gl_context *ctx,
+  struct gl_memory_object *mem_obj);
+   /*@}*/
+
+   /**
+* \name GL_EXT_external_objects_fd interface
+*/
+   /*@{*/
+   /**
+* Called to import a memory object. The caller relinquishes ownership
+* of fd after the call returns.
+*
+* Accessing fd after ImportMemoryObjectFd returns results in undefined
+* behaviour. This is consistent with EXT_external_object_fd.
+*/
+   void (*ImportMemoryObjectFd)(struct gl_context *ctx,
+struct gl_memory_object *mem_obj,
+GLuint64 size,
+int fd);
+   /*@}*/
 };
 
 
diff --git a/src/mesa/main/externalobjects.c b/src/mesa/main/externalobjects.c
index d6c5109..2ae3f0b 100644
--- a/src/mesa/main/externalobjects.c
+++ b/src/mesa/main/externalobjects.c
@@ -21,24 +21,145 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "macros.h"
+#include "mtypes.h"
 #include "externalobjects.h"
 
+/**
+ * Allocate and initialize a new memory object.  But don't put it into the
+ * memory object hash table.
+ *
+ * Called via ctx->Driver.NewMemoryObject, unless overridden by a device
+ * driver.
+ *
+ * \return pointer to new memory object.
+ */
+static struct gl_memory_object *
+_mesa_new_memory_object(struct gl_context *ctx, GLuint name)
+{
+   struct gl_memory_object *obj;
+   (void) ctx;
+   obj = MALLOC_STRUCT(gl_memory_object);
+
+   _mesa_initialize_memory_object(ctx, obj, name);
+   return obj;
+}
+
+/**
+ * Delete a memory object.  Called via ctx->Driver.DeleteMemory().
+ * Not removed from hash table here.
+ */
+void
+_mesa_delete_memory_object(struct gl_context *ctx, struct gl_memory_object *mo)
+{
+   free(mo);
+}
+
+void
+_mesa_init_memory_object_functions(struct dd_function_table *driver)
+{
+   driver->NewMemoryObject = _mesa_new_memory_object;
+   driver->DeleteMemoryObject = _mesa_delete_memory_object;
+}
+
+/**
+ * Initialize a buffer object to default values.
+ */
+void
+_mesa_initialize_memory_object(struct gl_context *ctx,
+   struct gl_memory_object *obj,
+   GLuint name)
+{
+   memset(obj, 0, sizeof(struct gl_memory_object));
+   obj->Name = name;
+}
+
 void GLAPIENTRY
 _mesa_DeleteMemoryObjectsEXT(GLsizei n, const GLuint *memoryObjects)
 {
-
+   GET_CURRENT_CONTEXT(ctx);
+   GLint i;
+
+   if (MESA_VERBOSE & (VERBOSE_API))
+  _mesa_debug(ctx, "glDeleteMemoryObjectsEXT(%d, %p)\n", n, memoryObjects);
+
+   if (n < 0) {
+  _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteMemoryObjects(n < 0)");
+  return;
+   }
+
+   if (!memoryObjects)
+  return;
+
+   _mesa_HashLockMutex(ctx->Shared->MemoryObjects);
+   for (i = 0; i < n; i++) {
+  if (memoryObjects[i] > 0) {
+ struct gl_memory_object *delObj
+= 

[Mesa-dev] [PATCH] loader/dri3: Use dri3_find_back in loader_dri3_swap_buffers_msc

2017-07-06 Thread Michel Dänzer
From: Thomas Hellstrom 

If the application hasn't done any drawing since the last call, we
would reuse the same back buffer which was used for the previous swap,
which may not have completed yet. This could result in various issues
such as tearing or application hangs.

In the normal case, the behaviour is unchanged.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97957
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101683
Cc: mesa-sta...@lists.freedesktop.org

[Michel Dänzer: Make Thomas' fix from bugzilla actually work as
 intended, write commit log]

Signed-off-by: Michel Dänzer 
---
 src/loader/loader_dri3_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 493a7f5218..1c93e7a96d 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -640,7 +640,7 @@ loader_dri3_swap_buffers_msc(struct loader_dri3_drawable 
*draw,
 
draw->vtable->flush_drawable(draw, flush_flags);
 
-   back = draw->buffers[LOADER_DRI3_BACK_ID(draw->cur_back)];
+   back = draw->buffers[dri3_find_back(draw)];
if (draw->is_different_gpu && back) {
   /* Update the linear buffer before presenting the pixmap */
   draw->ext->image->blitImage(dri_context,
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/20] nir: Support lowering vote intrinsics

2017-07-06 Thread Connor Abbott
On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
> ... trivially (as allowed by the spec!) by reusing the existing
> nir_opt_intrinsics code.
> ---
>  src/compiler/nir/nir.h| 4 
>  src/compiler/nir/nir_opt_intrinsics.c | 6 +++---
>  2 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 44a1d0887e..401c41f155 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1821,6 +1821,10 @@ typedef struct nir_shader_compiler_options {
> bool lower_extract_byte;
> bool lower_extract_word;
>
> +   bool lower_vote_any;
> +   bool lower_vote_all;
> +   bool lower_vote_eq;

Since there are potentially multiple ways to lower these (voteAny(x)
-> !voteAll(!x), using ballotARB(), etc.), and the way they're lowered
is a little... unexpected (although admittedly legal!), why don't we
use a more descriptive name, like lower_vote_*_trivial? While we're at
it, I highly doubt that an implementation would want this kind of
lowering for just one of the intrinsics, so we can merge this into a
single flag, say lower_vote_trivial.

> +
> /**
>  * Does the driver support real 32-bit integers?  (Otherwise, integers
>  * are simulated by floats.)
> diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
> b/src/compiler/nir/nir_opt_intrinsics.c
> index b63449b4fe..0cd75d8b28 100644
> --- a/src/compiler/nir/nir_opt_intrinsics.c
> +++ b/src/compiler/nir/nir_opt_intrinsics.c
> @@ -47,7 +47,7 @@ opt_intrinsics_impl(nir_function_impl *impl)
>   switch (intrin->intrinsic) {
>   case nir_intrinsic_vote_any: {
>  nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
> -if (!val)
> +if (!val || b.shader->options->lower_vote_any)
> continue;
>
>  replacement = nir_imm_int(, val->i32[0]);
> @@ -55,7 +55,7 @@ opt_intrinsics_impl(nir_function_impl *impl)
>   }
>   case nir_intrinsic_vote_all: {
>  nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
> -if (!val)
> +if (!val || b.shader->options->lower_vote_all)
> continue;
>
>  replacement = nir_imm_int(, val->i32[0]);
> @@ -63,7 +63,7 @@ opt_intrinsics_impl(nir_function_impl *impl)
>   }
>   case nir_intrinsic_vote_eq: {
>  nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
> -if (!val)
> +if (!val || b.shader->options->lower_vote_eq)
> continue;
>
>  replacement = nir_imm_int(, NIR_TRUE);
> --
> 2.13.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/20] nir: Add a ballot32 intrinsic

2017-07-06 Thread Connor Abbott
I've thought about this a little bit, and I think we'd rather just
decrease the bitsize of the intrinsic rather than add a whole new one.
The separate intrinsic isn't really buying you anything, I don't think
it's going to make anything simpler.

On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
> Some hardware, like i965, doesn't support group sizes greater than 32.
> In that case, we can use the ballot32 intrinsic instead, which will
> simplify our code generation.
> ---
>  src/compiler/nir/nir.h|  2 ++
>  src/compiler/nir/nir_intrinsics.h |  3 +++
>  src/compiler/nir/nir_opt_intrinsics.c | 17 +
>  src/intel/compiler/brw_compiler.c |  1 +
>  4 files changed, 23 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 3591048574..a3d8df0b19 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -1844,6 +1844,8 @@ typedef struct nir_shader_compiler_options {
>  */
> bool use_interpolated_input_intrinsics;
>
> +   unsigned max_subgroup_size;
> +
> unsigned max_unroll_iterations;
>  } nir_shader_compiler_options;
>
> diff --git a/src/compiler/nir/nir_intrinsics.h 
> b/src/compiler/nir/nir_intrinsics.h
> index 96ecfbc338..61ad9f6643 100644
> --- a/src/compiler/nir/nir_intrinsics.h
> +++ b/src/compiler/nir/nir_intrinsics.h
> @@ -106,6 +106,9 @@ INTRINSIC(ballot, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, 
> NIR_INTRINSIC_CAN_ELIMIN
>  INTRINSIC(read_invocation, 2, ARR(0, 1), true, 0, 0, 0, xx, xx, xx, 
> NIR_INTRINSIC_CAN_ELIMINATE)
>  INTRINSIC(read_first_invocation, 1, ARR(0), true, 0, 0, 0, xx, xx, xx, 
> NIR_INTRINSIC_CAN_ELIMINATE)
>
> +/* Equivalent to ballot, but only returns the low 32-bits */
> +INTRINSIC(ballot32, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, 
> NIR_INTRINSIC_CAN_ELIMINATE)
> +
>  /*
>   * Memory barrier with semantics analogous to the compute shader
>   * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
> diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
> b/src/compiler/nir/nir_opt_intrinsics.c
> index 0cd75d8b28..023e169a99 100644
> --- a/src/compiler/nir/nir_opt_intrinsics.c
> +++ b/src/compiler/nir/nir_opt_intrinsics.c
> @@ -69,6 +69,23 @@ opt_intrinsics_impl(nir_function_impl *impl)
>  replacement = nir_imm_int(, NIR_TRUE);
>  break;
>   }
> + case nir_intrinsic_ballot: {
> +assert(b.shader->options->max_subgroup_size != 0);
> +if (b.shader->options->max_subgroup_size > 32)
> +   continue;
> +
> +nir_intrinsic_instr *ballot =
> +   nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot32);
> +nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
> +ballot->src[0] = intrin->src[0];
> +
> +nir_builder_instr_insert(, >instr);
> +
> +replacement = nir_pack_64_2x32_split(,
> + >dest.ssa,
> + nir_imm_int(, 0));
> +break;
> + }
>   default:
>  break;
>   }
> diff --git a/src/intel/compiler/brw_compiler.c 
> b/src/intel/compiler/brw_compiler.c
> index 9ad5d8e4ee..6970bcb40f 100644
> --- a/src/intel/compiler/brw_compiler.c
> +++ b/src/intel/compiler/brw_compiler.c
> @@ -57,6 +57,7 @@ static const struct nir_shader_compiler_options 
> scalar_nir_options = {
> .lower_unpack_snorm_4x8 = true,
> .lower_unpack_unorm_2x16 = true,
> .lower_unpack_unorm_4x8 = true,
> +   .max_subgroup_size = 64, /* FIXME */
> .max_unroll_iterations = 32,
>  };
>
> --
> 2.13.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

2017-07-06 Thread Connor Abbott
On Thu, Jul 6, 2017 at 6:36 PM, Matt Arsenault  wrote:
>
> On Jul 6, 2017, at 18:31, Connor Abbott  wrote:
>
> After looking into it some more, I think LLVM won't promote allocas to
> registers at all when there are non-constant indices in the mix, and
> fixing it seems kinda involved. I guess a better solution for now
>
>
> AMDGPUPromoteAlloca does this, but it doesn’t happen very often

Could we just bump the heuristic on maximum size there to match
radeonsi? That way we wouldn't need to have these heuristics in
radeonsi and radv. As-is, we're second-guessing the backend.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

2017-07-06 Thread Matt Arsenault

> On Jul 6, 2017, at 18:31, Connor Abbott  wrote:
> 
> After looking into it some more, I think LLVM won't promote allocas to
> registers at all when there are non-constant indices in the mix, and
> fixing it seems kinda involved. I guess a better solution for now

AMDGPUPromoteAlloca does this, but it doesn’t happen very often___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

2017-07-06 Thread Connor Abbott
On Thu, Jul 6, 2017 at 2:18 PM, Connor Abbott  wrote:
> On Thu, Jul 6, 2017 at 2:01 PM, Bas Nieuwenhuizen
>  wrote:
>> On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott  
>> wrote:
>>> From: Connor Abbott 
>>>
>>> The old way was very TGSI-based, and couldn't handle indirect
>>> dereferences at all. Instead, pass through the type information NIR has
>>
>> I think the old code should handle indirect derefs just fine? See the
>> indir_index stuff. I'm kind of worried that LLVM doesn't promote
>> variables with indirect derefs agrresively enough to registers. IIRC
>> it only converts scalar arrays of up to 4 elements to vectors, while
>> radv always promotes and IIRC radeonsi promotes for all variables with
>> something like <= 20 dwords?
>
> Ah, true, I missed that while deleting it :). At the same time, if
> LLVM isn't putting stuff into registers aggressively enough, that's
> something that should be fixed in LLVM. Gathering everything into a
> vector every time everything something is used, like what this code
> does, is going to make it harder for LLVM to know what stuff aliases
> and optimize accordingly since LLVM won't be able to use its normal
> alias analysis. I don't know if LLVM will be able to optimize that
> pattern as well.

After looking into it some more, I think LLVM won't promote allocas to
registers at all when there are non-constant indices in the mix, and
fixing it seems kinda involved. I guess a better solution for now
would be to use nir_lower_locals_to_regs, since then each
register/array can get its own vector. I'll respin the series to
handle just shared variables, since that will fix the Feral issues,
but I don't want to leave too much other churn for Nicolai.

>
>>
>>
>>> about local variables to LLVM, and translate NIR dereferences directly
>>> into the equivalent GEP instructions in LLVM.
>>> ---
>>>  src/amd/common/ac_nir_to_llvm.c | 204 
>>> 
>>>  1 file changed, 146 insertions(+), 58 deletions(-)
>>>
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c 
>>> b/src/amd/common/ac_nir_to_llvm.c
>>> index e72747a..f42d214 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>>>
>>> struct hash_table *defs;
>>> struct hash_table *phis;
>>> +   struct hash_table *vars;
>>>
>>> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
>>> LLVMValueRef ring_offsets;
>>> @@ -157,8 +158,6 @@ struct nir_to_llvm_context {
>>> LLVMValueRef shared_memory;
>>> uint64_t input_mask;
>>> uint64_t output_mask;
>>> -   int num_locals;
>>> -   LLVMValueRef *locals;
>>> uint8_t num_output_clips;
>>> uint8_t num_output_culls;
>>>
>>> @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>>> return result;
>>>  }
>>>
>>> +static LLVMValueRef
>>> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
>>> +   nir_deref_var *deref)
>>> +{
>>> +   struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
>>> deref->var);
>>> +   assert(entry->data);
>>> +   LLVMValueRef val = entry->data;
>>> +   nir_deref *tail = deref->deref.child;
>>> +   while (tail != NULL) {
>>> +   LLVMValueRef offset;
>>> +   switch (tail->deref_type) {
>>> +   case nir_deref_type_array: {
>>> +   nir_deref_array *array = nir_deref_as_array(tail);
>>> +   offset = LLVMConstInt(ctx->i32, array->base_offset, 
>>> 0);
>>> +   if (array->deref_array_type ==
>>> +   nir_deref_array_type_indirect) {
>>> +   offset = LLVMBuildAdd(ctx->builder, offset,
>>> + get_src(ctx,
>>> + 
>>> array->indirect),
>>> + "");
>>> +   }
>>> +   break;
>>> +   }
>>> +   case nir_deref_type_struct: {
>>> +   nir_deref_struct *deref_struct =
>>> +   nir_deref_as_struct(tail);
>>> +   offset = LLVMConstInt(ctx->i32,
>>> + deref_struct->index, 0);
>>> +   break;
>>> +   }
>>> +   default:
>>> +   unreachable("bad deref type");
>>> +   }
>>> +   val = ac_build_gep0(>ac, val, offset);
>>> +   tail = tail->child;
>>> +   }
>>> +   return val;
>>> +}
>>> +
>>>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>>>nir_intrinsic_instr *instr)
>>>  {
>>> @@ -2948,24 +2986,14 @@ 

Re: [Mesa-dev] [PATCH 01/20] nir: Add intrinsics from ARB_shader_group_vote

2017-07-06 Thread Matt Turner
On Thu, Jul 6, 2017 at 5:07 PM, Connor Abbott  wrote:
> FYI, I already have another series which adds ARB_shader_ballot and
> ARB_shader_group_vote intrinsics, in addition to adding some more
> precise semantics to represent the restrictions on ballotARB() and
> similar things [0]. The problem is that marking ballot as
> can_eliminate but not can_reorder is overly conservative, since we
> won't try to CSE it even in the same basic block. I think Jason had
> some concern he was thinking about, so I'll ask him again.
>
> [0] https://lists.freedesktop.org/archives/mesa-dev/2017-June/158672.html

It seems like your series has been stalled for nearly a month on
questions about the semantics of cross-lane attributes. That seems
worthwhile, but I'd prefer not to block my series on sorting that out.
As far as I can tell, the overlap seems to be almost entirely within
your [PATCH 04/17], which itself contains a dependence on cross-lane
attributes.

Given that my series has minimal overlap with yours, I'd prefer (given
review) to commit mine and do the trivial rebase of your 04/17 on top
of that, once the cross-lane attributes are settled.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [st/va] Fix leak in VAAPI subpictures

2017-07-06 Thread Andres Gomez
It looks like we could want this into -stable (?)

On Thu, 2017-07-06 at 21:10 +0300, Andres Gomez wrote:
> On Thu, 2017-06-22 at 09:25 +, Namburu, Chandu-babu wrote:
> > From: Chandu Babu N 
> > Subject: [PATCH] [st/va] Fix leak in VAAPI subpictures
> >  
> > sampler view allocated in vaAssociateSubpicture is not cleared
> > in vaiDeassociateSubpicture.
> >  
> > Reviewed-by: Christian König 
> > ---
> > src/gallium/state_trackers/va/subpicture.c | 1 +
> > 1 file changed, 1 insertion(+)
> >  
> > diff --git a/src/gallium/state_trackers/va/subpicture.c 
> > b/src/gallium/state_trackers/va/subpicture.c
> > index 15d52b9..981a99c 100644
> > --- a/src/gallium/state_trackers/va/subpicture.c
> > +++ b/src/gallium/state_trackers/va/subpicture.c
> > @@ -283,6 +283,7 @@ vlVaDeassociateSubpicture(VADriverContextP ctx, 
> > VASubpictureID subpicture,
> >    while (surf->subpics.size && util_dynarray_top(>subpics, 
> > vlVaSubpicture *) == NULL)
> >       (void)util_dynarray_pop(>subpics, vlVaSubpicture *);
> >     }
> > +   pipe_sampler_view_reference(>sampler,NULL);
> >     mtx_unlock(>mutex);
> > return VA_STATUS_SUCCESS;
> > --
> > 2.7.4
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-- 
Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/17] nir: introduce new convergent and cross-thread attributes

2017-07-06 Thread Connor Abbott
On Mon, Jun 12, 2017 at 9:26 PM, Jason Ekstrand  wrote:
> On Mon, Jun 12, 2017 at 7:38 PM, Connor Abbott  wrote:
>>
>> On Mon, Jun 12, 2017 at 7:19 PM, Jason Ekstrand 
>> wrote:
>> > On Mon, Jun 12, 2017 at 11:58 AM, Nicolai Hähnle 
>> > wrote:
>> >>
>> >> On 12.06.2017 20:50, Connor Abbott wrote:
>> >>>
>> >>> On Mon, Jun 12, 2017 at 2:17 AM, Nicolai Hähnle 
>> >>> wrote:
>> 
>>  On 10.06.2017 01:44, Connor Abbott wrote:
>> >
>> >
>> > From: Connor Abbott 
>> >
>> > These are properties of the instruction that must be respected when
>> > moving it around, in addition to the usual SSA dominance guarantee.
>> > Previously, we only had special handling for fddx and fddy, in a
>> > very
>> > ad-hoc way. But with arb_shader_ballot and arb_shader_group_vote,
>> > we'll
>> > have to start handling a lot more instructions with similar
>> > constraints,
>> > so we want to add a more formal model of what the optimizer can and
>> > cannot do.
>> >
>> > v2: don't add attribute for ALU instructions
>> > v3: special-case derivative ALU instructions
>> > Signed-off-by: Connor Abbott 
>> > ---
>> >src/compiler/nir/nir.h | 80
>> > ++
>> >1 file changed, 80 insertions(+)
>> >
>> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> > index 3b827bf..64caccb 100644
>> > --- a/src/compiler/nir/nir.h
>> > +++ b/src/compiler/nir/nir.h
>> > @@ -985,6 +985,25 @@ typedef enum {
>> >* intrinsic are due to the register reads/writes.
>> >*/
>> >   NIR_INTRINSIC_CAN_REORDER = (1 << 1),
>> > +
>> > +   /**
>> > +* Indicates whether this intrinsic is "cross-thread". An
>> > operation
>> > is
>> > +* cross-thread if results in one thread depend on inputs in
>> > another
>> > thread,
>> > +* and therefore optimizations cannot change the execution mask
>> > when
>> > the
>> > +* operation is called. Examples of cross-thread operations
>> > include
>> > +* screen-space derivatives, the "any" reduction which returns
>> > "true"
>> > in
>> > +* all threads if any thread inputs "true", etc.
>> > +*/
>> > +   NIR_INTRINSIC_CROSS_THREAD,
>> > +
>> > +   /**
>> > +* Indicates that this intrinsic is "convergent". An operation
>> > is
>> > +* convergent when it must always be called in convergent
>> > control
>> > flow,
>> > +* that is, control flow with the same execution mask as when
>> > the
>> > program
>> > +* started. If an operation is convergent, it must be
>> > cross-thread
>> > as
>> > well,
>> > +* since the optimizer must maintain the guarantee.
>> > +*/
>> > +   NIR_INTRINSIC_CONVERGENT,
>> 
>> 
>> 
>>  This is inconsistent with LLVM's definition of 'convergent', and I'd
>>  like
>>  you to change it to match up with LLVM.
>> 
>>  LLVM's definition of convergent is: "The operation must not be made
>>  control-dependent on additional values."
>> 
>>  In the language of execution masks, this means that optimizations
>>  must
>>  guarantee that the execution mask for the instruction can only become
>>  a
>>  superset of what it was originally. This means lifting is actually
>>  okay.
>> 
>>  This is relevant because e.g. texture instructions with implicit
>>  derivatives
>>  are actually convergent operations (in the LLVM sense), but obviously
>>  they
>>  can be called with exec masks that are subsets of the exec mask at
>>  program
>>  start.
>> >>>
>> >>>
>> >>> Actually, according to GLSL (and I think SPIR-V, although I'm not 100%
>> >>> sure), they can't be called that way -- results are undefined if
>> >>> derivatives (or textures that take implicit derivatives) aren't called
>> >>> in uniform control flow, full stop. That's why I changed the
>> >>> definition compared to LLVM - this definition of convergent allows all
>> >>> the optimizations that the LLVM definition does, but it opens up
>> >>> additional optimization opportunities since we can assume that control
>> >>> flow is always uniform when doing divergence analysis. Also, as-is,
>> >>> the definition matches the GLSL/SPIR-V semantics closely, and since
>> >>> the purpose of the convergent attribute is to model derivatives in
>> >>> GLSL and SPIR-V, I'd like to keep that. If GLSL or SPIR-V change their
>> >>> semantics to allow what you describe, then we can add something
>> >>> something closer to the LLVM convergent semantics. If you want me to
>> >>> change the name to avoid confusion with LLVM, that's fair though --
>> 

[Mesa-dev] [PATCH 09/11] i965: Push UBO data, but don't use it just yet.

2017-07-06 Thread Kenneth Graunke
This patch starts uploading UBO data via 3DSTATE_CONSTANT_* packets,
and updates the compiler to know that there's extra payload data, so
things continue working.  However, it still issues pull loads for all
data.  I wanted to separate the two aspects for greater bisectability.
---
 src/intel/compiler/brw_fs.cpp|  9 +++-
 src/intel/compiler/brw_vec4.cpp  |  3 ++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  2 +
 src/mesa/drivers/dri/i965/genX_state_upload.c| 69 +---
 4 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 43b6e342043..cb8d093d00b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1383,7 +1383,14 @@ fs_visitor::emit_gs_thread_end()
 void
 fs_visitor::assign_curb_setup()
 {
-   prog_data->curb_read_length = ALIGN(stage_prog_data->nr_params, 8) / 8;
+   unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
+
+   unsigned ubo_push_length = 0;
+   for (int i = 0; i < 4; i++) {
+  ubo_push_length += stage_prog_data->ubo_ranges[i].length;
+   }
+
+   prog_data->curb_read_length = uniform_push_length + ubo_push_length;
 
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_block_and_inst(block, fs_inst, inst, cfg) {
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 3de7d931dde..410922c62b2 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1776,6 +1776,9 @@ vec4_visitor::setup_uniforms(int reg)
   reg += ALIGN(uniforms, 2) / 2;
}
 
+   for (int i = 0; i < 4; i++)
+  reg += stage_prog_data->ubo_ranges[i].length;
+
stage_prog_data->nr_params = this->uniforms * 4;
 
prog_data->base.curb_read_length =
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 3615c1805ec..9293bea992c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1444,6 +1444,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw, struct 
gl_program *prog,
   }
}
 
+   stage_state->push_constants_dirty = true;
+
if (prog->info.num_ubos || prog->info.num_ssbos)
   brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
 }
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 264e999b86a..c5d38e641d7 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2743,6 +2743,8 @@ UNUSED static const uint32_t push_constant_opcodes[] = {
 static void
 genX(upload_push_constant_packets)(struct brw_context *brw)
 {
+   struct gl_context *ctx = >ctx;
+
UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
 
struct brw_stage_state *stage_states[] = {
@@ -2759,19 +2761,71 @@ genX(upload_push_constant_packets)(struct brw_context 
*brw)
 
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
   struct brw_stage_state *stage_state = stage_states[stage];
-  bool active = stage_state->prog_data && stage_state->push_const_size > 0;
+  struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
 
   if (!stage_state->push_constants_dirty)
  continue;
 
   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
  pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
- if (active) {
+ if (stage_state->prog_data) {
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
-pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
-pkt.ConstantBody.Buffer[2] =
-   render_ro_bo(stage_state->push_const_bo,
-stage_state->push_const_offset);
+/* The Skylake PRM contains the following restriction:
+ *
+ *"The driver must ensure The following case does not occur
+ * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+ * buffer 3 read length equal to zero committed followed by a
+ * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+ * zero committed."
+ *
+ * To avoid this, we program the buffers in the highest slots.
+ * This way, slot 0 is only used if slot 3 is also used.
+ */
+int n = 3;
+
+for (int i = 3; i >= 0; i--) {
+   const struct brw_ubo_range *range =
+  _state->prog_data->ubo_ranges[i];
+
+   if (range->length == 0)
+  continue;
+
+   const struct gl_uniform_block *block =
+  prog->sh.UniformBlocks[range->block];
+   const struct gl_uniform_buffer_binding *binding =
+  >UniformBufferBindings[block->Binding];
+
+   if (binding->BufferObject == 

[Mesa-dev] [PATCH 08/11] i965: Pad buffer objects by 2kB in robust contexts to avoid OOB access.

2017-07-06 Thread Kenneth Graunke
This is an annoyingly big hammer, but it seems less mean than disabling
UBO pushing, and I'm not sure what else to do.
---
 src/mesa/drivers/dri/i965/intel_buffer_objects.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c 
b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
index a9ac29a6a81..0ecc2fd556c 100644
--- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -59,8 +59,26 @@ static void
 alloc_buffer_object(struct brw_context *brw,
 struct intel_buffer_object *intel_obj)
 {
-   intel_obj->buffer = brw_bo_alloc(brw->bufmgr, "bufferobj",
- intel_obj->Base.Size, 64);
+   const struct gl_context *ctx = >ctx;
+
+   uint64_t size = intel_obj->Base.Size;
+   if (ctx->Const.RobustAccess) {
+  /* Pad out buffer objects with an extra 2kB (half a page).
+   *
+   * When pushing UBOs, we need to safeguard against 3DSTATE_CONSTANT_*
+   * reading out of bounds memory.  The application might bind a UBO that's
+   * smaller than what the program expects.  Ideally, we'd bind an extra
+   * push buffer containing zeros, but we have a limited number of those,
+   * so it's not always viable.  Our only safe option is to pad all buffer
+   * objects by the maximum push data length, so that it will never read
+   * past the end of a BO.
+   *
+   * This is unfortunate, but it should result in at most 1 extra page,
+   * which probably isn't too terrible.
+   */
+  size += 64 * 32; /* max read length of 64 256-bit units */
+   }
+   intel_obj->buffer = brw_bo_alloc(brw->bufmgr, "bufferobj", size, 64);
 
/* the buffer might be bound as a uniform buffer, need to update it
 */
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/11] i965: Require a UBO offset alignment of 32 bytes.

2017-07-06 Thread Kenneth Graunke
Soon, we're going to start providing UBO data to shaders as push
constants, rather than requiring them to issue pull loads.  The
3DSTATE_CONSTANT_* commands require 32 byte aligned pointers.

So, we need to increase this from 16 to 32.
---
 src/mesa/drivers/dri/i965/brw_context.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index e921a41c827..e4828512ac8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -735,8 +735,11 @@ brw_initialize_context_constants(struct brw_context *brw)
 *  the element in the buffer."
 *
 * However, unaligned accesses are slower, so enforce buffer alignment.
+*
+* In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
+* restriction: the start of the buffer needs to be 32B aligned.
 */
-   ctx->Const.UniformBufferOffsetAlignment = 16;
+   ctx->Const.UniformBufferOffsetAlignment = 32;
 
/* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
 * that we can safely have the CPU and GPU writing the same SSBO on
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/11] i965: Store per-stage push constant BO pointers.

2017-07-06 Thread Kenneth Graunke
Right now, we always upload new push constant data, and immediately
emit 3DSTATE_CONSTANT_* packets.  We call intel_upload_space and store
the resulting BO pointer in brw->curbe.curbe_bo.  We read that when
emitting the packets.  This works today, but is fragile - it depends on
upload and packet emission being interleaved.

If we instead were to upload all the data, then emit all the packets,
then upload BO wrapping will get us into trouble.  For example, the VS
constants may land in one upload BO, but the FS constants may not fit
and land in a second upload BO.  Uploading FS constants would overwrite
the brw->curbe.curbe_bo pointer, so when we emitted 3DSTATE_CONSTANT_VS,
we'd get the wrong BO.

I intend to separate out this code in a future commit, so I need to fix
this.  To fix it, we simply store a per-stage BO pointer.
---
 src/mesa/drivers/dri/i965/brw_context.h | 3 ++-
 src/mesa/drivers/dri/i965/gen6_constant_state.c | 3 ++-
 src/mesa/drivers/dri/i965/genX_state_upload.c   | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 817396dddf6..9587c74304d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -552,7 +552,8 @@ struct brw_stage_state
/** Offset in the batchbuffer to Gen4-5 pipelined state (VS/WM/GS_STATE). */
uint32_t state_offset;
 
-   uint32_t push_const_offset; /* Offset in the batchbuffer */
+   struct brw_bo *push_const_bo; /* NULL if using the batchbuffer */
+   uint32_t push_const_offset; /* Offset in the push constant BO or batch */
int push_const_size; /* in 256-bit register increments */
 
/* Binding table: pointers to SURFACE_STATE entries. */
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c 
b/src/mesa/drivers/dri/i965/gen6_constant_state.c
index f3927c5dba2..920f502ca37 100644
--- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -65,7 +65,8 @@ gen6_upload_push_constants(struct brw_context *brw,
   const int size = prog_data->nr_params * sizeof(gl_constant_value);
   gl_constant_value *param;
   if (brw->gen >= 8 || brw->is_haswell) {
- param = intel_upload_space(brw, size, 32, >curbe.curbe_bo,
+ param = intel_upload_space(brw, size, 32,
+_state->push_const_bo,
 _state->push_const_offset);
   } else {
  param = brw_state_batch(brw, size, 32,
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 06b9cd50728..bc47e5a16fe 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2754,7 +2754,8 @@ upload_constant_state(struct brw_context *brw,
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
  pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
  pkt.ConstantBody.Buffer[2] =
-render_ro_bo(brw->curbe.curbe_bo, stage_state->push_const_offset);
+render_ro_bo(stage_state->push_const_bo,
+ stage_state->push_const_offset);
 #else
  pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
  pkt.ConstantBody.Buffer[0].offset =
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/11] i965: Use pushed UBO data in the scalar backend.

2017-07-06 Thread Kenneth Graunke
This actually takes advantage of the newly pushed UBO data, avoiding
pull loads.

XXX: quote performance numbers
---
 src/intel/compiler/brw_fs.cpp | 35 ++-
 src/intel/compiler/brw_fs.h   |  2 ++
 src/intel/compiler/brw_fs_nir.cpp | 28 
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 49e714f1c1f..68648eda64d 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -1386,7 +1386,9 @@ fs_visitor::assign_curb_setup()
unsigned uniform_push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
 
unsigned ubo_push_length = 0;
+   unsigned ubo_push_start[4];
for (int i = 0; i < 4; i++) {
+  ubo_push_start[i] = 8 * (ubo_push_length + uniform_push_length);
   ubo_push_length += stage_prog_data->ubo_ranges[i].length;
}
 
@@ -1398,7 +1400,11 @@ fs_visitor::assign_curb_setup()
 if (inst->src[i].file == UNIFORM) {
 int uniform_nr = inst->src[i].nr + inst->src[i].offset / 4;
 int constant_nr;
-if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
+if (inst->src[i].nr >= UBO_START) {
+   /* constant_nr is in 32-bit units, the rest are in bytes */
+   constant_nr = ubo_push_start[inst->src[i].nr - UBO_START] +
+ inst->src[i].offset / 4;
+} else if (uniform_nr >= 0 && uniform_nr < (int) uniforms) {
constant_nr = push_constant_loc[uniform_nr];
 } else {
/* Section 5.11 of the OpenGL 4.1 spec says:
@@ -2069,6 +2075,20 @@ fs_visitor::assign_constant_locations()
stage_prog_data->nr_params = num_push_constants;
stage_prog_data->nr_pull_params = num_pull_constants;
 
+   /* Now that we know how many regular uniforms we'll push, reduce the
+* UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.
+*/
+   unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
+   for (int i = 0; i < 4; i++) {
+  struct brw_ubo_range *range = _data->ubo_ranges[i];
+
+  if (push_length + range->length > 64)
+ range->length = 64 - push_length;
+
+  push_length += range->length;
+   }
+   assert(push_length <= 64);
+
/* Up until now, the param[] array has been indexed by reg + offset
 * of UNIFORM registers.  Move pull constants into pull_param[] and
 * condense param[] to only contain the uniforms we chose to push.
@@ -2103,6 +2123,19 @@ fs_visitor::get_pull_locs(const fs_reg ,
 {
assert(src.file == UNIFORM);
 
+   if (src.nr >= UBO_START) {
+  const struct brw_ubo_range *range =
+ _data->ubo_ranges[src.nr - UBO_START];
+
+  /* If this access is in our (reduced) range, use the push data. */
+  if (src.offset / 32 < range->length && !getenv("PULL"))
+ return false;
+
+  *out_surf_index = prog_data->binding_table.ubo_start + range->block;
+  *out_pull_index = (32 * range->start + src.offset) / 4;
+  return true;
+   }
+
const unsigned location = src.nr + src.offset / 4;
 
if (location < uniforms && pull_constant_loc[location] != -1) {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index beb0d971b4c..f1ba193de7e 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -50,6 +50,8 @@ offset(const fs_reg , const brw::fs_builder , 
unsigned delta)
return offset(reg, bld.dispatch_width(), delta);
 }
 
+#define UBO_START ((1 << 16) - 4)
+
 /**
  * The fragment shader front-end.
  *
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index a9dce42c38d..34e8cd1427d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3822,6 +3822,34 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   * and we have to split it if necessary.
   */
  const unsigned type_size = type_sz(dest.type);
+
+ /* See if we've selected this as a push constant candidate */
+ if (const_index) {
+const unsigned ubo_block = const_index->u32[0];
+const unsigned offset_256b = const_offset->u32[0] / 32;
+
+fs_reg push_reg;
+for (int i = 0; i < 4; i++) {
+   const struct brw_ubo_range *range = _data->ubo_ranges[i];
+   if (range->block == ubo_block &&
+   offset_256b >= range->start &&
+   offset_256b < range->start + range->length) {
+
+  push_reg = fs_reg(UNIFORM, UBO_START + i, dest.type);
+  push_reg.offset = const_offset->u32[0] - 32 * range->start;
+  break;
+   }
+}
+
+if (push_reg.file != BAD_FILE) {
+   for (unsigned i = 0; i < instr->num_components; i++) {
+  bld.MOV(offset(dest, bld, i),
+  

[Mesa-dev] [PATCH 03/11] i965: Select ranges of UBO data to be uploaded as push constants.

2017-07-06 Thread Kenneth Graunke
This adds a NIR pass that decides which portions of UBOS we should
upload as push constants, rather than pull constants.
---
 src/intel/Makefile.sources  |   1 +
 src/intel/compiler/brw_compiler.h   |  11 +
 src/intel/compiler/brw_nir.h|   4 +
 src/intel/compiler/brw_nir_analyze_ubo_ranges.c | 271 
 src/mesa/drivers/dri/i965/brw_gs.c  |   2 +
 src/mesa/drivers/dri/i965/brw_tcs.c |   2 +
 src/mesa/drivers/dri/i965/brw_tes.c |   2 +
 src/mesa/drivers/dri/i965/brw_vs.c  |   2 +
 src/mesa/drivers/dri/i965/brw_wm.c  |   2 +
 9 files changed, 297 insertions(+)
 create mode 100644 src/intel/compiler/brw_nir_analyze_ubo_ranges.c

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index b672e615c52..f0a8bf517a1 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -73,6 +73,7 @@ COMPILER_FILES = \
compiler/brw_nir.h \
compiler/brw_nir.c \
compiler/brw_nir_analyze_boolean_resolves.c \
+   compiler/brw_nir_analyze_ubo_ranges.c \
compiler/brw_nir_attribute_workarounds.c \
compiler/brw_nir_intrinsics.c \
compiler/brw_nir_opt_peephole_ffma.c \
diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index e4c22e31177..d8e7717e867 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -468,6 +468,15 @@ struct brw_image_param {
  */
 #define BRW_SHADER_TIME_STRIDE 64
 
+struct brw_ubo_range
+{
+   // XXX: jason says that 255 won't be enough for vulkan - we may have
+   // large amounts of UBOs in the future.  use uint16_t.
+   uint8_t block;
+   uint8_t start;
+   uint8_t length;
+};
+
 struct brw_stage_prog_data {
struct {
   /** size of our binding table. */
@@ -488,6 +497,8 @@ struct brw_stage_prog_data {
   /** @} */
} binding_table;
 
+   struct brw_ubo_range ubo_ranges[4];
+
GLuint nr_params;   /**< number of float params/constants */
GLuint nr_pull_params;
unsigned nr_image_params;
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 5d866b86ac8..560027c3662 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -142,6 +142,10 @@ void brw_nir_setup_glsl_uniforms(nir_shader *shader,
 void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
 struct brw_stage_prog_data *stage_prog_data);
 
+void brw_nir_analyze_ubo_ranges(const struct brw_compiler *compiler,
+nir_shader *nir,
+struct brw_ubo_range out_ranges[4]);
+
 bool brw_nir_opt_peephole_ffma(nir_shader *shader);
 
 #define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
diff --git a/src/intel/compiler/brw_nir_analyze_ubo_ranges.c 
b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
new file mode 100644
index 000..3535e67758c
--- /dev/null
+++ b/src/intel/compiler/brw_nir_analyze_ubo_ranges.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "compiler/nir/nir.h"
+#include "util/u_dynarray.h"
+
+/**
+ * \file brw_nir_analyze_ubo_ranges.c
+ *
+ * This pass decides which portions of UBOs to upload as push constants,
+ * so shaders can access them as part of the thread payload, rather than
+ * having to issue expensive memory reads to pull the data.
+ *
+ * The 3DSTATE_CONSTANT_* mechanism can push data from up to 4 different
+ * buffers, in GRF (256-bit/32-byte) units.
+ *
+ * To do this, we examine NIR load_ubo intrinsics, recording the number of
+ * loads at each offset.  We track offsets at a 32-byte granularity, so even
+ * fields with a bit of padding between them tend to fall into contiguous
+ * ranges.  We build a list of these 

[Mesa-dev] [PATCH 10/11] i965: Factor out push locations.

2017-07-06 Thread Kenneth Graunke
With UBOs, the answer of "have we decided to push this uniform" gets
a bit more complicated - for one, we have multiple surfaces.  This
patch refactors things so we can add the new code in a single place.
---
 src/intel/compiler/brw_fs.cpp | 39 +++
 src/intel/compiler/brw_fs.h   |  2 ++
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index cb8d093d00b..49e714f1c1f 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2096,6 +2096,25 @@ fs_visitor::assign_constant_locations()
  new_thread_local_id_index;
 }
 
+bool
+fs_visitor::get_pull_locs(const fs_reg ,
+  unsigned *out_surf_index,
+  unsigned *out_pull_index)
+{
+   assert(src.file == UNIFORM);
+
+   const unsigned location = src.nr + src.offset / 4;
+
+   if (location < uniforms && pull_constant_loc[location] != -1) {
+  /* A regular uniform push constant */
+  *out_surf_index = stage_prog_data->binding_table.pull_constants_start;
+  *out_pull_index = pull_constant_loc[location];
+  return true;
+   }
+
+   return false;
+}
+
 /**
  * Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
  * or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
@@ -2103,7 +2122,7 @@ fs_visitor::assign_constant_locations()
 void
 fs_visitor::lower_constant_loads()
 {
-   const unsigned index = stage_prog_data->binding_table.pull_constants_start;
+   unsigned index, pull_index;
 
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
   /* Set up the annotation tracking for new generated instructions. */
@@ -2117,13 +2136,7 @@ fs_visitor::lower_constant_loads()
  if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
 continue;
 
- unsigned location = inst->src[i].nr + inst->src[i].offset / 4;
- if (location >= uniforms)
-continue; /* Out of bounds access */
-
- int pull_index = pull_constant_loc[location];
-
- if (pull_index == -1)
+ if (!get_pull_locs(inst->src[i], , _index))
continue;
 
  assert(inst->src[i].stride == 0);
@@ -2148,14 +2161,8 @@ fs_visitor::lower_constant_loads()
   if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
   inst->src[0].file == UNIFORM) {
 
- unsigned location = inst->src[0].nr + inst->src[0].offset / 4;
- if (location >= uniforms)
-continue; /* Out of bounds access */
-
- int pull_index = pull_constant_loc[location];
-
- if (pull_index == -1)
-   continue;
+ if (!get_pull_locs(inst->src[0], , _index))
+continue;
 
  VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
 brw_imm_ud(index),
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 6c8c0275a76..beb0d971b4c 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -125,6 +125,8 @@ public:
void split_virtual_grfs();
bool compact_virtual_grfs();
void assign_constant_locations();
+   bool get_pull_locs(const fs_reg , unsigned *out_surf_index,
+  unsigned *out_pull_index);
void lower_constant_loads();
void invalidate_live_intervals();
void calculate_live_intervals();
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/11] i965: Stop re-uploading push constants after URB reconfiguration.

2017-07-06 Thread Kenneth Graunke
Previously we would re-upload the constant data to the batchbuffer,
then re-emit the packets.  We only need to do the last step (causing
the existing data in the batchbuffer to be re-uploaded to the push
constant staging area in the L3).

Now that we've separated the two, it's pretty easy to accomplish.
---
 src/mesa/drivers/dri/i965/gen7_urb.c  | 6 +-
 src/mesa/drivers/dri/i965/genX_state_upload.c | 9 ++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c 
b/src/mesa/drivers/dri/i965/gen7_urb.c
index 525c9c4d3da..8e614b1176e 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -101,7 +101,11 @@ gen7_allocate_push_constants(struct brw_context *brw)
 * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_*
 * commands.
 */
-   brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
+   brw->vs.base.push_constants_dirty = true;
+   brw->tcs.base.push_constants_dirty = true;
+   brw->tes.base.push_constants_dirty = true;
+   brw->gs.base.push_constants_dirty = true;
+   brw->wm.base.push_constants_dirty = true;
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index a8641a31768..264e999b86a 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2816,7 +2816,6 @@ static const struct brw_tracked_state 
genX(vs_push_constants) = {
_NEW_TRANSFORM,
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
-   BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VS_PROG_DATA,
},
@@ -2847,8 +2846,7 @@ static const struct brw_tracked_state 
genX(gs_push_constants) = {
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_GEOMETRY_PROGRAM |
-   BRW_NEW_GS_PROG_DATA |
-   BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+   BRW_NEW_GS_PROG_DATA,
},
.emit = genX(upload_gs_push_constants),
 };
@@ -2873,8 +2871,7 @@ static const struct brw_tracked_state 
genX(wm_push_constants) = {
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_FRAGMENT_PROGRAM |
-   BRW_NEW_FS_PROG_DATA |
-   BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+   BRW_NEW_FS_PROG_DATA,
},
.emit = genX(upload_wm_push_constants),
 };
@@ -3690,7 +3687,6 @@ static const struct brw_tracked_state 
genX(tes_push_constants) = {
   .mesa  = _NEW_PROGRAM_CONSTANTS,
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
-   BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_TESS_PROGRAMS |
BRW_NEW_TES_PROG_DATA,
},
@@ -3720,7 +3716,6 @@ static const struct brw_tracked_state 
genX(tcs_push_constants) = {
   .brw   = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_DEFAULT_TESS_LEVELS |
-   BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_TESS_PROGRAMS |
BRW_NEW_TCS_PROG_DATA,
},
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/11] i965: Introduce a BRW_NEW_DRAW_CALL dirty bit.

2017-07-06 Thread Kenneth Graunke
This allows us to have atoms which are signalled on every draw call.
---
 src/mesa/drivers/dri/i965/brw_context.h  | 2 ++
 src/mesa/drivers/dri/i965/brw_draw.c | 5 +
 src/mesa/drivers/dri/i965/brw_state_upload.c | 1 +
 3 files changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 9587c74304d..3f233d815b2 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -214,6 +214,7 @@ enum brw_state_id {
BRW_STATE_BLORP,
BRW_STATE_VIEWPORT_COUNT,
BRW_STATE_CONSERVATIVE_RASTERIZATION,
+   BRW_STATE_DRAW_CALL,
BRW_NUM_STATE_BITS
 };
 
@@ -304,6 +305,7 @@ enum brw_state_id {
 #define BRW_NEW_CC_STATE(1ull << BRW_STATE_CC_STATE)
 #define BRW_NEW_BLORP   (1ull << BRW_STATE_BLORP)
 #define BRW_NEW_CONSERVATIVE_RASTERIZATION (1ull << 
BRW_STATE_CONSERVATIVE_RASTERIZATION)
+#define BRW_NEW_DRAW_CALL   (1ull << BRW_STATE_DRAW_CALL)
 
 struct brw_state_flags {
/** State update flags signalled by mesa internals */
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 821f1e24e94..1d2c69a35cb 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -536,6 +536,11 @@ brw_try_draw_prims(struct gl_context *ctx,
   estimated_max_prim_size += 1024; /* gen6 WM push constants */
   estimated_max_prim_size += 512; /* misc. pad */
 
+  /* Flag BRW_NEW_DRAW_CALL on every draw.  This allows us to have
+   * atoms that happen on every draw call.
+   */
+  brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
+
   /* Flush the batch if it's approaching full, so that we don't wrap while
* we've got validated state that needs to be in the same batch as the
* primitives.
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index e14ed02e2e1..8ee2dff072c 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -340,6 +340,7 @@ static struct dirty_bit_map brw_bits[] = {
DEFINE_BIT(BRW_NEW_BLORP),
DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
+   DEFINE_BIT(BRW_NEW_DRAW_CALL),
{0, 0, 0}
 };
 
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/11] i965: Separate uploading push constant data from the pointer packets.

2017-07-06 Thread Kenneth Graunke
I hope to upload UBO via 3DSTATE_CONSTANT_XS packets, in addition to
normal uniforms.  In order to do that, I'll need to re-emit the packets
when UBOs change.  But I don't want to re-copy the regular uniform data
to the batchbuffer every time.

This patch separates out the data uploading from the packet submission.
We're running low on dirty bits, so I made the new atom happen on every
draw call, and added a flag to stage_state indicating that we want the
packet for that stage emitted.

I would have preferred to do this outside the atom system, but it has
to happen between the uploading of push constant data and the binding
table upload.
---
 src/mesa/drivers/dri/i965/brw_context.h |  3 +
 src/mesa/drivers/dri/i965/gen6_constant_state.c |  2 +
 src/mesa/drivers/dri/i965/genX_state_upload.c   | 81 ++---
 3 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 3f233d815b2..29dbc1726c3 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -565,6 +565,9 @@ struct brw_stage_state
/** SAMPLER_STATE count and table offset */
uint32_t sampler_count;
uint32_t sampler_offset;
+
+   /** Need to re-emit 3DSTATE_CONSTANT_XS? */
+   bool push_constants_dirty;
 };
 
 enum brw_predicate_state {
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c 
b/src/mesa/drivers/dri/i965/gen6_constant_state.c
index 920f502ca37..dd4e224aada 100644
--- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -119,4 +119,6 @@ gen6_upload_push_constants(struct brw_context *brw,
*/
   assert(stage_state->push_const_size <= 32);
}
+
+   stage_state->push_constants_dirty = true;
 }
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
b/src/mesa/drivers/dri/i965/genX_state_upload.c
index bc47e5a16fe..a8641a31768 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -2741,31 +2741,58 @@ UNUSED static const uint32_t push_constant_opcodes[] = {
 };
 
 static void
-upload_constant_state(struct brw_context *brw,
-  struct brw_stage_state *stage_state,
-  bool active, uint32_t stage)
+genX(upload_push_constant_packets)(struct brw_context *brw)
 {
UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
-   active = active && stage_state->push_const_size != 0;
 
-   brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
-  pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
-  if (active) {
+   struct brw_stage_state *stage_states[] = {
+  >vs.base,
+  >tcs.base,
+  >tes.base,
+  >gs.base,
+  >wm.base,
+   };
+
+   if (GEN_GEN == 7 && !GEN_IS_HASWELL && !brw->is_baytrail &&
+   stage_states[MESA_SHADER_VERTEX]->push_constants_dirty)
+  gen7_emit_vs_workaround_flush(brw);
+
+   for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+  struct brw_stage_state *stage_state = stage_states[stage];
+  bool active = stage_state->prog_data && stage_state->push_const_size > 0;
+
+  if (!stage_state->push_constants_dirty)
+ continue;
+
+  brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
+ pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
+ if (active) {
 #if GEN_GEN >= 8 || GEN_IS_HASWELL
- pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
- pkt.ConstantBody.Buffer[2] =
-render_ro_bo(stage_state->push_const_bo,
- stage_state->push_const_offset);
+pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
+pkt.ConstantBody.Buffer[2] =
+   render_ro_bo(stage_state->push_const_bo,
+stage_state->push_const_offset);
 #else
- pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
- pkt.ConstantBody.Buffer[0].offset =
-stage_state->push_const_offset | mocs;
+pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
+pkt.ConstantBody.Buffer[0].offset =
+   stage_state->push_const_offset | mocs;
 #endif
+ }
   }
+
+  stage_state->push_constants_dirty = false;
}
 
brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
 }
+
+const struct brw_tracked_state genX(push_constant_packets) = {
+   .dirty = {
+  .mesa  = 0,
+  .brw   = BRW_NEW_DRAW_CALL,
+   },
+   .emit = genX(upload_push_constant_packets),
+};
 #endif
 
 #if GEN_GEN >= 6
@@ -2781,14 +2808,6 @@ genX(upload_vs_push_constants)(struct brw_context *brw)
 
_mesa_shader_write_subroutine_indices(>ctx, MESA_SHADER_VERTEX);
gen6_upload_push_constants(brw, >program, prog_data, stage_state);
-
-#if GEN_GEN >= 7
-   if (GEN_GEN == 7 && !GEN_IS_HASWELL && !brw->is_baytrail)
-  

[Mesa-dev] [PATCH 00/11] i965: UBO pushing for fun and profit?

2017-07-06 Thread Kenneth Graunke
Hello,

This series begins pushing UBOs (rather than resorting to pull loads)
for scalar shaders on Gen7.5+, for the OpenGL driver.  Future work is
to hook it up for Vulkan (haven't started), for the vec4 shader stages
(I have about 75% of the code written), and for Gen7 (I have a plan).

Note that compute shaders unfortunately still resort to pull messages,
because I haven't found a way to make the constant commands absolute
addresses instead of being relative to dynamic state base address.

This has long been a gap in our UBO support - we pushed regular
uniform data, but always resorted to pulls for UBOs, making them
slower than regular uniforms.

I started this project a year and a half ago, and it initially looked
very promising - up to 30% faster in Tomb Raider, for example.  However,
Curro improved the performance of pull messages significantly since then.
Now, it doesn't seem to have as large of an impact.  Jason thinks this
would help close the GL/Vulkan gap in Talos Principle, when we finally
hook it up in Vulkan.  One place where it does help is GLBenchmark 3.1
Manhattan, which improves 3-4% on most platforms, and 6-7% on SKL GT4.
This is primarily because it avoids doing a pull load in a loop, though,
which could be solved by using the global code motion pass...

I figured I'd at least send it out for an initial review, and we can
continue collecting benchmark data...

--Ken

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/11] i965: Switch to absolute addressing for constant buffer 0.

2017-07-06 Thread Kenneth Graunke
By default, 3DSTATE_CONSTANT_* Constant Buffer 0 is relative to dynamic
state base address.  This makes it unusable for pushing UBOs.  I'd like
to be able to use all four push buffers.

There is a bit in the INSTPM register (or CS_DEBUG_MODE2 on Skylake)
which controls whether buffer 0 is relative to dynamic state base
address, or simply a normal pointer.  Setting that gives us full
flexibility.

We can't currently write this on Haswell and earlier, and will need
to update the kernel command parser, and then do the whole version
checking song and dance.
---
 src/intel/compiler/brw_compiler.h|  6 ++
 src/mesa/drivers/dri/i965/brw_defines.h  |  6 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c | 24 
 src/mesa/drivers/dri/i965/intel_screen.c |  1 +
 4 files changed, 37 insertions(+)

diff --git a/src/intel/compiler/brw_compiler.h 
b/src/intel/compiler/brw_compiler.h
index 3f383403883..e4c22e31177 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -100,6 +100,12 @@ struct brw_compiler {
 * This can negatively impact performance.
 */
bool precise_trig;
+
+   /**
+* Is 3DSTATE_CONSTANT_*'s Constant Buffer 0 relative to Dynamic State
+* Base Address?  (If not, it's a normal GPU address.)
+*/
+   bool constant_buffer_0_is_relative;
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index a4794c6a1d2..2a8dbf8cb9a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1680,4 +1680,10 @@ enum brw_pixel_shader_coverage_mask_mode {
 # define GEN8_L3CNTLREG_ALL_ALLOC_SHIFT25
 # define GEN8_L3CNTLREG_ALL_ALLOC_MASK INTEL_MASK(31, 25)
 
+#define INSTPM 0x20c0
+# define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
+
+#define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
+# define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 5e82c1b4ce8..e14ed02e2e1 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -90,6 +90,30 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
   OUT_BATCH(0);
   ADVANCE_BATCH();
}
+
+   /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
+* 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
+*
+* On Gen6-7.5, we use an execbuf parameter to do this for us.
+* However, the kernel ignores that when execlists are in use.
+* Fortunately, we can just write the registers from userspace
+* on Gen8+, and they're context saved/restored.
+*/
+   if (brw->gen >= 9) {
+  BEGIN_BATCH(3);
+  OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+  OUT_BATCH(CS_DEBUG_MODE2);
+  OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+  ADVANCE_BATCH();
+   } else if (brw->gen == 8) {
+  BEGIN_BATCH(3);
+  OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+  OUT_BATCH(INSTPM);
+  OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
+INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
+  ADVANCE_BATCH();
+   }
 }
 
 static inline const struct brw_tracked_state *
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index c75f2125d4b..5f2c512fc51 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2307,6 +2307,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
screen->compiler = brw_compiler_create(screen, devinfo);
screen->compiler->shader_debug_log = shader_debug_log_mesa;
screen->compiler->shader_perf_log = shader_perf_log_mesa;
+   screen->compiler->constant_buffer_0_is_relative = devinfo->gen < 8;
screen->program_id = 1;
 
screen->has_exec_fence =
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/20] nir: Add intrinsics from ARB_shader_group_vote

2017-07-06 Thread Connor Abbott
FYI, I already have another series which adds ARB_shader_ballot and
ARB_shader_group_vote intrinsics, in addition to adding some more
precise semantics to represent the restrictions on ballotARB() and
similar things [0]. The problem is that marking ballot as
can_eliminate but not can_reorder is overly conservative, since we
won't try to CSE it even in the same basic block. I think Jason had
some concern he was thinking about, so I'll ask him again.

[0] https://lists.freedesktop.org/archives/mesa-dev/2017-June/158672.html

On Thu, Jul 6, 2017 at 4:48 PM, Matt Turner  wrote:
> These are intrinsics rather than opcodes, because they operate across
> channels.
> ---
>  src/compiler/glsl/glsl_to_nir.cpp | 22 ++
>  src/compiler/nir/nir_intrinsics.h |  5 +
>  2 files changed, 27 insertions(+)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
> b/src/compiler/glsl/glsl_to_nir.cpp
> index 21530040a9..43d7e07042 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -799,6 +799,15 @@ nir_visitor::visit(ir_call *ir)
>case ir_intrinsic_shared_atomic_comp_swap:
>   op = nir_intrinsic_shared_atomic_comp_swap;
>   break;
> +  case ir_intrinsic_vote_any:
> + op = nir_intrinsic_vote_any;
> + break;
> +  case ir_intrinsic_vote_all:
> + op = nir_intrinsic_vote_all;
> + break;
> +  case ir_intrinsic_vote_eq:
> + op = nir_intrinsic_vote_eq;
> + break;
>default:
>   unreachable("not reached");
>}
> @@ -1135,6 +1144,19 @@ nir_visitor::visit(ir_call *ir)
>   nir_builder_instr_insert(, >instr);
>   break;
>}
> +  case nir_intrinsic_vote_any:
> +  case nir_intrinsic_vote_all:
> +  case nir_intrinsic_vote_eq: {
> + nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
> +
> + instr->variables[0] = evaluate_deref(>instr, 
> ir->return_deref);
> +
> + ir_instruction *value = (ir_instruction 
> *)ir->actual_parameters.get_head();
> + instr->src[0] = 
> nir_src_for_ssa(evaluate_rvalue(value->as_rvalue()));
> +
> + nir_builder_instr_insert(, >instr);
> + break;
> +  }
>default:
>   unreachable("not reached");
>}
> diff --git a/src/compiler/nir/nir_intrinsics.h 
> b/src/compiler/nir/nir_intrinsics.h
> index 21e7d904b7..8a838df027 100644
> --- a/src/compiler/nir/nir_intrinsics.h
> +++ b/src/compiler/nir/nir_intrinsics.h
> @@ -107,6 +107,11 @@ BARRIER(memory_barrier_shared)
>  /** A conditional discard, with a single boolean source. */
>  INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
>
> +/** ARB_shader_group_vote intrinsics */
> +INTRINSIC(vote_any, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
> NIR_INTRINSIC_CAN_ELIMINATE)
> +INTRINSIC(vote_all, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
> NIR_INTRINSIC_CAN_ELIMINATE)
> +INTRINSIC(vote_eq,  1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
> NIR_INTRINSIC_CAN_ELIMINATE)
> +
>  /**
>   * Basic Geometry Shader intrinsics.
>   *
> --
> 2.13.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101712] CPU lockup after ring 0 stalled

2017-07-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101712

guiscara...@gmail.com changed:

   What|Removed |Added

   Assignee|mesa-dev@lists.freedesktop. |dri-devel@lists.freedesktop
   |org |.org
 QA Contact|mesa-dev@lists.freedesktop. |dri-devel@lists.freedesktop
   |org |.org
  Component|Drivers/X11 |Drivers/Gallium/radeonsi

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/20] nir: Narrow unnecessary 64-bit operations to 32-bits

2017-07-06 Thread Matt Turner
If we know the high bits are zero, we can just do a 32-bit comparison on
the low bytes instead.
---
 src/compiler/nir/nir_opt_algebraic.py | 14 +-
 src/compiler/nir/nir_search_helpers.h | 48 +++
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index df5854270c..a9c3e80929 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -44,7 +44,7 @@ d = 'd'
 # however, be used for backend-requested lowering operations as those need to
 # happen regardless of precision.
 #
-# Variable names are specified as "[#]name[@type][(cond)]" where "#" inicates
+# Variable names are specified as "[#]name[@type][(cond)]" where "#" indicates
 # that the given variable will only match constants and the type indicates that
 # the given variable will only match values from ALU instructions with the
 # given output type, and (cond) specifies an additional condition function
@@ -144,6 +144,16 @@ optimizations = [
(('inot', ('ieq', a, b)), ('ine', a, b)),
(('inot', ('ine', a, b)), ('ieq', a, b)),
 
+   # Unnecessary 64-bit comparisons
+   (('ieq', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ieq', 
('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ine', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ine', 
('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ilt', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ilt', 
('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ige', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ige', 
('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('ult', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('ult', 
('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+   (('uge', 'a@64(fits_in_32_bits)', 'b@64(fits_in_32_bits)'), ('uge', 
('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b))),
+
+   (('iand', 'a@64(fits_in_32_bits)', 'b@64'), ('pack_64_2x32_split', ('iand', 
('unpack_64_2x32_split_x', a), ('unpack_64_2x32_split_x', b)), 0)),
+
# 0.0 >= b2f(a)
# b2f(a) <= 0.0
# b2f(a) == 0.0 because b2f(a) can only be 0 or 1
@@ -315,6 +325,8 @@ optimizations = [
(('pack_64_2x32_split', ('unpack_64_2x32_split_x', a),
('unpack_64_2x32_split_y', a)), a),
 
+   (('unpack_64_2x32_split_y', 'a(fits_in_32_bits)'), 0),
+
# Byte extraction
(('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
(('iand', 0xff, ('ushr', a, 16)), ('extract_u8', a, 2), 
'!options->lower_extract_byte'),
diff --git a/src/compiler/nir/nir_search_helpers.h 
b/src/compiler/nir/nir_search_helpers.h
index 200f2471f8..c29ea5b9dd 100644
--- a/src/compiler/nir/nir_search_helpers.h
+++ b/src/compiler/nir/nir_search_helpers.h
@@ -115,6 +115,54 @@ is_zero_to_one(nir_alu_instr *instr, unsigned src, 
unsigned num_components,
 }
 
 static inline bool
+fits_in_32_bits(nir_alu_instr *instr, unsigned src, unsigned num_components,
+const uint8_t *swizzle)
+{
+   if (instr->src[src].src.is_ssa &&
+   instr->src[src].src.ssa->parent_instr->type == nir_instr_type_alu) {
+  nir_alu_instr *parent_instr =
+ nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
+
+  switch (parent_instr->op) {
+  case nir_op_pack_64_2x32_split: {
+ nir_const_value *val =
+nir_src_as_const_value(parent_instr->src[1].src);
+
+ if (val && val->u32[0] == 0)
+return true;
+ break;
+  }
+  default:
+ break;
+  }
+
+  return false;
+   }
+
+   nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
+
+   if (!val)
+  return false;
+
+   for (unsigned i = 0; i < num_components; i++) {
+  switch (nir_op_infos[instr->op].input_types[src]) {
+  case nir_type_int:
+ if (val->i64[swizzle[i]] != (int)val->i64[swizzle[i]])
+return false;
+ break;
+  case nir_type_uint:
+ if (val->u64[swizzle[i]] != (unsigned)val->u64[swizzle[i]])
+return false;
+ break;
+  default:
+ return false;
+  }
+   }
+
+   return true;
+}
+
+static inline bool
 is_not_const(nir_alu_instr *instr, unsigned src, unsigned num_components,
  const uint8_t *swizzle)
 {
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/20] i965/fs: Add support for ballot32

2017-07-06 Thread Matt Turner
No use in taking a 64-bit value when we know the high 32-bits are zero.
---
 src/intel/compiler/brw_compiler.c | 2 +-
 src/intel/compiler/brw_fs_nir.cpp | 9 +++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index 6970bcb40f..6d93103b3d 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -57,7 +57,7 @@ static const struct nir_shader_compiler_options 
scalar_nir_options = {
.lower_unpack_snorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
-   .max_subgroup_size = 64, /* FIXME */
+   .max_subgroup_size = 32,
.max_unroll_iterations = 32,
 };
 
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 25e9b703eb..58d3788c30 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4138,7 +4138,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
-   case nir_intrinsic_ballot: {
+   case nir_intrinsic_ballot:
+   case nir_intrinsic_ballot32: {
   const fs_reg value = retype(get_nir_src(instr->src[0]),
   BRW_REGISTER_TYPE_UD);
   const struct brw_reg flag = retype(brw_flag_reg(0, 0),
@@ -4147,7 +4148,11 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   bld.exec_all().MOV(flag, brw_imm_ud(0u));
   bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);
 
-  dest.type = BRW_REGISTER_TYPE_UQ;
+  if (instr->intrinsic == nir_intrinsic_ballot) {
+ dest.type = BRW_REGISTER_TYPE_UQ;
+  } else {
+ dest.type = BRW_REGISTER_TYPE_UD;
+  }
   bld.MOV(dest, flag);
   break;
}
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/20] i965: Normalize types for FBL, FBH, etc

2017-07-06 Thread Matt Turner
Allows the instructions to be compacted. The documentation claims that
some of these only accept UD types, even though the type doesn't change
the operation performed. Just normalize the types to ensure we get
instruction compaction.

The only functional changes are for FBL and CBIT (always use UD types)
and FBH (always use the same types).
---
 src/intel/compiler/brw_fs_generator.cpp   | 14 ++
 src/intel/compiler/brw_vec4_generator.cpp | 12 +---
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index 2ade486705..011eefaa35 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1793,27 +1793,25 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
 break;
   case BRW_OPCODE_BFREV:
  assert(devinfo->gen >= 7);
- /* BFREV only supports UD type for src and dst. */
  brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
-  retype(src[0], BRW_REGISTER_TYPE_UD));
+   retype(src[0], BRW_REGISTER_TYPE_UD));
  break;
   case BRW_OPCODE_FBH:
  assert(devinfo->gen >= 7);
- /* FBH only supports UD type for dst. */
- brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ brw_FBH(p, retype(dst, src[0].type), src[0]);
  break;
   case BRW_OPCODE_FBL:
  assert(devinfo->gen >= 7);
- /* FBL only supports UD type for dst. */
- brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD),
+ retype(src[0], BRW_REGISTER_TYPE_UD));
  break;
   case BRW_OPCODE_LZD:
  brw_LZD(p, dst, src[0]);
  break;
   case BRW_OPCODE_CBIT:
  assert(devinfo->gen >= 7);
- /* CBIT only supports UD type for dst. */
- brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD),
+  retype(src[0], BRW_REGISTER_TYPE_UD));
  break;
   case BRW_OPCODE_ADDC:
  assert(devinfo->gen >= 7);
diff --git a/src/intel/compiler/brw_vec4_generator.cpp 
b/src/intel/compiler/brw_vec4_generator.cpp
index 334933d15a..6a3a0a080c 100644
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1646,27 +1646,25 @@ generate_code(struct brw_codegen *p,
 
   case BRW_OPCODE_BFREV:
  assert(devinfo->gen >= 7);
- /* BFREV only supports UD type for src and dst. */
  brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
retype(src[0], BRW_REGISTER_TYPE_UD));
  break;
   case BRW_OPCODE_FBH:
  assert(devinfo->gen >= 7);
- /* FBH only supports UD type for dst. */
- brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ brw_FBH(p, retype(dst, src[0].type), src[0]);
  break;
   case BRW_OPCODE_FBL:
  assert(devinfo->gen >= 7);
- /* FBL only supports UD type for dst. */
- brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD),
+ retype(src[0], BRW_REGISTER_TYPE_UD));
  break;
   case BRW_OPCODE_LZD:
  brw_LZD(p, dst, src[0]);
  break;
   case BRW_OPCODE_CBIT:
  assert(devinfo->gen >= 7);
- /* CBIT only supports UD type for dst. */
- brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD),
+  retype(src[0], BRW_REGISTER_TYPE_UD));
  break;
   case BRW_OPCODE_ADDC:
  assert(devinfo->gen >= 7);
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/20] i965: Enable ARB_shader_ballot on Gen8+

2017-07-06 Thread Matt Turner
---
 docs/features.txt| 2 +-
 docs/relnotes/17.2.0.html| 1 +
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/features.txt b/docs/features.txt
index ec78447e88..1f628e1c03 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -292,7 +292,7 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_sample_locations   not started
   GL_ARB_seamless_cubemap_per_texture   DONE (i965, nvc0, 
radeonsi, r600, softpipe, swr)
   GL_ARB_shader_atomic_counter_ops  DONE (i965/gen7+, 
nvc0, radeonsi, softpipe)
-  GL_ARB_shader_ballot  DONE (nvc0, radeonsi)
+  GL_ARB_shader_ballot  DONE (i965/gen8+, 
nvc0, radeonsi)
   GL_ARB_shader_clock   DONE (i965/gen7+, 
nv50, nvc0, radeonsi)
   GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
   GL_ARB_shader_group_vote  DONE (i965, nvc0, 
radeonsi)
diff --git a/docs/relnotes/17.2.0.html b/docs/relnotes/17.2.0.html
index c1e79ab6cb..ec4568db4d 100644
--- a/docs/relnotes/17.2.0.html
+++ b/docs/relnotes/17.2.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 GL_ARB_bindless_texture on radeonsi
 GL_ARB_post_depth_coverage on nvc0 (GM200+)
+GL_ARB_shader_ballot on i965/gen8+
 GL_ARB_shader_group_vote on i965 (with a no-op vec4 implementation)
 GL_ARB_shader_viewport_layer_array on nvc0 (GM200+)
 GL_AMD_vertex_shader_layer on nvc0 (GM200+)
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 3270e655ee..b91bbdc8d9 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -271,6 +271,7 @@ intelInitExtensions(struct gl_context *ctx)
 
if (brw->gen >= 8) {
   ctx->Extensions.ARB_gpu_shader_int64 = true;
+  ctx->Extensions.ARB_shader_ballot = true; /* requires 
ARB_gpu_shader_int64 */
   ctx->Extensions.ARB_ES3_2_compatibility = true;
}
 
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/20] nir: Optimize find_lsb/imsb/umsb error checks

2017-07-06 Thread Matt Turner
Two of the ARB_shader_ballot piglit tests hit the find_lsb case,
removing some of the noise allowed me to better debug the test when it
was failing.
---
 src/compiler/nir/nir_opt_algebraic.py | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index fe6e33d313..df5854270c 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -357,6 +357,17 @@ optimizations = [
(('~fadd', '#a', ('fadd', b, '#c')), ('fadd', ('fadd', a, c), b)),
(('iadd', '#a', ('iadd', b, '#c')), ('iadd', ('iadd', a, c), b)),
 
+   # By definition...
+   (('bcsel', ('ige', ('find_lsb', a), 0), ('find_lsb', a), -1), ('find_lsb', 
a)),
+   (('bcsel', ('ige', ('ifind_msb', a), 0), ('ifind_msb', a), -1), 
('ifind_msb', a)),
+   (('bcsel', ('ige', ('ufind_msb', a), 0), ('ufind_msb', a), -1), 
('ufind_msb', a)),
+
+   (('bcsel', ('ine', a, 0), ('find_lsb', a), -1), ('find_lsb', a)),
+   (('bcsel', ('ine', a, 0), ('ifind_msb', a), -1), ('ifind_msb', a)),
+   (('bcsel', ('ine', a, 0), ('ufind_msb', a), -1), ('ufind_msb', a)),
+
+   (('bcsel', ('ine', a, -1), ('ifind_msb', a), -1), ('ifind_msb', a)),
+
# Misc. lowering
(('fmod@32', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b, 
'options->lower_fmod32'),
(('fmod@64', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b, 
'options->lower_fmod64'),
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/20] i965/fs: Handle explicit flag sources in flags_read()

2017-07-06 Thread Matt Turner
The implementations of the ARB_shader_ballot intrinsics will explicitly
read the flag as a source register.
---
 src/intel/compiler/brw_fs.cpp | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 97908a4563..de3aafafb1 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -862,9 +862,6 @@ namespace {
 unsigned
 fs_inst::flags_read(const gen_device_info *devinfo) const
 {
-   /* XXX - This doesn't consider explicit uses of the flag register as source
-*   region.
-*/
if (predicate == BRW_PREDICATE_ALIGN1_ANYV ||
predicate == BRW_PREDICATE_ALIGN1_ALLV) {
   /* The vertical predication modes combine corresponding bits from
@@ -875,7 +872,20 @@ fs_inst::flags_read(const gen_device_info *devinfo) const
} else if (predicate) {
   return flag_mask(this);
} else {
-  return 0;
+  unsigned mask = 0;
+  for (int i = 0; i < sources; i++) {
+ if (src[i].file == ARF) {
+if (src[i].nr == BRW_ARF_FLAG + 0 && src[i].subnr == 0)
+   mask |= 0b0001;
+if (src[i].nr == BRW_ARF_FLAG + 0 && src[i].subnr == 1)
+   mask |= 0b0010;
+if (src[i].nr == BRW_ARF_FLAG + 1 && src[i].subnr == 0)
+   mask |= 0b0100;
+if (src[i].nr == BRW_ARF_FLAG + 1 && src[i].subnr == 1)
+   mask |= 0b1000;
+ }
+  }
+  return mask;
}
 }
 
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/20] i965/fs: Implement ARB_shader_ballot operations

2017-07-06 Thread Matt Turner
---
 src/intel/compiler/brw_fs_nir.cpp | 41 +++
 src/intel/compiler/brw_nir.c  |  1 +
 2 files changed, 42 insertions(+)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 17f35e081d..25e9b703eb 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4075,6 +4075,10 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_load_subgroup_size:
+  bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), brw_imm_d(dispatch_width));
+  break;
+
case nir_intrinsic_load_subgroup_invocation: {
   fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
   dest = retype(dest, BRW_REGISTER_TYPE_UD);
@@ -4090,6 +4094,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_load_subgroup_eq_mask:
+   case nir_intrinsic_load_subgroup_ge_mask:
+   case nir_intrinsic_load_subgroup_gt_mask:
+   case nir_intrinsic_load_subgroup_le_mask:
+   case nir_intrinsic_load_subgroup_lt_mask:
+  unreachable("not reached");
+
case nir_intrinsic_vote_any: {
   const fs_builder ubld = bld.exec_all();
   ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
@@ -4126,6 +4137,36 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
 bld.SEL(dest, dest, brw_imm_d(0)));
   break;
}
+
+   case nir_intrinsic_ballot: {
+  const fs_reg value = retype(get_nir_src(instr->src[0]),
+  BRW_REGISTER_TYPE_UD);
+  const struct brw_reg flag = retype(brw_flag_reg(0, 0),
+ BRW_REGISTER_TYPE_UD);
+
+  bld.exec_all().MOV(flag, brw_imm_ud(0u));
+  bld.CMP(bld.null_reg_ud(), value, brw_imm_ud(0u), BRW_CONDITIONAL_NZ);
+
+  dest.type = BRW_REGISTER_TYPE_UQ;
+  bld.MOV(dest, flag);
+  break;
+   }
+
+   case nir_intrinsic_read_invocation: {
+  const fs_reg value = get_nir_src(instr->src[0]);
+  const fs_reg invocation = get_nir_src(instr->src[1]);
+  bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+  bld.emit_uniformize(value, invocation));
+  break;
+   }
+
+   case nir_intrinsic_read_first_invocation: {
+  const fs_reg value = get_nir_src(instr->src[0]);
+  bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
+  bld.emit_uniformize(value));
+  break;
+   }
+
default:
   unreachable("unknown intrinsic");
}
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index cede77fbc8..ce21c01669 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -620,6 +620,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
OPT(nir_lower_tex, _options);
OPT(nir_normalize_cubemap_coords);
+   OPT(nir_lower_read_invocation_to_scalar);
 
OPT(nir_lower_global_vars_to_local);
 
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/20] nir: Add a ballot32 intrinsic

2017-07-06 Thread Matt Turner
Some hardware, like i965, doesn't support group sizes greater than 32.
In that case, we can use the ballot32 intrinsic instead, which will
simplify our code generation.
---
 src/compiler/nir/nir.h|  2 ++
 src/compiler/nir/nir_intrinsics.h |  3 +++
 src/compiler/nir/nir_opt_intrinsics.c | 17 +
 src/intel/compiler/brw_compiler.c |  1 +
 4 files changed, 23 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3591048574..a3d8df0b19 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1844,6 +1844,8 @@ typedef struct nir_shader_compiler_options {
 */
bool use_interpolated_input_intrinsics;
 
+   unsigned max_subgroup_size;
+
unsigned max_unroll_iterations;
 } nir_shader_compiler_options;
 
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 96ecfbc338..61ad9f6643 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -106,6 +106,9 @@ INTRINSIC(ballot, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMIN
 INTRINSIC(read_invocation, 2, ARR(0, 1), true, 0, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
 INTRINSIC(read_first_invocation, 1, ARR(0), true, 0, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
 
+/* Equivalent to ballot, but only returns the low 32-bits */
+INTRINSIC(ballot32, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+
 /*
  * Memory barrier with semantics analogous to the compute shader
  * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
b/src/compiler/nir/nir_opt_intrinsics.c
index 0cd75d8b28..023e169a99 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -69,6 +69,23 @@ opt_intrinsics_impl(nir_function_impl *impl)
 replacement = nir_imm_int(, NIR_TRUE);
 break;
  }
+ case nir_intrinsic_ballot: {
+assert(b.shader->options->max_subgroup_size != 0);
+if (b.shader->options->max_subgroup_size > 32)
+   continue;
+
+nir_intrinsic_instr *ballot =
+   nir_intrinsic_instr_create(b.shader, nir_intrinsic_ballot32);
+nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+ballot->src[0] = intrin->src[0];
+
+nir_builder_instr_insert(, >instr);
+
+replacement = nir_pack_64_2x32_split(,
+ >dest.ssa,
+ nir_imm_int(, 0));
+break;
+ }
  default:
 break;
  }
diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index 9ad5d8e4ee..6970bcb40f 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -57,6 +57,7 @@ static const struct nir_shader_compiler_options 
scalar_nir_options = {
.lower_unpack_snorm_4x8 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
+   .max_subgroup_size = 64, /* FIXME */
.max_unroll_iterations = 32,
 };
 
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/20] i965/fs: Do not move MOVs writing the flag outside of control flow

2017-07-06 Thread Matt Turner
The implementation of ballotARB() will start by zeroing the flags
register. So, a doing something like

if (gl_SubGroupInvocationARB % 2u == 0u) {
... = ballotARB(true);
[...]
} else {
... = ballotARB(true);
[...]
}

(like fs-ballot-if-else.shader_test does) would generate identical MOVs
to the same destination (the flag register!), and we definitely do not
want to pull that out of the control flow.
---
 src/intel/compiler/brw_fs_sel_peephole.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs_sel_peephole.cpp 
b/src/intel/compiler/brw_fs_sel_peephole.cpp
index 8cd897f72e..fd02792beb 100644
--- a/src/intel/compiler/brw_fs_sel_peephole.cpp
+++ b/src/intel/compiler/brw_fs_sel_peephole.cpp
@@ -68,7 +68,8 @@ count_movs_from_if(fs_inst *then_mov[MAX_MOVS], fs_inst 
*else_mov[MAX_MOVS],
 {
int then_movs = 0;
foreach_inst_in_block(fs_inst, inst, then_block) {
-  if (then_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV)
+  if (then_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV ||
+  inst->flags_written())
  break;
 
   then_mov[then_movs] = inst;
@@ -77,7 +78,8 @@ count_movs_from_if(fs_inst *then_mov[MAX_MOVS], fs_inst 
*else_mov[MAX_MOVS],
 
int else_movs = 0;
foreach_inst_in_block(fs_inst, inst, else_block) {
-  if (else_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV)
+  if (else_movs == MAX_MOVS || inst->opcode != BRW_OPCODE_MOV ||
+  inst->flags_written())
  break;
 
   else_mov[else_movs] = inst;
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/20] i965: Enable ARB_shader_group_vote

2017-07-06 Thread Matt Turner
---
 docs/features.txt| 2 +-
 docs/relnotes/17.2.0.html| 1 +
 src/mesa/drivers/dri/i965/intel_extensions.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/features.txt b/docs/features.txt
index 79b71de543..ec78447e88 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -295,7 +295,7 @@ Khronos, ARB, and OES extensions that are not part of any 
OpenGL or OpenGL ES ve
   GL_ARB_shader_ballot  DONE (nvc0, radeonsi)
   GL_ARB_shader_clock   DONE (i965/gen7+, 
nv50, nvc0, radeonsi)
   GL_ARB_shader_draw_parameters DONE (i965, nvc0, 
radeonsi)
-  GL_ARB_shader_group_vote  DONE (nvc0, radeonsi)
+  GL_ARB_shader_group_vote  DONE (i965, nvc0, 
radeonsi)
   GL_ARB_shader_stencil_export  DONE (i965/gen9+, 
radeonsi, softpipe, llvmpipe, swr)
   GL_ARB_shader_viewport_layer_arrayDONE (i965/gen6+, 
nvc0, radeonsi)
   GL_ARB_sparse_buffer  DONE (radeonsi/CIK+)
diff --git a/docs/relnotes/17.2.0.html b/docs/relnotes/17.2.0.html
index 5859274d12..c1e79ab6cb 100644
--- a/docs/relnotes/17.2.0.html
+++ b/docs/relnotes/17.2.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with 
certain drivers.
 
 GL_ARB_bindless_texture on radeonsi
 GL_ARB_post_depth_coverage on nvc0 (GM200+)
+GL_ARB_shader_group_vote on i965 (with a no-op vec4 implementation)
 GL_ARB_shader_viewport_layer_array on nvc0 (GM200+)
 GL_AMD_vertex_shader_layer on nvc0 (GM200+)
 GL_AMD_vertex_shader_viewport_index on nvc0 (GM200+)
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 7921b12c16..3270e655ee 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -69,6 +69,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.ARB_seamless_cube_map = true;
ctx->Extensions.ARB_shader_bit_encoding = true;
ctx->Extensions.ARB_shader_draw_parameters = true;
+   ctx->Extensions.ARB_shader_group_vote = true;
ctx->Extensions.ARB_shader_texture_lod = true;
ctx->Extensions.ARB_shading_language_packing = true;
ctx->Extensions.ARB_shadow = true;
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/20] nir: Add system values from ARB_shader_ballot

2017-07-06 Thread Matt Turner
We already had a channel_num system value, which I'm renaming to
subgroup_invocation to match the rest of the new system values.

Note that while ballotARB(true) will return zeros in the high 32-bits on
systems where gl_SubGroupSizeARB <= 32, the gl_SubGroup??MaskARB
variables do not consider whether channels are enabled. See issue (1) of
ARB_shader_ballot.
---
 src/compiler/nir/nir.c |  4 
 src/compiler/nir/nir_intrinsics.h  |  8 +++-
 src/compiler/nir/nir_lower_system_values.c | 28 
 src/intel/compiler/brw_fs_nir.cpp  |  2 +-
 src/intel/compiler/brw_nir_intrinsics.c|  4 ++--
 5 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 491b908396..9827e129ca 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1908,6 +1908,10 @@ nir_intrinsic_from_system_value(gl_system_value val)
   return nir_intrinsic_load_helper_invocation;
case SYSTEM_VALUE_VIEW_INDEX:
   return nir_intrinsic_load_view_index;
+   case SYSTEM_VALUE_SUBGROUP_SIZE:
+  return nir_intrinsic_load_subgroup_size;
+   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
+  return nir_intrinsic_load_subgroup_invocation;
default:
   unreachable("system value does not directly correspond to intrinsic");
}
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 6c6ba4cf59..96ecfbc338 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -344,10 +344,16 @@ SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
 SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
 SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(alpha_ref_float, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_le_mask, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_lt_mask, 1, 0, xx, xx, xx)
 
 /* Blend constant color values.  Float values are clamped. */
 SYSTEM_VALUE(blend_const_color_r_float, 1, 0, xx, xx, xx)
diff --git a/src/compiler/nir/nir_lower_system_values.c 
b/src/compiler/nir/nir_lower_system_values.c
index 810100a081..faf0c3c9da 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -116,6 +116,34 @@ convert_block(nir_block *block, nir_builder *b)
nir_load_base_instance(b));
  break;
 
+  case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+  case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+  case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+  case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+  case SYSTEM_VALUE_SUBGROUP_LT_MASK: {
+ nir_ssa_def *count = nir_load_subgroup_invocation(b);
+
+ switch (var->data.location) {
+ case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
+sysval = nir_ishl(b, nir_imm_int64(b, 1ull), count);
+break;
+ case SYSTEM_VALUE_SUBGROUP_GE_MASK:
+sysval = nir_ishl(b, nir_imm_int64(b, ~0ull), count);
+break;
+ case SYSTEM_VALUE_SUBGROUP_GT_MASK:
+sysval = nir_ishl(b, nir_imm_int64(b, ~1ull), count);
+break;
+ case SYSTEM_VALUE_SUBGROUP_LE_MASK:
+sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~1ull), count));
+break;
+ case SYSTEM_VALUE_SUBGROUP_LT_MASK:
+sysval = nir_inot(b, nir_ishl(b, nir_imm_int64(b, ~0ull), count));
+break;
+ default:
+unreachable("you seriously can't tell this is unreachable?");
+ }
+  }
+
   default:
  break;
   }
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 264398f38e..17f35e081d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4075,7 +4075,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
-   case nir_intrinsic_load_channel_num: {
+   case nir_intrinsic_load_subgroup_invocation: {
   fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
   dest = retype(dest, BRW_REGISTER_TYPE_UD);
   const fs_builder allbld8 = bld.group(8, 0).exec_all();
diff --git a/src/intel/compiler/brw_nir_intrinsics.c 
b/src/intel/compiler/brw_nir_intrinsics.c
index d63570fa2a..abbbc6f93e 100644
--- a/src/intel/compiler/brw_nir_intrinsics.c
+++ b/src/intel/compiler/brw_nir_intrinsics.c
@@ -88,10 +88,10 @@ lower_cs_intrinsics_convert_block(struct 
lower_intrinsics_state *state,
  /* We construct the local invocation index from:
   *
   *gl_LocalInvocationIndex =
-   

[Mesa-dev] [PATCH 08/20] nir: Add intrinsics from ARB_shader_ballot

2017-07-06 Thread Matt Turner
---
 src/compiler/glsl/glsl_to_nir.cpp | 45 +++
 src/compiler/nir/nir_intrinsics.h | 13 +++
 2 files changed, 58 insertions(+)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 43d7e07042..23632f27c2 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -808,6 +808,15 @@ nir_visitor::visit(ir_call *ir)
   case ir_intrinsic_vote_eq:
  op = nir_intrinsic_vote_eq;
  break;
+  case ir_intrinsic_ballot:
+ op = nir_intrinsic_ballot;
+ break;
+  case ir_intrinsic_read_invocation:
+ op = nir_intrinsic_read_invocation;
+ break;
+  case ir_intrinsic_read_first_invocation:
+ op = nir_intrinsic_read_first_invocation;
+ break;
   default:
  unreachable("not reached");
   }
@@ -1150,6 +1159,42 @@ nir_visitor::visit(ir_call *ir)
  nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
 
  instr->variables[0] = evaluate_deref(>instr, ir->return_deref);
+ ir_instruction *value = (ir_instruction 
*)ir->actual_parameters.get_head();
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value->as_rvalue()));
+
+ nir_builder_instr_insert(, >instr);
+ break;
+  }
+
+  case nir_intrinsic_ballot: {
+ nir_ssa_dest_init(>instr, >dest,
+   ir->return_deref->type->vector_elements, 64, NULL);
+
+ ir_instruction *value = (ir_instruction 
*)ir->actual_parameters.get_head();
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value->as_rvalue()));
+
+ nir_builder_instr_insert(, >instr);
+ break;
+  }
+  case nir_intrinsic_read_invocation: {
+ nir_ssa_dest_init(>instr, >dest,
+   ir->return_deref->type->vector_elements, 32, NULL);
+ instr->num_components = ir->return_deref->type->vector_elements;
+
+ ir_instruction *value = (ir_instruction 
*)ir->actual_parameters.get_head();
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value->as_rvalue()));
+
+ ir_instruction *invocation = (ir_instruction *)value->get_next();
+ instr->src[1] = 
nir_src_for_ssa(evaluate_rvalue(invocation->as_rvalue()));
+
+ nir_builder_instr_insert(, >instr);
+ break;
+  }
+  case nir_intrinsic_read_first_invocation: {
+ nir_ssa_dest_init(>instr, >dest,
+   ir->return_deref->type->vector_elements, 32, NULL);
+ instr->num_components = ir->return_deref->type->vector_elements;
 
  ir_instruction *value = (ir_instruction 
*)ir->actual_parameters.get_head();
  instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value->as_rvalue()));
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 8a838df027..6c6ba4cf59 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -94,6 +94,19 @@ BARRIER(memory_barrier)
 INTRINSIC(shader_clock, 0, ARR(0), true, 2, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
 
 /*
+ * Shader ballot intrinsics with semantics analogous to the
+ *
+ *ballotARB()
+ *readInvocationARB()
+ *readFirstInvocationARB()
+ *
+ * GLSL functions from ARB_shader_ballot.
+ */
+INTRINSIC(ballot, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(read_invocation, 2, ARR(0, 1), true, 0, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(read_first_invocation, 1, ARR(0), true, 0, 0, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+
+/*
  * Memory barrier with semantics analogous to the compute shader
  * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
  * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics.
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/20] i965/fs: Implement ARB_shader_group_vote operations

2017-07-06 Thread Matt Turner
---
 src/intel/compiler/brw_fs_nir.cpp | 36 
 1 file changed, 36 insertions(+)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index a9dce42c38..264398f38e 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4090,6 +4090,42 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_vote_any: {
+  const fs_builder ubld = bld.exec_all();
+  ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0));
+  bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), 
BRW_CONDITIONAL_NZ);
+  bld.MOV(dest, brw_imm_d(-1));
+  set_predicate(dispatch_width == 8 ?
+BRW_PREDICATE_ALIGN1_ANY8H :
+BRW_PREDICATE_ALIGN1_ANY16H,
+bld.SEL(dest, dest, brw_imm_d(0)));
+  break;
+   }
+   case nir_intrinsic_vote_all: {
+  const fs_builder ubld = bld.exec_all();
+  ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0x));
+  bld.CMP(bld.null_reg_d(), get_nir_src(instr->src[0]), brw_imm_d(0), 
BRW_CONDITIONAL_NZ);
+  bld.MOV(dest, brw_imm_d(-1));
+  set_predicate(dispatch_width == 8 ?
+BRW_PREDICATE_ALIGN1_ALL8H :
+BRW_PREDICATE_ALIGN1_ALL16H,
+bld.SEL(dest, dest, brw_imm_d(0)));
+  break;
+   }
+   case nir_intrinsic_vote_eq: {
+  fs_reg value = get_nir_src(instr->src[0]);
+  fs_reg uniformized = bld.emit_uniformize(value);
+
+  const fs_builder ubld = bld.exec_all();
+  ubld.MOV(brw_flag_reg(0, 0), brw_imm_uw(0x));
+  bld.CMP(bld.null_reg_d(), value, uniformized, BRW_CONDITIONAL_Z);
+  bld.MOV(dest, brw_imm_d(-1));
+  set_predicate(dispatch_width == 8 ?
+BRW_PREDICATE_ALIGN1_ALL8H :
+BRW_PREDICATE_ALIGN1_ALL16H,
+bld.SEL(dest, dest, brw_imm_d(0)));
+  break;
+   }
default:
   unreachable("unknown intrinsic");
}
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/20] i965/fs: Allow emit_uniformize() to broadcast a specific channel

2017-07-06 Thread Matt Turner
This function will be used to implement read_invocation (by specifying a
specific channel) and read_first_invocation (by not specifying a
channel).
---
 src/intel/compiler/brw_fs_builder.h | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_fs_builder.h 
b/src/intel/compiler/brw_fs_builder.h
index 87394bc17b..144ee54916 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -393,7 +393,7 @@ namespace brw {
* Copy any live channel from \p src to the first channel of the result.
*/
   src_reg
-  emit_uniformize(const src_reg ) const
+  emit_uniformize(const src_reg , dst_reg chan_index = dst_reg()) const
   {
  /* FIXME: We use a vector chan_index and dst to allow constant and
   * copy propagration to move result all the way into the consuming
@@ -403,10 +403,13 @@ namespace brw {
   * should go back to scalar destinations here.
   */
  const fs_builder ubld = exec_all();
- const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
  const dst_reg dst = vgrf(src.type);
 
- ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+ if (chan_index.file == BAD_FILE) {
+chan_index = vgrf(BRW_REGISTER_TYPE_UD);
+ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+ }
+
  ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 
0));
 
  return src_reg(component(dst, 0));
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/20] nir: Add pass to scalarize read_invocation/read_first_invocation

2017-07-06 Thread Matt Turner
i965 will want these to be scalar operations.
---
 src/compiler/Makefile.sources  |   1 +
 src/compiler/nir/nir.h |   2 +-
 .../nir/nir_lower_read_invocation_to_scalar.c  | 112 +
 3 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 src/compiler/nir/nir_lower_read_invocation_to_scalar.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index b0f1c14b87..95f64f7a91 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -229,6 +229,7 @@ NIR_FILES = \
nir/nir_lower_passthrough_edgeflags.c \
nir/nir_lower_patch_vertices.c \
nir/nir_lower_phis_to_scalar.c \
+   nir/nir_lower_read_invocation_to_scalar.c \
nir/nir_lower_regs_to_ssa.c \
nir/nir_lower_returns.c \
nir/nir_lower_samplers.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 401c41f155..3591048574 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2433,7 +2433,7 @@ bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
 bool nir_lower_vec_to_movs(nir_shader *shader);
 bool nir_lower_alu_to_scalar(nir_shader *shader);
 bool nir_lower_load_const_to_scalar(nir_shader *shader);
-
+bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
 bool nir_lower_phis_to_scalar(nir_shader *shader);
 void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask);
 
diff --git a/src/compiler/nir/nir_lower_read_invocation_to_scalar.c 
b/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
new file mode 100644
index 00..edac7f5271
--- /dev/null
+++ b/src/compiler/nir/nir_lower_read_invocation_to_scalar.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/** @file nir_lower_read_invocation_to_scalar.c
+ *
+ * Replaces nir_intrinsic_read_invocation/nir_intrinsic_read_first_invocation
+ * operations with num_components != 1 with individual per-channel operations.
+ */
+
+static void
+lower_read_invocation_to_scalar(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+   b->cursor = nir_before_instr(>instr);
+
+   nir_ssa_def *value = nir_ssa_for_src(b, intrin->src[0], 
intrin->num_components);
+   nir_ssa_def *reads[4];
+
+   for (unsigned i = 0; i < intrin->num_components; i++) {
+  nir_intrinsic_instr *chan_intrin =
+ nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
+  nir_ssa_dest_init(_intrin->instr, _intrin->dest,
+1, intrin->dest.ssa.bit_size, NULL);
+  chan_intrin->num_components = 1;
+
+  /* value */
+  chan_intrin->src[0] = nir_src_for_ssa(nir_channel(b, value, i));
+  /* invocation */
+  if (intrin->intrinsic == nir_intrinsic_read_invocation)
+ chan_intrin->src[1] = intrin->src[1];
+
+  nir_builder_instr_insert(b, _intrin->instr);
+
+  reads[i] = _intrin->dest.ssa;
+   }
+
+   nir_ssa_def_rewrite_uses(>dest.ssa,
+nir_src_for_ssa(nir_vec(b, reads,
+intrin->num_components)));
+   nir_instr_remove(>instr);
+}
+
+static bool
+nir_lower_read_invocation_to_scalar_impl(nir_function_impl *impl)
+{
+   bool progress = false;
+   nir_builder b;
+   nir_builder_init(, impl);
+
+   nir_foreach_block(block, impl) {
+  nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ if (intrin->num_components == 1)
+continue;
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_read_invocation:
+ case nir_intrinsic_read_first_invocation:
+lower_read_invocation_to_scalar(, intrin);
+progress = true;
+ 

[Mesa-dev] [PATCH 05/20] i965/fs: Handle explicit flag destinations in flags_written()

2017-07-06 Thread Matt Turner
The implementations of the ARB_shader_group_vote intrinsics will
explicitly write the flag as the destination register.
---
 src/intel/compiler/brw_fs.cpp | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 43b6e34204..97908a4563 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -890,9 +890,17 @@ fs_inst::flags_written() const
 opcode != BRW_OPCODE_WHILE)) ||
opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
   return flag_mask(this);
-   } else {
-  return 0;
+   } else if (dst.file == ARF) {
+  if (dst.nr == BRW_ARF_FLAG + 0 && dst.subnr == 0)
+ return 0b0001;
+  if (dst.nr == BRW_ARF_FLAG + 0 && dst.subnr == 1)
+ return 0b0010;
+  if (dst.nr == BRW_ARF_FLAG + 1 && dst.subnr == 0)
+ return 0b0100;
+  if (dst.nr == BRW_ARF_FLAG + 1 && dst.subnr == 1)
+ return 0b1000;
}
+   return 0;
 }
 
 /**
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/20] nir: Add pass to optimize intrinsics

2017-07-06 Thread Matt Turner
Specifically, constant fold intrinsics from ARB_shader_group_vote, but I
suspect it'll be useful for other things in the future.
---
 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/nir.h|   2 +
 src/compiler/nir/nir_opt_intrinsics.c | 102 ++
 src/intel/compiler/brw_nir.c  |   1 +
 4 files changed, 106 insertions(+)
 create mode 100644 src/compiler/nir/nir_opt_intrinsics.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index d3447fb9de..b0f1c14b87 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -254,6 +254,7 @@ NIR_FILES = \
nir/nir_opt_gcm.c \
nir/nir_opt_global_to_local.c \
nir/nir_opt_if.c \
+   nir/nir_opt_intrinsics.c \
nir/nir_opt_loop_unroll.c \
nir/nir_opt_move_comparisons.c \
nir/nir_opt_peephole_select.c \
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index c41b0dc931..44a1d0887e 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2644,6 +2644,8 @@ bool nir_opt_gcm(nir_shader *shader, bool value_number);
 
 bool nir_opt_if(nir_shader *shader);
 
+bool nir_opt_intrinsics(nir_shader *shader);
+
 bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
 
 bool nir_opt_move_comparisons(nir_shader *shader);
diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
b/src/compiler/nir/nir_opt_intrinsics.c
new file mode 100644
index 00..b63449b4fe
--- /dev/null
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/**
+ * \file nir_opt_intrinsics.c
+ */
+
+static bool
+opt_intrinsics_impl(nir_function_impl *impl)
+{
+   nir_builder b;
+   nir_builder_init(, impl);
+   bool progress = false;
+
+   nir_foreach_block(block, impl) {
+  nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ nir_ssa_def *replacement = NULL;
+ b.cursor = nir_before_instr(instr);
+
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_vote_any: {
+nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
+if (!val)
+   continue;
+
+replacement = nir_imm_int(, val->i32[0]);
+break;
+ }
+ case nir_intrinsic_vote_all: {
+nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
+if (!val)
+   continue;
+
+replacement = nir_imm_int(, val->i32[0]);
+break;
+ }
+ case nir_intrinsic_vote_eq: {
+nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
+if (!val)
+   continue;
+
+replacement = nir_imm_int(, NIR_TRUE);
+break;
+ }
+ default:
+break;
+ }
+
+ if (!replacement)
+continue;
+
+ nir_ssa_def_rewrite_uses(>dest.ssa,
+  nir_src_for_ssa(replacement));
+ nir_instr_remove(instr);
+ nir_metadata_preserve(impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ progress = true;
+  }
+   }
+
+   return progress;
+}
+
+bool
+nir_opt_intrinsics(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+  if (function->impl)
+ progress |= opt_intrinsics_impl(function->impl);
+   }
+
+   return false;
+}
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 49d3cf3656..cede77fbc8 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ 

[Mesa-dev] [PATCH 04/20] i965/vec4: Lower ARB_shader_group_vote intrinsics

2017-07-06 Thread Matt Turner
I don't expect anyone is going to care about using this in vec4 programs
(vertex/tessellation/geometry on Gen6/7), no one has come up with a good
way to implement it much less test it.
---
 src/intel/compiler/brw_compiler.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/intel/compiler/brw_compiler.c 
b/src/intel/compiler/brw_compiler.c
index f31f29d2ad..9ad5d8e4ee 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -78,6 +78,9 @@ static const struct nir_shader_compiler_options 
vector_nir_options = {
.lower_unpack_unorm_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+   .lower_vote_any = true,
+   .lower_vote_all = true,
+   .lower_vote_eq = true,
.max_unroll_iterations = 32,
 };
 
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/20] nir: Support lowering vote intrinsics

2017-07-06 Thread Matt Turner
... trivially (as allowed by the spec!) by reusing the existing
nir_opt_intrinsics code.
---
 src/compiler/nir/nir.h| 4 
 src/compiler/nir/nir_opt_intrinsics.c | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 44a1d0887e..401c41f155 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1821,6 +1821,10 @@ typedef struct nir_shader_compiler_options {
bool lower_extract_byte;
bool lower_extract_word;
 
+   bool lower_vote_any;
+   bool lower_vote_all;
+   bool lower_vote_eq;
+
/**
 * Does the driver support real 32-bit integers?  (Otherwise, integers
 * are simulated by floats.)
diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
b/src/compiler/nir/nir_opt_intrinsics.c
index b63449b4fe..0cd75d8b28 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -47,7 +47,7 @@ opt_intrinsics_impl(nir_function_impl *impl)
  switch (intrin->intrinsic) {
  case nir_intrinsic_vote_any: {
 nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
-if (!val)
+if (!val || b.shader->options->lower_vote_any)
continue;
 
 replacement = nir_imm_int(, val->i32[0]);
@@ -55,7 +55,7 @@ opt_intrinsics_impl(nir_function_impl *impl)
  }
  case nir_intrinsic_vote_all: {
 nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
-if (!val)
+if (!val || b.shader->options->lower_vote_all)
continue;
 
 replacement = nir_imm_int(, val->i32[0]);
@@ -63,7 +63,7 @@ opt_intrinsics_impl(nir_function_impl *impl)
  }
  case nir_intrinsic_vote_eq: {
 nir_const_value *val = nir_src_as_const_value(intrin->src[0]);
-if (!val)
+if (!val || b.shader->options->lower_vote_eq)
continue;
 
 replacement = nir_imm_int(, NIR_TRUE);
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/20] nir: Add intrinsics from ARB_shader_group_vote

2017-07-06 Thread Matt Turner
These are intrinsics rather than opcodes, because they operate across
channels.
---
 src/compiler/glsl/glsl_to_nir.cpp | 22 ++
 src/compiler/nir/nir_intrinsics.h |  5 +
 2 files changed, 27 insertions(+)

diff --git a/src/compiler/glsl/glsl_to_nir.cpp 
b/src/compiler/glsl/glsl_to_nir.cpp
index 21530040a9..43d7e07042 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -799,6 +799,15 @@ nir_visitor::visit(ir_call *ir)
   case ir_intrinsic_shared_atomic_comp_swap:
  op = nir_intrinsic_shared_atomic_comp_swap;
  break;
+  case ir_intrinsic_vote_any:
+ op = nir_intrinsic_vote_any;
+ break;
+  case ir_intrinsic_vote_all:
+ op = nir_intrinsic_vote_all;
+ break;
+  case ir_intrinsic_vote_eq:
+ op = nir_intrinsic_vote_eq;
+ break;
   default:
  unreachable("not reached");
   }
@@ -1135,6 +1144,19 @@ nir_visitor::visit(ir_call *ir)
  nir_builder_instr_insert(, >instr);
  break;
   }
+  case nir_intrinsic_vote_any:
+  case nir_intrinsic_vote_all:
+  case nir_intrinsic_vote_eq: {
+ nir_ssa_dest_init(>instr, >dest, 1, 32, NULL);
+
+ instr->variables[0] = evaluate_deref(>instr, ir->return_deref);
+
+ ir_instruction *value = (ir_instruction 
*)ir->actual_parameters.get_head();
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value->as_rvalue()));
+
+ nir_builder_instr_insert(, >instr);
+ break;
+  }
   default:
  unreachable("not reached");
   }
diff --git a/src/compiler/nir/nir_intrinsics.h 
b/src/compiler/nir/nir_intrinsics.h
index 21e7d904b7..8a838df027 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -107,6 +107,11 @@ BARRIER(memory_barrier_shared)
 /** A conditional discard, with a single boolean source. */
 INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
 
+/** ARB_shader_group_vote intrinsics */
+INTRINSIC(vote_any, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(vote_all, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(vote_eq,  1, ARR(1), true, 1, 1, 0, xx, xx, xx, 
NIR_INTRINSIC_CAN_ELIMINATE)
+
 /**
  * Basic Geometry Shader intrinsics.
  *
-- 
2.13.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] MOCS versioning

2017-07-06 Thread Ben Widawsky
Copying the kernel commit message:

Starting with GEN9, Memory Object Control State (MOCS) becomes an index
into a table as opposed to the direct programming within the command.
The table has 62 usable entries (ie 6 bits can represent all settings),
and each buffer type may use one of these 62 entries to describe
cacheability type, and age (and some other less useful fields).

Because we hadn't dealt with MOCS settings like this, we didn't think
ahead too well and have ended up with a mess for GEN9 (and soon GEN10)
platform. The plan for for future platforms is that the ideal MOCS
settings will be determined, defined, and written in the public PRMs.
After this point, the i915.ko will absorb these settings and sometime
afterwards flip the alpha switch. All driver releases without the final
MOCS table must be considered alpha. Here on, userspace can assume the
MOCS table is definitively done. There will be some reserved entries for
'oh shit' scenarios. This avoids versioning the MOCS table which leaves
somewhat of a mess in userspace trying to handle arbitrarily many MOCS
versions.

But we do have a mess on GEN9. In the beginning, the MOCS table entries
were pre-populated by the hardware based on estimations made prior to
tapeout and we could just use that. Subsequently much performance tuning
was done to determine optimal settings that the i915 driver should load
on top of the hardware defaults. That was posted last as v6 of the
original per-engine MOCS settings:
https://patchwork.freedesktop.org/patch/53237/. Since the MOCS table is
not context saved/restored, it isn't feasible to let userspace upload
its own MOCS table. After a good amount of debate, it was decided that
we'd utilize only the minimal set of entires in mesa anyway, and so we
took only those entries for our MOCS entries.

Now we've come to the realization that indeed there are other MOCS
entries which are more optimal for various buffer types and workloads.
The problem is that the meaning of the indices is ABI (we assume index 0
is the uncached entry, and that there are only 3 entries total).

What this patch [simply] aims to do is expose a parameter to inform
userspace which "version" of the table was loaded by i915. Upon
sufficient data, new entries can be added, and the version can be
bumped. For example, from my original mesa mocs branch:

commit c9b0481bce24af032386701de0266eb5bc24e988
Author: Ben Widawsky 
Date:   Fri Apr 8 10:21:16 2016 -0700

i965: Use PTE mocs

Signed-off-by: Ben Widawsky 

diff --git a/src/mesa/drivers/dri/i965/brw_mocs.c 
b/src/mesa/drivers/dri/i965/brw_mocs.c
index 5df154eb86..b7bfdab671 100644
--- a/src/mesa/drivers/dri/i965/brw_mocs.c
+++ b/src/mesa/drivers/dri/i965/brw_mocs.c
@@ -14,6 +14,9 @@
 /* Skylake: MOCS is now an index into an array of 62 different caching
  * configurations programmed by the kernel.
  */
+
+/* TC=PTE, LeCC=PTE, LRUM=3, L3CC=WB */
+#define SKL_MOCS_PTE_PTE (3 << 1)
 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
 #define SKL_MOCS_WB  (2 << 1)
 /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
@@ -26,6 +29,9 @@ brw_mocs_get_control_state(const struct brw_context *brw,
switch (brw->gen) {
default:
case 9:
+  if (brw->intelScreen->mocs_version > 1)
+ return SKL_MOCS_PTE_PTE;
+
   return type == INTEL_MOCS_PTE ? SKL_MOCS_PTE : SKL_MOCS_WB;
case 8:
   return type == INTEL_MOCS_PTE ? BDW_MOCS_PTE : BDW_MOCS_WB;

tl;dr: A versioned MOCS table will allow userspace to be aware of new
and potentially interesting cacheability settings. Next GEN platforms
will not be considered production worthy until the MOCS table is
finalized.

Ben Widawsky (1):
  drm/i915: Version the MOCS settings

 drivers/gpu/drm/i915/i915_drv.c |  3 +++
 drivers/gpu/drm/i915/i915_drv.h |  2 ++
 drivers/gpu/drm/i915/i915_pci.c | 13 +
 include/uapi/drm/i915_drm.h |  8 
 4 files changed, 22 insertions(+), 4 deletions(-)

Ben Widawsky (2):
  intel: Merge latest i915 uapi
  intel: Make driver aware of MOCS table version

 src/intel/drm/i915_drm.h  |  8 
 src/intel/vulkan/anv_device.c | 12 
 src/intel/vulkan/anv_private.h|  2 ++
 src/mesa/drivers/dri/i915/intel_context.c |  7 ++-
 src/mesa/drivers/dri/i965/intel_screen.c  | 14 ++
 src/mesa/drivers/dri/i965/intel_screen.h  |  2 ++
 6 files changed, 44 insertions(+), 1 deletion(-)

-- 
2.13.2
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] intel: Make driver aware of MOCS table version

2017-07-06 Thread Ben Widawsky
We don't yet have optimal MOCS settings, but we have enough to know how
to at least determine when we might have non-optimal settings within our
driver.

Signed-off-by: Ben Widawsky 
---
 src/intel/vulkan/anv_device.c | 12 
 src/intel/vulkan/anv_private.h|  2 ++
 src/mesa/drivers/dri/i915/intel_context.c |  7 ++-
 src/mesa/drivers/dri/i965/intel_screen.c  | 14 ++
 src/mesa/drivers/dri/i965/intel_screen.h  |  2 ++
 5 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 3dc55dbb8d..8e180dbf18 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -368,6 +368,18 @@ anv_physical_device_init(struct anv_physical_device 
*device,
  device->info.max_cs_threads = max_cs_threads;
}
 
+   if (device->info.gen >= 9) {
+  device->mocs_version = anv_gem_get_param(fd,
+   I915_PARAM_MOCS_TABLE_VERSION);
+  switch (device->mocs_version) {
+  default:
+ anv_perf_warn("Kernel exposes newer MOCS table\n");
+  case 1:
+  case 0:
+ device->mocs_version = MOCS_TABLE_VERSION;
+  }
+   }
+
brw_process_intel_debug_variable();
 
device->compiler = brw_compiler_create(NULL, >info);
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 573778dad5..b8241a9b22 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -684,6 +684,8 @@ struct anv_physical_device {
 uint32_teu_total;
 uint32_tsubslice_total;
 
+uint8_t mocs_version;
+
 struct {
   uint32_t  type_count;
   struct anv_memory_typetypes[VK_MAX_MEMORY_TYPES];
diff --git a/src/mesa/drivers/dri/i915/intel_context.c 
b/src/mesa/drivers/dri/i915/intel_context.c
index e0766a0e3f..9169ea650e 100644
--- a/src/mesa/drivers/dri/i915/intel_context.c
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -521,8 +521,13 @@ intelInitContext(struct intel_context *intel,
INTEL_DEBUG = parse_debug_string(getenv("INTEL_DEBUG"), debug_control);
if (INTEL_DEBUG & DEBUG_BUFMGR)
   dri_bufmgr_set_debug(intel->bufmgr, true);
-   if (INTEL_DEBUG & DEBUG_PERF)
+   if (INTEL_DEBUG & DEBUG_PERF) {
   intel->perf_debug = true;
+  if (screen->mocs_version > MOCS_TABLE_VERSION) {
+ fprintf(stderr, "Kernel exposes newer MOCS table\n");
+ screen->mocs_version = MOCS_TABLE_VERSION;
+  }
+   }
 
if (INTEL_DEBUG & DEBUG_AUB)
   drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true);
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index c75f2125d4..c53f133d49 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2301,6 +2301,20 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
  (ret != -1 || errno != EINVAL);
}
 
+   if (devinfo->gen >= 9) {
+  screen->mocs_version = intel_get_integer(screen,
+   I915_PARAM_MOCS_TABLE_VERSION);
+  switch (screen->mocs_version) {
+  case 1:
+  case 0:
+ screen->mocs_version = MOCS_TABLE_VERSION;
+ break;
+  default:
+ /* We want to perf debug, but we can't yet */
+ break;
+  }
+   }
+
dri_screen->extensions = !screen->has_context_reset_notification
   ? screenExtensions : intelRobustScreenExtensions;
 
diff --git a/src/mesa/drivers/dri/i965/intel_screen.h 
b/src/mesa/drivers/dri/i965/intel_screen.h
index f78b3e8f74..eb801f8155 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.h
+++ b/src/mesa/drivers/dri/i965/intel_screen.h
@@ -112,6 +112,8 @@ struct intel_screen
bool mesa_format_supports_texture[MESA_FORMAT_COUNT];
bool mesa_format_supports_render[MESA_FORMAT_COUNT];
enum isl_format mesa_to_isl_render_format[MESA_FORMAT_COUNT];
+
+   unsigned mocs_version;
 };
 
 extern void intelDestroyContext(__DRIcontext * driContextPriv);
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101703] No stencil buffer allocated when requested by GLUT

2017-07-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101703

Brian Paul  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #2 from Brian Paul  ---
Thanks, Olivier.  I've pushed this patch and the wglUseFontBitmaps patch
(commits 80c6598cdba36edb, 43dea65ad2e8259).
Closing this bug.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] intel: Merge latest i915 uapi

2017-07-06 Thread Ben Widawsky
Signed-off-by: Ben Widawsky 
---
 src/intel/drm/i915_drm.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/intel/drm/i915_drm.h b/src/intel/drm/i915_drm.h
index c26bf7c125..69e38ce89f 100644
--- a/src/intel/drm/i915_drm.h
+++ b/src/intel/drm/i915_drm.h
@@ -431,6 +431,14 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
 
+/* What version of the MOCS table we have. For GEN9 GPUs, the PRM defined
+ * non-optimal settings for the MOCS table. As a result, we were required to 
use a
+ * small subset, and later add new settings. This param allows userspace to
+ * determine which settings are there.
+ */
+#define MOCS_TABLE_VERSION   1 /* Build time MOCS table version */
+#define I915_PARAM_MOCS_TABLE_VERSION   49
+
 typedef struct drm_i915_getparam {
__s32 param;
/*
-- 
2.13.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/1] drm/i915: Version the MOCS settings

2017-07-06 Thread Ben Widawsky
From: Ben Widawsky 

Starting with GEN9, Memory Object Control State (MOCS) becomes an index
into a table as opposed to the direct programming within the command.
The table has 62 usable entries (ie 6 bits can represent all settings),
and each buffer type may use one of these 62 entries to describe
cacheability type, and age (and some other less useful fields).

Because we hadn't dealt with MOCS settings like this, we didn't think
ahead too well and have ended up with a mess for GEN9 (and soon GEN10)
platform. The plan for for future platforms is that the ideal MOCS
settings will be determined, defined, and written in the public PRMs.
After this point, the i915.ko will absorb these settings and sometime
afterwards flip the alpha switch. All driver releases without the final
MOCS table must be considered alpha. Here on, userspace can assume the
MOCS table is definitively done. There will be some reserved entries for
'oh shit' scenarios. This avoids versioning the MOCS table which leaves
somewhat of a mess in userspace trying to handle arbitrarily many MOCS
versions.

But we do have a mess on GEN9. In the beginning, the MOCS table entries
were pre-populated by the hardware based on estimations made prior to
tapeout and we could just use that. Subsequently much performance tuning
was done to determine optimal settings that the i915 driver should load
on top of the hardware defaults. That was posted last as v6 of the
original per-engine MOCS settings:
https://patchwork.freedesktop.org/patch/53237/. Since the MOCS table is
not context saved/restored, it isn't feasible to let userspace upload
its own MOCS table. After a good amount of debate, it was decided that
we'd utilize only the minimal set of entires in mesa anyway, and so we
took only those entries for our MOCS entries.

Now we've come to the realization that indeed there are other MOCS
entries which are more optimal for various buffer types and workloads.
The problem is that the meaning of the indices is ABI (we assume index 0
is the uncached entry, and that there are only 3 entries total).

What this patch [simply] aims to do is expose a parameter to inform
userspace which "version" of the table was loaded by i915. Upon
sufficient data, new entries can be added, and the version can be
bumped. For example, from my original mesa mocs branch:

commit c9b0481bce24af032386701de0266eb5bc24e988
Author: Ben Widawsky 
Date:   Fri Apr 8 10:21:16 2016 -0700

i965: Use PTE mocs

Signed-off-by: Ben Widawsky 

diff --git a/src/mesa/drivers/dri/i965/brw_mocs.c 
b/src/mesa/drivers/dri/i965/brw_mocs.c
index 5df154eb86..b7bfdab671 100644
--- a/src/mesa/drivers/dri/i965/brw_mocs.c
+++ b/src/mesa/drivers/dri/i965/brw_mocs.c
@@ -14,6 +14,9 @@
 /* Skylake: MOCS is now an index into an array of 62 different caching
  * configurations programmed by the kernel.
  */
+
+/* TC=PTE, LeCC=PTE, LRUM=3, L3CC=WB */
+#define SKL_MOCS_PTE_PTE (3 << 1)
 /* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
 #define SKL_MOCS_WB  (2 << 1)
 /* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
@@ -26,6 +29,9 @@ brw_mocs_get_control_state(const struct brw_context *brw,
switch (brw->gen) {
default:
case 9:
+  if (brw->intelScreen->mocs_version > 1)
+ return SKL_MOCS_PTE_PTE;
+
   return type == INTEL_MOCS_PTE ? SKL_MOCS_PTE : SKL_MOCS_WB;
case 8:
   return type == INTEL_MOCS_PTE ? BDW_MOCS_PTE : BDW_MOCS_WB;

tl;dr: A versioned MOCS table will allow userspace to be aware of new
and potentially interesting cacheability settings. Next GEN platforms
will not be considered production worthy until the MOCS table is
finalized.

v2: Update 1.5 year old patch. Add comments. Update commit message.

Signed-off-by: Ben Widawsky 
---
 drivers/gpu/drm/i915/i915_drv.c |  3 +++
 drivers/gpu/drm/i915/i915_drv.h |  2 ++
 drivers/gpu/drm/i915/i915_pci.c | 13 +
 include/uapi/drm/i915_drm.h |  8 
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9167a73f3c69..26c27b6ae814 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -401,6 +401,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
if (!value)
return -ENODEV;
break;
+   case I915_PARAM_MOCS_TABLE_VERSION:
+   value = INTEL_INFO(dev_priv)->mocs_version;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index effbe4f72a64..9b30f6e6ef9b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -859,6 +859,8 @@ struct intel_device_info {
u16 degamma_lut_size;
u16 

Re: [Mesa-dev] [PATCH 3/3] mesa: fix glPrimitiveRestartNV crash inside a render list.

2017-07-06 Thread Brian Paul

Thanks for the patches, Olivier!

I'm a bit short on time, but I'll test/commit them ASAP.  I'll take a 
closer look at the VBO issue too.


-Brian

On 07/06/2017 09:45 AM, Olivier Lauffenburger wrote:

glPrimitiveRestartNV crashes when it is called during the compilation
of a render list.

There are two reasons:
- ctx->Driver.CurrentSavePrimitive is not set to the current primitive
- save_PrimitiveRestartNV() calls _save_Begin() which only sets an
   OpenGL error, instead of calling vbo_save_NotifyBegin().

This patch correctly calls vbo_save_NotifyBegin() but it detects
the current primitive mode by looking at the latest saved primitive.
It works but it is not the most elegant method.
However, correctly setting ctx->Driver.CurrentSavePrimitive requires
an deeper understanding of the code than I have.

Signed-off-by: Olivier Lauffenburger 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101464
---
  src/mesa/vbo/vbo_save_api.c | 12 +---
  1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c
index a0735f63f2..cb10921ce7 100644
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -1116,10 +1116,16 @@ _save_PrimitiveRestartNV(void)
 GLenum curPrim;
 GET_CURRENT_CONTEXT(ctx);

-   curPrim = ctx->Driver.CurrentSavePrimitive;
+   /* get current primitive mode */
+   struct vbo_save_context *save = _context(ctx)->save;
+   if (save->prim_count == 0) return;
+
+   curPrim = save->prim[save->prim_count - 1].mode;

-   _save_End();
-   _save_Begin(curPrim);
+   /* restart primitive */
+   CALL_End(GET_DISPATCH(), ());
+   vbo_save_NotifyBegin(ctx, (curPrim | VBO_SAVE_PRIM_WEAK
+  | VBO_SAVE_PRIM_NO_CURRENT_UPDATE));
  }





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101712] CPU lockup after ring 0 stalled

2017-07-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101712

--- Comment #2 from guiscara...@gmail.com ---
Created attachment 132488
  --> https://bugs.freedesktop.org/attachment.cgi?id=132488=edit
DSMEG (more detailed log)

Here it is a more detailed log

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101712] CPU lockup after ring 0 stalled

2017-07-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101712

guiscara...@gmail.com changed:

   What|Removed |Added

   Priority|medium  |high

--- Comment #1 from guiscara...@gmail.com ---
It's turning the OS completely unsable, that is why I marked as urgent

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 101712] CPU lockup after ring 0 stalled

2017-07-06 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=101712

Bug ID: 101712
   Summary: CPU lockup after ring 0 stalled
   Product: Mesa
   Version: unspecified
  Hardware: x86-64 (AMD64)
OS: Linux (All)
Status: NEW
  Severity: critical
  Priority: medium
 Component: Drivers/X11
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: guiscara...@gmail.com
QA Contact: mesa-dev@lists.freedesktop.org

Created attachment 132487
  --> https://bugs.freedesktop.org/attachment.cgi?id=132487=edit
Error log about the crash

When trying to render any "heavy" graphics or anything that uses particle
animations (as I supose that is causing it), the computer crashes and can only
be recovered by a hard reset.
I've already posted at Manjaro
(https://forum.manjaro.org/t/xorg-freezes-when-playing-dota/25938/11) more
details about this issue, but it's getting worse everyday.

Please, I've already tried everything that was possible. I don't have any
alternatives...

Computer specs:
System:Host: Manjaro-AMD Kernel: 4.9.30-1-MANJARO x86_64 (64 bit gcc:
6.3.1)
   Desktop: MATE 1.18.0 (Gtk 3.22.15) Distro: Manjaro Linux
Machine:   Device: desktop Mobo: ASUSTeK model: M5A97 LE R2.0 v: Rev 1.xx
   UEFI: American Megatrends v: 2701 date: 03/24/2016
CPU:   Octa core AMD FX-8300 Eight-Core (-MCP-) cache: 16384 KB
   flags: (lm nx sse sse2 sse3 sse4_1 sse4_2 sse4a ssse3 svm) bmips:
52993
   clock speeds: max: 3300 MHz 1: 1400 MHz 2: 1400 MHz 3: 1400 MHz
   4: 1400 MHz 5: 3300 MHz 6: 3300 MHz 7: 1400 MHz 8: 1400 MHz
Graphics:  Card: Advanced Micro Devices [AMD/ATI] Turks PRO [Radeon HD
6570/7570/8550]
   bus-ID: 01:00.0
   Display Server: X.Org 1.19.3 driver: radeon
   Resolution: 1360x768@60.02hz
   GLX Renderer: Gallium 0.4 on AMD TURKS (DRM 2.48.0 /
4.9.30-1-MANJARO, LLVM 4.0.0)
   GLX Version: 3.0 Mesa 17.1.1 Direct Rendering: Yes
Audio: Card-1 Advanced Micro Devices [AMD/ATI] Turks HDMI Audio [Radeon HD
6500/6600 / 6700M Series]
   driver: snd_hda_intel bus-ID: 01:00.1
   Card-2 Advanced Micro Devices [AMD/ATI] SBx00 Azalia (Intel HDA)
   driver: snd_hda_intel bus-ID: 00:14.2
   Card-3 Microsoft driver: USB Audio usb-ID: 004-002
   Sound: Advanced Linux Sound Architecture v: k4.9.30-1-MANJARO
Network:   Card-1: Realtek RTL8111/8168/8411 PCI Express Gigabit Ethernet
Controller
   driver: r8168 v: 8.044.02-NAPI port: d000 bus-ID: 02:00.0
   IF: enp2s0 state: down mac: 
   Card-2: Microsoft Xbox 360 Wireless Adapter usb-ID: 006-004
   IF: null-if-id state: N/A mac: N/A
Drives:HDD Total Size: 1000.2GB (5.0% used)
   ID-1: /dev/sda model: SAMSUNG_HD502HI size: 500.1GB
   ID-2: /dev/sdb model: ST500LT012 size: 500.1GB
Partition: ID-1: / size: 449G used: 39G (9%) fs: ext4 dev: /dev/sda2
   ID-2: swap-1 size: 9.45GB used: 0.00GB (0%) fs: swap dev: /dev/sda3
Sensors:   System Temperatures: cpu: 48.2C mobo: N/A gpu: 42.5
   Fan Speeds (in rpm): cpu: 0
Info:  Processes: 222 Uptime: 1:09 Memory: 2561.3/7730.2MB
   Init: systemd Gcc sys: 7.1.1
   Client: Shell (bash 4.4.121) inxi: 2.3.9

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] ac/nir: Fix ordering of parameters for image atomic cmpswap intrinsics

2017-07-06 Thread Bas Nieuwenhuizen
Thanks! Pushed and cc'd it to stable.

Not pushing the first patch as I assume that is superseded by Connors patches.

On Fri, Jun 30, 2017 at 12:15 PM, Alex Smith
 wrote:
> The NIR parameters are ordered "compare, data", matching GLSL, but both
> the image and buffer LLVM intrinsics take them the other way around.
> This is already handled correctly for SSBO atomics.
>
> Signed-off-by: Alex Smith 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 6845df8..89b7103 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3442,9 +3442,9 @@ static LLVMValueRef visit_image_atomic(struct 
> nir_to_llvm_context *ctx,
> abort();
> }
>
> -   params[param_count++] = get_src(ctx, instr->src[2]);
> if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
> params[param_count++] = get_src(ctx, instr->src[3]);
> +   params[param_count++] = get_src(ctx, instr->src[2]);
>
> if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
> params[param_count++] = get_sampler_desc(ctx, 
> instr->variables[0], DESC_BUFFER);
> --
> 2.9.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] etnaviv: fix refcnt initialization in etna_screen

2017-07-06 Thread Aleksander Morgado
On Thu, Jul 6, 2017 at 11:18 PM, Aleksander Morgado
 wrote:
> Despite being a member of the etna_screen struct, 'refcnt' is used by
> the winsys-specific logic to track the reference count of the object
> managed in a hash table. When the count reaches zero, the pipe screen
> is removed from the table and destroyed.
>
> Fix the logic by initializing the refcnt to 1 when screen created.
> This initialization is done in etna_screen_create(), to follow the
> same logic as in freedreno and virgl.
>

For reference, this is the kind of backtrace I was getting due to this
issue. The dri2_create_image_from_winsys() call was trying run
pscreen->resource_from_handle, but the pscreen had already been freed.

If the item is added to the HT with refcnt = 0, getting an extra
reference from the HT would have set rfcnt = 1, and when that extra
reference was removed it would have gone to rfcnt = 0, triggering at
this point the destroy and removal from the HT, while the original
reference was still around and assumed valid, and finally arriving to
the use-after-free seen here.

01-01 00:00:51.546   621   621 F libc: Fatal signal 11 (SIGSEGV),
code 1, fault addr 0x0 in tid 621 (ndroid.systemui)
01-01 00:00:51.679  1062  1062 F DEBUG   : *** *** *** *** *** *** ***
*** *** *** *** *** *** *** *** ***
01-01 00:00:51.679  1062  1062 F DEBUG   : Build fingerprint:
'Android/linaro_arm/linaro_arm:7.1.1/N6F26U/aleksa04281944:userdebug/test-keys'
01-01 00:00:51.679  1062  1062 F DEBUG   : Revision: '0'
01-01 00:00:51.679  1062  1062 F DEBUG   : ABI: 'arm'
01-01 00:00:51.680  1062  1062 F DEBUG   : pid: 621, tid: 621, name:
ndroid.systemui  >>> com.android.systemui <<<
01-01 00:00:51.680  1062  1062 F DEBUG   : signal 11 (SIGSEGV), code 1
(SEGV_MAPERR), fault addr 0x0
01-01 00:00:51.680  1062  1062 F DEBUG   : r0 91e65d00  r1
bec0d3d8  r2 bec0d470  r3 0006
01-01 00:00:51.681  1062  1062 F DEBUG   : r4 bec0d470  r5
91e65d00  r6   r7 bec0d3ea
01-01 00:00:51.681  1062  1062 F DEBUG   : r8 914deb27  r9
  sl 8ef239c0  fp 1005
01-01 00:00:51.681  1062  1062 F DEBUG   : ip bec0cef4  sp
bec0d3c0  lr 91242bd9  pc   cpsr 2010
01-01 00:00:52.711  1062  1062 F DEBUG   :
01-01 00:00:52.711  1062  1062 F DEBUG   : backtrace:
01-01 00:00:52.711  1062  1062 F DEBUG   : #00 pc   
01-01 00:00:52.711  1062  1062 F DEBUG   : #01 pc 00066bd7
/system/lib/dri/gallium_dri.so (dri2_create_image_from_winsys+462)
01-01 00:00:52.711  1062  1062 F DEBUG   : #02 pc 00066f49
/system/lib/dri/gallium_dri.so (dri2_create_image_from_fd+592)
01-01 00:00:52.712  1062  1062 F DEBUG   : #03 pc 00065e07
/system/lib/dri/gallium_dri.so (dri2_from_dma_bufs2+54)
01-01 00:00:52.712  1062  1062 F DEBUG   : #04 pc 1fb7
/system/lib/libgbm.so (gbm_dri_bo_import+418)
01-01 00:00:52.712  1062  1062 F DEBUG   : #05 pc 127f
/system/lib/hw/gralloc.gbm.so
(_ZL15validate_handlePK13native_handleP10gbm_device+334)
01-01 00:00:52.712  1062  1062 F DEBUG   : #06 pc 111f
/system/lib/hw/gralloc.gbm.so (gralloc_gbm_handle_register+2)
01-01 00:00:52.712  1062  1062 F DEBUG   : #07 pc 160f
/system/lib/hw/gralloc.gbm.so
(_ZL23gbm_mod_register_bufferPK16gralloc_module_tPK13native_handle+28)
01-01 00:00:52.712  1062  1062 F DEBUG   : #08 pc cbdd
/system/lib/libui.so
(_ZN7android18Gralloc1On0Adapter6retainEPKNS_13GraphicBufferE+80)
01-01 00:00:52.712  1062  1062 F DEBUG   : #09 pc f45b
/system/lib/libui.so
(_ZN7android19GraphicBufferMapper14registerBufferEPKNS_13GraphicBufferE+58)
01-01 00:00:52.713  1062  1062 F DEBUG   : #10 pc e7c5
/system/lib/libui.so
(_ZN7android13GraphicBuffer9unflattenERPKvRjRPKiS4_+300)
01-01 00:00:52.713  1062  1062 F DEBUG   : #11 pc 00083951
/system/lib/libandroid_runtime.so
(_ZN7android6Parcel17FlattenableHelperINS_13GraphicBufferEE9unflattenEPKvjPKij+20)
01-01 00:00:52.713  1062  1062 F DEBUG   : #12 pc 00044c23
/system/lib/libbinder.so
(_ZNK7android6Parcel4readERNS0_26FlattenableHelperInterfaceE+338)
01-01 00:00:52.713  1062  1062 F DEBUG   : #13 pc 000460fd
/system/lib/libgui.so
(_ZN7android23BpGraphicBufferProducer13requestBufferEiPNS_2spINS_13GraphicBufferEEE+128)
01-01 00:00:52.713  1062  1062 F DEBUG   : #14 pc 00050fc7
/system/lib/libgui.so
(_ZN7android7Surface13dequeueBufferEPP19ANativeWindowBufferPi+322)
01-01 00:00:52.713  1062  1062 F DEBUG   : #15 pc c931
/system/lib/egl/libGLES_mesa.so (update_buffers+52)
01-01 00:00:52.713  1062  1062 F DEBUG   : #16 pc ca2d
/system/lib/egl/libGLES_mesa.so (droid_image_get_buffers+16)
01-01 00:00:52.713  1062  1062 F DEBUG   : #17 pc 00067523
/system/lib/dri/gallium_dri.so (dri2_allocate_textures+378)
01-01 00:00:52.713  1062  1062 F DEBUG   : #18 pc 000648b7
/system/lib/dri/gallium_dri.so (dri_st_framebuffer_validate+134)
01-01 00:00:52.714  1062  1062 F DEBUG   : #19 pc 00182237
/system/lib/dri/gallium_dri.so 

Re: [Mesa-dev] [PATCH 2/6] nir/lower_io_to_temporaries: don't set compact on shadow vars

2017-07-06 Thread Jason Ekstrand
Reviewed-by: Jason Ekstrand 

On Thu, Jul 6, 2017 at 12:48 PM, Connor Abbott 
wrote:

> From: Connor Abbott 
>
> The compact flag doesn't make sense on local variables, since the
> packing on them is up to the driver. This fixes nir_validate assertions
> in some cases, particularly when lower_io_to_temporaries is used on
> per-vertex inputs/outputs.
> ---
>  src/compiler/nir/nir_lower_io_to_temporaries.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/compiler/nir/nir_lower_io_to_temporaries.c
> b/src/compiler/nir/nir_lower_io_to_temporaries.c
> index 6031bbd..d2df14e 100644
> --- a/src/compiler/nir/nir_lower_io_to_temporaries.c
> +++ b/src/compiler/nir/nir_lower_io_to_temporaries.c
> @@ -141,6 +141,7 @@ create_shadow_temp(struct lower_io_state *state,
> nir_variable *var)
> temp->data.mode = nir_var_global;
> temp->data.read_only = false;
> temp->data.fb_fetch_output = false;
> +   temp->data.compact = false;
>
> return nvar;
>  }
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] radv: don't lower indirect derefs

2017-07-06 Thread Bas Nieuwenhuizen
On Thu, Jul 6, 2017 at 9:50 PM, Connor Abbott  wrote:
> From: Connor Abbott 
>
> Radeonsi doesn't either. As of the last commit, these should be handled
> properly as long as LLVM has scratch support. We also should use
> nir_lower_io_to_temporaries() for inputs instead of generating an
> if-ladder, since that should be more efficient.
> ---
>  src/amd/vulkan/radv_pipeline.c | 14 ++
>  1 file changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 237e146..0991304 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -272,11 +272,17 @@ radv_shader_compile_to_nir(struct radv_device *device,
>
> nir_shader_gather_info(nir, entry_point->impl);
>
> -   nir_variable_mode indirect_mask = 0;
> -   indirect_mask |= nir_var_shader_in;
> -   indirect_mask |= nir_var_local;
> +   if (device->llvm_supports_spill) {
> +   nir_lower_io_to_temporaries(nir, entry_point->impl,
> +   false /* outputs */,
> +   true /* inputs */);

We only need temporaries for VS/FS inputs, the other stages get the
inputs from ringbuffers and I think those have all been wired up for
indirect access. (might make sense to enable output temporaries for FS
and whatever maps to hardware VS?)

Also sounds like the current indirect index handling for inputs is
useless, you might be able to simplify that.

> +   } else {
> +   nir_variable_mode indirect_mask = 0;
> +   indirect_mask |= nir_var_shader_in;
> +   indirect_mask |= nir_var_local;
> +   nir_lower_indirect_derefs(nir, indirect_mask);
> +   }
>
> -   nir_lower_indirect_derefs(nir, indirect_mask);
>
> static const nir_lower_tex_options tex_options = {
>   .lower_txp = ~0,
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radv/ac: drop setting xnack

2017-07-06 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Jul 7, 2017 at 12:10 AM, Dave Airlie  wrote:
> From: Dave Airlie 
>
> Since radv uses compute rings and we can't know when we are setting
> up the shaders what ring they are to be used on, we should just use
> the default xnack setting. This may be suboptimal in some places,
> but if we hit a problem, we likely should try and address this
> between llvm and mesa.
>
> Signed-off-by: Dave Airlie 
> ---
>  src/amd/common/ac_llvm_util.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index d14057f..ea752e0 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -126,8 +126,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
> radeon_family family, enum ac
> LLVMTargetRef target = ac_get_llvm_target(triple);
>
> snprintf(features, sizeof(features),
> -"+DumpCode,+vgpr-spilling,-fp32-denormals%s%s",
> -family >= CHIP_VEGA10 ? ",+xnack" : ",-xnack",
> +"+DumpCode,+vgpr-spilling,-fp32-denormals%s",
>  tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "");
>
> LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/6] radv: Rewrite local & shared variable handling

2017-07-06 Thread Alex Smith
On 6 July 2017 at 22:20, Connor Abbott  wrote:
> On Thu, Jul 6, 2017 at 12:48 PM, Connor Abbott
>  wrote:
>> From: Connor Abbott 
>>
>> This series grew out of trying to get rid of the copy-n-pasted index
>> calculation code in radv's NIR-to-LLVM path, in particular in
>> radv_get_deref_offset(). I realized for IO it's probably better to
>> switch to using nir_lower_io after Nicolai's series, but for local
>> variables and shared variables which can't be accessed by the host and
>> don't have to match between stages, it's simpler to translate the NIR
>> variable directly into an LLVM variable instead.
>>
>> This cleanup gives us indirect addressing of local variables without
>> generating if-ladders in NIR for free, and makes our shared variable
>> handling much better which fixes an upcoming Feral title.
>
> Sorry, I worded this wrong -- I don't have any inside information on
> Feral titles, and Alex's original patch didn't mention any upcoming
> titles. I apologize for any confusion.

Don't worry about it :P

For the series:
Tested-by: Alex Smith 

>
>> It should also
>> let LLVM do its thing much better, even better than the existing
>> radeonsi path, by preserving type information that TGSI doesn't for
>> local variables.
>>
>> Connor Abbott (6):
>>   nir: don't segfault when printing variables with no name
>>   nir/lower_io_to_temporaries: don't set compact on shadow vars
>>   ac/nir: rewrite local variable handling
>>   ac/nir: rewrite shared variable handling
>>   radv: reorder and add passes to match i965
>>   radv: don't lower indirect derefs
>>
>>  src/amd/common/ac_nir_to_llvm.c| 310 
>> +
>>  src/amd/vulkan/radv_pipeline.c |  28 ++-
>>  src/compiler/nir/nir_lower_io_to_temporaries.c |   1 +
>>  src/compiler/nir/nir_print.c   |   2 +-
>>  4 files changed, 187 insertions(+), 154 deletions(-)
>>
>> --
>> 2.9.4
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv/ac: drop setting xnack

2017-07-06 Thread Dave Airlie
From: Dave Airlie 

Since radv uses compute rings and we can't know when we are setting
up the shaders what ring they are to be used on, we should just use
the default xnack setting. This may be suboptimal in some places,
but if we hit a problem, we likely should try and address this
between llvm and mesa.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_util.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index d14057f..ea752e0 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -126,8 +126,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
radeon_family family, enum ac
LLVMTargetRef target = ac_get_llvm_target(triple);
 
snprintf(features, sizeof(features),
-"+DumpCode,+vgpr-spilling,-fp32-denormals%s%s",
-family >= CHIP_VEGA10 ? ",+xnack" : ",-xnack",
+"+DumpCode,+vgpr-spilling,-fp32-denormals%s",
 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "");

LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
-- 
2.9.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/19] radeonsi: add basic memory object support

2017-07-06 Thread Andres Rodriguez



On 2017-07-04 12:39 PM, Andres Rodriguez wrote:



On 2017-07-04 09:30 AM, Christian König wrote:

Am 04.07.2017 um 15:13 schrieb Nicolai Hähnle:

On 01.07.2017 01:03, Andres Rodriguez wrote:

From: Dave Airlie 

Signed-off-by: Andres Rodriguez 
---
  src/gallium/drivers/radeon/r600_pipe_common.h |  6 ++
  src/gallium/drivers/radeon/r600_texture.c | 82 
+++

  2 files changed, 88 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h

index b22a3a7..c5734b4 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -377,6 +377,12 @@ union r600_mmio_counters {
  unsigned array[0];
  };
  +struct r600_memory_object {
+struct pb_buffer*buf;
+uint32_tstride;
+uint32_toffset;
+};
+
  struct r600_common_screen {
  struct pipe_screenb;
  struct radeon_winsys*ws;
diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c

index d68587b..3bc8b0c 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2784,10 +2784,92 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,

  }
  }
  +static struct pipe_memory_object *
+r600_memobj_from_handle(struct pipe_screen *screen,
+struct winsys_handle *whandle)
+{
+struct r600_common_screen *rscreen = (struct 
r600_common_screen*)screen;
+struct r600_memory_object *memobj = 
CALLOC_STRUCT(r600_memory_object);

+struct pb_buffer *buf = NULL;
+uint32_t stride, offset;
+
+if (!memobj)
+return NULL;
+
+buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
+  , );
+if (!buf)
+return NULL;
+
+memobj->buf = buf;
+memobj->stride = stride;
+memobj->offset = offset;
+
+return (struct pipe_memory_object *)memobj;
+}
+
+static void
+r600_memobj_destroy(struct pipe_screen *screen,
+struct pipe_memory_object *memobj)
+{
+free(memobj);
+}
+
+static struct pipe_resource *
+r600_texture_from_memobj(struct pipe_screen *screen,
+ const struct pipe_resource *templ,
+ struct pipe_memory_object *_memobj,
+ uint64_t offset)
+{
+int r;
+struct r600_common_screen *rscreen = (struct 
r600_common_screen*)screen;
+struct r600_memory_object *memobj = (struct r600_memory_object 
*)_memobj;

+struct r600_texture *rtex;
+struct radeon_surf surface;
+struct radeon_bo_metadata metadata = {};
+unsigned array_mode;
+rscreen->ws->buffer_get_metadata(memobj->buf, );
+
+surface.u.legacy.pipe_config = metadata.u.legacy.pipe_config;
+surface.u.legacy.bankw = metadata.u.legacy.bankw;
+surface.u.legacy.bankh = metadata.u.legacy.bankh;
+surface.u.legacy.tile_split = metadata.u.legacy.tile_split;
+surface.u.legacy.mtilea = metadata.u.legacy.mtilea;
+surface.u.legacy.num_banks = metadata.u.legacy.num_banks;


How can this possibly work when multiple textures are in the same 
memory object?


Good point. This is taking advantage of NV_dedicated_allocation on the 
vulkan side, so the correspondence is one image per memory object. This 
codepath should really be wrapped in an "if (memObj->Dedicated)" check.


I'll check with Marek on the multi-texture metadata that Christian 
mentioned below.


Regards,
Andres



As far as I understand the extension, we basically have to keep 
radeonsi and radv in sync in how they compute those parameters. Since 
it's only Vulkan -> OpenGL, this means we need to mirror precisely 
how radv calls addrlib here.


Hmm. Keeping in mind that there's another Vulkan driver as well, is 
there any chance we can get a future extension where the exporting 
API provides an opaque blob of metadata and the importing API 
receives that blob? We'd use the same format that is currently used 
for the buffer metadata, but it'd work with multiple textures in the 
same memob >


Hey Nicolai,

Didn't see you online on IRC and wanted to let you about a section of 
the VK_KHX_external_memory that rules out usage of external blobs.


See the answer to question 5 here (warning, giant spec website, not for 
browsers faint of heart):

https://www.khronos.org/registry/vulkan/specs/1.0-extensions/html/vkspec.html#VK_KHX_external_memory

Regards,
Andres

Didn't Marek explicitly designed the metadata so that multiple 
textures can be in the same memory object?

 > I didn't take a too close look at that, but from the ten miles high
perspective it looked like that should work.

Regards,
Christian.



Cheers,
Nicolai



+
+if (metadata.u.legacy.macrotile == RADEON_LAYOUT_TILED)
+array_mode = RADEON_SURF_MODE_2D;
+else if (metadata.u.legacy.microtile == RADEON_LAYOUT_TILED)
+array_mode = RADEON_SURF_MODE_1D;
+else
+ 

Re: [Mesa-dev] [PATCH 1/4] radeonsi: fix DCC fast clear for luminance and alpha formats

2017-07-06 Thread Marek Olšák
On Thu, Jul 6, 2017 at 8:24 PM, Andres Gomez  wrote:
> Marek, would we want this series in -stable or we shouldn't bother ?

Don't bother. Patch 1 isn't that important. Other patches are for
features not enabled in 17.1.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/6] radv: Rewrite local & shared variable handling

2017-07-06 Thread Connor Abbott
On Thu, Jul 6, 2017 at 12:48 PM, Connor Abbott
 wrote:
> From: Connor Abbott 
>
> This series grew out of trying to get rid of the copy-n-pasted index
> calculation code in radv's NIR-to-LLVM path, in particular in
> radv_get_deref_offset(). I realized for IO it's probably better to
> switch to using nir_lower_io after Nicolai's series, but for local
> variables and shared variables which can't be accessed by the host and
> don't have to match between stages, it's simpler to translate the NIR
> variable directly into an LLVM variable instead.
>
> This cleanup gives us indirect addressing of local variables without
> generating if-ladders in NIR for free, and makes our shared variable
> handling much better which fixes an upcoming Feral title.

Sorry, I worded this wrong -- I don't have any inside information on
Feral titles, and Alex's original patch didn't mention any upcoming
titles. I apologize for any confusion.

> It should also
> let LLVM do its thing much better, even better than the existing
> radeonsi path, by preserving type information that TGSI doesn't for
> local variables.
>
> Connor Abbott (6):
>   nir: don't segfault when printing variables with no name
>   nir/lower_io_to_temporaries: don't set compact on shadow vars
>   ac/nir: rewrite local variable handling
>   ac/nir: rewrite shared variable handling
>   radv: reorder and add passes to match i965
>   radv: don't lower indirect derefs
>
>  src/amd/common/ac_nir_to_llvm.c| 310 
> +
>  src/amd/vulkan/radv_pipeline.c |  28 ++-
>  src/compiler/nir/nir_lower_io_to_temporaries.c |   1 +
>  src/compiler/nir/nir_print.c   |   2 +-
>  4 files changed, 187 insertions(+), 154 deletions(-)
>
> --
> 2.9.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] etnaviv: fix refcnt initialization in etna_screen

2017-07-06 Thread Aleksander Morgado
Despite being a member of the etna_screen struct, 'refcnt' is used by
the winsys-specific logic to track the reference count of the object
managed in a hash table. When the count reaches zero, the pipe screen
is removed from the table and destroyed.

Fix the logic by initializing the refcnt to 1 when screen created.
This initialization is done in etna_screen_create(), to follow the
same logic as in freedreno and virgl.

Signed-off-by: Aleksander Morgado 
---
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index eefb51c5da..fa0cbd9076 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -772,6 +772,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu 
*gpu,
screen->dev = dev;
screen->gpu = gpu;
screen->ro = renderonly_dup(ro);
+   screen->refcnt = 1;
 
if (!screen->ro) {
   DBG("could not create renderonly object");
-- 
2.13.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/6] radv: reorder and add passes to match i965

2017-07-06 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Thu, Jul 6, 2017 at 9:50 PM, Connor Abbott  wrote:
> From: Connor Abbott 
>
> This makes the radv shader pipeline much closer to brw_preprocess_nir().
> The main changes are:
>
> - Now we call nir_split_var_copies(), which is necessary for
> nir_lower_var_copies() to work correctly. The SPIR-V frontend never
> generates variable copies, so we didn't notice this, but this will
> prevent regressions after the next commit that turns on
> nir_lower_io_to_temporaries, which does use copies. We add a call to
> radv_optimize() after nir_split_var_copies() but before lowering var
> copies and other stuff so that NIR optimization passes have a chance to
> work on the un-lowered form. This matches i965.
> - We now call nir_lower_load_const_to_scalar(), which should provide
> more optimization opportunities for NIR.
> - We now get rid of local variables after optimization is done, in case
> an optimization removed one.
> ---
>  src/amd/vulkan/radv_pipeline.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 49610a1..237e146 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -284,12 +284,20 @@ radv_shader_compile_to_nir(struct radv_device *device,
>
> nir_lower_tex(nir, _options);
>
> -   nir_lower_vars_to_ssa(nir);
> -   nir_lower_var_copies(nir);
> nir_lower_global_vars_to_local(nir);
> -   nir_remove_dead_variables(nir, nir_var_local);
> +
> +   nir_split_var_copies(nir);
> +
> radv_optimize_nir(nir);
>
> +   nir_lower_load_const_to_scalar(nir);
> +
> +   nir_lower_var_copies(nir);
> +
> +   radv_optimize_nir(nir);
> +
> +   nir_remove_dead_variables(nir, nir_var_local);
> +
> if (dump)
> nir_print_shader(nir, stderr);
>
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

2017-07-06 Thread Connor Abbott
On Thu, Jul 6, 2017 at 2:01 PM, Bas Nieuwenhuizen
 wrote:
> On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott  
> wrote:
>> From: Connor Abbott 
>>
>> The old way was very TGSI-based, and couldn't handle indirect
>> dereferences at all. Instead, pass through the type information NIR has
>
> I think the old code should handle indirect derefs just fine? See the
> indir_index stuff. I'm kind of worried that LLVM doesn't promote
> variables with indirect derefs agrresively enough to registers. IIRC
> it only converts scalar arrays of up to 4 elements to vectors, while
> radv always promotes and IIRC radeonsi promotes for all variables with
> something like <= 20 dwords?

Ah, true, I missed that while deleting it :). At the same time, if
LLVM isn't putting stuff into registers aggressively enough, that's
something that should be fixed in LLVM. Gathering everything into a
vector every time everything something is used, like what this code
does, is going to make it harder for LLVM to know what stuff aliases
and optimize accordingly since LLVM won't be able to use its normal
alias analysis. I don't know if LLVM will be able to optimize that
pattern as well.

>
>
>> about local variables to LLVM, and translate NIR dereferences directly
>> into the equivalent GEP instructions in LLVM.
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 204 
>> 
>>  1 file changed, 146 insertions(+), 58 deletions(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c 
>> b/src/amd/common/ac_nir_to_llvm.c
>> index e72747a..f42d214 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>>
>> struct hash_table *defs;
>> struct hash_table *phis;
>> +   struct hash_table *vars;
>>
>> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
>> LLVMValueRef ring_offsets;
>> @@ -157,8 +158,6 @@ struct nir_to_llvm_context {
>> LLVMValueRef shared_memory;
>> uint64_t input_mask;
>> uint64_t output_mask;
>> -   int num_locals;
>> -   LLVMValueRef *locals;
>> uint8_t num_output_clips;
>> uint8_t num_output_culls;
>>
>> @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
>> return result;
>>  }
>>
>> +static LLVMValueRef
>> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
>> +   nir_deref_var *deref)
>> +{
>> +   struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
>> deref->var);
>> +   assert(entry->data);
>> +   LLVMValueRef val = entry->data;
>> +   nir_deref *tail = deref->deref.child;
>> +   while (tail != NULL) {
>> +   LLVMValueRef offset;
>> +   switch (tail->deref_type) {
>> +   case nir_deref_type_array: {
>> +   nir_deref_array *array = nir_deref_as_array(tail);
>> +   offset = LLVMConstInt(ctx->i32, array->base_offset, 
>> 0);
>> +   if (array->deref_array_type ==
>> +   nir_deref_array_type_indirect) {
>> +   offset = LLVMBuildAdd(ctx->builder, offset,
>> + get_src(ctx,
>> + 
>> array->indirect),
>> + "");
>> +   }
>> +   break;
>> +   }
>> +   case nir_deref_type_struct: {
>> +   nir_deref_struct *deref_struct =
>> +   nir_deref_as_struct(tail);
>> +   offset = LLVMConstInt(ctx->i32,
>> + deref_struct->index, 0);
>> +   break;
>> +   }
>> +   default:
>> +   unreachable("bad deref type");
>> +   }
>> +   val = ac_build_gep0(>ac, val, offset);
>> +   tail = tail->child;
>> +   }
>> +   return val;
>> +}
>> +
>>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>>nir_intrinsic_instr *instr)
>>  {
>> @@ -2948,24 +2986,14 @@ static LLVMValueRef visit_load_var(struct 
>> nir_to_llvm_context *ctx,
>> values[chan] = ctx->inputs[idx + chan + 
>> const_index * 4];
>> }
>> break;
>> -   case nir_var_local:
>> -   for (unsigned chan = 0; chan < ve; chan++) {
>> -   if (indir_index) {
>> -   unsigned count = glsl_count_attribute_slots(
>> -   instr->variables[0]->var->type, 
>> false);
>> -   count -= chan / 4;
>> -   LLVMValueRef tmp_vec = 
>> 

Re: [Mesa-dev] [PATCH 3/4] ac/llvm: set xnack like radeonsi does.

2017-07-06 Thread Alex Deucher
On Thu, Jul 6, 2017 at 4:56 PM, Marek Olšák  wrote:
> On Thu, Jul 6, 2017 at 8:12 PM, Alex Deucher  wrote:
>> On Thu, Jul 6, 2017 at 1:13 PM, Jan Vesely  wrote:
>>> On Thu, 2017-07-06 at 12:09 +1000, Dave Airlie wrote:
 From: Dave Airlie 

 Use family, but only set xnack+ for gfx9.

 Signed-off-by: Dave Airlie 
 ---
  src/amd/common/ac_llvm_util.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

 diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
 index 4f32d4b..84ed73f 100644
 --- a/src/amd/common/ac_llvm_util.c
 +++ b/src/amd/common/ac_llvm_util.c
 @@ -126,7 +126,9 @@ LLVMTargetMachineRef ac_create_target_machine(enum 
 radeon_family family, enum ac
   LLVMTargetRef target = ac_get_llvm_target(triple);

   snprintf(features, sizeof(features),
 -  "+DumpCode,+vgpr-spilling,-fp32-denormals,-xnack");
 +  "+DumpCode,+vgpr-spilling,-fp32-denormals%s",
 +  family >= CHIP_VEGA10 ? ",+xnack" : ",-xnack");
>>>
>>> isn't xnack also supported on carrizo?
>>
>> Only on memory mapped via ATC which we only use with ROCm at the moment.
>
> xnack is currently enabled on all compute rings on Carrizo and Stoney.
> ATC or not, can the hw update the xnack SGPR pair? If yes, the
> compiler should reserve it to make sure the registers aren't used as
> general purpose registers.
>
> With current LLVM, the following applies: If you use a compute ring,
> use the default LLVM setting. If not, set -xnack.

That's a question for John or Felix.  As far as I know, on CZ/ST xnack
only works on requests that go through iommuv2.

Alex
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/6] ac/nir: rewrite shared variable handling

2017-07-06 Thread Bas Nieuwenhuizen
Patches 3-4 look technically correct to me, so for just using it for shared vars

Reviewed-by: Bas Nieuwenhuizen 

On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott  wrote:
> From: Connor Abbott 
>
> Similar to before, do the direct NIR->LLVM translation instead of
> lowering to an array then back to a variable. This should fix indirect
> dereferences, make shared variables more tightly packed, and make LLVM's
> alias analysis more precise.
> ---
>  src/amd/common/ac_nir_to_llvm.c | 116 
> 
>  1 file changed, 23 insertions(+), 93 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index f42d214..743cc1d 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -155,7 +155,6 @@ struct nir_to_llvm_context {
> LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
> LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
>
> -   LLVMValueRef shared_memory;
> uint64_t input_mask;
> uint64_t output_mask;
> uint8_t num_output_clips;
> @@ -386,23 +385,6 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, 
> int num_elements)
>CONST_ADDR_SPACE);
>  }
>
> -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
> - int idx,
> - LLVMTypeRef type)
> -{
> -   LLVMValueRef offset;
> -   LLVMValueRef ptr;
> -   int addr_space;
> -
> -   offset = LLVMConstInt(ctx->i32, idx * 16, false);
> -
> -   ptr = ctx->shared_memory;
> -   ptr = LLVMBuildGEP(ctx->builder, ptr, , 1, "");
> -   addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
> -   ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, 
> addr_space), "");
> -   return ptr;
> -}
> -
>  static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, 
> LLVMTypeRef t)
>  {
> if (t == ctx->f16 || t == ctx->i16)
> @@ -2986,7 +2968,8 @@ static LLVMValueRef visit_load_var(struct 
> nir_to_llvm_context *ctx,
> values[chan] = ctx->inputs[idx + chan + 
> const_index * 4];
> }
> break;
> -   case nir_var_local: {
> +   case nir_var_local:
> +   case nir_var_shared: {
> LLVMValueRef address = build_gep_for_deref(ctx,
>
> instr->variables[0]);
> LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
> @@ -3016,23 +2999,6 @@ static LLVMValueRef visit_load_var(struct 
> nir_to_llvm_context *ctx,
> }
> }
> break;
> -   case nir_var_shared: {
> -   LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
> -   LLVMValueRef derived_ptr;
> -
> -   if (indir_index)
> -   indir_index = LLVMBuildMul(ctx->builder, indir_index, 
> LLVMConstInt(ctx->i32, 4, false), "");
> -
> -   for (unsigned chan = 0; chan < ve; chan++) {
> -   LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 
> false);
> -   if (indir_index)
> -   index = LLVMBuildAdd(ctx->builder, index, 
> indir_index, "");
> -   derived_ptr = LLVMBuildGEP(ctx->builder, ptr, , 
> 1, "");
> -
> -   values[chan] = LLVMBuildLoad(ctx->builder, 
> derived_ptr, "");
> -   }
> -   break;
> -   }
> default:
> unreachable("unhandle variable mode");
> }
> @@ -3107,7 +3073,8 @@ visit_store_var(struct nir_to_llvm_context *ctx,
> }
> }
> break;
> -   case nir_var_local: {
> +   case nir_var_local:
> +   case nir_var_shared: {
> int writemask = instr->const_index[0];
> LLVMValueRef address = build_gep_for_deref(ctx,
>
> instr->variables[0]);
> @@ -3137,28 +3104,6 @@ visit_store_var(struct nir_to_llvm_context *ctx,
> }
> break;
> }
> -   case nir_var_shared: {
> -   LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
> -
> -   if (indir_index)
> -   indir_index = LLVMBuildMul(ctx->builder, indir_index, 
> LLVMConstInt(ctx->i32, 4, false), "");
> -
> -   for (unsigned chan = 0; chan < 8; chan++) {
> -   if (!(writemask & (1 << chan)))
> -   continue;
> -   LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 
> false);
> -   LLVMValueRef derived_ptr;
> -
> -   if (indir_index)
> -  

Re: [Mesa-dev] [PATCH mesa] build systems: move git_sha1_gen.sh to bin/

2017-07-06 Thread Brian Paul

LGTM.

Reviewed-by: Brian Paul 


On 07/06/2017 02:08 PM, Eric Engestrom wrote:

There was no reason for this script to live outside the scripts
directory.

Suggested-by: Brian Paul 
Signed-off-by: Eric Engestrom 
---
  Makefile.am| 2 +-
  git_sha1_gen.sh => bin/git_sha1_gen.sh | 2 +-
  src/Makefile.am| 2 +-
  src/SConscript | 2 +-
  src/mesa/Android.libmesa_git_sha1.mk   | 2 +-
  5 files changed, 5 insertions(+), 5 deletions(-)
  rename git_sha1_gen.sh => bin/git_sha1_gen.sh (69%)

diff --git a/Makefile.am b/Makefile.am
index d1eadee1e1..51542af3b8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -53,7 +53,7 @@ EXTRA_DIST = \
common.py \
docs \
doxygen \
-   git_sha1_gen.sh \
+   bin/git_sha1_gen.sh \
scons \
SConstruct

diff --git a/git_sha1_gen.sh b/bin/git_sha1_gen.sh
similarity index 69%
rename from git_sha1_gen.sh
rename to bin/git_sha1_gen.sh
index 20ab8df8ea..898e590758 100755
--- a/git_sha1_gen.sh
+++ b/bin/git_sha1_gen.sh
@@ -4,7 +4,7 @@
  cd "$(dirname "$0")"

  # don't print anything if git fails
-if ! git_sha1=$(git --git-dir=.git rev-parse --short=10 HEAD 2>/dev/null)
+if ! git_sha1=$(git --git-dir=../.git rev-parse --short=10 HEAD 2>/dev/null)
  then
exit
  fi
diff --git a/src/Makefile.am b/src/Makefile.am
index 36995e08a4..5aee6b0141 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,7 +21,7 @@

  .PHONY: git_sha1.h.tmp
  git_sha1.h.tmp:
-   @sh $(top_srcdir)/git_sha1_gen.sh > $@
+   @sh $(top_srcdir)/bin/git_sha1_gen.sh > $@

  git_sha1.h: git_sha1.h.tmp
@echo "updating git_sha1.h"
diff --git a/src/SConscript b/src/SConscript
index c31e4ec06f..37b3f8572e 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -24,7 +24,7 @@ def write_git_sha1_h_file(filename):

  tempfile = "git_sha1.h.tmp"
  with open(tempfile, "w") as f:
-args = [ 'sh', Dir('#').abspath + '/git_sha1_gen.sh' ]
+args = [ 'sh', Dir('#').abspath + '/bin/git_sha1_gen.sh' ]
  try:
  subprocess.Popen(args, stdout=f).wait()
  except:
diff --git a/src/mesa/Android.libmesa_git_sha1.mk 
b/src/mesa/Android.libmesa_git_sha1.mk
index a5a1ebb37f..7d64b1c809 100644
--- a/src/mesa/Android.libmesa_git_sha1.mk
+++ b/src/mesa/Android.libmesa_git_sha1.mk
@@ -46,7 +46,7 @@ LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, 
git_sha1.h)
  $(intermediates)/git_sha1.h: $(wildcard $(MESA_TOP)/.git/logs/HEAD)
@mkdir -p $(dir $@)
@echo "GIT-SHA1: $(PRIVATE_MODULE) <= git"
-   $(hide) sh $(MESA_TOP)/git_sha1_gen.sh > $@
+   $(hide) sh $(MESA_TOP)/bin/git_sha1_gen.sh > $@

  LOCAL_EXPORT_C_INCLUDE_DIRS := $(intermediates)




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/6] nir: don't segfault when printing variables with no name

2017-07-06 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott  wrote:
> From: Connor Abbott 
>
> While normally we give variables whose name field is NULL a temporary
> name when called from nir_print_shader(), when we were calling from
> nir_print_instr() we never bothered, meaning that we just segfaulted
> when trying to print out instructions with such a variable. Since
> nir_print_instr() is meant to be called while debugging, we don't need
> to bother too much about giving a consistent name, but we don't want to
> crash in the middle of debugging.
> ---
>  src/compiler/nir/nir_print.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
> index 66c0669..f4811fe 100644
> --- a/src/compiler/nir/nir_print.c
> +++ b/src/compiler/nir/nir_print.c
> @@ -257,7 +257,7 @@ static const char *
>  get_var_name(nir_variable *var, print_state *state)
>  {
> if (state->ht == NULL)
> -  return var->name;
> +  return var->name ? var->name : "unnamed";
>
> assert(state->syms);
>
> --
> 2.9.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/6] ac/nir: rewrite local variable handling

2017-07-06 Thread Bas Nieuwenhuizen
On Thu, Jul 6, 2017 at 9:48 PM, Connor Abbott  wrote:
> From: Connor Abbott 
>
> The old way was very TGSI-based, and couldn't handle indirect
> dereferences at all. Instead, pass through the type information NIR has

I think the old code should handle indirect derefs just fine? See the
indir_index stuff. I'm kind of worried that LLVM doesn't promote
variables with indirect derefs agrresively enough to registers. IIRC
it only converts scalar arrays of up to 4 elements to vectors, while
radv always promotes and IIRC radeonsi promotes for all variables with
something like <= 20 dwords?


> about local variables to LLVM, and translate NIR dereferences directly
> into the equivalent GEP instructions in LLVM.
> ---
>  src/amd/common/ac_nir_to_llvm.c | 204 
> 
>  1 file changed, 146 insertions(+), 58 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index e72747a..f42d214 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -65,6 +65,7 @@ struct nir_to_llvm_context {
>
> struct hash_table *defs;
> struct hash_table *phis;
> +   struct hash_table *vars;
>
> LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
> LLVMValueRef ring_offsets;
> @@ -157,8 +158,6 @@ struct nir_to_llvm_context {
> LLVMValueRef shared_memory;
> uint64_t input_mask;
> uint64_t output_mask;
> -   int num_locals;
> -   LLVMValueRef *locals;
> uint8_t num_output_clips;
> uint8_t num_output_culls;
>
> @@ -2905,6 +2904,45 @@ load_gs_input(struct nir_to_llvm_context *ctx,
> return result;
>  }
>
> +static LLVMValueRef
> +build_gep_for_deref(struct nir_to_llvm_context *ctx,
> +   nir_deref_var *deref)
> +{
> +   struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, 
> deref->var);
> +   assert(entry->data);
> +   LLVMValueRef val = entry->data;
> +   nir_deref *tail = deref->deref.child;
> +   while (tail != NULL) {
> +   LLVMValueRef offset;
> +   switch (tail->deref_type) {
> +   case nir_deref_type_array: {
> +   nir_deref_array *array = nir_deref_as_array(tail);
> +   offset = LLVMConstInt(ctx->i32, array->base_offset, 
> 0);
> +   if (array->deref_array_type ==
> +   nir_deref_array_type_indirect) {
> +   offset = LLVMBuildAdd(ctx->builder, offset,
> + get_src(ctx,
> + 
> array->indirect),
> + "");
> +   }
> +   break;
> +   }
> +   case nir_deref_type_struct: {
> +   nir_deref_struct *deref_struct =
> +   nir_deref_as_struct(tail);
> +   offset = LLVMConstInt(ctx->i32,
> + deref_struct->index, 0);
> +   break;
> +   }
> +   default:
> +   unreachable("bad deref type");
> +   }
> +   val = ac_build_gep0(>ac, val, offset);
> +   tail = tail->child;
> +   }
> +   return val;
> +}
> +
>  static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
>nir_intrinsic_instr *instr)
>  {
> @@ -2948,24 +2986,14 @@ static LLVMValueRef visit_load_var(struct 
> nir_to_llvm_context *ctx,
> values[chan] = ctx->inputs[idx + chan + 
> const_index * 4];
> }
> break;
> -   case nir_var_local:
> -   for (unsigned chan = 0; chan < ve; chan++) {
> -   if (indir_index) {
> -   unsigned count = glsl_count_attribute_slots(
> -   instr->variables[0]->var->type, 
> false);
> -   count -= chan / 4;
> -   LLVMValueRef tmp_vec = 
> ac_build_gather_values_extended(
> -   >ac, ctx->locals + idx + 
> chan, count,
> -   4, true);
> -
> -   values[chan] = 
> LLVMBuildExtractElement(ctx->builder,
> -  
> tmp_vec,
> -  
> indir_index, "");
> -   } else {
> -   values[chan] = LLVMBuildLoad(ctx->builder, 
> ctx->locals[idx + chan + const_index * 4], "");
> -   }
> -   }
> -   break;
> +   case 

  1   2   >