[Mesa-dev] [PATCH kmscube] Init and clean up VT settings

2017-12-01 Thread Kristian H. Kristensen
This puts VT input into raw (unbuffered) mode so that we can detect
single key strokes. Also uses KD_GRAPHICS mode so that fbcon gets
restored properly on exit and inhibits VT switching since we don't
properly get/set drm master. Finally, handle signals and clean up if
we catch one.
---
 common.c | 88 +++-
 common.h |  6 +
 configure.ac |  3 +++
 drm-atomic.c |  2 +-
 drm-legacy.c |  2 +-
 kmscube.c|  3 +++
 6 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/common.c b/common.c
index b76c994..c495187 100644
--- a/common.c
+++ b/common.c
@@ -24,10 +24,20 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
+#include 
 #include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
 
 #include "common.h"
 
@@ -288,3 +298,79 @@ int link_program(unsigned program)
 
return 0;
 }
+
+static struct termios save_tio;
+
+static void restore_vt(void)
+{
+   struct vt_mode mode = { .mode = VT_AUTO };
+   ioctl(STDIN_FILENO, VT_SETMODE, );
+
+   tcsetattr(STDIN_FILENO, TCSANOW, _tio);
+   ioctl(STDIN_FILENO, KDSETMODE, KD_TEXT);
+}
+
+static void handle_signal(int sig)
+{
+   restore_vt();
+
+   raise(sig);
+}
+
+int init_vt(void)
+{
+   struct termios tio;
+   struct stat buf;
+   int ret;
+
+   /* If we're not on a VT, we're probably logged in as root over
+* ssh. Skip all this then. */
+   ret = fstat(STDIN_FILENO, );
+   if (ret == -1 || major(buf.st_rdev) != TTY_MAJOR)
+   return 0;
+
+   /* First, save term io setting so we can restore properly. */
+   tcgetattr(STDIN_FILENO, _tio);
+
+   /* We don't drop drm master, so block VT switching while we're
+* running. Otherwise, switching to X on another VT will crash X when it
+* fails to get drm master. */
+   struct vt_mode mode = { .mode = VT_PROCESS, .relsig = 0, .acqsig = 0 };
+   ret = ioctl(STDIN_FILENO, VT_SETMODE, );
+   if (ret == -1) {
+   printf("failed to take control of vt handling\n");
+   return -1;
+   }
+
+   /* Set KD_GRAPHICS to disable fbcon while we render. */
+   ret = ioctl(STDIN_FILENO, KDSETMODE, KD_GRAPHICS);
+   if (ret == -1) {
+   printf("failed to switch console to graphics mode\n");
+   return -1;
+   }
+
+   atexit(restore_vt);
+
+   /* Set console input to raw mode. */
+   tio = save_tio;
+   tio.c_lflag &= ~(ICANON | ECHO);
+   tcsetattr(STDIN_FILENO, TCSANOW, );
+
+   /* Restore console on SIGINT and friends. */
+   struct sigaction act = {
+   .sa_handler = handle_signal,
+   .sa_flags = SA_RESETHAND
+   };
+   sigaction(SIGINT, , NULL);
+   sigaction(SIGSEGV, , NULL);
+   sigaction(SIGABRT, , NULL);
+
+   return 0;
+}
+
+bool key_pressed(void)
+{
+   struct pollfd pfd[1] = { { .fd = 0, .events = POLLIN } };
+
+   return poll(pfd, 1, 0) == 1;
+}
diff --git a/common.h b/common.h
index 11ec26e..e6a3c93 100644
--- a/common.h
+++ b/common.h
@@ -24,6 +24,8 @@
 #ifndef _COMMON_H
 #define _COMMON_H
 
+#include 
+
 #include 
 #include 
 #include 
@@ -130,4 +132,8 @@ init_cube_video(const struct gbm *gbm, const char *video)
 }
 #endif
 
+int init_vt(void);
+bool key_pressed(void);
+
+
 #endif /* _COMMON_H */
diff --git a/configure.ac b/configure.ac
index 8397f7b..3ee11ed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -31,6 +31,9 @@ AM_INIT_AUTOMAKE([foreign dist-bzip2])
 
 AC_PROG_CC
 
+# For sigaction
+AC_USE_SYSTEM_EXTENSIONS
+
 # Enable quiet compiles on automake 1.11.
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
diff --git a/drm-atomic.c b/drm-atomic.c
index 82531d3..4c0b16e 100644
--- a/drm-atomic.c
+++ b/drm-atomic.c
@@ -191,7 +191,7 @@ static int atomic_run(const struct gbm *gbm, const struct 
egl *egl)
/* Allow a modeset change for the first commit only. */
flags |= DRM_MODE_ATOMIC_ALLOW_MODESET;
 
-   while (1) {
+   while (!key_pressed()) {
struct gbm_bo *next_bo;
EGLSyncKHR gpu_fence = NULL;   /* out-fence from gpu, in-fence 
to kms */
EGLSyncKHR kms_fence = NULL;   /* in-fence to gpu, out-fence 
from kms */
diff --git a/drm-legacy.c b/drm-legacy.c
index a0b419a..d3a9391 100644
--- a/drm-legacy.c
+++ b/drm-legacy.c
@@ -73,7 +73,7 @@ static int legacy_run(const struct gbm *gbm, const struct egl 
*egl)
return ret;
}
 
-   while (1) {
+   while (!key_pressed()) {
struct gbm_bo *next_bo;
int waiting_for_flip = 1;
 
diff --git a/kmscube.c b/kmscube.c
index 3a2c4dd..4615430 100644
--- a/kmscube.c
+++ b/kmscube.c
@@ -153,5 +153,8 @@ int main(int argc, char *argv[])
glClearColor(0.5, 0.5, 0.5, 1.0);
glClear(GL_COLOR_BUFFER_BIT);
 
+   if (init_vt())
+   

Re: [Mesa-dev] [PATCH 8/8] radeonsi: make const and stream uploaders allocate read-only memory

2017-12-01 Thread Dieter Nützel

For the series:

Tested-by: Dieter Nützel 

on RX580

with UH, UV, Blender 2.79, glmark2

Shouldn't we set R600_DEBUG=sisched for UH, UV, Blender at least?
Maybe general (with LLVM 5.0/6.0)?

I'm seeing ~7-10 (> 10%) more fps with UH,UV and Blender (even with 
tess).


Greetings,
Dieter

Am 01.12.2017 21:19, schrieb Marek Olšák:

From: Marek Olšák 

and anything that clones these uploaders, like u_threaded_context.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
b/src/gallium/drivers/radeon/r600_pipe_common.c
index d85f9f0..23d8bf7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -438,26 +438,28 @@ bool si_common_context_init(struct
r600_common_context *rctx,
return false;
}

rctx->allocator_zeroed_memory =
u_suballocator_create(>b, sscreen->info.gart_page_size,
  0, PIPE_USAGE_DEFAULT, 0, true);
if (!rctx->allocator_zeroed_memory)
return false;

rctx->b.stream_uploader = u_upload_create(>b, 1024 * 1024,
- 0, PIPE_USAGE_STREAM, 0);
+ 0, PIPE_USAGE_STREAM,
+ R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.stream_uploader)
return false;

rctx->b.const_uploader = u_upload_create(>b, 128 * 1024,
-0, PIPE_USAGE_DEFAULT, 0);
+0, PIPE_USAGE_DEFAULT,
+R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.const_uploader)
return false;

rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
return false;

if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags &
DBG(NO_ASYNC_DMA))) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
   r600_flush_dma_ring,

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Serialize nir later in the linking process

2017-12-01 Thread Kenneth Graunke
On Friday, December 1, 2017 3:08:07 PM PST Jordan Justen wrote:
> Fixes MESA_GLSL=cache_fb with piglit
> tests/spec/glsl-1.50/execution/geometry/clip-distance-vs-gs-out.shader_test
> 
> Fixes: 0610a624a12 i965/link: Serialize program to nir after linking for 
> shader cache
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103988
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_link.cpp | 25 -
>  1 file changed, 16 insertions(+), 9 deletions(-)

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 20/32] vulkan/wsi: Set a proper pWaitDstStageMask on the dummy submit

2017-12-01 Thread Chad Versace
On Fri 01 Dec 2017, Chad Versace wrote:
> On Tue 28 Nov 2017, Jason Ekstrand wrote:
> > Neither mesa driver really cares, but we should set it none the less for
> > the sake of correctness.
> > ---
> >  src/vulkan/wsi/wsi_common.c | 17 +
> >  1 file changed, 17 insertions(+)
> > 
> > diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
> > index 4f6648f..e5a9a28 100644
> > --- a/src/vulkan/wsi/wsi_common.c
> > +++ b/src/vulkan/wsi/wsi_common.c
> > @@ -542,14 +542,31 @@ wsi_common_queue_present(const struct wsi_device *wsi,
> >   .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
> >   .pNext = NULL,
> >};
> > +  VkPipelineStageFlags *stage_flags = NULL;
> >if (i == 0) {
> >   /* We only need/want to wait on semaphores once.  After that, 
> > we're
> >* guaranteed ordering since it all happens on the same queue.
> >*/
> >   submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount,
> >   submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores,
> > +
> > + /* Set up the pWaitDstStageMasks */
> > + stage_flags = vk_alloc(>alloc,
> > +sizeof(VkPipelineStageFlags) *
> > +pPresentInfo->waitSemaphoreCount,
> > +8,
> > +VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
> > + if (!stage_flags) {
> > +result = VK_ERROR_OUT_OF_HOST_MEMORY;
> > +goto fail_present;
> > + }
> > + for (uint32_t s = 0; s < pPresentInfo->waitSemaphoreCount; s++)
> > +stage_flags[s] = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
> > +
> > + submit_info.pWaitDstStageMask = stage_flags;
> 
> Since VkSwapchain is required to be externally synchronized, you could
> embed stage_flags directly in struct wsi_swapchain, doubling its size
> when needed. But meh.
> 
> Reviewed-by: Chad Versace 

I have to quit for the day. I wish I could've completed all the review
by end-of-week, but I've had a busy two days preparing today's Chrome OS
release branchpoint. I'll continue reviewing over breakfast tomorrow,
but don't let me hinder the series. Push it when you're ready; don't let
my slowness become your slowness.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 21/32] anv/wsi: Use the common QueuePresent code

2017-12-01 Thread Chad Versace
On Tue 28 Nov 2017, Jason Ekstrand wrote:
> ---
>  src/intel/vulkan/anv_wsi.c | 63 
> +-
>  1 file changed, 6 insertions(+), 57 deletions(-)

Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 20/32] vulkan/wsi: Set a proper pWaitDstStageMask on the dummy submit

2017-12-01 Thread Chad Versace
On Tue 28 Nov 2017, Jason Ekstrand wrote:
> Neither mesa driver really cares, but we should set it none the less for
> the sake of correctness.
> ---
>  src/vulkan/wsi/wsi_common.c | 17 +
>  1 file changed, 17 insertions(+)
> 
> diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
> index 4f6648f..e5a9a28 100644
> --- a/src/vulkan/wsi/wsi_common.c
> +++ b/src/vulkan/wsi/wsi_common.c
> @@ -542,14 +542,31 @@ wsi_common_queue_present(const struct wsi_device *wsi,
>   .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
>   .pNext = NULL,
>};
> +  VkPipelineStageFlags *stage_flags = NULL;
>if (i == 0) {
>   /* We only need/want to wait on semaphores once.  After that, we're
>* guaranteed ordering since it all happens on the same queue.
>*/
>   submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount,
>   submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores,
> +
> + /* Set up the pWaitDstStageMasks */
> + stage_flags = vk_alloc(>alloc,
> +sizeof(VkPipelineStageFlags) *
> +pPresentInfo->waitSemaphoreCount,
> +8,
> +VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
> + if (!stage_flags) {
> +result = VK_ERROR_OUT_OF_HOST_MEMORY;
> +goto fail_present;
> + }
> + for (uint32_t s = 0; s < pPresentInfo->waitSemaphoreCount; s++)
> +stage_flags[s] = VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
> +
> + submit_info.pWaitDstStageMask = stage_flags;

Since VkSwapchain is required to be externally synchronized, you could
embed stage_flags directly in struct wsi_swapchain, doubling its size
when needed. But meh.

Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 19/32] vulkan/wsi: Only wait on semaphores on the first swapchain

2017-12-01 Thread Chad Versace
On Tue 28 Nov 2017, Jason Ekstrand wrote:
> ---
>  src/vulkan/wsi/wsi_common.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)

Smart. I was expecting a patch like this after reading patch 17.
Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 18/32] vulkan/wsi: Refactor result handling in queue_present

2017-12-01 Thread Chad Versace
On Tue 28 Nov 2017, Jason Ekstrand wrote:
> ---
>  src/vulkan/wsi/wsi_common.c | 54 
> +++--
>  1 file changed, 28 insertions(+), 26 deletions(-)
> 
> diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
> index 5920359..f149846 100644
> --- a/src/vulkan/wsi/wsi_common.c
> +++ b/src/vulkan/wsi/wsi_common.c
> @@ -514,14 +514,14 @@ wsi_common_queue_present(const struct wsi_device *wsi,
>   int queue_family_index,
>   const VkPresentInfoKHR *pPresentInfo)
>  {
> -   VkResult result = VK_SUCCESS;
> +   VkResult final_result = VK_SUCCESS;

Yes please. The goto improves this code.

Reviewed-by: Chad Versace 



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 17/32] radv/wsi: Move the guts of QueuePresent to wsi common

2017-12-01 Thread Chad Versace
On Tue 28 Nov 2017, Jason Ekstrand wrote:
> From: Dave Airlie 
> 
> v2 (Jason Ekstrand):
>  - Better comit message
>  - Rebase
>  - Re-indent to follow wsi_common style
>  - Drop the unneeded _swapchain from the newly added helper
>  - Make the clone more true to the original (as per the rebase)
> ---
>  src/amd/vulkan/radv_wsi.c   | 92 
> +++--
>  src/vulkan/wsi/wsi_common.c | 78 ++
>  src/vulkan/wsi/wsi_common.h | 10 +
>  3 files changed, 93 insertions(+), 87 deletions(-)

> +VkResult
> +wsi_common_queue_present(const struct wsi_device *wsi,
> + VkDevice device,
> + VkQueue queue,
> + int queue_family_index,
> + const VkPresentInfoKHR *pPresentInfo)
> +{
> +   VkResult result = VK_SUCCESS;
> +
> +   const VkPresentRegionsKHR *regions =
> +  vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR);
> +
> +   for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
> +  WSI_FROM_HANDLE(wsi_swapchain, swapchain, 
> pPresentInfo->pSwapchains[i]);
> +  VkResult item_result;
> +
> +  if (swapchain->fences[0] == VK_NULL_HANDLE) {
> + const VkFenceCreateInfo fence_info = {
> +.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
> +.pNext = NULL,
> +.flags = 0,
> + };
> + item_result = wsi->CreateFence(device, _info,
> +>alloc,
> +>fences[0]);

As part of moving fence creation to the common code, the fence
destruction should also be moved from (anv|radv)_DestroySwapchainKHR to
wsi_swapchain_finish(). But, since that migration doesn't affect the
correctness of the patch, this patch is

Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 16/32] vulkan/wsi: Add a WSI_FROM_HANDLE macro

2017-12-01 Thread Chad Versace
Patches 15 and 16 are
Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] anv: Enable UBO pushing

2017-12-01 Thread Jason Ekstrand
---
 src/intel/vulkan/anv_device.c   | 1 +
 src/intel/vulkan/anv_pipeline.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 937efb9..43781bd 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -405,6 +405,7 @@ anv_physical_device_init(struct anv_physical_device *device,
device->compiler->shader_debug_log = compiler_debug_log;
device->compiler->shader_perf_log = compiler_perf_log;
device->compiler->supports_pull_constants = false;
+   device->compiler->constant_buffer_0_is_relative = true;
 
isl_device_init(>isl_dev, >info, swizzled);
 
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 907b24a..3bb19ff 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -385,6 +385,9 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
  struct brw_stage_prog_data *prog_data,
  struct anv_pipeline_bind_map *map)
 {
+   const struct brw_compiler *compiler =
+  pipeline->device->instance->physicalDevice.compiler;
+
nir_shader *nir = anv_shader_compile_to_nir(pipeline, mem_ctx,
module, entrypoint, stage,
spec_info);
@@ -436,6 +439,9 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
if (pipeline->layout)
   anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map);
 
+   if (stage != MESA_SHADER_COMPUTE)
+  brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges);
+
assert(nir->num_uniforms == prog_data->nr_params * 4);
 
return nir;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] anv/cmd_buffer: Add support for pushing UBO ranges

2017-12-01 Thread Jason Ekstrand
In order to do this we have to modify push constant set up to handle
ranges.  We also have to tweak the way we handle dirty bits a bit so
that we re-push whenever a descriptor set changes.
---
 src/intel/vulkan/genX_cmd_buffer.c | 142 -
 src/intel/vulkan/genX_pipeline.c   |   3 +-
 2 files changed, 112 insertions(+), 33 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 16b4ca6..0bd3874 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1843,9 +1843,12 @@ cmd_buffer_emit_descriptor_pointers(struct 
anv_cmd_buffer *cmd_buffer,
}
 }
 
-static uint32_t
-cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
+static void
+cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
+VkShaderStageFlags dirty_stages)
 {
+   UNUSED const struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
+
static const uint32_t push_constant_opcodes[] = {
   [MESA_SHADER_VERTEX]  = 21,
   [MESA_SHADER_TESS_CTRL]   = 25, /* HS */
@@ -1857,39 +1860,117 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer 
*cmd_buffer)
 
VkShaderStageFlags flushed = 0;
 
-   anv_foreach_stage(stage, cmd_buffer->state.push_constants_dirty) {
-  if (stage == MESA_SHADER_COMPUTE)
- continue;
-
+   anv_foreach_stage(stage, dirty_stages) {
   assert(stage < ARRAY_SIZE(push_constant_opcodes));
   assert(push_constant_opcodes[stage] > 0);
 
-  struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, 
stage);
+  anv_batch_emit(_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c) {
+ c._3DCommandSubOpcode = push_constant_opcodes[stage];
 
-  if (state.offset == 0) {
- anv_batch_emit(_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c)
-c._3DCommandSubOpcode = push_constant_opcodes[stage];
-  } else {
- anv_batch_emit(_buffer->batch, GENX(3DSTATE_CONSTANT_VS), c) {
-c._3DCommandSubOpcode = push_constant_opcodes[stage],
-c.ConstantBody = (struct GENX(3DSTATE_CONSTANT_BODY)) {
-#if GEN_GEN >= 9
-   .Buffer[2] = { 
_buffer->device->dynamic_state_pool.block_pool.bo, state.offset },
-   .ReadLength[2] = DIV_ROUND_UP(state.alloc_size, 32),
+ if (anv_pipeline_has_stage(cmd_buffer->state.pipeline, stage)) {
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+const struct brw_stage_prog_data *prog_data =
+   pipeline->shaders[stage]->prog_data;
+const struct anv_pipeline_bind_map *bind_map =
+   >shaders[stage]->bind_map;
+
+/* The Skylake PRM contains the following restriction:
+ *
+ *"The driver must ensure The following case does not occur
+ * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+ * buffer 3 read length equal to zero committed followed by a
+ * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+ * zero committed."
+ *
+ * To avoid this, we program the buffers in the highest slots.
+ * This way, slot 0 is only used if slot 3 is also used.
+ */
+int n = 3;
+
+for (int i = 3; i >= 0; i--) {
+   const struct brw_ubo_range *range = _data->ubo_ranges[i];
+   if (range->length == 0)
+  continue;
+
+   const unsigned surface =
+  prog_data->binding_table.ubo_start + range->block;
+
+   assert(surface <= bind_map->surface_count);
+   const struct anv_pipeline_binding *binding =
+  _map->surface_to_descriptor[surface];
+
+   const struct anv_descriptor *desc =
+  anv_descriptor_for_binding(cmd_buffer, binding);
+
+   struct anv_address read_addr;
+   uint32_t read_len;
+   if (desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+  read_len = MIN2(range->length,
+ DIV_ROUND_UP(desc->buffer_view->range, 32) - 
range->start);
+  read_addr = (struct anv_address) {
+ .bo = desc->buffer_view->bo,
+ .offset = desc->buffer_view->offset +
+   range->start * 32,
+  };
+   } else {
+  assert(desc->type == 
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+
+  uint32_t dynamic_offset =
+ dynamic_offset_for_binding(cmd_buffer, pipeline, binding);
+  uint32_t buf_offset =
+ MIN2(desc->offset + dynamic_offset, desc->buffer->size);
+  uint32_t buf_range =
+ MIN2(desc->range, desc->buffer->size - buf_offset);
+
+  read_len = 

[Mesa-dev] [PATCH 0/6] anv: Add support for pushing ranges of UBOs

2017-12-01 Thread Jason Ekstrand
We've had this optimization in the GL driver since July but never got
around to hooking it up in Vulkan.  This lets us turn UBOs into push
constants which are significantly faster since they read the UBO data
once and the shader dispatch shoves it into the shader prior to program
execution.

I have zero benchmark numbers at the moment but I expect this to help
anything which uses UBOs and has constant offsets into them which is
basically everything.  Unfortunately, I'm at home and not near any
hardware with stable enough performance characteristics to get decent
numbers today.

Cc: Kenneth Graunke 
Cc: Eero Tamminen 

Jason Ekstrand (6):
  anv/pipeline: Translate vulkan_resource_index to a constant when
possible
  anv/cmd_buffer: Add some helpers for working with descriptor sets
  anv/cmd_buffer: Add some stage asserts
  anv/cmd_buffer: Add support for pushing UBO ranges
  anv/device: Increase the UBO alignment requirement to 32
  anv: Enable UBO pushing

 src/intel/vulkan/anv_device.c|  13 +-
 src/intel/vulkan/anv_nir_apply_pipeline_layout.c |  15 +-
 src/intel/vulkan/anv_pipeline.c  |   6 +
 src/intel/vulkan/genX_cmd_buffer.c   | 191 ++-
 src/intel/vulkan/genX_pipeline.c |   3 +-
 5 files changed, 179 insertions(+), 49 deletions(-)

-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/6] anv/device: Increase the UBO alignment requirement to 32

2017-12-01 Thread Jason Ekstrand
Push constants work in terms of 32-byte chunks so if we want to be able
to push UBOs, every thing needs to be 32-byte aligned.  Currently, we
only require 16-byte which is too small.
---
 src/intel/vulkan/anv_device.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index b5577ee..937efb9 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -822,7 +822,8 @@ void anv_GetPhysicalDeviceProperties(
   .viewportSubPixelBits = 13, /* We take a float? */
   .minMemoryMapAlignment= 4096, /* A page */
   .minTexelBufferOffsetAlignment= 1,
-  .minUniformBufferOffsetAlignment  = 16,
+  /* We need 16 for UBO block reads to work and 32 for push UBOs */
+  .minUniformBufferOffsetAlignment  = 32,
   .minStorageBufferOffsetAlignment  = 4,
   .minTexelOffset   = -8,
   .maxTexelOffset   = 7,
@@ -1833,8 +1834,15 @@ void anv_GetBufferMemoryRequirements(
  memory_types |= (1u << i);
}
 
+   /* Base alignment requirement of a cache line */
+   uint32_t alignment = 16;
+
+   /* We need an alignment of 32 for pushing UBOs */
+   if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
+  alignment = MAX2(alignment, 32);
+
pMemoryRequirements->size = buffer->size;
-   pMemoryRequirements->alignment = 16;
+   pMemoryRequirements->alignment = alignment;
pMemoryRequirements->memoryTypeBits = memory_types;
 }
 
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] anv/cmd_buffer: Add some helpers for working with descriptor sets

2017-12-01 Thread Jason Ekstrand
---
 src/intel/vulkan/genX_cmd_buffer.c | 45 --
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index ab5590d..e4362d1 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1432,6 +1432,35 @@ cmd_buffer_alloc_push_constants(struct anv_cmd_buffer 
*cmd_buffer)
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_ALL_GRAPHICS;
 }
 
+static const struct anv_descriptor *
+anv_descriptor_for_binding(const struct anv_cmd_buffer *cmd_buffer,
+   const struct anv_pipeline_binding *binding)
+{
+   assert(binding->set < MAX_SETS);
+   const struct anv_descriptor_set *set =
+  cmd_buffer->state.descriptors[binding->set];
+   const uint32_t offset =
+  set->layout->binding[binding->binding].descriptor_index;
+   return >descriptors[offset + binding->index];
+}
+
+static uint32_t
+dynamic_offset_for_binding(const struct anv_cmd_buffer *cmd_buffer,
+   const struct anv_pipeline *pipeline,
+   const struct anv_pipeline_binding *binding)
+{
+   assert(binding->set < MAX_SETS);
+   const struct anv_descriptor_set *set =
+  cmd_buffer->state.descriptors[binding->set];
+
+   uint32_t dynamic_offset_idx =
+  pipeline->layout->set[binding->set].dynamic_offset_start +
+  set->layout->binding[binding->binding].dynamic_offset_index +
+  binding->index;
+
+   return cmd_buffer->state.dynamic_offsets[dynamic_offset_idx];
+}
+
 static VkResult
 emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage,
@@ -1534,10 +1563,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
  continue;
   }
 
-  struct anv_descriptor_set *set =
- cmd_buffer->state.descriptors[binding->set];
-  uint32_t offset = 
set->layout->binding[binding->binding].descriptor_index;
-  struct anv_descriptor *desc = >descriptors[offset + binding->index];
+  const struct anv_descriptor *desc =
+ anv_descriptor_for_binding(cmd_buffer, binding);
 
   switch (desc->type) {
   case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -1611,14 +1638,10 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
 
   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- uint32_t dynamic_offset_idx =
-pipeline->layout->set[binding->set].dynamic_offset_start +
-set->layout->binding[binding->binding].dynamic_offset_index +
-binding->index;
-
  /* Compute the offset within the buffer */
- uint64_t offset = desc->offset +
-cmd_buffer->state.dynamic_offsets[dynamic_offset_idx];
+ uint32_t dynamic_offset =
+dynamic_offset_for_binding(cmd_buffer, pipeline, binding);
+ uint64_t offset = desc->offset + dynamic_offset;
  /* Clamp to the buffer size */
  offset = MIN2(offset, desc->buffer->size);
  /* Clamp the range to the buffer size */
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/6] anv/pipeline: Translate vulkan_resource_index to a constant when possible

2017-12-01 Thread Jason Ekstrand
We want to call brw_nir_analyze_ubo_ranges immedately after
anv_nir_apply_pipeline_layout and it badly wants constants.  We could
run an optimization step and let constant folding do it but that's way
more expensive than needed.  It's really easy to just handle constants
in apply_pipeline_layout.
---
 src/intel/vulkan/anv_nir_apply_pipeline_layout.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c 
b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
index f8d8164..4f7680b 100644
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -116,12 +116,19 @@ lower_res_index_intrinsic(nir_intrinsic_instr *intrin,
uint32_t array_size =
   state->layout->set[set].layout->binding[binding].array_size;
 
-   nir_ssa_def *block_index = nir_ssa_for_src(b, intrin->src[0], 1);
+   nir_const_value *const_block_index = nir_src_as_const_value(intrin->src[0]);
 
-   if (state->add_bounds_checks)
-  block_index = nir_umin(b, block_index, nir_imm_int(b, array_size - 1));
+   nir_ssa_def *block_index;
+   if (const_block_index) {
+  block_index = nir_imm_int(b, surface_index + const_block_index->u32[0]);
+   } else {
+  block_index = nir_ssa_for_src(b, intrin->src[0], 1);
 
-   block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
+  if (state->add_bounds_checks)
+ block_index = nir_umin(b, block_index, nir_imm_int(b, array_size - 
1));
+
+  block_index = nir_iadd(b, nir_imm_int(b, surface_index), block_index);
+   }
 
assert(intrin->dest.is_ssa);
nir_ssa_def_rewrite_uses(>dest.ssa, nir_src_for_ssa(block_index));
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] anv/cmd_buffer: Add some stage asserts

2017-12-01 Thread Jason Ekstrand
There are several places where we look up opcodes in an array of stages.
Assert that the we don't end up going out-of-bounds.
---
 src/intel/vulkan/genX_cmd_buffer.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index e4362d1..16b4ca6 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1822,6 +1822,9 @@ cmd_buffer_emit_descriptor_pointers(struct anv_cmd_buffer 
*cmd_buffer,
};
 
anv_foreach_stage(s, stages) {
+  assert(s < ARRAY_SIZE(binding_table_opcodes));
+  assert(binding_table_opcodes[s] > 0);
+
   if (cmd_buffer->state.samplers[s].alloc_size > 0) {
  anv_batch_emit(_buffer->batch,
 GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ssp) {
@@ -1858,6 +1861,9 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer 
*cmd_buffer)
   if (stage == MESA_SHADER_COMPUTE)
  continue;
 
+  assert(stage < ARRAY_SIZE(push_constant_opcodes));
+  assert(push_constant_opcodes[stage] > 0);
+
   struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, 
stage);
 
   if (state.offset == 0) {
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] configure: avoid testing for negative compiler options

2017-12-01 Thread Dylan Baker
Thanks for looking into this, I've gone ahead and pushed this to master.

Dylan

Quoting Marc Dietrich (2017-11-29 13:25:05)
> gcc seems to always accept unsupported negative compiler warning options:
> 
> echo "int i;" | gcc -c -xc -Wno-bob - # no error
> echo "int i;" | gcc -c -xc -Walice -  # unsupported compiler option
> 
> Inverting the options fixes the tests.
> 
> V2: fix options in meson build
> 
> Reviewed-by: Matt Turner 
> Signed-off-by: Marc Dietrich 
> ---
>  configure.ac |  6 --
>  meson.build  | 23 +++
>  2 files changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 1344c12884..3f9a5c85b1 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -394,8 +394,10 @@ esac
>  AC_SUBST([VISIBILITY_CFLAGS])
>  AC_SUBST([VISIBILITY_CXXFLAGS])
>  
> -AX_CHECK_COMPILE_FLAG([-Wno-override-init],
> [WNO_OVERRIDE_INIT="$WNO_OVERRIDE_INIT -Wno-override-init"]) # gcc
> -AX_CHECK_COMPILE_FLAG([-Wno-initializer-overrides],
> [WNO_OVERRIDE_INIT="$WNO_OVERRIDE_INIT -Wno-initializer-overrides"]) # clang
> +dnl For some reason, the test for -Wno-foo always succeeds with gcc, even if 
> the
> +dnl option is not supported. Hence, check for -Wfoo instead.
> +AX_CHECK_COMPILE_FLAG([-Woverride-init],
> [WNO_OVERRIDE_INIT="$WNO_OVERRIDE_INIT -Wno-override-init"]) # gcc
> +AX_CHECK_COMPILE_FLAG([-Winitializer-overrides],
> [WNO_OVERRIDE_INIT="$WNO_OVERRIDE_INIT -Wno-initializer-overrides"]) # clang
>  AC_SUBST([WNO_OVERRIDE_INIT])
>  
>  dnl
> diff --git a/meson.build b/meson.build
> index 919f1c2d41..a55d5ed391 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -683,11 +683,25 @@ endif
>  cpp = meson.get_compiler('cpp')
>  cpp_args = []
>  foreach a : ['-Wall', '-fno-math-errno', '-fno-trapping-math',
> - '-Qunused-arguments', '-Wno-non-virtual-dtor']
> + '-Qunused-arguments']
>if cpp.has_argument(a)
>  cpp_args += a
>endif
>  endforeach
> +
> +# For some reason, the test for -Wno-foo always succeeds with gcc, even if 
> the
> +# option is not supported. Hence, check for -Wfoo instead.
> +if cpp.has_argument('-Wnon-virtual-dtor')
> +  cpp_args += '-Wno-non-virtual-dtor'
> +endif
> +
> +no_override_init_args = []
> +foreach a : ['override-init', 'initializer-overrides']
> +  if cc.has_argument('-W' + a)
> +no_override_init_args += '-Wno-' + a
> +  endif
> +endforeach
> +
>  cpp_vis_args = []
>  if cpp.has_argument('-fvisibility=hidden')
>cpp_vis_args += '-fvisibility=hidden'
> @@ -707,13 +721,6 @@ foreach a : ['-Werror=pointer-arith', '-Werror=vla']
>endif
>  endforeach
>  
> -no_override_init_args = []
> -foreach a : ['-Wno-override-init', '-Wno-initializer-overrides']
> -  if cc.has_argument(a)
> -no_override_init_args += a
> -  endif
> -endforeach
> -
>  if host_machine.cpu_family().startswith('x86')
>pre_args += '-DHAVE_SSE41'
>with_sse41 = true
> -- 
> 2.15.0
> 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/cnl: Avoid fast-clearing sRGB render buffers

2017-12-01 Thread Jason Ekstrand
On Fri, Dec 1, 2017 at 2:44 PM, Nanley Chery  wrote:

> Gen10 doesn't automatically decode the clear color of sRGB buffers. To
> get correct rendering, avoid fast-clearing such buffers for now.
>
> The driver now passes the following piglit tests:
> * spec@arb_framebuffer_srgb@msaa-fast-clear
> * spec@ext_texture_srgb@multisample-fast-clear gl_ext_texture_srgb
>
> Suggested-by: Kenneth Graunke 
> Suggested-by: Jason Ekstrand 
> Signed-off-by: Nanley Chery 
> ---
>
> This patch is currently going through the jenkins pipeline.
>
>  src/mesa/drivers/dri/i965/brw_meta_util.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c
> b/src/mesa/drivers/dri/i965/brw_meta_util.c
> index ba92168934..54dc6a5ff9 100644
> --- a/src/mesa/drivers/dri/i965/brw_meta_util.c
> +++ b/src/mesa/drivers/dri/i965/brw_meta_util.c
> @@ -298,13 +298,23 @@ brw_is_color_fast_clear_compatible(struct
> brw_context *brw,
>  * resolved in intel_update_state. In that case it's pointless to do a
>  * fast clear because it's very likely to be immediately resolved.
>  */
> +   const bool srgb_rb = _mesa_get_srgb_format_linear(mt->format) !=
> mt->format;
> if (devinfo->gen >= 9 &&
> mt->surf.samples == 1 &&
> -   ctx->Color.sRGBEnabled &&
> -   _mesa_get_srgb_format_linear(mt->format) != mt->format)
> +   ctx->Color.sRGBEnabled && srgb_rb)
>return false;
>
> +  /* Gen10 doesn't automatically decode the clear color of sRGB buffers.
> Since
> +   * we currently don't perform this decode in software, avoid a
> fast-clear
> +   * altogether. TODO: Do this in software.
> +   */
> const mesa_format format = _mesa_get_render_format(ctx, mt->format);
> +   if (devinfo->gen >= 10 && srgb_rb) {
>

We could be a bit more precise and only disable it for non-0/1.  Either way,

Reviewed-by: Jason Ekstrand 


> +  perf_debug("sRGB fast clear not enabled for (%s)",
> + _mesa_get_format_name(format));
>

Thanks for leaving a perf_debug!


> +  return false;
> +   }
> +
> if (_mesa_is_format_integer_color(format)) {
>if (devinfo->gen >= 8) {
>   perf_debug("Integer fast clear not enabled for (%s)",
> --
> 2.15.1
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Serialize nir later in the linking process

2017-12-01 Thread Jason Ekstrand
Based on what you pasted on IRC the other day, I'm guessing the problem was
the missing unify_interfaces().  In any case, this looks much better.  The
closer serialization happens to back-end shader compilation, the better.

Reviewed-by: Jason Ekstrand 

On Fri, Dec 1, 2017 at 3:08 PM, Jordan Justen 
wrote:

> Fixes MESA_GLSL=cache_fb with piglit
> tests/spec/glsl-1.50/execution/geometry/clip-
> distance-vs-gs-out.shader_test
>
> Fixes: 0610a624a12 i965/link: Serialize program to nir after linking for
> shader cache
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103988
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/brw_link.cpp | 25 -
>  1 file changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp
> b/src/mesa/drivers/dri/i965/brw_link.cpp
> index d18521e792d..6177c8f5ebd 100644
> --- a/src/mesa/drivers/dri/i965/brw_link.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_link.cpp
> @@ -302,15 +302,6 @@ brw_link_shader(struct gl_context *ctx, struct
> gl_shader_program *shProg)
>NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo,
>   prog->nir->info.num_abos);
>
> -  if (brw->ctx.Cache) {
> - struct blob writer;
> - blob_init();
> - nir_serialize(, prog->nir);
> - prog->driver_cache_blob = ralloc_size(NULL, writer.size);
> - memcpy(prog->driver_cache_blob, writer.data, writer.size);
> - prog->driver_cache_blob_size = writer.size;
> -  }
> -
>infos[stage] = >nir->info;
>
>update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
> @@ -357,6 +348,22 @@ brw_link_shader(struct gl_context *ctx, struct
> gl_shader_program *shProg)
>}
> }
>
> +   if (brw->ctx.Cache) {
> +  for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders);
> stage++) {
> + struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
> + if (!shader)
> +continue;
> +
> + struct gl_program *prog = shader->Program;
> + struct blob writer;
> + blob_init();
> + nir_serialize(, prog->nir);
> + prog->driver_cache_blob = ralloc_size(NULL, writer.size);
> + memcpy(prog->driver_cache_blob, writer.data, writer.size);
> + prog->driver_cache_blob_size = writer.size;
> +  }
> +   }
> +
> if (brw->precompile && !brw_shader_precompile(ctx, shProg))
>return false;
>
> --
> 2.15.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/cnl: Avoid fast-clearing sRGB render buffers

2017-12-01 Thread Nanley Chery
On Fri, Dec 01, 2017 at 02:44:42PM -0800, Nanley Chery wrote:
> Gen10 doesn't automatically decode the clear color of sRGB buffers. To
> get correct rendering, avoid fast-clearing such buffers for now.
> 
> The driver now passes the following piglit tests:
> * spec@arb_framebuffer_srgb@msaa-fast-clear
> * spec@ext_texture_srgb@multisample-fast-clear gl_ext_texture_srgb
> 
> Suggested-by: Kenneth Graunke 
> Suggested-by: Jason Ekstrand 
> Signed-off-by: Nanley Chery 
> ---
> 
> This patch is currently going through the jenkins pipeline.
> 

The run came back green.

-Nanley

>  src/mesa/drivers/dri/i965/brw_meta_util.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c 
> b/src/mesa/drivers/dri/i965/brw_meta_util.c
> index ba92168934..54dc6a5ff9 100644
> --- a/src/mesa/drivers/dri/i965/brw_meta_util.c
> +++ b/src/mesa/drivers/dri/i965/brw_meta_util.c
> @@ -298,13 +298,23 @@ brw_is_color_fast_clear_compatible(struct brw_context 
> *brw,
>  * resolved in intel_update_state. In that case it's pointless to do a
>  * fast clear because it's very likely to be immediately resolved.
>  */
> +   const bool srgb_rb = _mesa_get_srgb_format_linear(mt->format) != 
> mt->format;
> if (devinfo->gen >= 9 &&
> mt->surf.samples == 1 &&
> -   ctx->Color.sRGBEnabled &&
> -   _mesa_get_srgb_format_linear(mt->format) != mt->format)
> +   ctx->Color.sRGBEnabled && srgb_rb)
>return false;
>  
> +  /* Gen10 doesn't automatically decode the clear color of sRGB buffers. 
> Since
> +   * we currently don't perform this decode in software, avoid a fast-clear
> +   * altogether. TODO: Do this in software.
> +   */
> const mesa_format format = _mesa_get_render_format(ctx, mt->format);
> +   if (devinfo->gen >= 10 && srgb_rb) {
> +  perf_debug("sRGB fast clear not enabled for (%s)",
> + _mesa_get_format_name(format));
> +  return false;
> +   }
> +
> if (_mesa_is_format_integer_color(format)) {
>if (devinfo->gen >= 8) {
>   perf_debug("Integer fast clear not enabled for (%s)",
> -- 
> 2.15.1
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 18/44] i965/fs: Add byte scattered write message and fs support

2017-12-01 Thread Jason Ekstrand
Assuming the changes you describe below, this patch would get a

Reviewed-by: Jason Ekstrand 

I think that's enough for you to land the UBO/SSBO portion of 16bit
storage.  Feel free to send v2 versions of any patches you want me to look
at again before you push but I think it's in pretty good shape.

--Jason

On Fri, Dec 1, 2017 at 1:21 PM, Chema Casanova 
wrote:

> On 30/11/17 21:42, Jason Ekstrand wrote:
> > On Wed, Nov 29, 2017 at 6:08 PM, Jose Maria Casanova Crespo
> > > wrote:
> >
> > v2: (Jason Ekstrand)
> > - Enable bit_size parameter to scattered messages to enable
> > different
> >   bitsizes byte/word/dword.
> > - Remove use of brw_send_indirect_scattered_message in favor of
> >   brw_send_indirect_surface_message.
> > - Move scattered messages to surface messages namespace.
> > - Assert align1 for scattered messages and assume Gen8+.
> > - Inline brw_set_dp_byte_scattered_write.
> >
> > Signed-off-by: Jose Maria Casanova Crespo  > >
> > Signed-off-by: Alejandro Piñeiro  > >
> > ---
> >  src/intel/compiler/brw_eu.h|  7 +
> >  src/intel/compiler/brw_eu_defines.h| 17 +++
> >  src/intel/compiler/brw_eu_emit.c   | 42
> > ++
> >  src/intel/compiler/brw_fs.cpp  | 14 +
> >  src/intel/compiler/brw_fs_copy_propagation.cpp |  2 ++
> >  src/intel/compiler/brw_fs_generator.cpp|  6 
> >  src/intel/compiler/brw_fs_surface_builder.cpp  | 11 +++
> >  src/intel/compiler/brw_fs_surface_builder.h|  7 +
> >  src/intel/compiler/brw_shader.cpp  |  7 +
> >  9 files changed, 113 insertions(+)
> >
> > diff --git a/src/intel/compiler/brw_eu.h
> b/src/intel/compiler/brw_eu.h
> > index 343dcd867d..3ac3b4342a 100644
> > --- a/src/intel/compiler/brw_eu.h
> > +++ b/src/intel/compiler/brw_eu.h
> > @@ -485,6 +485,13 @@ brw_typed_surface_write(struct brw_codegen *p,
> >  unsigned msg_length,
> >  unsigned num_channels);
> >
> > +void
> > +brw_byte_scattered_write(struct brw_codegen *p,
> > + struct brw_reg payload,
> > + struct brw_reg surface,
> > + unsigned msg_length,
> > + unsigned bit_size);
> > +
> >  void
> >  brw_memory_fence(struct brw_codegen *p,
> >   struct brw_reg dst);
> > diff --git a/src/intel/compiler/brw_eu_defines.h
> > b/src/intel/compiler/brw_eu_defines.h
> > index 9d5cf05c86..de6330ee54 100644
> > --- a/src/intel/compiler/brw_eu_defines.h
> > +++ b/src/intel/compiler/brw_eu_defines.h
> > @@ -402,6 +402,16 @@ enum opcode {
> >
> > SHADER_OPCODE_RND_MODE,
> >
> > +   /**
> > +* Byte scattered write/read opcodes.
> > +*
> > +* LOGICAL opcodes are eventually translated to the matching
> > non-LOGICAL
> > +* opcode, but instead of taking a single payload blog they
> > expect their
> > +* arguments separately as individual sources, like untyped
> > write/read.
> > +*/
> > +   SHADER_OPCODE_BYTE_SCATTERED_WRITE,
> > +   SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
> > +
> > SHADER_OPCODE_MEMORY_FENCE,
> >
> > SHADER_OPCODE_GEN4_SCRATCH_READ,
> > @@ -1255,4 +1265,11 @@ enum PACKED brw_rnd_mode {
> > BRW_RND_MODE_UNSPECIFIED,  /* Unspecified rounding mode */
> >  };
> >
> > +/* MDC_DS - Data Size Message Descriptor Control Field */
> > +enum PACKED brw_data_size {
> >
> >
> > I'm not sure how I feel about this being an enum with such a generic
> name.
>
> Right, PRM use a more exactly "Data Elements" but this field only used
> byte_scattered/scaled writes/reads. As I will follow your next
> suggestion of using #define, I'm chaging the name to:
>
> #define GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE  0
> #define GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD  1
> #define GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD 2
>
> I'll include in the comment about MSC_DS
> "Specifies the number of Bytes to be read or written per Dword used at
> byte_scattered read/write and byte_scaled read/write messages."
>
> >
> >
> > +   GEN7_BYTE_SCATTERED_DATA_SIZE_BYTE = 0,
> > +   GEN7_BYTE_SCATTERED_DATA_SIZE_WORD = 1,
> > +   GEN7_BYTE_SCATTERED_DATA_SIZE_DWORD = 2
> > +};
> > +
> >  #endif /* BRW_EU_DEFINES_H */
> > diff --git a/src/intel/compiler/brw_eu_emit.c
> > b/src/intel/compiler/brw_eu_emit.c
> > index ca97ff7325..ded7e228cf 100644
> > --- 

[Mesa-dev] [PATCH] util: Add backtrace support based on gallium u_debug_stack

2017-12-01 Thread Jordan Justen
Only the libunwind support is retained from u_debug_stack.

Signed-off-by: Jordan Justen 
---
 src/util/Makefile.am  |   4 +-
 src/util/Makefile.sources |   2 +
 src/util/backtrace.c  | 223 ++
 src/util/backtrace.h  |  55 
 src/util/meson.build  |   2 +
 5 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 src/util/backtrace.c
 create mode 100644 src/util/backtrace.h

diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index a5241ad27ba..25ed47ab248 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -42,6 +42,7 @@ libmesautil_la_CPPFLAGS = \
-I$(top_srcdir)/src/gallium/include \
-I$(top_srcdir)/src/gallium/auxiliary \
$(VISIBILITY_CFLAGS) \
+   $(LIBUNWIND_CFLAGS) \
$(MSVC2013_COMPAT_CFLAGS) \
$(ZLIB_CFLAGS)
 
@@ -52,7 +53,8 @@ libmesautil_la_SOURCES = \
 libmesautil_la_LIBADD = \
$(CLOCK_LIB) \
$(ZLIB_LIBS) \
-   $(LIBATOMIC_LIBS)
+   $(LIBATOMIC_LIBS) \
+   $(LIBUNWIND_LIBS)
 
 libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES)
 libxmlconfig_la_CFLAGS = \
diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
index 104ecae8ed3..39e67bd6f44 100644
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -1,4 +1,6 @@
 MESA_UTIL_FILES := \
+   backtrace.c \
+   backtrace.h \
bitscan.c \
bitscan.h \
bitset.h \
diff --git a/src/util/backtrace.c b/src/util/backtrace.c
new file mode 100644
index 000..df629c877ac
--- /dev/null
+++ b/src/util/backtrace.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Stack backtracing.
+ *
+ * @author Jose Fonseca 
+ */
+
+#include "backtrace.h"
+
+#if defined(HAVE_LIBUNWIND)
+
+#define UNW_LOCAL_ONLY
+#include 
+
+/**
+ * Represent a frame from a stack backtrace.
+ */
+#ifdef HAVE_LIBUNWIND
+struct debug_stack_frame
+{
+   unw_word_t start_ip;
+   unsigned int off;
+   const char *procname;
+};
+#else
+struct debug_stack_frame;
+#endif
+
+
+// void
+// debug_backtrace_capture(struct debug_stack_frame *backtrace,
+// unsigned start_frame,
+// unsigned nr_frames);
+
+// void
+// debug_backtrace_dump(const struct debug_stack_frame *backtrace,
+//  unsigned nr_frames);
+
+// void
+// debug_backtrace_print(FILE *f,
+//   const struct debug_stack_frame *backtrace,
+//   unsigned nr_frames);
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include 
+
+#include "u_thread.h"
+#include "hash_table.h"
+
+static struct hash_table* symbols_hash = NULL;;
+static mtx_t symbols_mutex = _MTX_INITIALIZER_NP;
+
+static const char *
+symbol_name_cached(unw_cursor_t *cursor, unw_proc_info_t *pip)
+{
+   void *addr = (void *)(uintptr_t)pip->start_ip;
+   struct hash_entry *entry;
+   char *name;
+
+   mtx_lock(_mutex);
+   if(!symbols_hash)
+  symbols_hash = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+   entry = _mesa_hash_table_search(symbols_hash, addr);
+   if(entry) {
+  name = entry->data;
+   } else {
+  char procname[256];
+  unw_word_t off;
+  int ret;
+
+  ret = unw_get_proc_name(cursor, procname, sizeof(procname), );
+  if (ret && ret != -UNW_ENOMEM) {
+ procname[0] = '?';
+ procname[1] = 0;
+  }
+
+  asprintf(, "%s%s", procname, ret == -UNW_ENOMEM ? "..." : "");
+
+  _mesa_hash_table_insert(symbols_hash, addr, name);
+   }
+   mtx_unlock(_mutex);
+
+   return name;
+}
+
+static void
+debug_backtrace_capture(struct debug_stack_frame *backtrace,
+unsigned start_frame,
+  

[Mesa-dev] [PATCH] i965: Serialize nir later in the linking process

2017-12-01 Thread Jordan Justen
Fixes MESA_GLSL=cache_fb with piglit
tests/spec/glsl-1.50/execution/geometry/clip-distance-vs-gs-out.shader_test

Fixes: 0610a624a12 i965/link: Serialize program to nir after linking for shader 
cache
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103988
Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_link.cpp | 25 -
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index d18521e792d..6177c8f5ebd 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -302,15 +302,6 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo,
  prog->nir->info.num_abos);
 
-  if (brw->ctx.Cache) {
- struct blob writer;
- blob_init();
- nir_serialize(, prog->nir);
- prog->driver_cache_blob = ralloc_size(NULL, writer.size);
- memcpy(prog->driver_cache_blob, writer.data, writer.size);
- prog->driver_cache_blob_size = writer.size;
-  }
-
   infos[stage] = >nir->info;
 
   update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
@@ -357,6 +348,22 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
   }
}
 
+   if (brw->ctx.Cache) {
+  for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
+ struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
+ if (!shader)
+continue;
+
+ struct gl_program *prog = shader->Program;
+ struct blob writer;
+ blob_init();
+ nir_serialize(, prog->nir);
+ prog->driver_cache_blob = ralloc_size(NULL, writer.size);
+ memcpy(prog->driver_cache_blob, writer.data, writer.size);
+ prog->driver_cache_blob_size = writer.size;
+  }
+   }
+
if (brw->precompile && !brw_shader_precompile(ctx, shProg))
   return false;
 
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] meson: fix underlinkage without dri3

2017-12-01 Thread Dylan Baker
There are some case where the dri3 loader is covering for underlinkage
for GLX and EGL, provide the linkage that they actually need.

Signed-off-by: Dylan Baker 
---
 src/egl/meson.build | 2 +-
 src/glx/meson.build | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/egl/meson.build b/src/egl/meson.build
index f32715265ce..df6e8b49dac 100644
--- a/src/egl/meson.build
+++ b/src/egl/meson.build
@@ -106,7 +106,7 @@ if with_platform_x11
 files_egl += files('drivers/dri2/platform_x11_dri3.c')
 link_for_egl += libloader_dri3_helper
   endif
-  deps_for_egl += [dep_xcb_dri2, dep_xcb_xfixes]
+  deps_for_egl += [dep_x11_xcb, dep_xcb_dri2, dep_xcb_xfixes]
 endif
 if with_platform_drm
   files_egl += files('drivers/dri2/platform_drm.c')
diff --git a/src/glx/meson.build b/src/glx/meson.build
index 02bd79082fc..a73cf859666 100644
--- a/src/glx/meson.build
+++ b/src/glx/meson.build
@@ -171,7 +171,8 @@ if with_glx == 'dri'
 link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl],
 dependencies : [
   dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb,
-  dep_x11_xcb, dep_xcb_dri2, dep_xcb_dri3, extra_deps_libgl,
+  dep_x11_xcb, dep_xcb_dri2, dep_xcb_dri3, dep_xext, dep_xfixes,
+  dep_xdamage, extra_deps_libgl,
 ],
 version : gl_lib_version,
 install : true,
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] meson: Reformat meson code to match more common style

2017-12-01 Thread Dylan Baker
Generally in our meson build large arrays are formated in the form:
[
  ..., ..., ..., $
  ...,
]

So use that form

Signed-off-by: Dylan Baker 
---
 src/glx/meson.build | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/glx/meson.build b/src/glx/meson.build
index 2ffef4cf223..02bd79082fc 100644
--- a/src/glx/meson.build
+++ b/src/glx/meson.build
@@ -142,8 +142,10 @@ libglx = static_library(
 inc_common, inc_glapi, inc_loader,
 include_directories('../../include/GL/internal'),
   ],
-  c_args : [c_vis_args, gl_lib_cargs,
-'-DGL_LIB_NAME="lib@0@.so.@1@"'.format(gl_lib_name, 
gl_lib_version.split('.')[0])],
+  c_args : [
+c_vis_args, gl_lib_cargs,
+'-DGL_LIB_NAME="lib@0@.so.@1@"'.format(gl_lib_name, 
gl_lib_version.split('.')[0]),
+  ],
   link_with : [libloader, libloader_dri3_helper, libmesa_util, libxmlconfig, 
extra_libs_libglx],
   dependencies : [dep_libdrm, dep_dri2proto, dep_glproto, dep_x11, dep_glvnd],
   build_by_default : false,
@@ -167,9 +169,10 @@ if with_glx == 'dri'
 link_with : [libglapi_static, libglapi],
 link_whole : libglx,
 link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl],
-dependencies : [dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11,
-dep_xcb_glx, dep_xcb, dep_x11_xcb,
-dep_xcb_dri2, dep_xcb_dri3, extra_deps_libgl],
+dependencies : [
+  dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb,
+  dep_x11_xcb, dep_xcb_dri2, dep_xcb_dri3, extra_deps_libgl,
+],
 version : gl_lib_version,
 install : true,
   )
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] meson: Fix overlinkage of dri3 loader

2017-12-01 Thread Dylan Baker
This was covering for underinkage elsewhere. With that fixed these can
be removed.

Signed-off-by: Dylan Baker 
---
 src/loader/meson.build | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/loader/meson.build b/src/loader/meson.build
index 2f97cdc83bf..ce7e13c3542 100644
--- a/src/loader/meson.build
+++ b/src/loader/meson.build
@@ -27,8 +27,7 @@ if with_platform_x11 and with_dri3
 c_args : c_vis_args,
 include_directories : inc_include,
 dependencies : [
-  dep_xshmfence, dep_xcb_present, dep_xcb_dri3, dep_xcb_sync, dep_x11_xcb,
-  dep_xext, dep_xdamage, dep_xcb_glx, dep_libdrm,
+  dep_xshmfence, dep_xcb_present, dep_xcb_dri3, dep_xcb_sync, dep_libdrm,
 ],
 build_by_default : false,
   )
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/3] Fix linkage for libEGL and libGLX without dri3

2017-12-01 Thread Dylan Baker
As we discussed elsewhere, this should fix the linkage of the dri3 loader and
glx and egl.

Dylan Baker (3):
  meson: Reformat meson code to match more common style
  meson: fix underlinkage without dri3
  meson: Fix overlinkage of dri3 loader

 src/egl/meson.build|  2 +-
 src/glx/meson.build| 14 +-
 src/loader/meson.build |  3 +--
 3 files changed, 11 insertions(+), 8 deletions(-)

-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/cnl: Avoid fast-clearing sRGB render buffers

2017-12-01 Thread Nanley Chery
Gen10 doesn't automatically decode the clear color of sRGB buffers. To
get correct rendering, avoid fast-clearing such buffers for now.

The driver now passes the following piglit tests:
* spec@arb_framebuffer_srgb@msaa-fast-clear
* spec@ext_texture_srgb@multisample-fast-clear gl_ext_texture_srgb

Suggested-by: Kenneth Graunke 
Suggested-by: Jason Ekstrand 
Signed-off-by: Nanley Chery 
---

This patch is currently going through the jenkins pipeline.

 src/mesa/drivers/dri/i965/brw_meta_util.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c 
b/src/mesa/drivers/dri/i965/brw_meta_util.c
index ba92168934..54dc6a5ff9 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_util.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_util.c
@@ -298,13 +298,23 @@ brw_is_color_fast_clear_compatible(struct brw_context 
*brw,
 * resolved in intel_update_state. In that case it's pointless to do a
 * fast clear because it's very likely to be immediately resolved.
 */
+   const bool srgb_rb = _mesa_get_srgb_format_linear(mt->format) != mt->format;
if (devinfo->gen >= 9 &&
mt->surf.samples == 1 &&
-   ctx->Color.sRGBEnabled &&
-   _mesa_get_srgb_format_linear(mt->format) != mt->format)
+   ctx->Color.sRGBEnabled && srgb_rb)
   return false;
 
+  /* Gen10 doesn't automatically decode the clear color of sRGB buffers. Since
+   * we currently don't perform this decode in software, avoid a fast-clear
+   * altogether. TODO: Do this in software.
+   */
const mesa_format format = _mesa_get_render_format(ctx, mt->format);
+   if (devinfo->gen >= 10 && srgb_rb) {
+  perf_debug("sRGB fast clear not enabled for (%s)",
+ _mesa_get_format_name(format));
+  return false;
+   }
+
if (_mesa_is_format_integer_color(format)) {
   if (devinfo->gen >= 8) {
  perf_debug("Integer fast clear not enabled for (%s)",
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] meson: Install dri.pc file when building gallium dri drivers

2017-12-01 Thread Dylan Baker
Currently this pkg-config file is only installed if a classic dri driver
is built. This is wrong, it should be installed if any dri driver is
installed, which includes the gallium dri target.

Reported-by: Marc Dietrich 
Signed-off-by: Dylan Baker 
---
 src/mesa/drivers/dri/meson.build | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/meson.build b/src/mesa/drivers/dri/meson.build
index 217f1e5c71c..4ec2f343df2 100644
--- a/src/mesa/drivers/dri/meson.build
+++ b/src/mesa/drivers/dri/meson.build
@@ -53,6 +53,17 @@ if dri_drivers != []
 link_args : ['-Wl,--build-id=sha1', ld_args_bsymbolic, 
ld_args_gc_sections],
   )
 
+  meson.add_install_script(
+join_paths(meson.source_root(), 'bin/install_megadrivers.py'),
+libmesa_dri_drivers.full_path(),
+dri_drivers_path,
+dri_link,
+  )
+endif
+
+# This needs to be installed if any dri drivers (including gallium dri drivers)
+# are built.
+if with_dri
   pkg.generate(
 name : 'dri',
 filebase : 'dri',
@@ -61,11 +72,4 @@ if dri_drivers != []
 variables : ['dridriverdir=${prefix}/' + dri_drivers_path],
 requires_private : ['libdrm >= 2.4.75'],  # FIXME: don't hardcode this
   )
-
-  meson.add_install_script(
-join_paths(meson.source_root(), 'bin/install_megadrivers.py'),
-libmesa_dri_drivers.full_path(),
-dri_drivers_path,
-dri_link,
-  )
 endif
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GBM and the Device Memory Allocator Proposals

2017-12-01 Thread Miguel Angel Vico


On Fri, 1 Dec 2017 13:38:41 -0500
Rob Clark  wrote:

> On Fri, Dec 1, 2017 at 12:09 PM, Nicolai Hähnle  wrote:
> > On 01.12.2017 16:06, Rob Clark wrote:  
> >>
> >> On Thu, Nov 30, 2017 at 5:43 PM, Nicolai Hähnle 
> >> wrote:  
> >>>
> >>> Hi,
> >>>
> >>> I've had a chance to look a bit more closely at the allocator prototype
> >>> repository now. There's a whole bunch of low-level API design feedback,
> >>> but
> >>> for now let's focus on the high-level stuff first.
> >>>
> >>> Going by the 4.5 major object types (as also seen on slide 5 of your
> >>> presentation [0]), assertions and usages make sense to me.
> >>>
> >>> Capabilities and capability sets should be cleaned up in my opinion, as
> >>> the
> >>> status quo is overly obfuscating things. What capability sets really
> >>> represent, as far as I understand them, is *memory layouts*, and so
> >>> that's
> >>> what they should be called.
> >>>
> >>> This conceptually simplifies `derive_capabilities` significantly without
> >>> any
> >>> loss of expressiveness as far as I can see. Given two lists of memory
> >>> layouts, we simply look for which memory layouts appear in both lists,
> >>> and
> >>> then merge their constraints and capabilities.
> >>>
> >>> Merging constraints looks good to me.
> >>>
> >>> Capabilities need some more thought. The prototype removes capabilities
> >>> when
> >>> merging layouts, but I'd argue that that is often undesirable. (In fact,
> >>> I
> >>> cannot think of capabilities which we'd always want to remove.)
> >>>
> >>> A typical example for this is compression (i.e. DCC in our case). For
> >>> rendering usage, we'd return something like:
> >>>
> >>> Memory layout: AMD/tiled; constraints(alignment=64k); caps(AMD/DCC)
> >>>
> >>> For display usage, we might return (depending on hardware):
> >>>
> >>> Memory layout: AMD/tiled; constraints(alignment=64k); caps(none)
> >>>
> >>> Merging these in the prototype would remove the DCC capability, even
> >>> though
> >>> it might well make sense to keep it there for rendering. Dealing with the
> >>> fact that display usage does not have this capability is precisely one of
> >>> the two things that transitions are about! The other thing that
> >>> transitions
> >>> are about is caches.
> >>>
> >>> I think this is kind of what Rob was saying in one of his mails.  
> >>
> >>
> >> Perhaps "layout" is a better name than "caps".. either way I think of
> >> both AMD/tiled and AMD/DCC as the same type of "thing".. the
> >> difference between AMD/tiled and AMD/DCC is that a transition can be
> >> provided for AMD/DCC.  Other than that they are both things describing
> >> the layout.  
> >
> >
> > The reason that a transition can be provided is that they aren't quite the
> > same thing, though. In a very real sense, AMD/DCC is a "child" property of
> > AMD/tiled: DCC is implemented as a meta surface whose memory layout depends
> > on the layout of the main surface.  
> 
> I suppose this is six-of-one, half-dozen of the other..
> 
> what you are calling a layout is what I'm calling a cap that just
> happens not to have an associated transition
> 
> > Although, if there are GPUs that can do an in-place "transition" between
> > different tiling layouts, then the distinction is perhaps really not as
> > clear-cut. I guess that would only apply to tiled renderers.  
> 
> I suppose the advantage of just calling both layout and caps the same
> thing, and just saying that a "cap" (or "layout" if you prefer that
> name) can optionally have one or more associated transitions, is that
> you can deal with cases where sometimes a tiled format might actually
> have an in-place transition ;-)
> 
> >  
> >> So lets say you have a setup where both display and GPU supported
> >> FOO/tiled, but only GPU supported compressed (FOO/CC) and cached
> >> (FOO/cached).  But the GPU supported the following transitions:
> >>
> >>trans_a: FOO/CC -> null
> >>trans_b: FOO/cached -> null
> >>
> >> Then the sets for each device (in order of preference):
> >>
> >> GPU:
> >>1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=32k)
> >>2: caps(FOO/tiled, FOO/CC); constraints(alignment=32k)
> >>3: caps(FOO/tiled); constraints(alignment=32k)
> >>
> >> Display:
> >>1: caps(FOO/tiled); constraints(alignment=64k)
> >>
> >> Merged Result:
> >>1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=64k);
> >>   transition(GPU->display: trans_a, trans_b; display->GPU: none)
> >>2: caps(FOO/tiled, FOO/CC); constraints(alignment=64k);
> >>   transition(GPU->display: trans_a; display->GPU: none)
> >>3: caps(FOO/tiled); constraints(alignment=64k);
> >>   transition(GPU->display: none; display->GPU: none)  
> >
> >
> > We definitely don't want to expose a way of getting uncached rendering
> > surfaces for radeonsi. I mean, I think we are supposed to be able to program
> > our hardware so that the backend 

[Mesa-dev] [Bug 104024] xinit hangs on black screen with cursor, kernel oopses , bisected

2017-12-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=104024

Bug ID: 104024
   Summary: xinit hangs on black screen with cursor, kernel oopses
, bisected
   Product: Mesa
   Version: git
  Hardware: Other
OS: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Mesa core
  Assignee: mesa-dev@lists.freedesktop.org
  Reporter: freedesk...@voorpost.net
QA Contact: mesa-dev@lists.freedesktop.org

videocard is an rx460

xinit session hangs at start black screen with white cursor in left top corner


[  328.303157] Oops:  [#2] PREEMPT SMP
[  328.303160] Modules linked in: snd_cmipci snd_opl3_lib snd_mpu401_uart
snd_usb_audio snd_usbmidi_lib snd_rawmidi snd_hda_codec_realtek
snd_hda_codec_hdmi snd_hda_codec_generic amdgpu snd_hda_intel snd_hda_codec
backlight snd_hwdep snd_hda_core
[  328.303187] CPU: 3 PID: 1982 Comm: X Tainted: G  D 4.14.3 #1
[  328.303190] Hardware name: MICRO-STAR INTERNATIONAL CO.,LTD
MS-7596/785GM-E51 (MS-7596), BIOS V2.11 12/08/2010
[  328.303194] task: 88022ec50740 task.stack: c96f8000
[  328.303233] RIP: 0010:amdgpu_info_ioctl+0xe7e/0x1330 [amdgpu]
[  328.303236] RSP: 0018:c96fbb20 EFLAGS: 00010212
[  328.303241] RAX:  RBX: 88022b238000 RCX:
00062459
[  328.303244] RDX: 880238a4 RSI:  RDI:
34040300
[  328.303247] RBP: c96fbcd8 R08: e200 R09:

[  328.303250] R10:  R11: c96fbd68 R12:
c96fbd68
[  328.303253] R13: a00526b0 R14: 0020 R15:
7ffc516dbbc0
[  328.303257] FS:  7f9f536178c0() GS:880237d8()
knlGS:
[  328.303260] CS:  0010 DS:  ES:  CR0: 80050033
[  328.303263] CR2: 880238a4 CR3: 00022741 CR4:
06e0
[  328.303266] Call Trace:
[  328.303278]  ? vsnprintf+0x252/0x570
[  328.303284]  ? sprintf+0x42/0x50
[  328.303321]  ? amdgpu_debugfs_add_files+0xb0/0xb0 [amdgpu]
[  328.303329]  drm_ioctl_kernel+0x64/0xb0
[  328.303336]  drm_ioctl+0x2cd/0x390
[  328.303372]  ? amdgpu_debugfs_add_files+0xb0/0xb0 [amdgpu]
[  328.303380]  ? call_rcu+0x18/0x20
[  328.303386]  ? dentry_free+0x33/0x70
[  328.303390]  ? __dentry_kill+0x10f/0x160
[  328.303426]  amdgpu_drm_ioctl+0x9/0x10 [amdgpu]
[  328.303431]  do_vfs_ioctl+0x8d/0x5a0
[  328.303436]  ? fput+0x9/0x10
[  328.303441]  SyS_ioctl+0x3c/0x70
[  328.303446]  do_syscall_64+0x74/0x330
[  328.303451]  ? rcu_user_enter+0x43/0x50
[  328.303456]  ? __context_tracking_enter+0x43/0x70
[  328.303462]  entry_SYSCALL64_slow_path+0x25/0x25
[  328.303465] RIP: 0033:0x7f9f526c22d7
[  328.303468] RSP: 002b:7ffc516dba18 EFLAGS: 0246 ORIG_RAX:
0010
[  328.303473] RAX: ffda RBX: 008a81b8 RCX:
7f9f526c22d7
[  328.303475] RDX: 7ffc516dba60 RSI: 40206445 RDI:
000e
[  328.303478] RBP: 7ffc516dba60 R08:  R09:

[  328.303481] R10:  R11: 0246 R12:
40206445
[  328.303484] R13: 000e R14: 008a8420 R15:
008a8420
[  328.303487] Code: 75 eb 41 b8 01 00 00 00 b9 09 00 00 00 e9 c1 fe ff ff 8b
bb f8 6b 00 00 85 ff 0f 84 72 04 00 00 48 8d 93 4c 66 00 00 31 f6 31 c9 <0f> b6
02 48 81 c2 18 02 00 00 d3 e0 83 c1 01 09 c6 39 cf 75 eb 
[  328.303586] RIP: amdgpu_info_ioctl+0xe7e/0x1330 [amdgpu] RSP:
c96fbb20
[  328.303589] CR2: 880238a4
[  328.303593] ---[ end trace c4ed8ae89345f0f6 ]---



git bisected to this commit:
436a3f8d6d52921d91a2dab53fcfca192037125e is the first bad commit
commit 436a3f8d6d52921d91a2dab53fcfca192037125e
Author: Boyuan Zhang 
Date:   Tue Nov 7 15:41:40 2017 -0500

radeon/common: add vcn enc ip info query

New ip info query is needed for vcn encode

Signed-off-by: Boyuan Zhang 
Acked-by: Christian König 

:04 04 86c3d1c20a68bdf64f0e63b35ae3a25d5bc75fe2
0be7a9c819bb30959800448fb174b87e6c8fc895 M  src

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 18/44] i965/fs: Add byte scattered write message and fs support

2017-12-01 Thread Chema Casanova
On 30/11/17 21:42, Jason Ekstrand wrote:
> On Wed, Nov 29, 2017 at 6:08 PM, Jose Maria Casanova Crespo
> > wrote:
> 
> v2: (Jason Ekstrand)
>     - Enable bit_size parameter to scattered messages to enable
> different
>       bitsizes byte/word/dword.
>     - Remove use of brw_send_indirect_scattered_message in favor of
>       brw_send_indirect_surface_message.
>     - Move scattered messages to surface messages namespace.
>     - Assert align1 for scattered messages and assume Gen8+.
>     - Inline brw_set_dp_byte_scattered_write.
> 
> Signed-off-by: Jose Maria Casanova Crespo  >
> Signed-off-by: Alejandro Piñeiro  >
> ---
>  src/intel/compiler/brw_eu.h                    |  7 +
>  src/intel/compiler/brw_eu_defines.h            | 17 +++
>  src/intel/compiler/brw_eu_emit.c               | 42
> ++
>  src/intel/compiler/brw_fs.cpp                  | 14 +
>  src/intel/compiler/brw_fs_copy_propagation.cpp |  2 ++
>  src/intel/compiler/brw_fs_generator.cpp        |  6 
>  src/intel/compiler/brw_fs_surface_builder.cpp  | 11 +++
>  src/intel/compiler/brw_fs_surface_builder.h    |  7 +
>  src/intel/compiler/brw_shader.cpp              |  7 +
>  9 files changed, 113 insertions(+)
> 
> diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
> index 343dcd867d..3ac3b4342a 100644
> --- a/src/intel/compiler/brw_eu.h
> +++ b/src/intel/compiler/brw_eu.h
> @@ -485,6 +485,13 @@ brw_typed_surface_write(struct brw_codegen *p,
>                          unsigned msg_length,
>                          unsigned num_channels);
> 
> +void
> +brw_byte_scattered_write(struct brw_codegen *p,
> +                         struct brw_reg payload,
> +                         struct brw_reg surface,
> +                         unsigned msg_length,
> +                         unsigned bit_size);
> +
>  void
>  brw_memory_fence(struct brw_codegen *p,
>                   struct brw_reg dst);
> diff --git a/src/intel/compiler/brw_eu_defines.h
> b/src/intel/compiler/brw_eu_defines.h
> index 9d5cf05c86..de6330ee54 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -402,6 +402,16 @@ enum opcode {
> 
>     SHADER_OPCODE_RND_MODE,
> 
> +   /**
> +    * Byte scattered write/read opcodes.
> +    *
> +    * LOGICAL opcodes are eventually translated to the matching
> non-LOGICAL
> +    * opcode, but instead of taking a single payload blog they
> expect their
> +    * arguments separately as individual sources, like untyped
> write/read.
> +    */
> +   SHADER_OPCODE_BYTE_SCATTERED_WRITE,
> +   SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
> +
>     SHADER_OPCODE_MEMORY_FENCE,
> 
>     SHADER_OPCODE_GEN4_SCRATCH_READ,
> @@ -1255,4 +1265,11 @@ enum PACKED brw_rnd_mode {
>     BRW_RND_MODE_UNSPECIFIED,  /* Unspecified rounding mode */
>  };
> 
> +/* MDC_DS - Data Size Message Descriptor Control Field */
> +enum PACKED brw_data_size {
> 
> 
> I'm not sure how I feel about this being an enum with such a generic name.

Right, PRM use a more exactly "Data Elements" but this field only used
byte_scattered/scaled writes/reads. As I will follow your next
suggestion of using #define, I'm chaging the name to:

#define GEN7_BYTE_SCATTERED_DATA_ELEMENT_BYTE  0
#define GEN7_BYTE_SCATTERED_DATA_ELEMENT_WORD  1
#define GEN7_BYTE_SCATTERED_DATA_ELEMENT_DWORD 2

I'll include in the comment about MSC_DS
"Specifies the number of Bytes to be read or written per Dword used at
byte_scattered read/write and byte_scaled read/write messages."

>  
> 
> +   GEN7_BYTE_SCATTERED_DATA_SIZE_BYTE = 0,
> +   GEN7_BYTE_SCATTERED_DATA_SIZE_WORD = 1,
> +   GEN7_BYTE_SCATTERED_DATA_SIZE_DWORD = 2
> +};
> +
>  #endif /* BRW_EU_DEFINES_H */
> diff --git a/src/intel/compiler/brw_eu_emit.c
> b/src/intel/compiler/brw_eu_emit.c
> index ca97ff7325..ded7e228cf 100644
> --- a/src/intel/compiler/brw_eu_emit.c
> +++ b/src/intel/compiler/brw_eu_emit.c
> @@ -2580,6 +2580,7 @@ brw_send_indirect_surface_message(struct
> brw_codegen *p,
>     return insn;
>  }
> 
> +
>  static bool
>  while_jumps_before_offset(const struct gen_device_info *devinfo,
>                            brw_inst *insn, int while_offset, int
> start_offset)
> @@ -2983,6 +2984,47 @@ brw_untyped_surface_write(struct brw_codegen *p,
>        p, insn, num_channels);
>  }
> 
> +static enum brw_data_size brw_data_size_from_bit_size(unsigned
> bit_size)
> 
> 
> Please put the return type on 

Re: [Mesa-dev] [PATCH 5/6] glx: Prepare driFetchDrawable for no-config contexts

2017-12-01 Thread Adam Jackson
On Tue, 2017-11-14 at 14:03 -0800, Ian Romanick wrote:
> On 11/14/2017 12:13 PM, Adam Jackson wrote:
> > diff --git a/src/glx/glx_pbuffer.c b/src/glx/glx_pbuffer.c
> > index 933b5d9ecd..42e7996e37 100644
> > --- a/src/glx/glx_pbuffer.c
> > +++ b/src/glx/glx_pbuffer.c
> > @@ -272,7 +272,7 @@ DestroyDRIDrawable(Display *dpy, GLXDrawable drawable, 
> > int destroy_xdrawable)
> >   * 10.  Given that, this routine should try to use an array on the stack to
> >   * capture the reply rather than always calling Xmalloc.
> >   */
> > -static int
> > +int
> >  GetDrawableAttribute(Display * dpy, GLXDrawable drawable,
> 
> I'm trying to decide whether or not this needs a __glX prefix now.  Yes?

Eh, sure. Anyone whose toolchain still doesn't have visibility control
is losing pretty hard, but we may as well be consistent.

> Other than that, this patch is
> 
> Reviewed-by: Ian Romanick 

Merged 1 2 and 5, will respin the rest.

- ajax
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 07/44] spirv/nir: Handle 16-bit types

2017-12-01 Thread Chema Casanova
On 30/11/17 21:24, Jason Ekstrand wrote:
> I sprinkled a few mostly trivial comments below.  With those fixed,
> 
> Reviewed-by: Jason Ekstrand  >
> 
> On Wed, Nov 29, 2017 at 6:07 PM, Jose Maria Casanova Crespo
> > wrote:
> 
> From: Eduardo Lima Mitev >
> 
> v2: Added more missing implementations of 16-bit types. (Jason Ekstrand)
> 
> v3: Store values in values[0].u16[i] (Jason Ekstrand)
>     Include switches based on bitsize for 16-bit types
>     (Chema Casanova)
> 
> Signed-off-by: Jose Maria Casanova Crespo  >
> Signed-off-by: Eduardo Lima >
> ---
>  src/compiler/spirv/spirv_to_nir.c  | 111
> +++--
>  src/compiler/spirv/vtn_variables.c |  21 +++
>  2 files changed, 115 insertions(+), 17 deletions(-)
> 
> diff --git a/src/compiler/spirv/spirv_to_nir.c
> b/src/compiler/spirv/spirv_to_nir.c
> index 027efab88d..f745373473 100644
> --- a/src/compiler/spirv/spirv_to_nir.c
> +++ b/src/compiler/spirv/spirv_to_nir.c
> @@ -104,10 +104,13 @@ vtn_const_ssa_value(struct vtn_builder *b,
> nir_constant *constant,
>     switch (glsl_get_base_type(type)) {
>     case GLSL_TYPE_INT:
>     case GLSL_TYPE_UINT:
> +   case GLSL_TYPE_INT16:
> +   case GLSL_TYPE_UINT16:
>     case GLSL_TYPE_INT64:
>     case GLSL_TYPE_UINT64:
>     case GLSL_TYPE_BOOL:
>     case GLSL_TYPE_FLOAT:
> +   case GLSL_TYPE_FLOAT16:
>     case GLSL_TYPE_DOUBLE: {
>        int bit_size = glsl_get_bit_size(type);
>        if (glsl_type_is_vector_or_scalar(type)) {
> @@ -751,16 +754,38 @@ vtn_handle_type(struct vtn_builder *b, SpvOp
> opcode,
>        int bit_size = w[2];
>        const bool signedness = w[3];
>        val->type->base_type = vtn_base_type_scalar;
> -      if (bit_size == 64)
> +      switch (bit_size) {
> +      case 64:
>           val->type->type = (signedness ? glsl_int64_t_type() :
> glsl_uint64_t_type());
> -      else
> +         break;
> +      case 32:
>           val->type->type = (signedness ? glsl_int_type() :
> glsl_uint_type());
> +         break;
> +      case 16:
> +         val->type->type = (signedness ? glsl_int16_t_type() :
> glsl_uint16_t_type());
> +         break;
> +      default:
> +         unreachable("Invalid int bit size");
> +      }
>        break;
>     }
> +
>     case SpvOpTypeFloat: {
>        int bit_size = w[2];
>        val->type->base_type = vtn_base_type_scalar;
> -      val->type->type = bit_size == 64 ? glsl_double_type() :
> glsl_float_type();
> +      switch (bit_size) {
> +      case 16:
> +         val->type->type = glsl_float16_t_type();
> +         break;
> +      case 32:
> +         val->type->type = glsl_float_type();
> +         break;
> +      case 64:
> +         val->type->type = glsl_double_type();
> +         break;
> +      default:
> +         assert(!"Invalid float bit size");
> 
> 
> unreachable()

Fixed locally.

> +      }
>        break;
>     }
> 
> @@ -980,10 +1005,13 @@ vtn_null_constant(struct vtn_builder *b,
> const struct glsl_type *type)
>     switch (glsl_get_base_type(type)) {
>     case GLSL_TYPE_INT:
>     case GLSL_TYPE_UINT:
> +   case GLSL_TYPE_INT16:
> +   case GLSL_TYPE_UINT16:
>     case GLSL_TYPE_INT64:
>     case GLSL_TYPE_UINT64:
>     case GLSL_TYPE_BOOL:
>     case GLSL_TYPE_FLOAT:
> +   case GLSL_TYPE_FLOAT16:
>     case GLSL_TYPE_DOUBLE:
>        /* Nothing to do here.  It's already initialized to zero */
>        break;
> @@ -1106,12 +1134,20 @@ vtn_handle_constant(struct vtn_builder *b,
> SpvOp opcode,
>     case SpvOpConstant: {
>        assert(glsl_type_is_scalar(val->const_type));
>        int bit_size = glsl_get_bit_size(val->const_type);
> -      if (bit_size == 64) {
> +      switch (bit_size) {
> +      case 64: {
>           val->constant->values->u32[0] = w[3];
>           val->constant->values->u32[1] = w[4];
> 
> 
> A bit unrelated but this should be using vtn_u64_literal and setting
> u64[0] instead of assuming the aliasing works out between u32 and u64.

Let it be:

 val->constant->values->u64[0] = vtn_u64_literal([3]);


> -      } else {
> -         assert(bit_size == 32);
> +         break;
> +      }
> 
> 
> You don't need braces around the 64-bit case.

Ok.

> +      case 32:
>           val->constant->values->u32[0] = w[3];
> +         break;
> +      case 16:
> +         

Re: [Mesa-dev] [PATCH 14/29] anv/cmd_buffer: Apply subpass flushes before set_subpass

2017-12-01 Thread Jason Ekstrand
On Fri, Dec 1, 2017 at 5:47 AM, Pohjolainen, Topi <
topi.pohjolai...@gmail.com> wrote:

> On Mon, Nov 27, 2017 at 07:06:04PM -0800, Jason Ekstrand wrote:
> > This seems slightly more correct because it means that the flushes
> > happen before any clears or resolves implied by the subpass transition.
>
> After reading the next patch this patch seems incomplete both before
> and after. Next patch seems to explicitly consider that flushes are
> needed before and after whereas at this point it would be only
> before (when this patch is applied) or after (without this patch).
>
> I guess something else holds things together, I'm just not seeing
> it?
>

I think so.  In either case, what really matters is that the subpass
flushes happen before the next draw call.  The only change made here is
that before they would get triggered by the next draw call and now they may
get triggered by a clear or resolve that happens as part of set_subpass.


> > ---
> >  src/intel/vulkan/genX_cmd_buffer.c | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> > index 2d47179..bbe97f5 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -3197,10 +3197,10 @@ void genX(CmdBeginRenderPass)(
> >
> > genX(flush_pipeline_select_3d)(cmd_buffer);
> >
> > -   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
> > -
> > cmd_buffer->state.pending_pipe_bits |=
> >cmd_buffer->state.pass->subpass_flushes[0];
> > +
> > +   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
> >  }
> >
> >  void genX(CmdNextSubpass)(
> > @@ -3220,11 +3220,11 @@ void genX(CmdNextSubpass)(
> >  */
> > cmd_buffer_subpass_transition_layouts(cmd_buffer, true);
> >
> > -   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass
> + 1);
> > -
> > uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
> > cmd_buffer->state.pending_pipe_bits |=
> >cmd_buffer->state.pass->subpass_flushes[subpass_id];
> > +
> > +   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass
> + 1);
> >  }
> >
> >  void genX(CmdEndRenderPass)(
> > --
> > 2.5.0.400.gff86faf
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] gallium/u_upload_mgr: allow drivers to specify pipe_resource::flags

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/auxiliary/util/u_upload_mgr.c| 9 ++---
 src/gallium/auxiliary/util/u_upload_mgr.h| 2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_context.c | 2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_context.c | 2 +-
 src/gallium/drivers/freedreno/a5xx/fd5_context.c | 2 +-
 src/gallium/drivers/r300/r300_context.c  | 2 +-
 src/gallium/drivers/r600/r600_pipe_common.c  | 4 ++--
 src/gallium/drivers/radeon/r600_pipe_common.c| 4 ++--
 src/gallium/drivers/svga/svga_context.c  | 6 +++---
 src/gallium/drivers/svga/svga_resource_texture.c | 2 +-
 src/gallium/drivers/virgl/virgl_context.c| 2 +-
 11 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c 
b/src/gallium/auxiliary/util/u_upload_mgr.c
index 4bb14d6..e3b0fb3 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -37,43 +37,45 @@
 
 #include "u_upload_mgr.h"
 
 
 struct u_upload_mgr {
struct pipe_context *pipe;
 
unsigned default_size;  /* Minimum size of the upload buffer, in bytes. */
unsigned bind;  /* Bitmask of PIPE_BIND_* flags. */
enum pipe_resource_usage usage;
+   unsigned flags;
unsigned map_flags; /* Bitmask of PIPE_TRANSFER_* flags. */
boolean map_persistent; /* If persistent mappings are supported. */
 
struct pipe_resource *buffer;   /* Upload buffer. */
struct pipe_transfer *transfer; /* Transfer object for the upload buffer. */
uint8_t *map;/* Pointer to the mapped upload buffer. */
unsigned offset; /* Aligned offset to the upload buffer, pointing
  * at the first unused byte. */
 };
 
 
 struct u_upload_mgr *
 u_upload_create(struct pipe_context *pipe, unsigned default_size,
-unsigned bind, enum pipe_resource_usage usage)
+unsigned bind, enum pipe_resource_usage usage, unsigned flags)
 {
struct u_upload_mgr *upload = CALLOC_STRUCT(u_upload_mgr);
if (!upload)
   return NULL;
 
upload->pipe = pipe;
upload->default_size = default_size;
upload->bind = bind;
upload->usage = usage;
+   upload->flags = flags;
 
upload->map_persistent =
   pipe->screen->get_param(pipe->screen,
   PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT);
 
if (upload->map_persistent) {
   upload->map_flags = PIPE_TRANSFER_WRITE |
   PIPE_TRANSFER_UNSYNCHRONIZED |
   PIPE_TRANSFER_PERSISTENT |
   PIPE_TRANSFER_COHERENT;
@@ -87,28 +89,28 @@ u_upload_create(struct pipe_context *pipe, unsigned 
default_size,
return upload;
 }
 
 struct u_upload_mgr *
 u_upload_create_default(struct pipe_context *pipe)
 {
return u_upload_create(pipe, 1024 * 1024,
   PIPE_BIND_VERTEX_BUFFER |
   PIPE_BIND_INDEX_BUFFER |
   PIPE_BIND_CONSTANT_BUFFER,
-  PIPE_USAGE_STREAM);
+  PIPE_USAGE_STREAM, 0);
 }
 
 struct u_upload_mgr *
 u_upload_clone(struct pipe_context *pipe, struct u_upload_mgr *upload)
 {
return u_upload_create(pipe, upload->default_size, upload->bind,
-  upload->usage);
+  upload->usage, upload->flags);
 }
 
 static void
 upload_unmap_internal(struct u_upload_mgr *upload, boolean destroying)
 {
if (!destroying && upload->map_persistent)
   return;
 
if (upload->transfer) {
   struct pipe_box *box = >transfer->box;
@@ -162,20 +164,21 @@ u_upload_alloc_buffer(struct u_upload_mgr *upload, 
unsigned min_size)
 
/* Allocate a new one:
 */
size = align(MAX2(upload->default_size, min_size), 4096);
 
memset(, 0, sizeof buffer);
buffer.target = PIPE_BUFFER;
buffer.format = PIPE_FORMAT_R8_UNORM; /* want TYPELESS or similar */
buffer.bind = upload->bind;
buffer.usage = upload->usage;
+   buffer.flags = upload->flags;
buffer.width0 = size;
buffer.height0 = 1;
buffer.depth0 = 1;
buffer.array_size = 1;
 
if (upload->map_persistent) {
   buffer.flags = PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
  PIPE_RESOURCE_FLAG_MAP_COHERENT;
}
 
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h 
b/src/gallium/auxiliary/util/u_upload_mgr.h
index 536467e..875fd9a 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -45,21 +45,21 @@ extern "C" {
 /**
  * Create the upload manager.
  *
  * \param pipe  Pipe driver.
  * \param default_size  Minimum size of the upload buffer, in bytes.
  * \param bind  Bitmask of PIPE_BIND_* flags.
  * \param usage PIPE_USAGE_*
  */
 struct u_upload_mgr *
 u_upload_create(struct pipe_context *pipe, unsigned default_size,
-unsigned bind, enum pipe_resource_usage usage);
+

[Mesa-dev] [PATCH 8/8] radeonsi: make const and stream uploaders allocate read-only memory

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

and anything that clones these uploaders, like u_threaded_context.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index d85f9f0..23d8bf7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -438,26 +438,28 @@ bool si_common_context_init(struct r600_common_context 
*rctx,
return false;
}
 
rctx->allocator_zeroed_memory =
u_suballocator_create(>b, sscreen->info.gart_page_size,
  0, PIPE_USAGE_DEFAULT, 0, true);
if (!rctx->allocator_zeroed_memory)
return false;
 
rctx->b.stream_uploader = u_upload_create(>b, 1024 * 1024,
- 0, PIPE_USAGE_STREAM, 0);
+ 0, PIPE_USAGE_STREAM,
+ R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.stream_uploader)
return false;
 
rctx->b.const_uploader = u_upload_create(>b, 128 * 1024,
-0, PIPE_USAGE_DEFAULT, 0);
+0, PIPE_USAGE_DEFAULT,
+R600_RESOURCE_FLAG_READ_ONLY);
if (!rctx->b.const_uploader)
return false;
 
rctx->ctx = rctx->ws->ctx_create(rctx->ws);
if (!rctx->ctx)
return false;
 
if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & 
DBG(NO_ASYNC_DMA))) {
rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA,
   r600_flush_dma_ring,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] radeonsi: make IBs and shader binaries read-only for the GPU

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

Now Mesa can't corrupt them from the GPU.
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 3 +++
 src/gallium/drivers/radeon/r600_pipe_common.h   | 1 +
 src/gallium/drivers/radeonsi/si_shader.c| 8 +---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c   | 1 +
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index ec282d5..55400ab 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -167,20 +167,23 @@ void si_init_resource_fields(struct si_screen *sscreen,
 
/* Displayable and shareable surfaces are not suballocated. */
if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */
else
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
if (sscreen->debug_flags & DBG(NO_WC))
res->flags &= ~RADEON_FLAG_GTT_WC;
 
+   if (res->b.b.flags & R600_RESOURCE_FLAG_READ_ONLY)
+   res->flags |= RADEON_FLAG_READ_ONLY;
+
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
res->max_forced_staging_uploads = 0;
res->b.max_forced_staging_uploads = 0;
 
if (res->domains & RADEON_DOMAIN_VRAM) {
res->vram_usage = size;
 
res->max_forced_staging_uploads =
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 498a741..d1fdea0 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -46,20 +46,21 @@
 
 struct u_log_context;
 struct si_screen;
 struct si_context;
 
 #define R600_RESOURCE_FLAG_TRANSFER(PIPE_RESOURCE_FLAG_DRV_PRIV << 
0)
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING
(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 #define R600_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 
3)
 #define R600_RESOURCE_FLAG_UNMAPPABLE  (PIPE_RESOURCE_FLAG_DRV_PRIV << 
4)
+#define R600_RESOURCE_FLAG_READ_ONLY   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
5)
 
 /* Debug flags. */
 enum {
/* Shader logging options: */
DBG_VS = PIPE_SHADER_VERTEX,
DBG_PS = PIPE_SHADER_FRAGMENT,
DBG_GS = PIPE_SHADER_GEOMETRY,
DBG_TCS = PIPE_SHADER_TESS_CTRL,
DBG_TES = PIPE_SHADER_TESS_EVAL,
DBG_CS = PIPE_SHADER_COMPUTE,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index d3e5e97..a75feb2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5010,23 +5010,25 @@ int si_shader_binary_upload(struct si_screen *sscreen, 
struct si_shader *shader)
 
assert(!prolog || !prolog->rodata_size);
assert(!previous_stage || !previous_stage->rodata_size);
assert(!prolog2 || !prolog2->rodata_size);
assert((!prolog && !previous_stage && !prolog2 && !epilog) ||
   !mainb->rodata_size);
assert(!epilog || !epilog->rodata_size);
 
r600_resource_reference(>bo, NULL);
shader->bo = (struct r600_resource*)
-pipe_buffer_create(>b, 0,
-   PIPE_USAGE_IMMUTABLE,
-   align(bo_size, SI_CPDMA_ALIGNMENT));
+si_aligned_buffer_create(>b,
+  R600_RESOURCE_FLAG_READ_ONLY,
+  PIPE_USAGE_IMMUTABLE,
+  align(bo_size, 
SI_CPDMA_ALIGNMENT),
+  256);
if (!shader->bo)
return -ENOMEM;
 
/* Upload. */
ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
PIPE_TRANSFER_UNSYNCHRONIZED);
 
/* Don't use util_memcpy_cpu_to_le32. LLVM binaries are
 * endian-independent. */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 089a358..63cd632 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -642,20 +642,21 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys 
*ws, struct amdgpu_ib *ib,
   buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
   break;
default:
   unreachable("unhandled IB type");
}
 
pb = ws->base.buffer_create(>base, buffer_size,
ws->info.gart_page_size,
RADEON_DOMAIN_GTT,

[Mesa-dev] [PATCH 3/8] gallium/radeon: move setting VRAM|GTT into winsyses

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

The combined VRAM|GTT heap will be removed.
---
 src/gallium/drivers/r600/r600_buffer_common.c   | 14 --
 src/gallium/drivers/radeon/r600_buffer_common.c | 14 --
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c   | 14 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c   |  7 +++
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c   |  8 
 5 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_buffer_common.c 
b/src/gallium/drivers/r600/r600_buffer_common.c
index 5ff25ae..501b96f 100644
--- a/src/gallium/drivers/r600/r600_buffer_common.c
+++ b/src/gallium/drivers/r600/r600_buffer_common.c
@@ -169,34 +169,20 @@ void r600_init_resource_fields(struct r600_common_screen 
*rscreen,
 
/* Only displayable single-sample textures can be shared between
 * processes. */
if (res->b.b.target == PIPE_BUFFER ||
res->b.b.nr_samples >= 2 ||
(rtex->surface.micro_tile_mode != RADEON_MICRO_MODE_DISPLAY &&
 /* Raven doesn't use display micro mode for 32bpp, so check this: 
*/
 !(res->b.b.bind & PIPE_BIND_SCANOUT)))
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
-   /* If VRAM is just stolen system memory, allow both VRAM and
-* GTT, whichever has free space. If a buffer is evicted from
-* VRAM to GTT, it will stay there.
-*
-* DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
-* placements even with a low amount of stolen VRAM.
-*/
-   if (!rscreen->info.has_dedicated_vram &&
-   (rscreen->info.drm_major < 3 || rscreen->info.drm_minor < 6) &&
-   res->domains == RADEON_DOMAIN_VRAM) {
-   res->domains = RADEON_DOMAIN_VRAM_GTT;
-   res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with 
VRAM_GTT */
-   }
-
if (rscreen->debug_flags & DBG_NO_WC)
res->flags &= ~RADEON_FLAG_GTT_WC;
 
if (res->b.b.bind & PIPE_BIND_SHARED)
res->flags |= RADEON_FLAG_NO_SUBALLOC;
 
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
 
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index d162eea..ec282d5 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -164,34 +164,20 @@ void si_init_resource_fields(struct si_screen *sscreen,
res->flags |= RADEON_FLAG_NO_CPU_ACCESS |
 RADEON_FLAG_GTT_WC;
}
 
/* Displayable and shareable surfaces are not suballocated. */
if (res->b.b.bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
res->flags |= RADEON_FLAG_NO_SUBALLOC; /* shareable */
else
res->flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
-   /* If VRAM is just stolen system memory, allow both VRAM and
-* GTT, whichever has free space. If a buffer is evicted from
-* VRAM to GTT, it will stay there.
-*
-* DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
-* placements even with a low amount of stolen VRAM.
-*/
-   if (!sscreen->info.has_dedicated_vram &&
-   (sscreen->info.drm_major < 3 || sscreen->info.drm_minor < 6) &&
-   res->domains == RADEON_DOMAIN_VRAM) {
-   res->domains = RADEON_DOMAIN_VRAM_GTT;
-   res->flags &= ~RADEON_FLAG_NO_CPU_ACCESS; /* disallowed with 
VRAM_GTT */
-   }
-
if (sscreen->debug_flags & DBG(NO_WC))
res->flags &= ~RADEON_FLAG_GTT_WC;
 
/* Set expected VRAM and GART usage for the buffer. */
res->vram_usage = 0;
res->gart_usage = 0;
res->max_forced_staging_uploads = 0;
res->b.max_forced_staging_uploads = 0;
 
if (res->domains & RADEON_DOMAIN_VRAM) {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 6ec7cb7..9ab8f67 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -379,36 +379,48 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct 
amdgpu_winsys *ws,
  unsigned pb_cache_bucket)
 {
struct amdgpu_bo_alloc_request request = {0};
amdgpu_bo_handle buf_handle;
uint64_t va = 0;
struct amdgpu_winsys_bo *bo;
amdgpu_va_handle va_handle;
unsigned va_gap_size;
int r;
 
-   assert(initial_domain & RADEON_DOMAIN_VRAM_GTT);
+   /* VRAM or GTT must be specified, but not both at the same time. */
+   assert(util_bitcount(initial_domain & RADEON_DOMAIN_VRAM_GTT) == 1);
+
bo = CALLOC_STRUCT(amdgpu_winsys_bo);
if (!bo) {
   return NULL;
}
 
pb_cache_init_entry(>bo_cache, >u.real.cache_entry, 

[Mesa-dev] [PATCH 4/8] gallium/radeon: remove RADEON_HEAP_VRAM_GTT

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

Only winsyses can set VRAM|GTT. Drivers shouldn't if they want to use
winsys allocators.
---
 src/gallium/drivers/radeon/radeon_winsys.h | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 7ab110a..1d59b28 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -647,74 +647,69 @@ static inline void radeon_emit(struct radeon_winsys_cs 
*cs, uint32_t value)
 static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
 const uint32_t *values, unsigned count)
 {
 memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
 cs->current.cdw += count;
 }
 
 enum radeon_heap {
 RADEON_HEAP_VRAM_NO_CPU_ACCESS,
 RADEON_HEAP_VRAM,
-RADEON_HEAP_VRAM_GTT, /* combined heaps */
 RADEON_HEAP_GTT_WC,
 RADEON_HEAP_GTT,
 RADEON_MAX_SLAB_HEAPS,
 RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
 };
 
 static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap 
heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
 case RADEON_HEAP_VRAM:
 return RADEON_DOMAIN_VRAM;
-case RADEON_HEAP_VRAM_GTT:
-return RADEON_DOMAIN_VRAM_GTT;
 case RADEON_HEAP_GTT_WC:
 case RADEON_HEAP_GTT:
 return RADEON_DOMAIN_GTT;
 default:
 assert(0);
 return (enum radeon_bo_domain)0;
 }
 }
 
 static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
 return RADEON_FLAG_GTT_WC |
RADEON_FLAG_NO_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
 case RADEON_HEAP_VRAM:
-case RADEON_HEAP_VRAM_GTT:
 case RADEON_HEAP_GTT_WC:
 return RADEON_FLAG_GTT_WC |
RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
 case RADEON_HEAP_GTT:
 default:
 return RADEON_FLAG_NO_INTERPROCESS_SHARING;
 }
 }
 
 /* The pb cache bucket is chosen to minimize pb_cache misses.
  * It must be between 0 and 3 inclusive.
  */
 static inline unsigned radeon_get_pb_cache_bucket_index(enum radeon_heap heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
 return 0;
 case RADEON_HEAP_VRAM:
-case RADEON_HEAP_VRAM_GTT:
 return 1;
 case RADEON_HEAP_GTT_WC:
 return 2;
 case RADEON_HEAP_GTT:
 default:
 return 3;
 }
 }
 
 /* Return the heap index for winsys allocators, or -1 on failure. */
@@ -735,22 +730,21 @@ static inline int radeon_get_heap_index(enum 
radeon_bo_domain domain,
   RADEON_FLAG_NO_CPU_ACCESS |
   RADEON_FLAG_NO_INTERPROCESS_SHARING))
 return -1;
 
 switch (domain) {
 case RADEON_DOMAIN_VRAM:
 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
 return RADEON_HEAP_VRAM_NO_CPU_ACCESS;
 else
 return RADEON_HEAP_VRAM;
-case RADEON_DOMAIN_VRAM_GTT:
-return RADEON_HEAP_VRAM_GTT;
 case RADEON_DOMAIN_GTT:
 if (flags & RADEON_FLAG_GTT_WC)
 return RADEON_HEAP_GTT_WC;
 else
 return RADEON_HEAP_GTT;
+default:
+return -1;
 }
-return -1;
 }
 
 #endif
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/8] winsys/amdgpu: add RADEON_FLAG_READ_ONLY

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeon/radeon_winsys.h | 47 ++
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c  |  9 +-
 2 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 1d59b28..d1c761f 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -45,20 +45,21 @@ enum radeon_bo_domain { /* bitfield */
 RADEON_DOMAIN_VRAM = 4,
 RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT
 };
 
 enum radeon_bo_flag { /* bitfield */
 RADEON_FLAG_GTT_WC =(1 << 0),
 RADEON_FLAG_NO_CPU_ACCESS = (1 << 1),
 RADEON_FLAG_NO_SUBALLOC =   (1 << 2),
 RADEON_FLAG_SPARSE =(1 << 3),
 RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 4),
+RADEON_FLAG_READ_ONLY = (1 << 5),
 };
 
 enum radeon_bo_usage { /* bitfield */
 RADEON_USAGE_READ = 2,
 RADEON_USAGE_WRITE = 4,
 RADEON_USAGE_READWRITE = RADEON_USAGE_READ | RADEON_USAGE_WRITE,
 
 /* The winsys ensures that the CS submission will be scheduled after
  * previously flushed CSs referencing this BO in a conflicting way.
  */
@@ -646,72 +647,88 @@ static inline void radeon_emit(struct radeon_winsys_cs 
*cs, uint32_t value)
 
 static inline void radeon_emit_array(struct radeon_winsys_cs *cs,
 const uint32_t *values, unsigned count)
 {
 memcpy(cs->current.buf + cs->current.cdw, values, count * 4);
 cs->current.cdw += count;
 }
 
 enum radeon_heap {
 RADEON_HEAP_VRAM_NO_CPU_ACCESS,
+RADEON_HEAP_VRAM_READ_ONLY,
 RADEON_HEAP_VRAM,
 RADEON_HEAP_GTT_WC,
+RADEON_HEAP_GTT_WC_READ_ONLY,
 RADEON_HEAP_GTT,
 RADEON_MAX_SLAB_HEAPS,
 RADEON_MAX_CACHED_HEAPS = RADEON_MAX_SLAB_HEAPS,
 };
 
 static inline enum radeon_bo_domain radeon_domain_from_heap(enum radeon_heap 
heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
+case RADEON_HEAP_VRAM_READ_ONLY:
 case RADEON_HEAP_VRAM:
 return RADEON_DOMAIN_VRAM;
 case RADEON_HEAP_GTT_WC:
+case RADEON_HEAP_GTT_WC_READ_ONLY:
 case RADEON_HEAP_GTT:
 return RADEON_DOMAIN_GTT;
 default:
 assert(0);
 return (enum radeon_bo_domain)0;
 }
 }
 
 static inline unsigned radeon_flags_from_heap(enum radeon_heap heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
 return RADEON_FLAG_GTT_WC |
RADEON_FLAG_NO_CPU_ACCESS |
RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
+case RADEON_HEAP_VRAM_READ_ONLY:
+return RADEON_FLAG_GTT_WC |
+   RADEON_FLAG_NO_INTERPROCESS_SHARING |
+   RADEON_FLAG_READ_ONLY;
+
 case RADEON_HEAP_VRAM:
 case RADEON_HEAP_GTT_WC:
 return RADEON_FLAG_GTT_WC |
RADEON_FLAG_NO_INTERPROCESS_SHARING;
 
+case RADEON_HEAP_GTT_WC_READ_ONLY:
+return RADEON_FLAG_GTT_WC |
+   RADEON_FLAG_NO_INTERPROCESS_SHARING |
+   RADEON_FLAG_READ_ONLY;
+
 case RADEON_HEAP_GTT:
 default:
 return RADEON_FLAG_NO_INTERPROCESS_SHARING;
 }
 }
 
 /* The pb cache bucket is chosen to minimize pb_cache misses.
  * It must be between 0 and 3 inclusive.
  */
 static inline unsigned radeon_get_pb_cache_bucket_index(enum radeon_heap heap)
 {
 switch (heap) {
 case RADEON_HEAP_VRAM_NO_CPU_ACCESS:
 return 0;
+case RADEON_HEAP_VRAM_READ_ONLY:
 case RADEON_HEAP_VRAM:
 return 1;
 case RADEON_HEAP_GTT_WC:
+case RADEON_HEAP_GTT_WC_READ_ONLY:
 return 2;
 case RADEON_HEAP_GTT:
 default:
 return 3;
 }
 }
 
 /* Return the heap index for winsys allocators, or -1 on failure. */
 static inline int radeon_get_heap_index(enum radeon_bo_domain domain,
 enum radeon_bo_flag flags)
@@ -721,30 +738,48 @@ static inline int radeon_get_heap_index(enum 
radeon_bo_domain domain,
 /* NO_CPU_ACCESS implies VRAM only. */
 assert(!(flags & RADEON_FLAG_NO_CPU_ACCESS) || domain == 
RADEON_DOMAIN_VRAM);
 
 /* Resources with interprocess sharing don't use any winsys allocators. */
 if (!(flags & RADEON_FLAG_NO_INTERPROCESS_SHARING))
 return -1;
 
 /* Unsupported flags: NO_SUBALLOC, SPARSE. */
 if (flags & ~(RADEON_FLAG_GTT_WC |
   RADEON_FLAG_NO_CPU_ACCESS |
-  RADEON_FLAG_NO_INTERPROCESS_SHARING))
+  RADEON_FLAG_NO_INTERPROCESS_SHARING |
+  RADEON_FLAG_READ_ONLY))
 return -1;
 
 switch (domain) {
 case RADEON_DOMAIN_VRAM:
-if (flags & RADEON_FLAG_NO_CPU_ACCESS)
+switch (flags & (RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY)) {
+case RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_READ_ONLY:
+assert(!"NO_CPU_ACCESS | READ_ONLY doesn't make sense");
+

[Mesa-dev] [PATCH 2/8] radeonsi: allow DMABUF exports for local buffers

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

Cc: 17.3 
---
 src/gallium/drivers/radeon/r600_texture.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 2aa47b5..7a5d704 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -740,21 +740,23 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
 rtex->surface.bpe;
slice_size = rtex->surface.u.gfx9.surf_slice_size;
} else {
offset = rtex->surface.u.legacy.level[0].offset;
stride = rtex->surface.u.legacy.level[0].nblk_x *
 rtex->surface.bpe;
slice_size = 
(uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
}
} else {
/* Move a suballocated buffer into a non-suballocated 
allocation. */
-   if (sscreen->ws->buffer_is_suballocated(res->buf)) {
+   if (sscreen->ws->buffer_is_suballocated(res->buf) ||
+   (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING 
&&
+whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
assert(!res->b.is_shared);
 
/* Allocate a new buffer with PIPE_BIND_SHARED. */
struct pipe_resource templ = res->b.b;
templ.bind |= PIPE_BIND_SHARED;
 
struct pipe_resource *newb =
screen->resource_create(screen, );
if (!newb)
return false;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/8] radeonsi: flush the context after resource_copy_region for buffer exports

2017-12-01 Thread Marek Olšák
From: Marek Olšák 

Cc: 17.2 17.3 
---
 src/gallium/drivers/radeon/r600_texture.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 86a2e1b..2aa47b5 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -666,59 +666,65 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
   struct winsys_handle *whandle,
unsigned usage)
 {
struct si_screen *sscreen = (struct si_screen*)screen;
struct r600_common_context *rctx;
struct r600_resource *res = (struct r600_resource*)resource;
struct r600_texture *rtex = (struct r600_texture*)resource;
struct radeon_bo_metadata metadata;
bool update_metadata = false;
unsigned stride, offset, slice_size;
+   bool flush = false;
 
ctx = threaded_context_unwrap_sync(ctx);
rctx = (struct r600_common_context*)(ctx ? ctx : sscreen->aux_context);
 
if (resource->target != PIPE_BUFFER) {
/* This is not supported now, but it might be required for 
OpenCL
 * interop in the future.
 */
if (resource->nr_samples > 1 || rtex->is_depth)
return false;
 
/* Move a suballocated texture into a non-suballocated 
allocation. */
if (sscreen->ws->buffer_is_suballocated(res->buf) ||
rtex->surface.tile_swizzle ||
(rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING 
&&
 whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
assert(!res->b.is_shared);
r600_reallocate_texture_inplace(rctx, rtex,
PIPE_BIND_SHARED, 
false);
-   rctx->b.flush(>b, NULL, 0);
+   flush = true;
assert(res->b.b.bind & PIPE_BIND_SHARED);
assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
assert(!(res->flags & 
RADEON_FLAG_NO_INTERPROCESS_SHARING));
assert(rtex->surface.tile_swizzle == 0);
}
 
/* Since shader image stores don't support DCC on VI,
 * disable it for external clients that want write
 * access.
 */
if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
-   if (si_texture_disable_dcc(rctx, rtex))
+   if (si_texture_disable_dcc(rctx, rtex)) {
update_metadata = true;
+   /* si_texture_disable_dcc flushes the context */
+   flush = false;
+   }
}
 
if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
(rtex->cmask.size || rtex->dcc_offset)) {
/* Eliminate fast clear (both CMASK and DCC) */
r600_eliminate_fast_color_clear(rctx, rtex);
+   /* eliminate_fast_color_clear flushes the context */
+   flush = false;
 
/* Disable CMASK if flush_resource isn't going
 * to be called.
 */
if (rtex->cmask.size)
r600_texture_discard_cmask(sscreen, rtex);
}
 
/* Set metadata. */
if (!res->b.is_shared || update_metadata) {
@@ -751,34 +757,38 @@ static boolean r600_texture_get_handle(struct 
pipe_screen* screen,
struct pipe_resource *newb =
screen->resource_create(screen, );
if (!newb)
return false;
 
/* Copy the old buffer contents to the new one. */
struct pipe_box box;
u_box_1d(0, newb->width0, );
rctx->b.resource_copy_region(>b, newb, 0, 0, 0, 0,
 >b.b, 0, );
+   flush = true;
/* Move the new buffer storage to the old 
pipe_resource. */
si_replace_buffer_storage(>b, >b.b, newb);
pipe_resource_reference(, NULL);
 
assert(res->b.b.bind & PIPE_BIND_SHARED);
assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
}
 
/* Buffers */
offset = 0;
stride = 0;
slice_size = 0;
}
 
+   if (flush)
+   

[Mesa-dev] [Bug 103814] incorrect dust rendering in hl2 without sisched

2017-12-01 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103814

--- Comment #5 from Hleb Valoshka <375...@gmail.com> ---
I have checked mesa snapshot build against llvm6 and no more able to reproduce
this error.

Debian's 17.1.5 is linked against llvm4, while 17.2.5 is linked against llvm5,
so it looks like llvm5 causes issues, but it's too big to bisect.

I currently have no possibility to check 17.3 linked against llvm6 but if
nobody does this I'll do it closer to x-mas.

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/android: Partially handle HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED

2017-12-01 Thread Rob Herring
On Fri, Dec 1, 2017 at 8:44 AM, Tomasz Figa  wrote:
> On Fri, Dec 1, 2017 at 11:20 PM, Rob Herring  wrote:
>> On Fri, Dec 1, 2017 at 7:30 AM, Robert Foss  
>> wrote:
>>> On Thu, 2017-11-30 at 11:14 -0600, Rob Herring wrote:
 On Thu, Nov 30, 2017 at 12:11 AM, Tapani Pälli  wrote:
 >
 >
 > On 11/30/2017 06:13 AM, Tomasz Figa wrote:
 > >
 > > On Thu, Nov 30, 2017 at 3:43 AM, Robert Foss  > ra.com>
 > > wrote:

[...]

 > > (As a side note, I had an idea to create a new interface,
 > > standardized
 > > by Mesa, let's say libdri_android, completely free of any
 > > gralloc-internals. It would have to be exposed additionally by
 > > any
 > > Android that intends to run Mesa. Given the need to deal with 3
 > > different gralloc versions already, it could be something easier
 > > to
 > > manage.)
 >
 >
 > Makes sense, it is a bit messy and we have bit too much patches on
 > our tree
 > because of these differences.

 Seems overly complicated to me. The information needed is within the
 ints in the native_handle in most/all implementations. I don't think
 there's another way to globally store dmabuf metadata unless you have
 a custom interface in your DRM driver. So standardizing to a common
 library implies a common handle struct here. I think the options are:

 - common struct definition (native_handle_t + dmabuf fd(s) + width,
 height, stride, format, usage, etc.)
 - common struct plus inline accessor functions
 - common opaque struct plus accessor library
>>>
>>> So these common parts would be much like what currently exists in
>>> minigbm/cros_gralloc_handle.h and gbm_gralloc/gralloc_drm_handle.h
>>> then, but extended with the above suggestions?
>>
>> Yes, but which part do you think needs to be extended?
>>
>> As we discussed on IRC, I think perhaps we just need to change the
>> handle format field in gralloc_drm_handle.h to use fourcc (aka DRM and
>> GBM formats) instead of the Android format. I think all the users just
>> end up converting it to their own internal format anyway.
>
> We keep the handle opaque for consumers and even minigbm dereferences
> it only when creating/registering the buffer, further using the handle
> pointer only as a key to internal bookkeeping map.

What you say implies that you don't need any metadata in the handle,
but you do have pretty much all the same data. Whether you

> Relying on the struct itself is not only error prone, as there is no
> way to check if the struct on gralloc implementation side matches what
> we expect, but also makes it difficult to change the handle struct at
> our convenience.

How does a library solve this?

Everything in Android gets built together and the handle pretty much
has to stay the same across components in any implementation I've
seen. Maybe someday that will change and we'll need versioning and
backwards compatibility, but for now that's unnecessary complexity.
We'd have to get to a single, well controlled and defined handle first
anyway before we start versioning.

Anyone is still free to change things downstream however they want.
We're only talking about what does mainline/upstream do.

 Also, I don't think whatever is standardized should live in Mesa.
 There's a need to support drm_hwcomposer (which has the same
 dependencies as mesa) with non-Mesa GL implementations (yes, vendor
 binaries).
>>>
>>> Excluding Mesa and the composer leaves us with the allocator or
>>> creating a new library.
>>> I would assume that creating a new library is the worse option.
>>
>> Why excluding the composer? If we have to pick, I'd put it there or
>> perhaps libdrm?
>
> There is neither a single central composer nor libdrm is used on every
> system... (The latter is not even used by Intel driver in Mesa
> anymore.)

I think you are confusing libdrm_intel which was removed with libdrm
(the ioctl wrappers) which is still a dependency. I don't think there
is any plan to remove libdrm completely.

For cases where a user has different components, then they have to
copy the struct.

> However I fully agree that there are other upstream components (e.g.
> drm_hwcomposer), which would benefit from it and nobody wants to
> include Mesa in the build just for one header. Should we have a
> separate freedesktop project for it?

I'm still going to say libdrm. If that's really a problem, then we can
split it out later.

Rob
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] docs/release-calendar: update and extend

2017-12-01 Thread Emil Velikov
On 30 November 2017 at 15:36, Andres Gomez  wrote:
> On Wed, 2017-11-29 at 18:20 +, Emil Velikov wrote:
>> From: Emil Velikov 
>>
>> Cc: Juan A. Suárez 
>> Cc: Andres Gomez 
>> Signed-off-by: Emil Velikov 
>> ---
>> Gents, I have no objections if you want to do any of the releases listed
>> ;-)
>> ---
>>  docs/release-calendar.html | 26 --
>>  1 file changed, 12 insertions(+), 14 deletions(-)
>>
>> diff --git a/docs/release-calendar.html b/docs/release-calendar.html
>> index 2d39e3b39e8..0ba59f520ea 100644
>> --- a/docs/release-calendar.html
>> +++ b/docs/release-calendar.html
>> @@ -39,39 +39,37 @@ if you'd like to nominate a patch in the next stable 
>> release.
>>  Notes
>>  
>>  
>> -17.2
>> +17.2
>>  2017-12-08
>>  17.2.7
>>  Emil Velikov
>> -Final planned release for the 17.2 series
>
> Missing: 
>
Arrh, I always forget these yet the w3c validator is kind enough to not warn :-\


>>  
>>  
>> -17.3
>> -2017-11-03
>> -17.3.0-rc3
>> +2017-12-22
>> +17.2.8
>>  Emil Velikov
>
> I can pick 17.2.8
>
>> -
>> +Final planned release for the 17.2 series
>>  
>>  
>> -2017-11-10
>> -17.3.0-rc4
>> +17.3
>> +2017-12-02
>> +17.3.0 final
>>  Emil Velikov
>> -May be promoted to 17.3.0 final
>> +
>>  
>>  
>> -2017-11-24
>> +2017-12-15
>>  17.3.1
>> -Andres Gomez
>> +Emil Velikov
>>  
>>  
>>  
>> -2017-12-08
>> +2017-12-29
>>  17.3.2
>>  Emil Velikov
>>  
>> -
>>  
>> -2017-12-22
>> +2018-01-12
>>  17.3.3
>>  Emil Velikov
>
> Juan can pick 17.3.3
>
>>  
>
> Other than those small changes, this is:
>
> Reviewed-by: Andres Gomez 
>
Thanks Andres, tweaked and pushed.

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 14/32] vulkan/wsi: Do image creation in common code

2017-12-01 Thread Chad Versace
On Tue 28 Nov 2017, Jason Ekstrand wrote:
> This uses the mock extension created in a previous commit to tell the
> driver that the image it's just been asked to create is, in fact, a
> window system image with whatever assumptions that implies.  There was a
> lot of redundant code between the two drivers to do basically exactly
> the same thing.
> ---
>  src/amd/vulkan/radv_wsi.c   | 124 
> +---
>  src/intel/vulkan/anv_wsi.c  | 122 +--
>  src/vulkan/wsi/wsi_common.c | 117 +-
>  src/vulkan/wsi/wsi_common.h |  28 +---
>  src/vulkan/wsi/wsi_common_private.h |  25 +++-
>  src/vulkan/wsi/wsi_common_wayland.c |  13 +---
>  src/vulkan/wsi/wsi_common_x11.c |  20 +-
>  7 files changed, 146 insertions(+), 303 deletions(-)

This patch is
Reviewed-by: Chad Versace 

This series is a good improvement over the old code :)

But I found some nits:

- The Vulkan spec, iirc, requires that vkCreateFoo not modify its
  output parameter on failure. The memset in this patch breaks that
  requirement. I'm sure we're breaking that elsewhere too. We should
  probably do a cleanup one day to fix that.

- wsi_destroy_image neither assets that image->fd == -1 nor closes
  it. I believe there is no fd leakage because anv_wsi* does the
  right thing. But that's a small cleanup we could do later.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

2017-12-01 Thread Rogovin, Kevin
Hi,

 For ANV I do not know as I have not really poked into its code. For i965, this 
patch series handles the situation as to what to do if a draw of dispatch 
compute accesses both an ASTC5x5 texture and a texture with an auxiliary 
buffer. It does this by checking if there are both such textures and ASTC5x5 
textures in the list of currently bound textures. If the answer is yes, then it 
resolves all such auxiliary requiring textures that use an auxiliary buffer so 
that the sampler does not need them when it reads from the surfaces. The 
resolve stuff is handled in the function brw_astc5x5_perform_wa(() in 
brw_context.c of the first patch, the checking is handled in the 3rd patch by 
modifying brw_tex_validate() in intel_tex_validate.c. The 4'th and 5'th patches 
are deceptively small since all they do is add a call to 
brw_astc5x5_perform_wa(() in brw_draw.c and brw_compute.c respectively. The 4th 
patch also has a small addition to prevent surface state for sampler state to 
have the auxiliary surface given in the call.

As to how to do similar auto-resolve and tweak of state on ANV, I need to dive 
quite deep into the code to see how to do it.

-Kevin

-Original Message-
From: Matt Turner [mailto:matts...@gmail.com] 
Sent: Friday, December 1, 2017 8:25 PM
To: Rogovin, Kevin 
Cc: Ilia Mirkin ; mesa-dev@lists.freedesktop.org
Subject: Re: [Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

On Fri, Dec 1, 2017 at 10:06 AM, Rogovin, Kevin  wrote:
> Hi,
>
>  Yes ANV will need something like this as well. If the GPU samples from both 
> an ASTC5x5 texture and one with an aux buffer without a texture cache 
> invalidate between such accesses, then the GPU hangs, which in turn makes the 
> system unresponsive for a few seconds until the kernel resets the GPU; then 
> an ioctl will fail in i965 which means things are very bad usually and (for 
> me atleast on my system with how I build mesa) the application crashes.

I think his question is -- is there anything we can do about the case where a 
single shader program samples ASTC5x5 and a texture with an aux buffer? 
Presumably there's no way to invalidate the texture cache during a shader 
program, so what can you do?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] i965/gen6-7/sol: Bump primitive counter BO size.

2017-12-01 Thread Francisco Jerez
Eero Tamminen  writes:

> Hi,
>
> Tested-By: Eero Tamminen 
>
> On 18.11.2017 00:28, Francisco Jerez wrote:
>> Improves performance of SynMark2 OglGSCloth by a further 9.65%±0.59%
>> due to the reduction in overwraps of the primitive count buffer that
>> lead to a CPU stall on previous rendering.  Cummulative performance
>> improvement from the series 81.50% ±0.96% (data gathered on VLV).
>
> I tested the patch series with transform feedback using tests on SNB 
> GT2, BYT, HSW GT2 and BSW, using git versions of Mesa, drm-tip kernel 
> and X server.
>
>
> SNB GT2:
> * No noticeable perf impact on GfxBench Manhattan
> * Mesa unfortunately renders GSCloth incorrectly on SNB,
>but that happens also without this patch series:
>   https://bugs.freedesktop.org/show_bug.cgi?id=103824
>
> BYT:
> * 1-2% perf improvement in GfxBench Manhattan 3.0 & 3.1
> * 30% perf improvement in GSCloth
>- Device is single channel one, was your VLV 2-channel one?
>

I don't have access to the VLV system today to verify, but your system
is likely hitting the bandwidth limits of the system sooner than mine
(either because of slower memory clocks or because of single- vs
dual-channel), after which point performance doesn't improve further
for you because it's fully bandwidth-bound.

> HSW GT2:
> * No noticeable perf impact
>

This is also expected, HSW uses the hsw_sol.c XFB implementation which
this patch doesn't have any effect on.

> BSW:
> * No noticeable perf impact (as expected)
>
>
>   - Eero
>
>> ---
>>   src/mesa/drivers/dri/i965/gen6_sol.c | 5 +++--
>>   1 file changed, 3 insertions(+), 2 deletions(-)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c 
>> b/src/mesa/drivers/dri/i965/gen6_sol.c
>> index b1baf01bcd9..355acd42189 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_sol.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
>> @@ -197,7 +197,7 @@ brw_new_transform_feedback(struct gl_context *ctx, 
>> GLuint name)
>>  brw_obj->offset_bo =
>> brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 64);
>>  brw_obj->prim_count_bo =
>> -  brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 64);
>> +  brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384, 64);
>>   
>>  return _obj->base;
>>   }
>> @@ -287,7 +287,8 @@ brw_save_primitives_written_counters(struct brw_context 
>> *brw,
>>  assert(obj->prim_count_bo != NULL);
>>   
>>  /* Check if there's enough space for a new pair of four values. */
>> -   if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= 4096) {
>> +   if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >=
>> +   obj->prim_count_bo->size) {
>> aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
>>  >previous_counter);
>> aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
>> 
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GBM and the Device Memory Allocator Proposals

2017-12-01 Thread Rob Clark
On Fri, Dec 1, 2017 at 12:09 PM, Nicolai Hähnle  wrote:
> On 01.12.2017 16:06, Rob Clark wrote:
>>
>> On Thu, Nov 30, 2017 at 5:43 PM, Nicolai Hähnle 
>> wrote:
>>>
>>> Hi,
>>>
>>> I've had a chance to look a bit more closely at the allocator prototype
>>> repository now. There's a whole bunch of low-level API design feedback,
>>> but
>>> for now let's focus on the high-level stuff first.
>>>
>>> Going by the 4.5 major object types (as also seen on slide 5 of your
>>> presentation [0]), assertions and usages make sense to me.
>>>
>>> Capabilities and capability sets should be cleaned up in my opinion, as
>>> the
>>> status quo is overly obfuscating things. What capability sets really
>>> represent, as far as I understand them, is *memory layouts*, and so
>>> that's
>>> what they should be called.
>>>
>>> This conceptually simplifies `derive_capabilities` significantly without
>>> any
>>> loss of expressiveness as far as I can see. Given two lists of memory
>>> layouts, we simply look for which memory layouts appear in both lists,
>>> and
>>> then merge their constraints and capabilities.
>>>
>>> Merging constraints looks good to me.
>>>
>>> Capabilities need some more thought. The prototype removes capabilities
>>> when
>>> merging layouts, but I'd argue that that is often undesirable. (In fact,
>>> I
>>> cannot think of capabilities which we'd always want to remove.)
>>>
>>> A typical example for this is compression (i.e. DCC in our case). For
>>> rendering usage, we'd return something like:
>>>
>>> Memory layout: AMD/tiled; constraints(alignment=64k); caps(AMD/DCC)
>>>
>>> For display usage, we might return (depending on hardware):
>>>
>>> Memory layout: AMD/tiled; constraints(alignment=64k); caps(none)
>>>
>>> Merging these in the prototype would remove the DCC capability, even
>>> though
>>> it might well make sense to keep it there for rendering. Dealing with the
>>> fact that display usage does not have this capability is precisely one of
>>> the two things that transitions are about! The other thing that
>>> transitions
>>> are about is caches.
>>>
>>> I think this is kind of what Rob was saying in one of his mails.
>>
>>
>> Perhaps "layout" is a better name than "caps".. either way I think of
>> both AMD/tiled and AMD/DCC as the same type of "thing".. the
>> difference between AMD/tiled and AMD/DCC is that a transition can be
>> provided for AMD/DCC.  Other than that they are both things describing
>> the layout.
>
>
> The reason that a transition can be provided is that they aren't quite the
> same thing, though. In a very real sense, AMD/DCC is a "child" property of
> AMD/tiled: DCC is implemented as a meta surface whose memory layout depends
> on the layout of the main surface.

I suppose this is six-of-one, half-dozen of the other..

what you are calling a layout is what I'm calling a cap that just
happens not to have an associated transition

> Although, if there are GPUs that can do an in-place "transition" between
> different tiling layouts, then the distinction is perhaps really not as
> clear-cut. I guess that would only apply to tiled renderers.

I suppose the advantage of just calling both layout and caps the same
thing, and just saying that a "cap" (or "layout" if you prefer that
name) can optionally have one or more associated transitions, is that
you can deal with cases where sometimes a tiled format might actually
have an in-place transition ;-)

>
>> So lets say you have a setup where both display and GPU supported
>> FOO/tiled, but only GPU supported compressed (FOO/CC) and cached
>> (FOO/cached).  But the GPU supported the following transitions:
>>
>>trans_a: FOO/CC -> null
>>trans_b: FOO/cached -> null
>>
>> Then the sets for each device (in order of preference):
>>
>> GPU:
>>1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=32k)
>>2: caps(FOO/tiled, FOO/CC); constraints(alignment=32k)
>>3: caps(FOO/tiled); constraints(alignment=32k)
>>
>> Display:
>>1: caps(FOO/tiled); constraints(alignment=64k)
>>
>> Merged Result:
>>1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=64k);
>>   transition(GPU->display: trans_a, trans_b; display->GPU: none)
>>2: caps(FOO/tiled, FOO/CC); constraints(alignment=64k);
>>   transition(GPU->display: trans_a; display->GPU: none)
>>3: caps(FOO/tiled); constraints(alignment=64k);
>>   transition(GPU->display: none; display->GPU: none)
>
>
> We definitely don't want to expose a way of getting uncached rendering
> surfaces for radeonsi. I mean, I think we are supposed to be able to program
> our hardware so that the backend bypasses all caches, but (a) nobody
> validates that and (b) it's basically suicide in terms of performance. Let's
> build fewer footguns :)

sure, this was just a hypothetical example.  But to take this case as
another example, if you didn't want to expose uncached rendering (or
cached w/ cache flushes after each draw), you 

Re: [Mesa-dev] GBM and the Device Memory Allocator Proposals

2017-12-01 Thread Nicolai Hähnle

On 01.12.2017 18:09, Nicolai Hähnle wrote:
[snip]

As for the actual transition API, I accept that some metadata may be
required, and the metadata probably needs to depend on the memory 
layout,

which is often vendor-specific. But even linear layouts need some
transitions for caches. We probably need at least some generic 
"off-device

usage" bit.


I've started thinking of cached as a capability with a transition.. I
think that helps.  Maybe it needs to somehow be more specific (ie. if
you have two devices both with there own cache with no coherency
between the two)


As I wrote above, I'd prefer not to think of "cached" as a capability at 
least for radeonsi.


 From the desktop perspective, I would say let's ignore caches, the 
drivers know which caches they need to flush to make data visible to 
other devices on the system.


On the other hand, there are probably SoC cases where non-coherent 
caches are shared between some but not all devices, and in that case 
perhaps we do need to communicate this.


So perhaps we should have two kinds of "capabilities".

The first, like framebuffer compression, is a capability of the 
allocated memory layout (because the compression requires a meta 
surface), and devices that expose it may opportunistically use it.


The second, like caches, is a capability that the device/driver will use 
and you don't get a say in it, but other devices/drivers also don't need 
to be aware of them.


So then you could theoretically have a system that gives you:

GPU: FOO/tiled(layout-caps=FOO/cc, dev-caps=FOO/gpu-cache)
Display: FOO/tiled(layout-caps=FOO/cc)
Video:   FOO/tiled(dev-caps=FOO/vid-cache)
Camera:  FOO/tiled(dev-caps=FOO/vid-cache)

[snip]

FWIW, I think all that stuff about different caches quite likely 
over-complicates things. At the end of each "command submission" of 
whichever type of engine, the buffer must be in a state where the kernel 
is free to move it around for memory management purposes. This already 
puts a big constraint on the kind of (non-coherent) caches that can be 
supported anyway, so I wouldn't be surprised if we could get away with a 
*much* simpler approach.


Cheers,
Nicolai

--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

2017-12-01 Thread Matt Turner
On Fri, Dec 1, 2017 at 10:06 AM, Rogovin, Kevin  wrote:
> Hi,
>
>  Yes ANV will need something like this as well. If the GPU samples from both 
> an ASTC5x5 texture and one with an aux buffer without a texture cache 
> invalidate between such accesses, then the GPU hangs, which in turn makes the 
> system unresponsive for a few seconds until the kernel resets the GPU; then 
> an ioctl will fail in i965 which means things are very bad usually and (for 
> me atleast on my system with how I build mesa) the application crashes.

I think his question is -- is there anything we can do about the case
where a single shader program samples ASTC5x5 and a texture with an
aux buffer? Presumably there's no way to invalidate the texture cache
during a shader program, so what can you do?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

2017-12-01 Thread Rogovin, Kevin
Hi,

 Yes ANV will need something like this as well. If the GPU samples from both an 
ASTC5x5 texture and one with an aux buffer without a texture cache invalidate 
between such accesses, then the GPU hangs, which in turn makes the system 
unresponsive for a few seconds until the kernel resets the GPU; then an ioctl 
will fail in i965 which means things are very bad usually and (for me atleast 
on my system with how I build mesa) the application crashes.

 -Kevin

-Original Message-
From: ibmir...@gmail.com [mailto:ibmir...@gmail.com] On Behalf Of Ilia Mirkin
Sent: Friday, December 1, 2017 7:38 PM
To: Rogovin, Kevin 
Cc: mesa-dev@lists.freedesktop.org
Subject: Re: [Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

On Fri, Dec 1, 2017 at 12:19 PM,   wrote:
> From: Kevin Rogovin 
>
> This patch series implements a needed workaround for Gen9 for ASTC5x5 
> sampler reads. The crux of the work around is to make sure that the 
> sampler does not read an ASTC5x5 texture and a surface with an 
> auxilary buffer without having a texture cache invalidate between such 
> accesses.

Presumably anv needs something like this too? What happens if you have a single 
draw which samples from both an ASTC5x5 texture and one with an aux buffer? 
[Just curious.]
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Disable regular fast-clears (CCS_D) on gen9+

2017-12-01 Thread Jason Ekstrand
This partially reverts commit 3e57e9494c2279580ad6a83ab8c065d01e7e634e
which caused a bunch of GPU hangs on several Source titles.  To date, we
have no clue why these hangs are actually happening.  This undoes the
final effect of 3e57e9494c227 and gets us back to not hanging.  Tested
with Team Fortress 2.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102435
Fixes: 3e57e9494c2279580ad6a83ab8c065d01e7e634e
Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/brw_meta_util.c | 11 ++
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 55 +++
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_meta_util.c 
b/src/mesa/drivers/dri/i965/brw_meta_util.c
index d292f5a..ba92168 100644
--- a/src/mesa/drivers/dri/i965/brw_meta_util.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_util.c
@@ -293,6 +293,17 @@ brw_is_color_fast_clear_compatible(struct brw_context *brw,
brw->mesa_to_isl_render_format[mt->format])
   return false;
 
+   /* Gen9 doesn't support fast clear on single-sampled SRGB buffers. When
+* GL_FRAMEBUFFER_SRGB is enabled any color renderbuffers will be
+* resolved in intel_update_state. In that case it's pointless to do a
+* fast clear because it's very likely to be immediately resolved.
+*/
+   if (devinfo->gen >= 9 &&
+   mt->surf.samples == 1 &&
+   ctx->Color.sRGBEnabled &&
+   _mesa_get_srgb_format_linear(mt->format) != mt->format)
+  return false;
+
const mesa_format format = _mesa_get_render_format(ctx, mt->format);
if (_mesa_is_format_integer_color(format)) {
   if (devinfo->gen >= 8) {
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b87d356..041abb2 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -207,7 +207,13 @@ intel_miptree_supports_ccs(struct brw_context *brw,
if (!brw->mesa_format_supports_render[mt->format])
   return false;
 
-   return true;
+   if (devinfo->gen >= 9) {
+  mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
+  const enum isl_format isl_format =
+ brw_isl_format_for_mesa_format(linear_format);
+  return isl_format_supports_ccs_e(>screen->devinfo, isl_format);
+   } else
+  return true;
 }
 
 static bool
@@ -284,13 +290,12 @@ intel_miptree_supports_ccs_e(struct brw_context *brw,
if (!intel_miptree_supports_ccs(brw, mt))
   return false;
 
-   /* Many window system buffers are sRGB even if they are never rendered as
-* sRGB.  For those, we want CCS_E for when sRGBEncode is false.  When the
-* surface is used as sRGB, we fall back to CCS_D.
+   /* Fast clear can be also used to clear srgb surfaces by using equivalent
+* linear format. This trick, however, can't be extended to be used with
+* lossless compression and therefore a check is needed to see if the format
+* really is linear.
 */
-   mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
-   enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
-   return isl_format_supports_ccs_e(>screen->devinfo, isl_format);
+   return _mesa_get_srgb_format_linear(mt->format) == mt->format;
 }
 
 /**
@@ -2681,27 +2686,29 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
   return ISL_AUX_USAGE_MCS;
 
case ISL_AUX_USAGE_CCS_D:
-  return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE;
-
-   case ISL_AUX_USAGE_CCS_E: {
-  /* If the format supports CCS_E and is compatible with the miptree,
-   * then we can use it.
+  /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
+   * the single-sampled color renderbuffers because the CCS buffer isn't
+   * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
+   * enabled because otherwise the surface state will be programmed with
+   * the linear equivalent format anyway.
*/
-  if (format_ccs_e_compat_with_miptree(>screen->devinfo,
-   mt, render_format))
- return ISL_AUX_USAGE_CCS_E;
-
-  /* Otherwise, we have to fall back to CCS_D */
+  if (isl_format_is_srgb(render_format) &&
+  _mesa_get_srgb_format_linear(mt->format) != mt->format) {
+ return ISL_AUX_USAGE_NONE;
+  } else if (!mt->mcs_buf) {
+ return ISL_AUX_USAGE_NONE;
+  } else {
+ return ISL_AUX_USAGE_CCS_D;
+  }
 
-  /* gen9 hardware technically supports non-0/1 clear colors with sRGB
-   * formats.  However, there are issues with blending where it doesn't
-   * properly apply the sRGB curve to the clear color when blending.
+   case ISL_AUX_USAGE_CCS_E: {
+  /* Lossless compression is not supported for SRGB formats, it
+   * should be impossible to get here with such surfaces.
*/
-  if (blend_enabled && 

Re: [Mesa-dev] [PATCH 4/5] xlib: remove dummy GLX_MESA_set_3dfx_mode implementation

2017-12-01 Thread Brian Paul

On 11/30/2017 12:21 PM, Emil Velikov wrote:

On 30 November 2017 at 19:09, Ian Romanick  wrote:

Is xmesa.h something that apps could see?  Removing stuff could,
hypothetically, cause compilation problems... but also, app developers,
fix your old crap. :)


Some digging showed:
  - the header was never installed
  - seemingly no external users of XMesa
  - used for mesa <> xserver see commit 50aaabc248c9823106ff772873cbf2631d4dadcd

Brian any recollection if have any actual audience of XMesa?


Not in a very long time.  I think it's OK to remove that old stuff.

-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

2017-12-01 Thread Ilia Mirkin
On Fri, Dec 1, 2017 at 12:19 PM,   wrote:
> From: Kevin Rogovin 
>
> This patch series implements a needed workaround for Gen9 for ASTC5x5
> sampler reads. The crux of the work around is to make sure that the
> sampler does not read an ASTC5x5 texture and a surface with an auxilary
> buffer without having a texture cache invalidate between such accesses.

Presumably anv needs something like this too? What happens if you have
a single draw which samples from both an ASTC5x5 texture and one with
an aux buffer? [Just curious.]
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] Implement WaClearTDRRegBeforeEOTForNonPS.

2017-12-01 Thread Rafael Antognolli
On Fri, Dec 01, 2017 at 10:33:26AM +0200, Pohjolainen, Topi wrote:
> On Thu, Nov 30, 2017 at 04:50:19PM -0800, Rafael Antognolli wrote:
> > It looks like I forgot to prefix the subject with "intel/compiler:".
> > Fixed locally.
> > 
> > On Thu, Nov 30, 2017 at 04:42:48PM -0800, Rafael Antognolli wrote:
> > > The bspec describes:
> > > 
> > >"WA: Clear tdr register before send EOT in all non-PS shader kernels
> > > 
> > >mov(8) tdr0:ud 0x0:ud {NoMask}"
> > > 
> > > Signed-off-by: Rafael Antognolli 
> > > ---
> > >  src/intel/compiler/brw_fs_generator.cpp | 7 +++
> > >  src/intel/compiler/brw_reg.h| 6 ++
> > >  2 files changed, 13 insertions(+)
> > > 
> > > diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> > > b/src/intel/compiler/brw_fs_generator.cpp
> > > index 28790c86a64..78aa764fe73 100644
> > > --- a/src/intel/compiler/brw_fs_generator.cpp
> > > +++ b/src/intel/compiler/brw_fs_generator.cpp
> > > @@ -573,6 +573,13 @@ fs_generator::generate_urb_write(fs_inst *inst, 
> > > struct brw_reg payload)
> > >  {
> > > brw_inst *insn;
> > >  
> 
> I think it would be nice to have the bspec quote here.

Good point, will do that.

> > > +   if (inst->eot) {
> > > +  brw_push_insn_state(p);
> > > +  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> > > +  brw_MOV(p, brw_tdr_reg(), brw_imm_uw(0));
> > > +  brw_pop_insn_state(p);
> > > +   }
> > > +
> > > insn = brw_next_insn(p, BRW_OPCODE_SEND);
> > >  
> > > brw_set_dest(p, insn, brw_null_reg());
> > > diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h
> > > index ec1045b612a..a039c6f676c 100644
> > > --- a/src/intel/compiler/brw_reg.h
> > > +++ b/src/intel/compiler/brw_reg.h
> > > @@ -774,6 +774,12 @@ brw_address_reg(unsigned subnr)
> > > return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, 
> > > subnr);
> > >  }
> > >  
> > > +static inline struct brw_reg
> > > +brw_tdr_reg(void)
> > > +{
> > > +   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TDR, 0);
> > > +}
> > > +
> > >  /* If/else instructions break in align16 mode if writemask & swizzle
> > >   * aren't xyzw.  This goes against the convention for other scalar
> > >   * regs:
> > > -- 
> > > 2.13.6
> > > 
> > > ___
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] i965: set ASTC5x5 workaround texture type tracking on texture validate

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin 

One of the presteps in each draw (and compute) call is to validate
the textures. This is the perfect place (since all texture units
are looped through) to see if ASTC5x5 and/or textures with an
auxilary surface are accessed by the GPU.

Signed-off-by: Kevin Rogovin 
---
 src/mesa/drivers/dri/i965/intel_tex_validate.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c 
b/src/mesa/drivers/dri/i965/intel_tex_validate.c
index 2b7798c940..812c0c7793 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -188,11 +188,24 @@ brw_validate_textures(struct brw_context *brw)
struct gl_context *ctx = >ctx;
const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
 
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
for (int unit = 0; unit <= max_enabled_unit; unit++) {
   struct gl_texture_unit *tex_unit = >Texture.Unit[unit];
 
   if (tex_unit->_Current) {
+ struct intel_texture_object *tex =
+intel_texture_object(tex_unit->_Current);
+ struct intel_mipmap_tree *mt = tex->mt;
+
  intel_finalize_mipmap_tree(brw, unit);
+ if (mt && mt->aux_usage != ISL_AUX_USAGE_NONE) {
+brw->astc5x5_wa.texture_with_auxilary_present = true;
+ }
+ if (tex->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ tex->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5) {
+brw->astc5x5_wa.texture_astc5x5_present = true;
+ }
   }
}
 }
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] i965: define astc5x5 workaround infrastructure

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin 

Some GEN's have a bug in the sample where if the sampler accesses
a texture with an auxialry surface and an ASTC5x5 texture without
having the texture cache invalidated between such accesses, then
the GPU will hang. This patch defines the infrastructure to
implement the needed workaround for such hardware.

Signed-off-by: Kevin Rogovin 
---
 src/mesa/drivers/dri/i965/brw_context.c   | 63 +++
 src/mesa/drivers/dri/i965/brw_context.h   | 23 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c |  1 +
 3 files changed, 87 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index dd55b43669..f2e9b9779a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1060,6 +1060,12 @@ brwCreateContext(gl_api api,
if (ctx->Extensions.INTEL_performance_query)
   brw_init_performance_queries(brw);
 
+   brw->astc5x5_wa.required = (devinfo->gen == 9);
+   brw->astc5x5_wa.mode = BRW_ASTC5x5_WA_MODE_NONE;
+   brw->astc5x5_wa.texture_astc5x5_present = false;
+   brw->astc5x5_wa.texture_with_auxilary_present = false;
+   brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+
vbo_use_buffer_objects(ctx);
vbo_always_unmap_buffers(ctx);
 
@@ -1134,6 +1140,63 @@ intelDestroyContext(__DRIcontext * driContextPriv)
driContextPriv->driverPrivate = NULL;
 }
 
+void
+brw_set_astc5x5_wa_mode(struct brw_context *brw,
+enum brw_astc5x5_wa_mode_t mode)
+{
+   if (!brw->astc5x5_wa.required ||
+   mode == BRW_ASTC5x5_WA_MODE_NONE ||
+   brw->astc5x5_wa.mode == mode) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.mode != BRW_ASTC5x5_WA_MODE_NONE) {
+  brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+   }
+
+   brw->astc5x5_wa.mode = mode;
+}
+
+static void
+resolve_to_disable_aux_on_samplers(struct brw_context *brw)
+{
+   struct gl_context *ctx = >ctx;
+   const int max_enabled_unit = ctx->Texture._MaxEnabledTexImageUnit;
+
+   for (int unit = 0; unit <= max_enabled_unit; unit++) {
+  struct gl_texture_unit *tex_unit = >Texture.Unit[unit];
+  struct gl_texture_object *tex_obj = tex_unit->_Current;
+  if (tex_obj) {
+ struct intel_mipmap_tree *mt = intel_texture_object(tex_obj)->mt;
+ if (mt && mt->aux_usage != ISL_AUX_USAGE_NONE) {
+intel_miptree_prepare_access(brw, mt,
+ 0, INTEL_REMAINING_LEVELS,
+ 0, INTEL_REMAINING_LAYERS,
+ ISL_AUX_USAGE_NONE, false);
+ }
+  }
+   }
+}
+
+void
+brw_astc5x5_perform_wa(struct brw_context *brw)
+{
+   if (!brw->astc5x5_wa.required) {
+  return;
+   }
+
+   if (brw->astc5x5_wa.texture_astc5x5_present) {
+  if (brw->astc5x5_wa.texture_with_auxilary_present) {
+ /* resolve so that auxilary buffers are not needed
+  * by any sampler */
+ resolve_to_disable_aux_on_samplers(brw);
+  }
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else if (brw->astc5x5_wa.texture_with_auxilary_present) {
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+   }
+}
+
 GLboolean
 intelUnbindContext(__DRIcontext * driContextPriv)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index b3d7c6baf8..37dfe45592 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -165,6 +165,12 @@ enum brw_cache_id {
BRW_MAX_CACHE
 };
 
+enum brw_astc5x5_wa_mode_t {
+   BRW_ASTC5x5_WA_MODE_NONE,
+   BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5,
+   BRW_ASTC5x5_WA_MODE_HAS_AUX,
+};
+
 enum brw_state_id {
/* brw_cache_ids must come first - see brw_program_cache.c */
BRW_STATE_URB_FENCE = BRW_MAX_CACHE,
@@ -1230,6 +1236,18 @@ struct brw_context
 */
bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS];
 
+   /* Certain GEN's have a hardware bug where the sampler hangs if it attempts
+* to access auxilary buffers and an ASTC5x5 compressed buffer. The 
workaround
+* is to invalidate the texture cache between such access.
+*/
+   struct {
+  bool required;
+  enum brw_astc5x5_wa_mode_t mode;
+  bool texture_astc5x5_present;
+  bool texture_with_auxilary_present;
+  bool blorp_sampling_from_astc5x5;
+   } astc5x5_wa;
+
__DRIcontext *driContext;
struct intel_screen *screen;
 };
@@ -1663,6 +1681,11 @@ void brw_query_internal_format(struct gl_context *ctx, 
GLenum target,
GLenum internalFormat, GLenum pname,
GLint *params);
 
+/* brw_context::astc5x5_wa */
+void brw_set_astc5x5_wa_mode(struct brw_context *brw,
+ enum brw_astc5x5_wa_mode_t mode);
+void brw_astc5x5_perform_wa(struct brw_context 

[Mesa-dev] [PATCH 4/5] i965: use ASTC5x5 workaround in brw_draw

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin 

Perform the ASTC5x5 workaround tasks for drawing; note that
the function does not do anything and immediately returns
if the bug is not present on the hardware.

Signed-off-by: Kevin Rogovin 
---
 src/mesa/drivers/dri/i965/brw_draw.c | 6 ++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 5 +
 2 files changed, 11 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e..f335c2bd64 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -684,6 +684,12 @@ brw_prepare_drawing(struct gl_context *ctx,
brw_predraw_resolve_inputs(brw, true);
brw_predraw_resolve_framebuffer(brw);
 
+   /* if necessary, perform astc5x5 workarounds to make sure any sampler does
+* not sample sample from a surface using an auxilary buffer within the
+* same batch of sampling from a surface with an ASTC5x5 format
+*/
+   brw_astc5x5_perform_wa(brw);
+
/* Bind all inputs, derive varying and size information:
 */
brw_merge_inputs(brw, arrays);
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index adf60a840b..ccdb537227 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -447,6 +447,11 @@ brw_aux_surface_disabled(const struct brw_context *brw,
 {
const struct gl_framebuffer *fb = brw->ctx.DrawBuffer;
 
+   if (brw->astc5x5_wa.required &&
+   brw->astc5x5_wa.texture_astc5x5_present) {
+  return true;
+   }
+
for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
   const struct intel_renderbuffer *irb =
  intel_renderbuffer(fb->_ColorDrawBuffers[i]);
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] i965: use ASTC5x5 workaround in brw_compute

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin 

Perform the ASTC5x5 workaround tasks for compute; note that
the function does not do anything and immediately returns
if the bug is not present on the hardware.

Signed-off-by: Kevin Rogovin 
---
 src/mesa/drivers/dri/i965/brw_compute.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
b/src/mesa/drivers/dri/i965/brw_compute.c
index 9be7523bab..e338539dc1 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -179,6 +179,12 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 
brw_predraw_resolve_inputs(brw, false);
 
+   /* if necessary, perform astc5x5 workarounds to make sure any sampler does
+* not sample sample from a surface using an auxilary buffer within the
+* same batch of sampling from a surface with an ASTC5x5 format
+*/
+   brw_astc5x5_perform_wa(brw);
+
/* Flush the batch if the batch/state buffers are nearly full.  We can
 * grow them if needed, but this is not free, so we'd like to avoid it.
 */
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] i965: ASTC5x5 workaround

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin 

This patch series implements a needed workaround for Gen9 for ASTC5x5
sampler reads. The crux of the work around is to make sure that the
sampler does not read an ASTC5x5 texture and a surface with an auxilary
buffer without having a texture cache invalidate between such accesses.


Kevin Rogovin (5):
  i965: define astc5x5 workaround infrastructure
  i965: ASTC5x5 workaround logic for blorp
  i965: set ASTC5x5 workaround texture type tracking on texture validate
  i965: use ASTC5x5 workaround in brw_draw
  i965: use ASTC5x5 workaround in brw_compute

 src/mesa/drivers/dri/i965/brw_compute.c  |  6 +++
 src/mesa/drivers/dri/i965/brw_context.c  | 63 
 src/mesa/drivers/dri/i965/brw_context.h  | 23 +
 src/mesa/drivers/dri/i965/brw_draw.c |  6 +++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  5 ++
 src/mesa/drivers/dri/i965/genX_blorp_exec.c  |  5 ++
 src/mesa/drivers/dri/i965/intel_batchbuffer.c|  1 +
 src/mesa/drivers/dri/i965/intel_tex_image.c  | 16 --
 src/mesa/drivers/dri/i965/intel_tex_validate.c   | 13 +
 9 files changed, 134 insertions(+), 4 deletions(-)

-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] i965: ASTC5x5 workaround logic for blorp

2017-12-01 Thread kevin . rogovin
From: Kevin Rogovin 

Blorp will only read from an ASTC5x5 texture if it copies from
such a surface, that can only if an application is fetching
such pixels. Because an ASTC5x3 texture can never be a render
target, we do not need to worry about blorp reading such surfaces
on framebuffer blits, or any other copying from a framebuffer.

Signed-off-by: Kevin Rogovin 
---
 src/mesa/drivers/dri/i965/genX_blorp_exec.c |  5 +
 src/mesa/drivers/dri/i965/intel_tex_image.c | 16 
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c 
b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 87e90fde91..73f72d2603 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -230,6 +230,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
struct gl_context *ctx = >ctx;
bool check_aperture_failed_once = false;
 
+   if (brw->astc5x5_wa.blorp_sampling_from_astc5x5) {
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_ASTC5x5);
+   } else {
+  brw_set_astc5x5_wa_mode(brw, BRW_ASTC5x5_WA_MODE_HAS_AUX);
+   }
/* Flush the sampler and render caches.  We definitely need to flush the
 * sampler cache so that we get updated contents from the render cache for
 * the glBlitFramebuffer() source.  Also, we are sometimes warned in the
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 37c8e24f03..60028bb67a 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -759,10 +759,18 @@ intel_get_tex_sub_image(struct gl_context *ctx,
DBG("%s\n", __func__);
 
if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-  if (intel_gettexsubimage_blorp(brw, texImage,
- xoffset, yoffset, zoffset,
- width, height, depth, format, type,
- pixels, >Pack))
+  bool blorp_success;
+
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 =
+ (texImage->TexFormat == MESA_FORMAT_RGBA_ASTC_5x5 ||
+  texImage->TexFormat == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5);
+  blorp_success = intel_gettexsubimage_blorp(brw, texImage,
+ xoffset, yoffset, zoffset,
+ width, height, depth,
+ format, type, pixels,
+ >Pack);
+  brw->astc5x5_wa.blorp_sampling_from_astc5x5 = false;
+  if (blorp_success)
  return;
 
   perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
-- 
2.14.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GBM and the Device Memory Allocator Proposals

2017-12-01 Thread Nicolai Hähnle

On 01.12.2017 16:06, Rob Clark wrote:

On Thu, Nov 30, 2017 at 5:43 PM, Nicolai Hähnle  wrote:

Hi,

I've had a chance to look a bit more closely at the allocator prototype
repository now. There's a whole bunch of low-level API design feedback, but
for now let's focus on the high-level stuff first.

Going by the 4.5 major object types (as also seen on slide 5 of your
presentation [0]), assertions and usages make sense to me.

Capabilities and capability sets should be cleaned up in my opinion, as the
status quo is overly obfuscating things. What capability sets really
represent, as far as I understand them, is *memory layouts*, and so that's
what they should be called.

This conceptually simplifies `derive_capabilities` significantly without any
loss of expressiveness as far as I can see. Given two lists of memory
layouts, we simply look for which memory layouts appear in both lists, and
then merge their constraints and capabilities.

Merging constraints looks good to me.

Capabilities need some more thought. The prototype removes capabilities when
merging layouts, but I'd argue that that is often undesirable. (In fact, I
cannot think of capabilities which we'd always want to remove.)

A typical example for this is compression (i.e. DCC in our case). For
rendering usage, we'd return something like:

Memory layout: AMD/tiled; constraints(alignment=64k); caps(AMD/DCC)

For display usage, we might return (depending on hardware):

Memory layout: AMD/tiled; constraints(alignment=64k); caps(none)

Merging these in the prototype would remove the DCC capability, even though
it might well make sense to keep it there for rendering. Dealing with the
fact that display usage does not have this capability is precisely one of
the two things that transitions are about! The other thing that transitions
are about is caches.

I think this is kind of what Rob was saying in one of his mails.


Perhaps "layout" is a better name than "caps".. either way I think of
both AMD/tiled and AMD/DCC as the same type of "thing".. the
difference between AMD/tiled and AMD/DCC is that a transition can be
provided for AMD/DCC.  Other than that they are both things describing
the layout.


The reason that a transition can be provided is that they aren't quite 
the same thing, though. In a very real sense, AMD/DCC is a "child" 
property of AMD/tiled: DCC is implemented as a meta surface whose memory 
layout depends on the layout of the main surface.


Although, if there are GPUs that can do an in-place "transition" between 
different tiling layouts, then the distinction is perhaps really not as 
clear-cut. I guess that would only apply to tiled renderers.




So lets say you have a setup where both display and GPU supported
FOO/tiled, but only GPU supported compressed (FOO/CC) and cached
(FOO/cached).  But the GPU supported the following transitions:

   trans_a: FOO/CC -> null
   trans_b: FOO/cached -> null

Then the sets for each device (in order of preference):

GPU:
   1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=32k)
   2: caps(FOO/tiled, FOO/CC); constraints(alignment=32k)
   3: caps(FOO/tiled); constraints(alignment=32k)

Display:
   1: caps(FOO/tiled); constraints(alignment=64k)

Merged Result:
   1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=64k);
  transition(GPU->display: trans_a, trans_b; display->GPU: none)
   2: caps(FOO/tiled, FOO/CC); constraints(alignment=64k);
  transition(GPU->display: trans_a; display->GPU: none)
   3: caps(FOO/tiled); constraints(alignment=64k);
  transition(GPU->display: none; display->GPU: none)


We definitely don't want to expose a way of getting uncached rendering 
surfaces for radeonsi. I mean, I think we are supposed to be able to 
program our hardware so that the backend bypasses all caches, but (a) 
nobody validates that and (b) it's basically suicide in terms of 
performance. Let's build fewer footguns :)


So at least for radeonsi, we wouldn't want to have an AMD/cached bit, 
but we'd still want to have a transition between the GPU and display 
precisely to flush caches.




Two interesting questions:

1. If we query for multiple usages on the same device, can we get a
capability which can only be used for a subset of those usages?


I think the original idea was, "no"..  perhaps that could restriction
could be lifted if transitions where part of the result.  Or maybe you
just query independently the same device for multiple different
usages, and then merge that cap-set.

(Do we need to care about intra-device transitions?  Or can we just
let the driver care about that, same as it always has?)


2. What happens when we merge memory layouts with sets of capabilities where
neither is a subset of the other?


I think this is a case where no zero-copy sharing is possible, right?


Not necessarily. Let's say we have some industry-standard tiling layout 
foo, and vendors support their own proprietary framebuffer compression 
on top of 

Re: [Mesa-dev] V2 Initial GS NIR support for radeonsi

2017-12-01 Thread Marek Olšák
Commit "radeonsi: enable gs support for nir backend" also modifies st/mesa.

If there are no piglit regressions for TGSI, all ac and radeonsi patches are:

Reviewed-by: Marek Olšák 

Marek

On Thu, Nov 30, 2017 at 5:47 AM, Timothy Arceri  wrote:
> On 30/11/17 14:00, Dieter Nützel wrote:
>>
>> Hello Timo,
>>
>> do you have a V3 handy...? ;-)
>
>
> I haven't run piglit yet after rebasing so run at your own risk.
>
> https://github.com/tarceri/Mesa.git radeonsi_nir_final
>
>
>
>>
>> Greetings,
>> Dieter
>>
>> Am 23.11.2017 06:31, schrieb Timothy Arceri:
>>>
>>> On 23/11/17 15:09, Dieter Nützel wrote:

 Am 22.11.2017 10:29, schrieb Timothy Arceri:
>
> This series depends on [1] and [2].
>
> V2
>  - use driver_location as per Nicolais suggestion
>  - tidy ups as per Mareks suggestions
>  - bug fixes (many more piglit tests now passing)
>
> [1] https://patchwork.freedesktop.org/series/34131/
> [2] https://patchwork.freedesktop.org/series/34132/


 Hello Timothy,

 I could run Unigine_Heaven-4.0 (with tess disabled of course) and
 Unigine_Valley-1.0 with all 3 together on my RX580.
 If I'll try to swith to wireframe, 'game' window disappeared (as
 expected, too).

 SOURCE/Unigine_Valley-1.0> echo $R600_DEBUG
 nir

 So here is my

 Tested-by: Dieter Nützel 

 on all _3_ series.
>>>
>>>
>>> Cool. Thanks for testing.
>>>

 GREAT work!
 Dieter
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/x11: Remove unneeded free() on always null string

2017-12-01 Thread Vadym Shovkoplias
Hi Eric,

Mostly by a static analysis tool. It found at least 7 issues with useless
free() calls and other problems that probably should be fixed.
Suggest please should I create one cumulative commit for this or it should
be a separate commits ?

On Fri, Dec 1, 2017 at 5:41 PM, Eric Engestrom 
wrote:

> On Friday, 2017-12-01 17:08:53 +0200, vadim.shovkopl...@gmail.com wrote:
> > From: Vadym Shovkoplias 
> >
> > In this condition dri2_dpy->driver_name string always equals
> > NULL, so call to free() is useless
> >
> > Signed-off-by: Vadym Shovkoplias 
>
> Reviewed and pushed :)
>
> Are you finding all of these by inspection, or are you using a tool?
>
> > ---
> >  src/egl/drivers/dri2/platform_x11.c | 1 -
> >  1 file changed, 1 deletion(-)
> >
> > diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/
> platform_x11.c
> > index c49cb1f..8ede590b 100644
> > --- a/src/egl/drivers/dri2/platform_x11.c
> > +++ b/src/egl/drivers/dri2/platform_x11.c
> > @@ -704,7 +704,6 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
> >
> > if (dri2_dpy->driver_name == NULL) {
> >close(dri2_dpy->fd);
> > -  free(dri2_dpy->driver_name);
> >free(connect);
> >return EGL_FALSE;
> > }
> > --
> > 2.7.4
> >
>



-- 

Vadym Shovkoplias | Software engineer
GlobalLogic
P +x.xxx.xxx.  M +3.8050.931.7304  S vadym.shovkoplias
www.globallogic.com

http://www.globallogic.com/email_disclaimer.txt
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/x11: Remove unneeded free() on always null string

2017-12-01 Thread Eric Engestrom
On Friday, 2017-12-01 17:08:53 +0200, vadim.shovkopl...@gmail.com wrote:
> From: Vadym Shovkoplias 
> 
> In this condition dri2_dpy->driver_name string always equals
> NULL, so call to free() is useless
> 
> Signed-off-by: Vadym Shovkoplias 

Reviewed and pushed :)

Are you finding all of these by inspection, or are you using a tool?

> ---
>  src/egl/drivers/dri2/platform_x11.c | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/src/egl/drivers/dri2/platform_x11.c 
> b/src/egl/drivers/dri2/platform_x11.c
> index c49cb1f..8ede590b 100644
> --- a/src/egl/drivers/dri2/platform_x11.c
> +++ b/src/egl/drivers/dri2/platform_x11.c
> @@ -704,7 +704,6 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
>  
> if (dri2_dpy->driver_name == NULL) {
>close(dri2_dpy->fd);
> -  free(dri2_dpy->driver_name);
>free(connect);
>return EGL_FALSE;
> }
> -- 
> 2.7.4
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radv: do not print ASM to stderr when dumping shaders

2017-12-01 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index cdf8e7a114..c8794d06ea 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -508,7 +508,7 @@ radv_dump_shader(struct radv_pipeline *pipeline,
nir_print_shader(shader->nir, f);
}
 
-   fprintf(stderr, "DISASM:\n%s\n", shader->disasm_string);
+   fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
 
radv_shader_dump_stats(pipeline->device, shader, stage, f);
 }
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] egl/x11: Remove unneeded free() on always null string

2017-12-01 Thread vadim . shovkoplias
From: Vadym Shovkoplias 

In this condition dri2_dpy->driver_name string always equals
NULL, so call to free() is useless

Signed-off-by: Vadym Shovkoplias 

---
 src/egl/drivers/dri2/platform_x11.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/egl/drivers/dri2/platform_x11.c 
b/src/egl/drivers/dri2/platform_x11.c
index c49cb1f..8ede590b 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -704,7 +704,6 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
 
if (dri2_dpy->driver_name == NULL) {
   close(dri2_dpy->fd);
-  free(dri2_dpy->driver_name);
   free(connect);
   return EGL_FALSE;
}
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] GBM and the Device Memory Allocator Proposals

2017-12-01 Thread Rob Clark
On Thu, Nov 30, 2017 at 5:43 PM, Nicolai Hähnle  wrote:
> Hi,
>
> I've had a chance to look a bit more closely at the allocator prototype
> repository now. There's a whole bunch of low-level API design feedback, but
> for now let's focus on the high-level stuff first.
>
> Going by the 4.5 major object types (as also seen on slide 5 of your
> presentation [0]), assertions and usages make sense to me.
>
> Capabilities and capability sets should be cleaned up in my opinion, as the
> status quo is overly obfuscating things. What capability sets really
> represent, as far as I understand them, is *memory layouts*, and so that's
> what they should be called.
>
> This conceptually simplifies `derive_capabilities` significantly without any
> loss of expressiveness as far as I can see. Given two lists of memory
> layouts, we simply look for which memory layouts appear in both lists, and
> then merge their constraints and capabilities.
>
> Merging constraints looks good to me.
>
> Capabilities need some more thought. The prototype removes capabilities when
> merging layouts, but I'd argue that that is often undesirable. (In fact, I
> cannot think of capabilities which we'd always want to remove.)
>
> A typical example for this is compression (i.e. DCC in our case). For
> rendering usage, we'd return something like:
>
> Memory layout: AMD/tiled; constraints(alignment=64k); caps(AMD/DCC)
>
> For display usage, we might return (depending on hardware):
>
> Memory layout: AMD/tiled; constraints(alignment=64k); caps(none)
>
> Merging these in the prototype would remove the DCC capability, even though
> it might well make sense to keep it there for rendering. Dealing with the
> fact that display usage does not have this capability is precisely one of
> the two things that transitions are about! The other thing that transitions
> are about is caches.
>
> I think this is kind of what Rob was saying in one of his mails.

Perhaps "layout" is a better name than "caps".. either way I think of
both AMD/tiled and AMD/DCC as the same type of "thing".. the
difference between AMD/tiled and AMD/DCC is that a transition can be
provided for AMD/DCC.  Other than that they are both things describing
the layout.

So lets say you have a setup where both display and GPU supported
FOO/tiled, but only GPU supported compressed (FOO/CC) and cached
(FOO/cached).  But the GPU supported the following transitions:

  trans_a: FOO/CC -> null
  trans_b: FOO/cached -> null

Then the sets for each device (in order of preference):

GPU:
  1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=32k)
  2: caps(FOO/tiled, FOO/CC); constraints(alignment=32k)
  3: caps(FOO/tiled); constraints(alignment=32k)

Display:
  1: caps(FOO/tiled); constraints(alignment=64k)

Merged Result:
  1: caps(FOO/tiled, FOO/CC, FOO/cached); constraints(alignment=64k);
 transition(GPU->display: trans_a, trans_b; display->GPU: none)
  2: caps(FOO/tiled, FOO/CC); constraints(alignment=64k);
 transition(GPU->display: trans_a; display->GPU: none)
  3: caps(FOO/tiled); constraints(alignment=64k);
 transition(GPU->display: none; display->GPU: none)

> Two interesting questions:
>
> 1. If we query for multiple usages on the same device, can we get a
> capability which can only be used for a subset of those usages?

I think the original idea was, "no"..  perhaps that could restriction
could be lifted if transitions where part of the result.  Or maybe you
just query independently the same device for multiple different
usages, and then merge that cap-set.

(Do we need to care about intra-device transitions?  Or can we just
let the driver care about that, same as it always has?)

> 2. What happens when we merge memory layouts with sets of capabilities where
> neither is a subset of the other?

I think this is a case where no zero-copy sharing is possible, right?

> As for the actual transition API, I accept that some metadata may be
> required, and the metadata probably needs to depend on the memory layout,
> which is often vendor-specific. But even linear layouts need some
> transitions for caches. We probably need at least some generic "off-device
> usage" bit.

I've started thinking of cached as a capability with a transition.. I
think that helps.  Maybe it needs to somehow be more specific (ie. if
you have two devices both with there own cache with no coherency
between the two)

BR,
-R

>
> Cheers,
> Nicolai
>
> [0] https://www.x.org/wiki/Events/XDC2017/jones_allocator.pdf
>
>
> On 21.11.2017 02:11, James Jones wrote:
>>
>> As many here know at this point, I've been working on solving issues
>> related to DMA-capable memory allocation for various devices for some time
>> now.  I'd like to take this opportunity to apologize for the way I handled
>> the EGL stream proposals.  I understand now that the development process
>> followed there was unacceptable to the community and likely offended many
>> great engineers.
>>
>> Moving forward, I attempted 

Re: [Mesa-dev] [PATCH] egl/android: Partially handle HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED

2017-12-01 Thread Tomasz Figa
On Fri, Dec 1, 2017 at 11:20 PM, Rob Herring  wrote:
> On Fri, Dec 1, 2017 at 7:30 AM, Robert Foss  wrote:
>> On Thu, 2017-11-30 at 11:14 -0600, Rob Herring wrote:
>>> On Thu, Nov 30, 2017 at 12:11 AM, Tapani Pälli >> m> wrote:
>>> >
>>> >
>>> > On 11/30/2017 06:13 AM, Tomasz Figa wrote:
>>> > >
>>> > > On Thu, Nov 30, 2017 at 3:43 AM, Robert Foss >> > > ra.com>
>>> > > wrote:
>>> > > >
>>> > > > Hey,
>>> > > >
>>> > > > On Tue, 2017-11-28 at 11:49 +, Emil Velikov wrote:
>>> > > > >
>>> > > > > On 28 November 2017 at 10:45, Tapani Pälli >> > > > > l.com>
>>> > > > > wrote:
>>> > > > > >
>>> > > > > > Hi;
>>> > > > > >
>>> > > > > >
>>> > > > > > On 11/27/2017 04:14 PM, Robert Foss wrote:
>>> > > > > > >
>>> > > > > > >
>>> > > > > > > From: Tomasz Figa 
>>> > > > > > >
>>> > > > > > > There is no API available to properly query the
>>> > > > > > > IMPLEMENTATION_DEFINED
>>> > > > > > > format. As a workaround we rely here on gralloc
>>> > > > > > > allocating either
>>> > > > > > > an arbitrary YCbCr 4:2:0 or RGBX_, with the latter
>>> > > > > > > being
>>> > > > > > > recognized
>>> > > > > > > by lock_ycbcr failing.
>>> > > > > > >
>>> > > > > > > Reviewed-on: https://chromium-review.googlesource.com/566
>>> > > > > > > 793
>>> > > > > > >
>>> > > > > > > Signed-off-by: Tomasz Figa 
>>> > > > > > > Reviewed-by: Chad Versace 
>>> > > > > > > Signed-off-by: Robert Foss 
>>> > > > > > > ---
>>> > > > > > >src/egl/drivers/dri2/platform_android.c | 39
>>> > > > > > > +++--
>>> > > > > > >1 file changed, 37 insertions(+), 2 deletions(-)
>>> > > > > > >
>>> > > > > > > diff --git a/src/egl/drivers/dri2/platform_android.c
>>> > > > > > > b/src/egl/drivers/dri2/platform_android.c
>>> > > > > > > index 63223e9a69..ae914d79c1 100644
>>> > > > > > > --- a/src/egl/drivers/dri2/platform_android.c
>>> > > > > > > +++ b/src/egl/drivers/dri2/platform_android.c
>>> > > > > > > @@ -59,6 +59,10 @@ static const struct droid_yuv_format
>>> > > > > > > droid_yuv_formats[] = {
>>> > > > > > >   { HAL_PIXEL_FORMAT_YCbCr_420_888,   0, 1,
>>> > > > > > > __DRI_IMAGE_FOURCC_YUV420
>>> > > > > > > },
>>> > > > > > >   { HAL_PIXEL_FORMAT_YCbCr_420_888,   1, 1,
>>> > > > > > > __DRI_IMAGE_FOURCC_YVU420
>>> > > > > > > },
>>> > > > > > >   { HAL_PIXEL_FORMAT_YV12,1, 1,
>>> > > > > > > __DRI_IMAGE_FOURCC_YVU420
>>> > > > > > > },
>>> > > > > > > +   /* HACK: See droid_create_image_from_prime_fd() and
>>> > > > > > > b/32077885. */
>>> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 2,
>>> > > > > > > __DRI_IMAGE_FOURCC_NV12 },
>>> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 1,
>>> > > > > > > __DRI_IMAGE_FOURCC_YUV420 },
>>> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   1, 1,
>>> > > > > > > __DRI_IMAGE_FOURCC_YVU420 },
>>> > > > > >
>>> > > > > >
>>> > > > > >
>>> > > > > > One alternative way would be to ask gralloc about these
>>> > > > > > formats. On
>>> > > > > > gralloc0
>>> > > > > > this would need a perform() hook and gralloc1 has
>>> > > > > > getFormat(). This
>>> > > > > > is how
>>> > > > > > it is done currently on Android-IA, see following commits:
>>> > > > > >
>>> > > > > > https://github.com/intel/external-mesa/commit/deb323eafa321
>>> > > > > > c725805a
>>> > > > > > 702ed19cb4983346b60
>>> > > > > >
>>> > > > > > https://github.com/intel/external-mesa/commit/7cc01beaf540e
>>> > > > > > 29862853
>>> > > > > > 561ef93c6c4e86c4c1a
>>> > > > > >
>>> > > > > > Do you think this approach would work with Chromium as
>>> > > > > > well?
>>> > > > > >
>>> > > > >
>>> > > > > i think the Android-IA approach looks good, although it
>>> > > > > depends on
>>> > > > > local gralloc0 changes. With gralloc1 on the horizon, I don't
>>> > > > > know
>>> > > > > how
>>> > > > > much sense it makes to extend the predecessor.
>>> > > > > AFAICT the patch should not cause any issues and it's nicely
>>> > > > > documented.
>>> > > >
>>> > > >
>>> > > > I had a look at the chromiumos/minigbm implementation, and it
>>> > > > does not
>>> > > > contain a gralloc1 implementation as far as I can see. I assume
>>> > > > that it
>>> > > > is available somewhere, but maybe not on a public branch.
>>> > > >
>>> > > > Would it be possible to make the minigbm gralloc1 impl. public?
>>> > > > That
>>> > > > way I could submit a patch mirroring what intel/minigbm does.
>>> > > >
>>> > > > If you fine folks as at Google prefer to roll it yourselves,
>>> > > > just give
>>> > > > me a poke.
>>> > >
>>> > >
>>> > > There is no gralloc1 implementation for ChromiumOS minigbm and
>>> > > AFAIK
>>> > > we don't have any plans of adding one. AFAICT there is nothing we
>>> > > would gain with it over gralloc0.
>>> > >
>>> > > >
>>> > > > 

Re: [Mesa-dev] [PATCH 7/7] meson: fix deps and underlinkage of libGL

2017-12-01 Thread Emil Velikov
On 1 December 2017 at 12:50, Jon Turney  wrote:
> On 30/11/2017 16:46, Emil Velikov wrote:
>>
>> On 30 November 2017 at 15:13, Jon Turney wrote:
>>>
>>> On 29/11/2017 17:34, Dylan Baker wrote:
>
> [...]
>>>
>>>
>>> Maybe it's me that's missing something...
>>>
>>> There are references to functions provided by these libraries (xcb_glx,
>>> xcb,
>>> x11_xcb) in common code.
>>>
>> Having a reference to those (as printed by the linker as it errors) should
>> help.
>>
>> There's a lot of macros guarding the different parts of GLX - the
>> obvious ones GLX_ seem there.
>>
>> Another thing that comes to mind is the libloader_dri3_helper.
>> It should be a noop/empty for non-Linux/BSD but something is going wrong?
>
>
> Yes, as I wrote in the bit of my email you snipped, these dependencies come
> via libloader_dri3_helper.
>
The train of thought had left the station, apologies.

> When configured -Ddri3=false (which is always the case on non-linux
> targets), libGL is underlinked, so this patch is correct.
>
> Even with this patch applied, mesa fails to build for linux when configured
> with -Ddri3=false, due to various undefined references to libXext:
>
>> [3/3] Linking target src/glx/libGL.so.1.2.0.
>> FAILED: src/glx/libGL.so.1.2.0
>> cc  -o src/glx/libGL.so.1.2.0 'src/glx/GL@sha/src_glx_dummy.c.o'
>> -Wl,--no-undefined -Wl,--as-needed -shared -fPIC -Wl,--start-group
>> -Wl,-soname,libGL.so.1 -Wl,--whole-archive src/glx/libglx.a
>> -Wl,--no-whole-archive src/mapi/glapi/libglapi_static.a
>> src/mapi/shared-glapi/libglapi.so.0.0.0 src/loader/libloader.a
>> src/util/libmesa_util.a src/util/libxmlconfig.a -pthread -Wl,-Bsymbolic
>> -Wl,--gc-sections -ldrm -ldl -lm -Wl,--end-group -lX11 -lxcb-glx -lxcb
>> -lX11-xcb -lX11 -lxcb -lxcb-dri2 -ldrm -lX11 -ldrm -lz -lexpat -lm
>> '-Wl,-rpath,$ORIGIN/:$ORIGIN/../mapi/shared-glapi'
>> -Wl,-rpath-link,/home/jon/src/mesa/build/src/glx:/home/jon/src/mesa/build/src/mapi/shared-glapi
Hmm the command line seems rather iffy.

The LDFLAGS/linker flags should not be mixed with LIBADD/library dependencies.
In the above example -Wl,-Bsymbolic and -Wl,--gc-sections is thrown in
between the libraries while the whole thing is wrapped in
--start/stop-group.
Even if that doesn't cause a problem now, it's something to cleanup.

Unrelated nitpick - there is a double "deps_xcb_dri3 = []" in meson.build

>> src/glx/libglx.a(dri2.c.o): In function `DRI2CloseDisplay':
>> /home/jon/src/mesa/build/../src/glx/dri2.c:58: undefined reference to
>> `XextRemoveDisplay'

After a lot of of head scratching I noticed what's wrong.
The dependencies of loader_dri3 are off:
Meson:
dep_xshmfence, dep_xcb_present, dep_xcb_dri3, dep_xcb_sync, dep_x11_xcb,
dep_xext, dep_xdamage, dep_xcb_glx, dep_libdrm,

Autotools:
x11-xcb xcb xcb-dri3 xcb-xfixes xcb-present xcb-sync xshmfence libdrm

So one should really move the following to glx and (ideally) sort the
remaining more like autotools.
dep_xext, dep_xdamage, dep_xcb_glx,

HTH
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] egl/android: Partially handle HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED

2017-12-01 Thread Rob Herring
On Fri, Dec 1, 2017 at 7:30 AM, Robert Foss  wrote:
> On Thu, 2017-11-30 at 11:14 -0600, Rob Herring wrote:
>> On Thu, Nov 30, 2017 at 12:11 AM, Tapani Pälli > m> wrote:
>> >
>> >
>> > On 11/30/2017 06:13 AM, Tomasz Figa wrote:
>> > >
>> > > On Thu, Nov 30, 2017 at 3:43 AM, Robert Foss > > > ra.com>
>> > > wrote:
>> > > >
>> > > > Hey,
>> > > >
>> > > > On Tue, 2017-11-28 at 11:49 +, Emil Velikov wrote:
>> > > > >
>> > > > > On 28 November 2017 at 10:45, Tapani Pälli > > > > > l.com>
>> > > > > wrote:
>> > > > > >
>> > > > > > Hi;
>> > > > > >
>> > > > > >
>> > > > > > On 11/27/2017 04:14 PM, Robert Foss wrote:
>> > > > > > >
>> > > > > > >
>> > > > > > > From: Tomasz Figa 
>> > > > > > >
>> > > > > > > There is no API available to properly query the
>> > > > > > > IMPLEMENTATION_DEFINED
>> > > > > > > format. As a workaround we rely here on gralloc
>> > > > > > > allocating either
>> > > > > > > an arbitrary YCbCr 4:2:0 or RGBX_, with the latter
>> > > > > > > being
>> > > > > > > recognized
>> > > > > > > by lock_ycbcr failing.
>> > > > > > >
>> > > > > > > Reviewed-on: https://chromium-review.googlesource.com/566
>> > > > > > > 793
>> > > > > > >
>> > > > > > > Signed-off-by: Tomasz Figa 
>> > > > > > > Reviewed-by: Chad Versace 
>> > > > > > > Signed-off-by: Robert Foss 
>> > > > > > > ---
>> > > > > > >src/egl/drivers/dri2/platform_android.c | 39
>> > > > > > > +++--
>> > > > > > >1 file changed, 37 insertions(+), 2 deletions(-)
>> > > > > > >
>> > > > > > > diff --git a/src/egl/drivers/dri2/platform_android.c
>> > > > > > > b/src/egl/drivers/dri2/platform_android.c
>> > > > > > > index 63223e9a69..ae914d79c1 100644
>> > > > > > > --- a/src/egl/drivers/dri2/platform_android.c
>> > > > > > > +++ b/src/egl/drivers/dri2/platform_android.c
>> > > > > > > @@ -59,6 +59,10 @@ static const struct droid_yuv_format
>> > > > > > > droid_yuv_formats[] = {
>> > > > > > >   { HAL_PIXEL_FORMAT_YCbCr_420_888,   0, 1,
>> > > > > > > __DRI_IMAGE_FOURCC_YUV420
>> > > > > > > },
>> > > > > > >   { HAL_PIXEL_FORMAT_YCbCr_420_888,   1, 1,
>> > > > > > > __DRI_IMAGE_FOURCC_YVU420
>> > > > > > > },
>> > > > > > >   { HAL_PIXEL_FORMAT_YV12,1, 1,
>> > > > > > > __DRI_IMAGE_FOURCC_YVU420
>> > > > > > > },
>> > > > > > > +   /* HACK: See droid_create_image_from_prime_fd() and
>> > > > > > > b/32077885. */
>> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 2,
>> > > > > > > __DRI_IMAGE_FOURCC_NV12 },
>> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 1,
>> > > > > > > __DRI_IMAGE_FOURCC_YUV420 },
>> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   1, 1,
>> > > > > > > __DRI_IMAGE_FOURCC_YVU420 },
>> > > > > >
>> > > > > >
>> > > > > >
>> > > > > > One alternative way would be to ask gralloc about these
>> > > > > > formats. On
>> > > > > > gralloc0
>> > > > > > this would need a perform() hook and gralloc1 has
>> > > > > > getFormat(). This
>> > > > > > is how
>> > > > > > it is done currently on Android-IA, see following commits:
>> > > > > >
>> > > > > > https://github.com/intel/external-mesa/commit/deb323eafa321
>> > > > > > c725805a
>> > > > > > 702ed19cb4983346b60
>> > > > > >
>> > > > > > https://github.com/intel/external-mesa/commit/7cc01beaf540e
>> > > > > > 29862853
>> > > > > > 561ef93c6c4e86c4c1a
>> > > > > >
>> > > > > > Do you think this approach would work with Chromium as
>> > > > > > well?
>> > > > > >
>> > > > >
>> > > > > i think the Android-IA approach looks good, although it
>> > > > > depends on
>> > > > > local gralloc0 changes. With gralloc1 on the horizon, I don't
>> > > > > know
>> > > > > how
>> > > > > much sense it makes to extend the predecessor.
>> > > > > AFAICT the patch should not cause any issues and it's nicely
>> > > > > documented.
>> > > >
>> > > >
>> > > > I had a look at the chromiumos/minigbm implementation, and it
>> > > > does not
>> > > > contain a gralloc1 implementation as far as I can see. I assume
>> > > > that it
>> > > > is available somewhere, but maybe not on a public branch.
>> > > >
>> > > > Would it be possible to make the minigbm gralloc1 impl. public?
>> > > > That
>> > > > way I could submit a patch mirroring what intel/minigbm does.
>> > > >
>> > > > If you fine folks as at Google prefer to roll it yourselves,
>> > > > just give
>> > > > me a poke.
>> > >
>> > >
>> > > There is no gralloc1 implementation for ChromiumOS minigbm and
>> > > AFAIK
>> > > we don't have any plans of adding one. AFAICT there is nothing we
>> > > would gain with it over gralloc0.
>> > >
>> > > >
>> > > > Those are the two options I'm seeing.
>> > > >
>> > > > As for gralloc0 support, would it be needed?
>> > > >
>> > > > >
>> > > > > Perhaps someone from the Google/CrOS team can assist in
>> > > > > 

Re: [Mesa-dev] [PATCH 2/2] glsl: don't run intrastage array validation when the interface type is not an array

2017-12-01 Thread Nicolai Hähnle

On 01.12.2017 08:20, Samuel Iglesias Gonsálvez wrote:

On Thu, 2017-11-30 at 15:47 +0100, Nicolai Hähnle wrote:

Can you add an explanation / spec quote for this?



Yes.

"We validate that the interface block array type's definition matches.
However, the function could be previously called if an non-array
interface block has different type definitions -for example, when the
precision qualifier differs in a GLSL ES shader, we would create two
different types-, and it would return invalid as both definitions are
non-arrays.

We fix this by specifying that at least one definition should be an
array to call the validation."

Does it sound good to you?


Yes, thanks. R-b with that.

Cheers,
Nicolai



Sam


On 09.11.2017 12:48, Samuel Iglesias Gonsálvez wrote:

Signed-off-by: Samuel Iglesias Gonsálvez 
---
   src/compiler/glsl/link_interface_blocks.cpp | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/link_interface_blocks.cpp
b/src/compiler/glsl/link_interface_blocks.cpp
index c2c3b58f821..ce90d916075 100644
--- a/src/compiler/glsl/link_interface_blocks.cpp
+++ b/src/compiler/glsl/link_interface_blocks.cpp
@@ -137,7 +137,7 @@ intrastage_match(ir_variable *a,
  /* If a block is an array then it must match across the
shader.
   * Unsized arrays are also processed and matched agaist sized
arrays.
   */
-   if (b->type != a->type &&
+   if (b->type != a->type && (b->type->is_array() || a->type-

is_array()) &&

  (b->is_interface_instance() || a->is_interface_instance())
&&
  !validate_intrastage_arrays(prog, b, a))
 return false;







--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glx/dri3: Remove unused deviceName variable

2017-12-01 Thread Eric Engestrom
On Friday, 2017-12-01 13:23:02 +0200, vadim.shovkopl...@gmail.com wrote:
> From: Vadym Shovkoplias 
> 
> deviceName string is declared, assigned and freed but actually
> never used in dri3_create_screen() function.
> 
> Fixes: 2d94601582e ("Add DRI3+Present loader")

Indeed, been like this since creation, never changed.
Reviewed-by: Eric Engestrom 

and pushed, thanks :)

> 
> Signed-off-by: Vadym Shovkoplias 
> 
> ---
>  src/glx/dri3_glx.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
> index a10306f..f280a8c 100644
> --- a/src/glx/dri3_glx.c
> +++ b/src/glx/dri3_glx.c
> @@ -800,7 +800,7 @@ dri3_create_screen(int screen, struct glx_display * priv)
> struct dri3_screen *psc;
> __GLXDRIscreen *psp;
> struct glx_config *configs = NULL, *visuals = NULL;
> -   char *driverName, *deviceName, *tmp;
> +   char *driverName, *tmp;
> int i;
> unsigned char disable;
>  
> @@ -830,7 +830,6 @@ dri3_create_screen(int screen, struct glx_display * priv)
> }
>  
> psc->fd = loader_get_user_preferred_fd(psc->fd, >is_different_gpu);
> -   deviceName = NULL;
>  
> driverName = loader_get_driver_for_fd(psc->fd);
> if (!driverName) {
> @@ -956,7 +955,6 @@ dri3_create_screen(int screen, struct glx_display * priv)
>__glXEnableDirectExtension(>base, "GLX_EXT_buffer_age");
>  
> free(driverName);
> -   free(deviceName);
>  
> tmp = getenv("LIBGL_SHOW_FPS");
> psc->show_fps_interval = tmp ? atoi(tmp) : 0;
> @@ -983,7 +981,6 @@ handle_error:
>dlclose(psc->driver);
>  
> free(driverName);
> -   free(deviceName);
> glx_screen_cleanup(>base);
> free(psc);
>  
> -- 
> 2.7.4
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 14/29] anv/cmd_buffer: Apply subpass flushes before set_subpass

2017-12-01 Thread Pohjolainen, Topi
On Mon, Nov 27, 2017 at 07:06:04PM -0800, Jason Ekstrand wrote:
> This seems slightly more correct because it means that the flushes
> happen before any clears or resolves implied by the subpass transition.

After reading the next patch this patch seems incomplete both before
and after. Next patch seems to explicitly consider that flushes are
needed before and after whereas at this point it would be only
before (when this patch is applied) or after (without this patch).

I guess something else holds things together, I'm just not seeing
it?

> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 2d47179..bbe97f5 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -3197,10 +3197,10 @@ void genX(CmdBeginRenderPass)(
>  
> genX(flush_pipeline_select_3d)(cmd_buffer);
>  
> -   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
> -
> cmd_buffer->state.pending_pipe_bits |=
>cmd_buffer->state.pass->subpass_flushes[0];
> +
> +   genX(cmd_buffer_set_subpass)(cmd_buffer, pass->subpasses);
>  }
>  
>  void genX(CmdNextSubpass)(
> @@ -3220,11 +3220,11 @@ void genX(CmdNextSubpass)(
>  */
> cmd_buffer_subpass_transition_layouts(cmd_buffer, true);
>  
> -   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
> -
> uint32_t subpass_id = anv_get_subpass_id(_buffer->state);
> cmd_buffer->state.pending_pipe_bits |=
>cmd_buffer->state.pass->subpass_flushes[subpass_id];
> +
> +   genX(cmd_buffer_set_subpass)(cmd_buffer, cmd_buffer->state.subpass + 1);
>  }
>  
>  void genX(CmdEndRenderPass)(
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallium/util: add u_transfer_helper

2017-12-01 Thread Rob Clark
On Thu, Nov 30, 2017 at 10:33 AM, Rob Clark  wrote:
> On Thu, Nov 30, 2017 at 10:04 AM, Nicolai Hähnle  wrote:
>> On 29.11.2017 14:48, Rob Clark wrote:
>>>
>>> Add a new helper that drivers can use to emulate various things that
>>> need special handling in particular in transfer_map:
>>>
>>>   1) z32_s8x24.. gl/gallium treats this as a single buffer with depth
>>>  and stencil interleaved but hardware frequently treats this as
>>>  separate z32 and s8 buffers.  Special pack/unpack handling is
>>>  needed in transfer_map/unmap to pack/unpack the exposed buffer
>>>
>>>   2) fake RGTC.. GPUs designed with GLES in mind, but which can other-
>>>  wise do GL3, if native RGTC is not supported it can be emulated
>>>  by converting to uncompressed internally, but needs pack/unpack
>>>  in transfer_map/unmap
>>>
>>> This could be possibly extended to handle MSAA resolve in map/unmap.
>>
>>
>> This looks mostly fine (though the MSAA thing needs to be resolved one way
>
> I was hoping if Eric was interested in using this, that maybe he could
> add in the MSAA resolve bits.. otherwise that might have to wait until
> I implement non-fake MSAA.
>
>> or the other), except there's a clash if you ever want to use threaded
>> contexts, which have their own threaded_resource and threaded_transfer.
>>
>> I haven't really given any thought yet to how to reconcile those.
>
> hmm, ok, I see the issue.. maybe it is ok though, if the driver
> installs the u_transfer_helper_* pipe fxns in the threaded_context
> that wraps the real context, and the vtbl used by u_transfer_helper
> has the threaded_context fxns instead of actual driver fxns.  (Since
> the transfer_helper is installed into the pipe_screen, it means you
> couldn't mix threaded and non-threaded contexts under a single
> screen.. not sure if there is ever a reason to do that?)

hmm, so the issue actually is there but not w/ threaded_transfer vs
u_transfer.. instead with threaded_resource vs u_transfer_resource.
:-/

u_transfer_helper really only needs to track the additional stencil
buffer and internal_format (although maybe adding MSAA resolves could
add more to that).. possibly I could track that a different way
(setter/getter fxns in vtbl?)

BR,
-R

> There are two cases with the transfer-helper:
>
>  1) the transfer is passed directly through to the driver and not
> intercepted.  In which case the pipe_transfer returned from
> ->transfer_map() is the driver's transfer_map (which doesn't subclass
> u_transfer.. which I should probably move to the .c file)
>
>  2) the transfer is handled by the helper, in which case the helper
> calls the driver's original ->transfer_map() via vtbl.  In this case
> the pipe_transfer passed back to st is u_transfer, but it has pointers
> to the driver's real pipe_transfer(s).  The pointers to the real
> pipe_transfer's could just as well be threaded_transfer's.
>
> In either case, the driver doesn't need to subclass u_transfer.. which
> I should move to .c to make move obvious, I guess.
>
> So possibly threaded_context_create() might need to gain a 'bool
> use_transfer_helper' arg, or something like that.  But doesn't seem
> like a major problem.
>
> BR,
> -R
>
>> Cheers,
>> Nicolai
>>
>>
>>
>>>
>>> Signed-off-by: Rob Clark 
>>> ---
>>>   src/gallium/auxiliary/Makefile.sources |   2 +
>>>   src/gallium/auxiliary/meson.build  |   2 +
>>>   src/gallium/auxiliary/util/u_transfer_helper.c | 364
>>> +
>>>   src/gallium/auxiliary/util/u_transfer_helper.h | 121 
>>>   src/gallium/include/pipe/p_screen.h|   8 +-
>>>   5 files changed, 496 insertions(+), 1 deletion(-)
>>>   create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.c
>>>   create mode 100644 src/gallium/auxiliary/util/u_transfer_helper.h
>>>
>>> diff --git a/src/gallium/auxiliary/Makefile.sources
>>> b/src/gallium/auxiliary/Makefile.sources
>>> index f40c4723fae..a2dae04698c 100644
>>> --- a/src/gallium/auxiliary/Makefile.sources
>>> +++ b/src/gallium/auxiliary/Makefile.sources
>>> @@ -304,6 +304,8 @@ C_SOURCES := \
>>> util/u_tile.h \
>>> util/u_transfer.c \
>>> util/u_transfer.h \
>>> +   util/u_transfer_helper.c \
>>> +   util/u_transfer_helper.h \
>>> util/u_threaded_context.c \
>>> util/u_threaded_context.h \
>>> util/u_threaded_context_calls.h \
>>> diff --git a/src/gallium/auxiliary/meson.build
>>> b/src/gallium/auxiliary/meson.build
>>> index 3e623fd099f..8c242ec1a05 100644
>>> --- a/src/gallium/auxiliary/meson.build
>>> +++ b/src/gallium/auxiliary/meson.build
>>> @@ -324,6 +324,8 @@ files_libgallium = files(
>>> 'util/u_tile.h',
>>> 'util/u_transfer.c',
>>> 'util/u_transfer.h',
>>> +  'util/u_transfer_helper.c',
>>> +  'util/u_transfer_helper.h',
>>> 'util/u_threaded_context.c',
>>> 'util/u_threaded_context.h',
>>> 

Re: [Mesa-dev] [PATCH] egl/android: Partially handle HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED

2017-12-01 Thread Robert Foss
On Thu, 2017-11-30 at 11:14 -0600, Rob Herring wrote:
> On Thu, Nov 30, 2017 at 12:11 AM, Tapani Pälli  m> wrote:
> > 
> > 
> > On 11/30/2017 06:13 AM, Tomasz Figa wrote:
> > > 
> > > On Thu, Nov 30, 2017 at 3:43 AM, Robert Foss  > > ra.com>
> > > wrote:
> > > > 
> > > > Hey,
> > > > 
> > > > On Tue, 2017-11-28 at 11:49 +, Emil Velikov wrote:
> > > > > 
> > > > > On 28 November 2017 at 10:45, Tapani Pälli  > > > > l.com>
> > > > > wrote:
> > > > > > 
> > > > > > Hi;
> > > > > > 
> > > > > > 
> > > > > > On 11/27/2017 04:14 PM, Robert Foss wrote:
> > > > > > > 
> > > > > > > 
> > > > > > > From: Tomasz Figa 
> > > > > > > 
> > > > > > > There is no API available to properly query the
> > > > > > > IMPLEMENTATION_DEFINED
> > > > > > > format. As a workaround we rely here on gralloc
> > > > > > > allocating either
> > > > > > > an arbitrary YCbCr 4:2:0 or RGBX_, with the latter
> > > > > > > being
> > > > > > > recognized
> > > > > > > by lock_ycbcr failing.
> > > > > > > 
> > > > > > > Reviewed-on: https://chromium-review.googlesource.com/566
> > > > > > > 793
> > > > > > > 
> > > > > > > Signed-off-by: Tomasz Figa 
> > > > > > > Reviewed-by: Chad Versace 
> > > > > > > Signed-off-by: Robert Foss 
> > > > > > > ---
> > > > > > >src/egl/drivers/dri2/platform_android.c | 39
> > > > > > > +++--
> > > > > > >1 file changed, 37 insertions(+), 2 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/src/egl/drivers/dri2/platform_android.c
> > > > > > > b/src/egl/drivers/dri2/platform_android.c
> > > > > > > index 63223e9a69..ae914d79c1 100644
> > > > > > > --- a/src/egl/drivers/dri2/platform_android.c
> > > > > > > +++ b/src/egl/drivers/dri2/platform_android.c
> > > > > > > @@ -59,6 +59,10 @@ static const struct droid_yuv_format
> > > > > > > droid_yuv_formats[] = {
> > > > > > >   { HAL_PIXEL_FORMAT_YCbCr_420_888,   0, 1,
> > > > > > > __DRI_IMAGE_FOURCC_YUV420
> > > > > > > },
> > > > > > >   { HAL_PIXEL_FORMAT_YCbCr_420_888,   1, 1,
> > > > > > > __DRI_IMAGE_FOURCC_YVU420
> > > > > > > },
> > > > > > >   { HAL_PIXEL_FORMAT_YV12,1, 1,
> > > > > > > __DRI_IMAGE_FOURCC_YVU420
> > > > > > > },
> > > > > > > +   /* HACK: See droid_create_image_from_prime_fd() and
> > > > > > > b/32077885. */
> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 2,
> > > > > > > __DRI_IMAGE_FOURCC_NV12 },
> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   0, 1,
> > > > > > > __DRI_IMAGE_FOURCC_YUV420 },
> > > > > > > +   { HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED,   1, 1,
> > > > > > > __DRI_IMAGE_FOURCC_YVU420 },
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > One alternative way would be to ask gralloc about these
> > > > > > formats. On
> > > > > > gralloc0
> > > > > > this would need a perform() hook and gralloc1 has
> > > > > > getFormat(). This
> > > > > > is how
> > > > > > it is done currently on Android-IA, see following commits:
> > > > > > 
> > > > > > https://github.com/intel/external-mesa/commit/deb323eafa321
> > > > > > c725805a
> > > > > > 702ed19cb4983346b60
> > > > > > 
> > > > > > https://github.com/intel/external-mesa/commit/7cc01beaf540e
> > > > > > 29862853
> > > > > > 561ef93c6c4e86c4c1a
> > > > > > 
> > > > > > Do you think this approach would work with Chromium as
> > > > > > well?
> > > > > > 
> > > > > 
> > > > > i think the Android-IA approach looks good, although it
> > > > > depends on
> > > > > local gralloc0 changes. With gralloc1 on the horizon, I don't
> > > > > know
> > > > > how
> > > > > much sense it makes to extend the predecessor.
> > > > > AFAICT the patch should not cause any issues and it's nicely
> > > > > documented.
> > > > 
> > > > 
> > > > I had a look at the chromiumos/minigbm implementation, and it
> > > > does not
> > > > contain a gralloc1 implementation as far as I can see. I assume
> > > > that it
> > > > is available somewhere, but maybe not on a public branch.
> > > > 
> > > > Would it be possible to make the minigbm gralloc1 impl. public?
> > > > That
> > > > way I could submit a patch mirroring what intel/minigbm does.
> > > > 
> > > > If you fine folks as at Google prefer to roll it yourselves,
> > > > just give
> > > > me a poke.
> > > 
> > > 
> > > There is no gralloc1 implementation for ChromiumOS minigbm and
> > > AFAIK
> > > we don't have any plans of adding one. AFAICT there is nothing we
> > > would gain with it over gralloc0.
> > > 
> > > > 
> > > > Those are the two options I'm seeing.
> > > > 
> > > > As for gralloc0 support, would it be needed?
> > > > 
> > > > > 
> > > > > Perhaps someone from the Google/CrOS team can assist in
> > > > > making the
> > > > > bug
> > > > > public, although even then it might be better to focus on a
> > > > > 'perfect'
> > > > > gralloc1?
> > > > > 
> > > > > IMHO the patch looks 

Re: [Mesa-dev] [PATCH 1/2] meson: install dri internal headers.

2017-12-01 Thread Eric Engestrom
On Thursday, 2017-11-30 10:40:11 -0800, Dylan Baker wrote:
> Reported-by: Marc Dietrich 
> Signed-off-by: Dylan Baker 

Both patches are:
Reviewed-by: Eric Engestrom 

> ---
>  include/meson.build | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/include/meson.build b/include/meson.build
> index 35e7791507c..bae6742c4d6 100644
> --- a/include/meson.build
> +++ b/include/meson.build
> @@ -66,3 +66,7 @@ if with_egl
>  subdir : 'EGL',
>)
>  endif
> +
> +if with_dri
> +  install_headers('GL/internal/dri_interface.h', subdir : 'GL/internal')
> +endif
> -- 
> 2.15.0
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/va: Enable vaExportSurfaceHandle()

2017-12-01 Thread Eric Engestrom
On Friday, 2017-12-01 00:31:48 +, Mark Thompson wrote:
> It will be present from libva 2.1 (VAAPI 1.1.0 or higher).
> 
> Signed-off-by: Mark Thompson 
> ---
> See:
> 
> 
> 
> Also enabled in mpv:
> 
> 
> There are some other driver functions added in this new version:
> * MFContext (multi-frame) stuff exists for lock-step processing of multiple 
> streams.  As far as I can tell, it is only of value for server transcode 
> setups, and probably has little benefit when encode is already asynchronous 
> (which it isn't in the Intel driver).
> * CreateBuffer2 is for passing 2D buffers to/from the driver.  Nothing uses 
> it yet.
> * QueryProcessingRate is for querying expected performance.  It might be 
> sensible to implement, but would need more hardware information than I have 
> to make the necessary tables.
> All of them are left as NULL.
> 
> Thanks,
> 
> - Mark
> 
> 
>  src/gallium/state_trackers/va/context.c | 8 +++-
>  src/gallium/state_trackers/va/surface.c | 2 +-
>  2 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/state_trackers/va/context.c 
> b/src/gallium/state_trackers/va/context.c
> index 78e1f19ab7..c4abe77cf7 100644
> --- a/src/gallium/state_trackers/va/context.c
> +++ b/src/gallium/state_trackers/va/context.c
> @@ -89,7 +89,13 @@ static struct VADriverVTable vtable =
> ,
> ,
> ,
> -#if 0
> +#if VA_CHECK_VERSION(1, 1, 0)
> +   NULL, /* vaCreateMFContext */
> +   NULL, /* vaMFAddContext */
> +   NULL, /* vaMFReleaseContext */
> +   NULL, /* vaMFSubmit */
> +   NULL, /* vaCreateBuffer2 */
> +   NULL, /* vaQueryProcessingRate */

Might be time to use designated initialisers?

> ,
>  #endif
>  };
> diff --git a/src/gallium/state_trackers/va/surface.c 
> b/src/gallium/state_trackers/va/surface.c
> index 636505b720..f9412ce52e 100644
> --- a/src/gallium/state_trackers/va/surface.c
> +++ b/src/gallium/state_trackers/va/surface.c
> @@ -923,7 +923,7 @@ vlVaQueryVideoProcPipelineCaps(VADriverContextP ctx, 
> VAContextID context,
> return VA_STATUS_SUCCESS;
>  }
>  
> -#if 0
> +#if VA_CHECK_VERSION(1, 1, 0)
>  VAStatus
>  vlVaExportSurfaceHandle(VADriverContextP ctx,
>  VASurfaceID surface_id,
> -- 
> 2.11.0
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 20/22] nv50, nvc0: Support BGRX1010102 format for visuals.

2017-12-01 Thread Ilia Mirkin
On Fri, Dec 1, 2017 at 12:54 AM, Mario Kleiner
 wrote:
> On 11/29/2017 04:38 PM, Ilia Mirkin wrote:
>>
>> Why is this required? Can't you just use the BGR10_A2 format directly?
>>
>
> If i don't define this PIPE_FORMAT_B10G10R10X2_UNORM as "TD" = displayable,
> then it doesn't get exposed by the state tracker as a visual/fbconfig with
> RGBA = 10 10 10 0 under nouveau.
>
> Wayland's Weston doesn't like that at all and gives a screen with pixel
> trash instead of a proper desktop, probably because it falls back to a
> BGRA1010102 format with alpha channel, that might indeed be zero.
>
> On X11, all redirected/composited rendering only gives a black window client
> area, e.g., glxgears ends up as a black rectangle.
>
> With the patch i get proper Weston display, and proper composited X11.
> "Proper" within the limitations imposed by my hacks + tbd work on the ddx
> and kms driver.
>
>> The problem with exposing these sorts of formats is that blending with
>> DST_ALPHA would be incorrect -- it should get read in as 1.0, but will
>> end up with bogus values.
>
>
> Hm. My own application uses DST_ALPHA and ONE_MINUS_DST_ALPHA blending on
> the window system backbuffer in some demos and it seems to work fine on
> nouveau in depth 30 from what i can see. Not sure if this is due to the way
> my demos handle this though and there might be other cases that misbehave
> like you describe.
>
> Unfortunately nv50/g80_defs.xml.h doesn't define a BGR10 surface format
> without alpha channel.

Right. There is no such format - it's not supported. Normally the
state tracker works around hardware limitations like that rather than
the driver exposing things it can't do. OTOH we could have a fixup for
blending in nv50 and nvc0 to support this (we probably should anyways
for RGB10 support, which suffers from a similar issue, and is not
worked around by st/mesa's blending logic... this presents various
difficulties for hardware that can't do independent blend but can do
MRTs like the first half of the tesla generation).

My guess is that your application will run into trouble if you start
outputting non-1.0 alpha values from your shader - instead of just
blending, it'll get stored, and then on the next blend it'll come out
as that value rather than 1.0.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] meson: fix deps and underlinkage of libGL

2017-12-01 Thread Jon Turney

On 30/11/2017 16:46, Emil Velikov wrote:

On 30 November 2017 at 15:13, Jon Turney wrote:

On 29/11/2017 17:34, Dylan Baker wrote:

[...]


Maybe it's me that's missing something...

There are references to functions provided by these libraries (xcb_glx, xcb,
x11_xcb) in common code.


Having a reference to those (as printed by the linker as it errors) should help.

There's a lot of macros guarding the different parts of GLX - the
obvious ones GLX_ seem there.

Another thing that comes to mind is the libloader_dri3_helper.
It should be a noop/empty for non-Linux/BSD but something is going wrong?


Yes, as I wrote in the bit of my email you snipped, these dependencies 
come via libloader_dri3_helper.


When configured -Ddri3=false (which is always the case on non-linux 
targets), libGL is underlinked, so this patch is correct.


Even with this patch applied, mesa fails to build for linux when 
configured with -Ddri3=false, due to various undefined references to 
libXext:



[3/3] Linking target src/glx/libGL.so.1.2.0.
FAILED: src/glx/libGL.so.1.2.0
cc  -o src/glx/libGL.so.1.2.0 'src/glx/GL@sha/src_glx_dummy.c.o' 
-Wl,--no-undefined -Wl,--as-needed -shared -fPIC -Wl,--start-group 
-Wl,-soname,libGL.so.1 -Wl,--whole-archive src/glx/libglx.a 
-Wl,--no-whole-archive src/mapi/glapi/libglapi_static.a 
src/mapi/shared-glapi/libglapi.so.0.0.0 src/loader/libloader.a 
src/util/libmesa_util.a src/util/libxmlconfig.a -pthread -Wl,-Bsymbolic 
-Wl,--gc-sections -ldrm -ldl -lm -Wl,--end-group -lX11 -lxcb-glx -lxcb 
-lX11-xcb -lX11 -lxcb -lxcb-dri2 -ldrm -lX11 -ldrm -lz -lexpat -lm 
'-Wl,-rpath,$ORIGIN/:$ORIGIN/../mapi/shared-glapi' 
-Wl,-rpath-link,/home/jon/src/mesa/build/src/glx:/home/jon/src/mesa/build/src/mapi/shared-glapi
src/glx/libglx.a(dri2.c.o): In function `DRI2CloseDisplay':
/home/jon/src/mesa/build/../src/glx/dri2.c:58: undefined reference to 
`XextRemoveDisplay'
src/glx/libglx.a(dri2.c.o): In function `DRI2FindDisplay':
/home/jon/src/mesa/build/../src/glx/dri2.c:81: undefined reference to 
`XextFindDisplay'
/home/jon/src/mesa/build/../src/glx/dri2.c:81: undefined reference to 
`XextCreateExtension'
/home/jon/src/mesa/build/../src/glx/dri2.c:81: undefined reference to 
`XextAddDisplay'
src/glx/libglx.a(dri2.c.o): In function `DRI2EventToWire':
/home/jon/src/mesa/build/../src/glx/dri2.c:168: undefined reference to 
`XMissingExtension'
src/glx/libglx.a(dri2.c.o): In function `DRI2WireToEvent':
/home/jon/src/mesa/build/../src/glx/dri2.c:93: undefined reference to 
`XMissingExtension'
src/glx/libglx.a(dri2.c.o): In function `DRI2QueryVersion':
/home/jon/src/mesa/build/../src/glx/dri2.c:229: undefined reference to 
`XMissingExtension'
src/glx/libglx.a(dri2.c.o): In function `DRI2Connect':
/home/jon/src/mesa/build/../src/glx/dri2.c:275: undefined reference to 
`XMissingExtension'
src/glx/libglx.a(dri2.c.o): In function `DRI2Authenticate':
/home/jon/src/mesa/build/../src/glx/dri2.c:344: undefined reference to 
`XMissingExtension'
src/glx/libglx.a(dri2.c.o):/home/jon/src/mesa/build/../src/glx/dri2.c:371: more 
undefined references to `XMissingExtension' follow
src/glx/libglx.a(dri2_glx.c.o): In function `dri2_copy_drawable':
/home/jon/src/mesa/build/../src/glx/dri2_glx.c:635: undefined reference to 
`XFixesCreateRegion'
/home/jon/src/mesa/build/../src/glx/dri2_glx.c:637: undefined reference to 
`XFixesDestroyRegion'
src/glx/libglx.a(dri2_glx.c.o): In function `__dri2CopySubBuffer':
/home/jon/src/mesa/build/../src/glx/dri2_glx.c:597: undefined reference to 
`XFixesCreateRegion'
/home/jon/src/mesa/build/../src/glx/dri2_glx.c:608: undefined reference to 
`XFixesDestroyRegion'
src/glx/libglx.a(dri_glx.c.o): In function `__glXReportDamage':
/home/jon/src/mesa/build/../src/glx/dri_glx.c:348: undefined reference to 
`XFixesCreateRegion'
/home/jon/src/mesa/build/../src/glx/dri_glx.c:350: undefined reference to 
`XDamageAdd'
/home/jon/src/mesa/build/../src/glx/dri_glx.c:351: undefined reference to 
`XFixesDestroyRegion'
src/glx/libglx.a(dri_glx.c.o): In function `has_damage_post':
/home/jon/src/mesa/build/../src/glx/dri_glx.c:296: undefined reference to 
`XDamageQueryVersion'
src/glx/libglx.a(XF86dri.c.o): In function `close_display':
/home/jon/src/mesa/build/../src/glx/XF86dri.c:82: undefined reference to 
`XextRemoveDisplay'
src/glx/libglx.a(XF86dri.c.o): In function `find_display':
/home/jon/src/mesa/build/../src/glx/XF86dri.c:77: undefined reference to 
`XextFindDisplay'
/home/jon/src/mesa/build/../src/glx/XF86dri.c:77: undefined reference to 
`XextCreateExtension'
/home/jon/src/mesa/build/../src/glx/XF86dri.c:77: undefined reference to 
`XextAddDisplay'
src/glx/libglx.a(XF86dri.c.o): In function `XF86DRIQueryVersion':
/home/jon/src/mesa/build/../src/glx/XF86dri.c:125: undefined reference to 
`XMissingExtension'
src/glx/libglx.a(XF86dri.c.o): In function `XF86DRIQueryDirectRenderingCapable':
/home/jon/src/mesa/build/../src/glx/XF86dri.c:155: undefined reference to 
`XMissingExtension'
src/glx/libglx.a(XF86dri.c.o): In 

[Mesa-dev] [PATCH] i965/nir: do int64 lowering before optimization

2017-12-01 Thread Iago Toral Quiroga
Otherwise loop unrolling will fail to see the actual cost of
the unrolling operations when the loop body contains 64-bit integer
instructions, and very specially when the divmod64 lowering applies,
since its lowering is quite expensive.

Without this change, some in-development CTS tests for int64
get stuck forever trying to register allocate a shader with
over 50K SSA values. The large number of SSA values is the result
of NIR first unrolling multiple seemingly simple loops that involve
int64 instructions, only to then lower these instructions to produce
a massive pile of code (due to the divmod64 lowering in the unrolled
instructions).

With this change, loop unrolling will see the loops with the int64
code already lowered and will realize that it is too expensive to
unroll.
---
 src/intel/compiler/brw_nir.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 8f3f77f89a..ef12cdfff8 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -636,6 +636,10 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
 
OPT(nir_split_var_copies);
 
+   nir_lower_int64(nir, nir_lower_imul64 |
+nir_lower_isign64 |
+nir_lower_divmod64);
+
nir = brw_nir_optimize(nir, compiler, is_scalar);
 
if (is_scalar) {
@@ -663,10 +667,6 @@ brw_preprocess_nir(const struct brw_compiler *compiler, 
nir_shader *nir)
   brw_nir_no_indirect_mask(compiler, nir->info.stage);
nir_lower_indirect_derefs(nir, indirect_mask);
 
-   nir_lower_int64(nir, nir_lower_imul64 |
-nir_lower_isign64 |
-nir_lower_divmod64);
-
/* Get rid of split copies */
nir = brw_nir_optimize(nir, compiler, is_scalar);
 
-- 
2.11.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/6] st/omx/tizonia: Add --enable-omx-tizonia flag and build files

2017-12-01 Thread Gurkirpal Singh
Allow only bellagio or tizonia to be used at the same time.
Detect tizonia package config file
Generate libomx_mesa.so and install it to libtizcore.pc::pluginsdir
Only compile empty source (target.c) for now.

GSoC Project link: 
https://summerofcode.withgoogle.com/projects/#4737166321123328
---
 configure.ac   | 47 +-
 src/gallium/Makefile.am|  4 ++
 src/gallium/state_trackers/omx/Makefile.am | 13 ++
 src/gallium/state_trackers/omx/tizonia/Makefile.am | 36 +
 .../state_trackers/omx/tizonia/Makefile.sources|  1 +
 src/gallium/targets/omx/Makefile.am| 19 -
 src/gallium/targets/omx/omx.sym|  1 +
 7 files changed, 117 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/state_trackers/omx/tizonia/Makefile.am
 create mode 100644 src/gallium/state_trackers/omx/tizonia/Makefile.sources

diff --git a/configure.ac b/configure.ac
index f127d24..5145818 100644
--- a/configure.ac
+++ b/configure.ac
@@ -85,6 +85,7 @@ dnl Versions for external dependencies
 DRI2PROTO_REQUIRED=2.8
 GLPROTO_REQUIRED=1.4.14
 LIBOMXIL_BELLAGIO_REQUIRED=0.0
+LIBOMXIL_TIZONIA_REQUIRED=0.9.0
 LIBVA_REQUIRED=0.38.0
 VDPAU_REQUIRED=1.1
 WAYLAND_REQUIRED=1.11
@@ -1294,14 +1295,19 @@ AC_ARG_ENABLE([vdpau],
[enable_vdpau=auto])
 AC_ARG_ENABLE([omx],
[AS_HELP_STRING([--enable-omx],
- [DEPRECATED: Use --enable-omx-bellagio instead 
@<:@default=auto@:>@])],
-   [AC_MSG_ERROR([--enable-omx is deprecated. Use --enable-omx-bellagio 
instead.])],
+ [DEPRECATED: Use --enable-omx-bellagio or --enable-omx-tizonia 
instead @<:@default=auto@:>@])],
+   [AC_MSG_ERROR([--enable-omx is deprecated. Use --enable-omx-bellagio or 
--enable-omx-tizonia instead.])],
[])
 AC_ARG_ENABLE([omx-bellagio],
[AS_HELP_STRING([--enable-omx-bellagio],
  [enable OpenMAX Bellagio library @<:@default=disabled@:>@])],
[enable_omx_bellagio="$enableval"],
[enable_omx_bellagio=no])
+AC_ARG_ENABLE([omx-tizonia],
+   [AS_HELP_STRING([--enable-omx-tizonia],
+ [enable OpenMAX Tizonia library @<:@default=disabled@:>@])],
+   [enable_omx_tizonia="$enableval"],
+   [enable_omx_tizonia=no])
 AC_ARG_ENABLE([va],
[AS_HELP_STRING([--enable-va],
  [enable va library @<:@default=auto@:>@])],
@@ -1353,11 +1359,17 @@ if test "x$enable_opengl" = xno -a \
 "x$enable_xvmc" = xno -a \
 "x$enable_vdpau" = xno -a \
 "x$enable_omx_bellagio" = xno -a \
+"x$enable_omx_tizonia" = xno -a \
 "x$enable_va" = xno -a \
 "x$enable_opencl" = xno; then
 AC_MSG_ERROR([at least one API should be enabled])
 fi
 
+if test "x$enable_omx_bellagio" = xyes -a \
+"x$enable_omx_tizonia" = xyes; then
+   AC_MSG_ERROR([Can't enable both bellagio and tizonia at same time])
+fi
+
 # Building OpenGL ES1 and/or ES2 without OpenGL is not supported on mesa 9.0.x
 if test "x$enable_opengl" = xno -a \
 "x$enable_gles1" = xyes; then
@@ -2198,6 +2210,10 @@ if test -n "$with_gallium_drivers" -a 
"x$with_gallium_drivers" != xswrast; then
 PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], 
[enable_omx_bellagio=yes], [enable_omx_bellagio=no])
 fi
 
+if test "x$enable_omx_tizonia" = xauto -a "x$have_omx_platform" = xyes; 
then
+   PKG_CHECK_EXISTS([libtizonia >= $LIBOMXIL_TIZONIA_REQUIRED], 
[enable_omx_tizonia=yes], [enable_omx_tizonia=no])
+fi
+
 if test "x$enable_va" = xauto -a "x$have_va_platform" = xyes; then
 PKG_CHECK_EXISTS([libva >= $LIBVA_REQUIRED], [enable_va=yes], 
[enable_va=no])
 fi
@@ -2207,6 +2223,7 @@ if test "x$enable_dri" = xyes -o \
 "x$enable_xvmc" = xyes -o \
 "x$enable_vdpau" = xyes -o \
 "x$enable_omx_bellagio" = xyes -o \
+"x$enable_omx_tizonia" = xyes -o \
 "x$enable_va" = xyes; then
 need_gallium_vl=yes
 fi
@@ -2215,6 +2232,7 @@ AM_CONDITIONAL(NEED_GALLIUM_VL, test "x$need_gallium_vl" 
= xyes)
 if test "x$enable_xvmc" = xyes -o \
 "x$enable_vdpau" = xyes -o \
 "x$enable_omx_bellagio" = xyes -o \
+"x$enable_omx_tizonia" = xyes -o \
 "x$enable_va" = xyes; then
 if echo $platforms | grep -q "x11"; then
 PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
@@ -2248,9 +2266,23 @@ if test "x$enable_omx_bellagio" = xyes; then
 fi
 PKG_CHECK_MODULES([OMX_BELLAGIO], [libomxil-bellagio >= 
$LIBOMXIL_BELLAGIO_REQUIRED])
 gallium_st="$gallium_st omx_bellagio"
+AC_DEFINE([ENABLE_ST_OMX_BELLAGIO], 1, [Use Bellagio for OMX IL])
 fi
 AM_CONDITIONAL(HAVE_ST_OMX_BELLAGIO, test "x$enable_omx_bellagio" = xyes)
 
+if test "x$enable_omx_tizonia" = xyes; then
+if test "x$have_omx_platform" != xyes; then
+AC_MSG_ERROR([OMX requires at least one of the x11 or drm platforms])
+fi
+PKG_CHECK_MODULES([OMX_TIZONIA],
+  [libtizonia 

[Mesa-dev] [PATCH v2 3/6] st/omx/tizonia: Add entrypoint

2017-12-01 Thread Gurkirpal Singh
Adds base files for adding components
---
 .../state_trackers/omx/tizonia/Makefile.sources|  4 ++-
 .../state_trackers/omx/tizonia/entrypoint.c| 37 ++
 .../state_trackers/omx/tizonia/entrypoint.h| 35 
 3 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/state_trackers/omx/tizonia/entrypoint.c
 create mode 100644 src/gallium/state_trackers/omx/tizonia/entrypoint.h

diff --git a/src/gallium/state_trackers/omx/tizonia/Makefile.sources 
b/src/gallium/state_trackers/omx/tizonia/Makefile.sources
index 9103ca8..de6fcf9 100644
--- a/src/gallium/state_trackers/omx/tizonia/Makefile.sources
+++ b/src/gallium/state_trackers/omx/tizonia/Makefile.sources
@@ -1 +1,3 @@
-C_SOURCES :=
+C_SOURCES := \
+   entrypoint.c \
+   entrypoint.h
diff --git a/src/gallium/state_trackers/omx/tizonia/entrypoint.c 
b/src/gallium/state_trackers/omx/tizonia/entrypoint.c
new file mode 100644
index 000..c89bdfd
--- /dev/null
+++ b/src/gallium/state_trackers/omx/tizonia/entrypoint.c
@@ -0,0 +1,37 @@
+/**
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#include 
+#include 
+#include 
+
+#include "entrypoint.h"
+
+OMX_ERRORTYPE OMX_ComponentInit (OMX_HANDLETYPE ap_hdl)
+{
+   return OMX_ErrorNone;
+}
diff --git a/src/gallium/state_trackers/omx/tizonia/entrypoint.h 
b/src/gallium/state_trackers/omx/tizonia/entrypoint.h
new file mode 100644
index 000..740a89d
--- /dev/null
+++ b/src/gallium/state_trackers/omx/tizonia/entrypoint.h
@@ -0,0 +1,35 @@
+/**
+ *
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **/
+
+#ifndef OMX_TIZ_ENTRYPOINT_H
+#define OMX_TIZ_ENTRYPOINT_H
+
+#include "vl/vl_winsys.h"
+
+PUBLIC OMX_ERRORTYPE OMX_ComponentInit(OMX_HANDLETYPE ap_hdl);
+
+#endif
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 6/6] st/omx/tizonia/h264d: Add EGLImage support

2017-12-01 Thread Gurkirpal Singh
Example Gstreamer pipeline :
MESA_ENABLE_OMX_EGLIMAGE=1 GST_GL_API=gles2 GST_GL_PLATFORM=egl gst-launch-1.0 
filesrc location=movie.mp4 ! qtdemux ! h264parse ! omxh264dec ! glimagesink
---
 src/gallium/state_trackers/omx/Makefile.am |   1 +
 src/gallium/state_trackers/omx/tizonia/Makefile.am |  13 +++
 .../state_trackers/omx/tizonia/entrypoint.c|  44 
 src/gallium/state_trackers/omx/tizonia/h264dprc.c  | 116 -
 src/gallium/state_trackers/omx/vid_dec_common.c|  38 +++
 src/gallium/state_trackers/omx/vid_dec_common.h|   3 +
 6 files changed, 211 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/omx/Makefile.am 
b/src/gallium/state_trackers/omx/Makefile.am
index de6eb64..fbde8e0 100644
--- a/src/gallium/state_trackers/omx/Makefile.am
+++ b/src/gallium/state_trackers/omx/Makefile.am
@@ -37,6 +37,7 @@ SUBDIRS = tizonia
 
 AM_CFLAGS = \
$(GALLIUM_CFLAGS) \
+   $(LIBDRM_CFLAGS) \
$(VISIBILITY_CFLAGS) \
$(VL_CFLAGS) \
$(XCB_DRI3_CFLAGS) \
diff --git a/src/gallium/state_trackers/omx/tizonia/Makefile.am 
b/src/gallium/state_trackers/omx/tizonia/Makefile.am
index 0f72e39..3149afa 100644
--- a/src/gallium/state_trackers/omx/tizonia/Makefile.am
+++ b/src/gallium/state_trackers/omx/tizonia/Makefile.am
@@ -22,8 +22,21 @@ include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc
 
 AM_CFLAGS = \
+   -I$(top_srcdir)/include \
+   -I$(top_srcdir)/src/mapi \
+   -I$(top_srcdir)/src/mesa \
+   -I$(top_builddir)/src/mesa/drivers/dri/common \
+   -I$(top_srcdir)/src/mesa/drivers/dri/common \
+   -I$(top_srcdir)/src/egl/drivers/dri2 \
+   -I$(top_srcdir)/src/egl/wayland/wayland-egl \
+   -I$(top_srcdir)/src/egl/main \
+   -I$(top_srcdir)/src/gbm/main \
+   -I$(top_srcdir)/src/loader \
+   -I$(top_srcdir)/src/gbm/backends/dri \
+   -I$(top_srcdir)/src/gallium/state_trackers/dri \
-I$(top_srcdir)/src/gallium/state_trackers/omx \
$(GALLIUM_CFLAGS) \
+   $(LIBDRM_CFLAGS) \
$(VISIBILITY_CFLAGS) \
$(VL_CFLAGS) \
$(XCB_DRI3_CFLAGS) \
diff --git a/src/gallium/state_trackers/omx/tizonia/entrypoint.c 
b/src/gallium/state_trackers/omx/tizonia/entrypoint.c
index 72d955b..b896447 100644
--- a/src/gallium/state_trackers/omx/tizonia/entrypoint.c
+++ b/src/gallium/state_trackers/omx/tizonia/entrypoint.c
@@ -44,6 +44,40 @@
 #include "h264eoutport.h"
 #include "names.h"
 
+#include "util/u_debug.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(mesa_enable_omx_eglimage,
+   "MESA_ENABLE_OMX_EGLIMAGE",
+   false)
+
+static OMX_BOOL egl_image_validation_hook(const OMX_HANDLETYPE ap_hdl,
+  OMX_U32 pid, OMX_PTR ap_eglimage,
+  void *ap_args)
+{
+   const void * p_krn = NULL;
+   const tiz_port_t * p_port = NULL;
+
+   assert(ap_hdl);
+   assert(ap_eglimage);
+   assert(!ap_args);
+
+   if (!debug_get_option_mesa_enable_omx_eglimage()) {
+  return OMX_FALSE;
+   }
+
+   p_krn = tiz_get_krn(ap_hdl);
+   p_port = tiz_krn_get_port(p_krn, pid);
+
+   const OMX_VIDEO_PORTDEFINITIONTYPE * p_video_portdef
+  = &(p_port->portdef_.format.video);
+
+   if (!p_video_portdef->pNativeWindow) {
+  return OMX_FALSE;
+   }
+
+   return OMX_TRUE;
+}
+
 OMX_ERRORTYPE OMX_ComponentInit (OMX_HANDLETYPE ap_hdl)
 {
tiz_role_factory_t h264d_role;
@@ -57,6 +91,11 @@ OMX_ERRORTYPE OMX_ComponentInit (OMX_HANDLETYPE ap_hdl)
const tiz_type_factory_t * tf_list[] = {_inport_type, 
_outport_type,
_type, _type,
_inport_type};
+   const tiz_eglimage_hook_t egl_validation_hook = {
+  OMX_VID_DEC_AVC_OUTPUT_PORT_INDEX,
+  egl_image_validation_hook,
+  NULL
+   };
 
/* Settings for roles */
strcpy ((OMX_STRING) h264d_role.role, OMX_VID_DEC_AVC_ROLE);
@@ -108,5 +147,10 @@ OMX_ERRORTYPE OMX_ComponentInit (OMX_HANDLETYPE ap_hdl)
/* Register the component roles */
tiz_comp_register_roles (ap_hdl, rf_list, 2);
 
+   /* Register egl image validation hook for the decoder */
+   tiz_check_omx (tiz_comp_register_role_eglimage_hook
+ (ap_hdl, (const OMX_U8 *) OMX_VID_DEC_AVC_ROLE,
+  _validation_hook));
+
return OMX_ErrorNone;
 }
diff --git a/src/gallium/state_trackers/omx/tizonia/h264dprc.c 
b/src/gallium/state_trackers/omx/tizonia/h264dprc.c
index f5c5d659..96f95b9 100644
--- a/src/gallium/state_trackers/omx/tizonia/h264dprc.c
+++ b/src/gallium/state_trackers/omx/tizonia/h264dprc.c
@@ -38,10 +38,33 @@
 
 #include "vl/vl_video_buffer.h"
 #include "vl/vl_compositor.h"
+#include "util/u_hash_table.h"
 #include "util/u_surface.h"
 
+#include "dri_screen.h"
+#include "egl_dri2.h"
+
 unsigned dec_frame_delta;
 
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void 

[Mesa-dev] [PATCH v2 5/6] st/omx/tizonia: Add H.264 encoder

2017-12-01 Thread Gurkirpal Singh
v2: Refactor out screen functions to st/omx

Example Gstreamer pipeline :
gst-launch-1.0 filesrc location=movie.mp4 ! qtdemux ! h264parse ! avdec_h264 ! 
videoconvert ! omxh264enc ! h264parse ! avdec_h264 ! videoconvert ! ximagesink
---
 src/gallium/state_trackers/omx/Makefile.sources|   4 +-
 src/gallium/state_trackers/omx/bellagio/vid_enc.c  | 355 +--
 src/gallium/state_trackers/omx/bellagio/vid_enc.h  |  48 --
 .../state_trackers/omx/tizonia/Makefile.sources|  11 +
 .../state_trackers/omx/tizonia/entrypoint.c|  40 +-
 src/gallium/state_trackers/omx/tizonia/h264e.c | 184 ++
 src/gallium/state_trackers/omx/tizonia/h264e.h |  55 ++
 .../state_trackers/omx/tizonia/h264einport.c   | 216 +++
 .../state_trackers/omx/tizonia/h264einport.h   |  31 +
 .../state_trackers/omx/tizonia/h264einport_decls.h |  48 ++
 .../state_trackers/omx/tizonia/h264eoutport.c  | 143 +
 .../state_trackers/omx/tizonia/h264eoutport.h  |  31 +
 .../omx/tizonia/h264eoutport_decls.h   |  48 ++
 src/gallium/state_trackers/omx/tizonia/h264eprc.c  | 689 +
 src/gallium/state_trackers/omx/tizonia/h264eprc.h  |  31 +
 src/gallium/state_trackers/omx/vid_enc_common.c| 387 
 src/gallium/state_trackers/omx/vid_enc_common.h| 175 ++
 17 files changed, 2094 insertions(+), 402 deletions(-)
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264e.c
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264e.h
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264einport.c
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264einport.h
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264einport_decls.h
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264eoutport.c
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264eoutport.h
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264eoutport_decls.h
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264eprc.c
 create mode 100644 src/gallium/state_trackers/omx/tizonia/h264eprc.h
 create mode 100644 src/gallium/state_trackers/omx/vid_enc_common.c
 create mode 100644 src/gallium/state_trackers/omx/vid_enc_common.h

diff --git a/src/gallium/state_trackers/omx/Makefile.sources 
b/src/gallium/state_trackers/omx/Makefile.sources
index ba9b0be..0fb1d9a 100644
--- a/src/gallium/state_trackers/omx/Makefile.sources
+++ b/src/gallium/state_trackers/omx/Makefile.sources
@@ -4,4 +4,6 @@ C_SOURCES := \
vid_dec_h264_common.c \
vid_dec_h264_common.h \
vid_omx_common.c \
-   vid_omx_common.h
+   vid_omx_common.h \
+   vid_enc_common.c \
+   vid_enc_common.h
diff --git a/src/gallium/state_trackers/omx/bellagio/vid_enc.c 
b/src/gallium/state_trackers/omx/bellagio/vid_enc.c
index 64ff624..9f94a1c 100644
--- a/src/gallium/state_trackers/omx/bellagio/vid_enc.c
+++ b/src/gallium/state_trackers/omx/bellagio/vid_enc.c
@@ -49,32 +49,11 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_video_codec.h"
 #include "util/u_memory.h"
-#include "vl/vl_video_buffer.h"
 
 #include "entrypoint.h"
 #include "vid_enc.h"
 #include "vid_omx_common.h"
-
-struct encode_task {
-   struct list_head list;
-
-   struct pipe_video_buffer *buf;
-   unsigned pic_order_cnt;
-   struct pipe_resource *bitstream;
-   void *feedback;
-};
-
-struct input_buf_private {
-   struct list_head tasks;
-
-   struct pipe_resource *resource;
-   struct pipe_transfer *transfer;
-};
-
-struct output_buf_private {
-   struct pipe_resource *bitstream;
-   struct pipe_transfer *transfer;
-};
+#include "vid_enc_common.h"
 
 static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING 
name);
 static OMX_ERRORTYPE vid_enc_Destructor(OMX_COMPONENTTYPE *comp);
@@ -94,8 +73,6 @@ static OMX_ERRORTYPE 
vid_enc_AllocateOutBuffer(omx_base_PortType *comp, OMX_INOU
 static OMX_ERRORTYPE vid_enc_FreeOutBuffer(omx_base_PortType *port, OMX_U32 
idx, OMX_BUFFERHEADERTYPE *buf);
 static void vid_enc_BufferEncoded(OMX_COMPONENTTYPE *comp, 
OMX_BUFFERHEADERTYPE* input, OMX_BUFFERHEADERTYPE* output);
 
-static void enc_ReleaseTasks(struct list_head *head);
-
 OMX_ERRORTYPE vid_enc_LoaderComponent(stLoaderComponentType *comp)
 {
comp->componentVersion.s.nVersionMajor = 0;
@@ -616,66 +593,6 @@ static OMX_ERRORTYPE vid_enc_GetConfig(OMX_HANDLETYPE 
handle, OMX_INDEXTYPE idx,
return OMX_ErrorNone;
 }
 
-static enum pipe_video_profile enc_TranslateOMXProfileToPipe(unsigned 
omx_profile)
-{
-   switch (omx_profile) {
-   case OMX_VIDEO_AVCProfileBaseline:
-  return PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE;
-   case OMX_VIDEO_AVCProfileMain:
-  return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN;
-   case OMX_VIDEO_AVCProfileExtended:
-  return PIPE_VIDEO_PROFILE_MPEG4_AVC_EXTENDED;
-   case OMX_VIDEO_AVCProfileHigh:
-  return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
-   case OMX_VIDEO_AVCProfileHigh10:
-  return 

Re: [Mesa-dev] [PATCH 12/22] egl/x11: Handle depth 30 drawables for EGL_KHR_image_pixmap.

2017-12-01 Thread Tapani Pälli

Patches 9,10,11,12 LGTM

Reviewed-by: Tapani Pälli 

On 29.11.2017 06:20, Mario Kleiner wrote:

Enables eglCreateImageKHR() with target set to
EGL_NATIVE_PIXMAP_KHR to handle color depth 30
X11 drawables.

Note that in theory the drawable depth 32 case in the
current implementation is ambiguous: A depth 32 drawable
could be of format ARGB or ARGB2101010, therefore an
assignment of __DRI_IMAGE_FORMAT_ARGB for a pixmap of
ARGB2101010 format would be wrong. In practice however, the
X-Server (as of v1.19) does not provide any depth 32 visuals
for ARGB2101010 EGL/GLX configs. Those are associated with
depth 30 visuals without an alpha channel instead. Therefore
the switch-case depth 32 branch is only executed for ARGB
pixmaps and we get away with this.

Tested with KDE Plasma 5 under X11, DRI2 and DRI3/Present,
selecting EGL + OpenGL compositing and different fbconfigs
with/without 2 bit alpha channel. glxinfo confirms use of
depth 30 visuals for ARGB2101010 only.

Suggested-by: Eric Engestrom 
Signed-off-by: Mario Kleiner 
---
  src/egl/drivers/dri2/platform_x11.c  | 3 +++
  src/egl/drivers/dri2/platform_x11_dri3.c | 3 +++
  2 files changed, 6 insertions(+)

diff --git a/src/egl/drivers/dri2/platform_x11.c 
b/src/egl/drivers/dri2/platform_x11.c
index 8e48376..61c842d 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1050,6 +1050,9 @@ dri2_create_image_khr_pixmap(_EGLDisplay *disp, 
_EGLContext *ctx,
 case 24:
format = __DRI_IMAGE_FORMAT_XRGB;
break;
+   case 30:
+  format = __DRI_IMAGE_FORMAT_XRGB2101010;
+  break;
 case 32:
format = __DRI_IMAGE_FORMAT_ARGB;
break;
diff --git a/src/egl/drivers/dri2/platform_x11_dri3.c 
b/src/egl/drivers/dri2/platform_x11_dri3.c
index eadd371..6e40eaa 100644
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -269,6 +269,9 @@ dri3_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext 
*ctx,
 case 24:
format = __DRI_IMAGE_FORMAT_XRGB;
break;
+   case 30:
+  format = __DRI_IMAGE_FORMAT_XRGB2101010;
+  break;
 case 32:
format = __DRI_IMAGE_FORMAT_ARGB;
break;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/22] i965/screen: Honor 'allow_rgb10_configs' option. (v2)

2017-12-01 Thread Tapani Pälli
Not sure of the end results of discussion on this drirc option but 
personally I think it is good to have as temporary workaround.


Patches 7,8

Reviewed-by: Tapani Pälli 


On 29.11.2017 06:20, Mario Kleiner wrote:

Allows to prevent exposing RGB10 configs and visuals to
clients.

v2: Rename expose_rgb10_configs to allow_rgb10_configs,
 as suggested by Emil.

Signed-off-by: Mario Kleiner 
---
  src/mesa/drivers/dri/i965/intel_screen.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 455a13c..f6853a8 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2092,11 +2092,20 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
 else
num_formats = ARRAY_SIZE(formats) - 2; /* all - RGBA_ORDERING formats */
  
+   /* Shall we expose 10 bpc formats? */

+   bool allow_rgb10_configs = driQueryOptionb(_screen->optionCache,
+  "allow_rgb10_configs");
+
 /* Generate singlesample configs without accumulation buffer. */
 for (unsigned i = 0; i < num_formats; i++) {
__DRIconfig **new_configs;
int num_depth_stencil_bits = 2;
  
+  if (!allow_rgb10_configs &&

+  (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM ||
+   formats[i] == MESA_FORMAT_B10G10R10X2_UNORM))
+ continue;
+
/* Starting with DRI2 protocol version 1.1 we can request a 
depth/stencil
 * buffer that has a different number of bits per pixel than the color
 * buffer, gen >= 6 supports this.
@@ -2133,6 +2142,11 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
 for (unsigned i = 0; i < num_formats; i++) {
__DRIconfig **new_configs;
  
+  if (!allow_rgb10_configs &&

+  (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM ||
+  formats[i] == MESA_FORMAT_B10G10R10X2_UNORM))
+ continue;
+
if (formats[i] == MESA_FORMAT_B5G6R5_UNORM) {
   depth_bits[0] = 16;
   stencil_bits[0] = 0;
@@ -2166,6 +2180,11 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
if (devinfo->gen < 6)
   break;
  
+  if (!allow_rgb10_configs &&

+  (formats[i] == MESA_FORMAT_B10G10R10A2_UNORM ||
+  formats[i] == MESA_FORMAT_B10G10R10X2_UNORM))
+ continue;
+
__DRIconfig **new_configs;
const int num_depth_stencil_bits = 2;
int num_msaa_modes = 0;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/22] i965/screen: Add XRGB2101010 and ARGB2101010 support for DRI3.

2017-12-01 Thread Tapani Pälli

Reviewed-by: Tapani Pälli 

On 29.11.2017 06:20, Mario Kleiner wrote:

Allow DRI3/Present buffer sharing for 10 bpc buffers.
Otherwise composited desktops under DRI3 will only display
black client areas for redirected windows.

Signed-off-by: Mario Kleiner 
---
  src/mesa/drivers/dri/i965/intel_screen.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 39efc1c..455a13c 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -181,6 +181,12 @@ static const struct __DRI2flushExtensionRec 
intelFlushExtension = {
  };
  
  static const struct intel_image_format intel_image_formats[] = {

+   { __DRI_IMAGE_FOURCC_ARGB2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB2101010, 4 } } },
+
+   { __DRI_IMAGE_FOURCC_XRGB2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB2101010, 4 } } },
+
 { __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
   { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
  


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/22] i965: Support accelerated blit for depth 30 formats. (v2)

2017-12-01 Thread Tapani Pälli

Reviewed-by: Tapani Pälli 

On 29.11.2017 06:20, Mario Kleiner wrote:

Extend intel_miptree_blit() to handle at least
ARGB2101010 -> XRGB2101010, ARGB2101010 -> ARGB2101010,
and XRGB2101010 -> XRGB2101010 via the BLT engine,
but not XRGB2101010 -> ARGB2101010 yet.

This works as tested under Compiz, KDE-5, Gnome-Shell.

v2: Restrict BLT fast path to exclude XRGB2101010 -> ARGB2101010,
 as intel_miptree_set_alpha_to_one() isn't ready to set 2 bit
 alpha channels to 1.0 yet. However, couldn't find a test case
 where this specific blit would be needed, so maybe not much
 of a point to improve here.

Signed-off-by: Mario Kleiner 
---
  src/mesa/drivers/dri/i965/intel_blit.c | 20 +++-
  1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
b/src/mesa/drivers/dri/i965/intel_blit.c
index 5f25bfa..46945b2 100644
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -170,6 +170,19 @@ intel_miptree_blit_compatible_formats(mesa_format src, 
mesa_format dst)
return (dst == MESA_FORMAT_R8G8B8A8_UNORM ||
dst == MESA_FORMAT_R8G8B8X8_UNORM);
  
+   /* We can also discard alpha when going from A2->X2 for 2 bit alpha,

+* however we can't fill the alpha channel with two 1 bits when going
+* from X2->A2, because intel_miptree_set_alpha_to_one() is not yet
+* ready for this / can only handle 8 bit alpha.
+*/
+   if (src == MESA_FORMAT_B10G10R10A2_UNORM)
+  return (dst == MESA_FORMAT_B10G10R10A2_UNORM ||
+  dst == MESA_FORMAT_B10G10R10X2_UNORM);
+
+   if (src == MESA_FORMAT_R10G10B10A2_UNORM)
+  return (dst == MESA_FORMAT_R10G10B10A2_UNORM ||
+  dst == MESA_FORMAT_R10G10B10X2_UNORM);
+
 return false;
  }
  
@@ -322,7 +335,8 @@ intel_miptree_blit(struct brw_context *brw,

 /* The blitter doesn't support doing any format conversions.  We do also
  * support blitting ARGB to XRGB (trivial, the values dropped into
  * the X channel don't matter), and XRGB to ARGB by setting the A
-* channel to 1.0 at the end.
+* channel to 1.0 at the end. Also trivially ARGB2101010 to XRGB2101010,
+* but not XRGB2101010 to ARGB2101010 yet.
  */
 if (!intel_miptree_blit_compatible_formats(src_format, dst_format)) {
perf_debug("%s: Can't use hardware blitter from %s to %s, "
@@ -789,6 +803,10 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
 DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
 __func__, mt->bo, pitch, x, y, width, height);
  
+   /* Note: Currently only handles 8 bit alpha channel. Extension to < 8 Bit

+* alpha channel would be likely possible via ROP code 0xfa instead of 0xf0
+* and writing a suitable bit-mask instead of 0x.
+*/
 BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
 CMD = XY_COLOR_BLT_CMD;
 CMD |= XY_BLT_WRITE_ALPHA;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 02/22] i965: Support xrgb/argb2101010 formats for glx_texture_from_pixmap.

2017-12-01 Thread Tapani Pälli

Reviewed-by: Tapani Pälli 

On 29.11.2017 06:20, Mario Kleiner wrote:

Makes compositing under X11/GLX work.

Signed-off-by: Mario Kleiner 
---
  src/mesa/drivers/dri/i965/intel_tex_image.c | 12 ++--
  1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 37c8e24..2ee3658 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -464,11 +464,19 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
 if (rb->mt->cpp == 4) {
if (texture_format == __DRI_TEXTURE_FORMAT_RGB) {
   internal_format = GL_RGB;
- texFormat = MESA_FORMAT_B8G8R8X8_UNORM;
+ if (rb->mt->format == MESA_FORMAT_B10G10R10X2_UNORM ||
+ rb->mt->format == MESA_FORMAT_B10G10R10A2_UNORM)
+texFormat = MESA_FORMAT_B10G10R10X2_UNORM;
+ else
+texFormat = MESA_FORMAT_B8G8R8X8_UNORM;
}
else {
   internal_format = GL_RGBA;
- texFormat = MESA_FORMAT_B8G8R8A8_UNORM;
+ if (rb->mt->format == MESA_FORMAT_B10G10R10X2_UNORM ||
+ rb->mt->format == MESA_FORMAT_B10G10R10A2_UNORM)
+texFormat = MESA_FORMAT_B10G10R10A2_UNORM;
+ else
+texFormat = MESA_FORMAT_B8G8R8A8_UNORM;
}
 } else if (rb->mt->cpp == 2) {
internal_format = GL_RGB;


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/22] i965/screen: Add basic support for rendering 10 bpc/depth 30 framebuffers. (v3)

2017-12-01 Thread Tapani Pälli
IMO this patch should be moved (when committing) as patch .. 6? So that 
we would have the dri enablers in place. Otherwise when if/bisecting 
these configs will fail to initialize.



On 29.11.2017 06:20, Mario Kleiner wrote:

Expose formats which are supported at least back to Gen 5 Ironlake,
possibly further. Allow creation of 10 bpc winsys buffers for drawables.

glxinfo now lists new RGBA 10 10 10 2/0 formats.
Works correctly under DRI2 without compositing.

v2: Move the BGRA/BGRX1010102 formats before the RGBA/RGBX
 32 bit formats, as the code comments require. Thanks Emil!
 Update num_formats from 3 to 5, to keep the special Android
 handling intact.

v3: Use num_formats = ARRAY_SIZE(formats) - 2 as suggested by Tapani,
 to only exclude the last 2 Android formats, add Tapani's r-b.

Signed-off-by: Mario Kleiner 
Reviewed-by: Tapani Pälli 
---
  src/mesa/drivers/dri/i965/intel_screen.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index b56a61b..39efc1c 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1646,7 +1646,13 @@ intelCreateBuffer(__DRIscreen *dri_screen,
fb->Visual.samples = num_samples;
 }
  
-   if (mesaVis->redBits == 5) {

+   if (mesaVis->redBits == 10 && mesaVis->alphaBits > 0) {
+  rgbFormat = mesaVis->redMask == 0x3ff0 ? 
MESA_FORMAT_B10G10R10A2_UNORM
+ : 
MESA_FORMAT_R10G10B10A2_UNORM;
+   } else if (mesaVis->redBits == 10) {
+  rgbFormat = mesaVis->redMask == 0x3ff0 ? 
MESA_FORMAT_B10G10R10X2_UNORM
+ : 
MESA_FORMAT_R10G10B10X2_UNORM;
+   } else if (mesaVis->redBits == 5) {
rgbFormat = mesaVis->redMask == 0x1f ? MESA_FORMAT_R5G6B5_UNORM
 : MESA_FORMAT_B5G6R5_UNORM;
 } else if (mesaVis->sRGBCapable) {
@@ -2035,6 +2041,10 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
  
MESA_FORMAT_B8G8R8A8_SRGB,
  
+  /* For 10 bpc, 30 bit depth framebuffers. */

+  MESA_FORMAT_B10G10R10A2_UNORM,
+  MESA_FORMAT_B10G10R10X2_UNORM,
+
/* The 32-bit RGBA format must not precede the 32-bit BGRA format.
 * Likewise for RGBX and BGRX.  Otherwise, the GLX client and the GLX
 * server may disagree on which format the GLXFBConfig represents,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glx/dri3: Remove unused deviceName variable

2017-12-01 Thread vadim . shovkoplias
From: Vadym Shovkoplias 

deviceName string is declared, assigned and freed but actually
never used in dri3_create_screen() function.

Fixes: 2d94601582e ("Add DRI3+Present loader")

Signed-off-by: Vadym Shovkoplias 

---
 src/glx/dri3_glx.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index a10306f..f280a8c 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -800,7 +800,7 @@ dri3_create_screen(int screen, struct glx_display * priv)
struct dri3_screen *psc;
__GLXDRIscreen *psp;
struct glx_config *configs = NULL, *visuals = NULL;
-   char *driverName, *deviceName, *tmp;
+   char *driverName, *tmp;
int i;
unsigned char disable;
 
@@ -830,7 +830,6 @@ dri3_create_screen(int screen, struct glx_display * priv)
}
 
psc->fd = loader_get_user_preferred_fd(psc->fd, >is_different_gpu);
-   deviceName = NULL;
 
driverName = loader_get_driver_for_fd(psc->fd);
if (!driverName) {
@@ -956,7 +955,6 @@ dri3_create_screen(int screen, struct glx_display * priv)
   __glXEnableDirectExtension(>base, "GLX_EXT_buffer_age");
 
free(driverName);
-   free(deviceName);
 
tmp = getenv("LIBGL_SHOW_FPS");
psc->show_fps_interval = tmp ? atoi(tmp) : 0;
@@ -983,7 +981,6 @@ handle_error:
   dlclose(psc->driver);
 
free(driverName);
-   free(deviceName);
glx_screen_cleanup(>base);
free(psc);
 
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 28/44] i965/fs: Use untyped_surface_read for 16-bit load_ssbo

2017-12-01 Thread Chema Casanova
On 01/12/17 11:49, Jason Ekstrand wrote:
> On Wed, Nov 29, 2017 at 6:57 PM, Jose Maria Casanova Crespo
> > wrote:
> 
> SSBO loads were using byte_scattered read messages as they allow
> reading 16-bit size components. byte_scattered messages can only
> operate one component at a time so we needed to emit as many messages
> as components.
> 
> But for vec2 and vec4 of 16-bit, being multiple of 32-bit we can use the
> untyped_surface_read message to read pairs of 16-bit components
> using only
> one message. Once each pair is read it is unshuffled to return the
> proper
> 16-bit components.
> 
> On 16-bit scalar and vec3 16-bit the not paired component is read using
> only one byte_scattered_read message.
> 
> 
> My gut tells me that, for vec3's, we'd be better off with a single
> untyped read than one untyped read and one byte scattered read.  Also,
> are there alignment issues with untyped surface reads/writes that might
> cause us problems on vec3's?  I don't know what the alignment rules are
> for 16-bit vec3's in Vulkan.

I think that untyped_read will work perfectly fine with vec3 as there
are not special rules for 16-bits. The only thing would be that we would
writing always the unused 4th component, so we decided to play save and
just modify what was expected and only scattered write allowed that with
that approach:

"* A three- or four-component vector, with components of size N, has a
base alignment of 4 N."

I was trying for this V4 of the series, to use untyped_surface_read for
all the cases, but I focused on scalar ones, without success. But for
vec3 it should be easy to do if we can assume to write random data at
the 4th component.

>  
> 
> v2: Removed use of stride = 2 on sources (Jason Ekstrand)
>     Rework optimization using unshuffle 16 reads (Chema Casanova)
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 43
> ++-
>  1 file changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index fa7aa9c247..57e79853ef 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -2354,16 +2354,39 @@ do_untyped_vector_read(const fs_builder ,
>           bld.ADD(read_offset, read_offset, brw_imm_ud(16));
>        }
>     } else if (type_sz(dest.type) == 2) {
> -      fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
> -      bld.MOV(read_offset, offset_reg);
> -      for (unsigned i = 0; i < num_components; i++) {
> -         fs_reg read_reg = emit_byte_scattered_read(bld,
> surf_index, read_offset,
> -                                                    1 /* dims */,
> -                                                    1,
> -                                                    16 /*bit_size */,
> -                                                   
> BRW_PREDICATE_NONE);
> -         bld.MOV(offset(dest,bld,i), subscript(read_reg, dest.type,
> 0));
> -         bld.ADD(read_offset, read_offset,
> brw_imm_ud(type_sz(dest.type)));
> +      assert(dest.stride == 1);
> +
> +      int component_pairs = num_components / 2;
> +      /* Pairs of 16-bit components can be read with untyped read */
> +      if (component_pairs > 0) {
> +         fs_reg read_result = emit_untyped_read(bld, surf_index,
> +                                                offset_reg,
> +                                                1 /* dims */,
> +                                                component_pairs,
> +                                                BRW_PREDICATE_NONE);
> +         shuffle_32bit_load_result_to_16bit_data(bld,
> +               retype(dest, BRW_REGISTER_TYPE_HF),
> +               retype(read_result, BRW_REGISTER_TYPE_F),
> 
> 
> I'd rather we use W and D rather than HF and F.  Rounding errors scare me.

Ok.

Thanks for the review.

Chema

> +               component_pairs * 2);
> +      }
> +      /* Last component of vec3 and scalar 16-bit read needs to be read
> +       * using one byte_scattered_read message
> +       */
> +      if (num_components % 2) {
> +         fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
> +         bld.ADD(read_offset,
> +                 offset_reg,
> +                 brw_imm_ud((num_components - 1) *
> type_sz(dest.type)));
> +         fs_reg read_result = emit_byte_scattered_read(bld, surf_index,
> +                                                       read_offset,
> +                                                       1 /* dims */,
> +                                                       1,
> +                                                       16 /*
> bit_size */,
> +     

[Mesa-dev] [PATCH] radv: fix a crash in radv_can_dump_shader()

2017-12-01 Thread Samuel Pitoiset
module can be NULL, oops.

Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/radv_shader.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 91f2e7f2a1..f404c49ad5 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -118,8 +118,8 @@ radv_can_dump_shader(struct radv_device *device,
 struct radv_shader_module *module)
 {
/* Only dump non-meta shaders, useful for debugging purposes. */
-   return !module->nir &&
-  device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS;
+   return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS &&
+  !module->nir;
 }
 
 #endif
-- 
2.15.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v4 19/44] i965/fs: Use byte_scattered_write on 16-bit store_ssbo

2017-12-01 Thread Jason Ekstrand
On Fri, Dec 1, 2017 at 2:46 AM, Chema Casanova 
wrote:

> On 01/12/17 11:12, Jason Ekstrand wrote:
> > I've left some comments below that I think clean things up and make this
> > better, but I believe it is correct as-is.
> >
> > Reviewed-by: Jason Ekstrand  > >
> >
> > On Wed, Nov 29, 2017 at 6:42 PM, Jose Maria Casanova Crespo
> > > wrote:
> >
> > From: Alejandro Piñeiro  > >
> >
> > We need to rely on byte scattered writes as untyped writes are 32-bit
> > size. We could try to keep using 32-bit messages when we have two or
> > four 16-bit elements, but for simplicity sake, we use the same
> message
> > for any component number. We revisit this aproach in the follwing
> > patches.
> >
> > v2: Removed use of stride = 2 on 16-bit sources (Jason Ekstrand)
> >
> > v3: (Jason Ekstrand)
> > - Include bit_size to scattered write message and remove
> namespace
> > - specific for scattered messages.
> > - Move comment to proper place.
> > - Squashed with i965/fs: Adjust type_size/type_slots on
> store_ssbo.
> > (Jose Maria Casanova)
> > - Take into account that get_nir_src returns now WORD types for
> >   16-bit sources instead of DWORD.
> >
> > Signed-off-by: Jose Maria Casanova Crespo  > >
> > Signed-off-by: Alejandro Piñeiro  > >
> > ---
> >  src/intel/compiler/brw_fs_nir.cpp | 51
> > ---
> >  1 file changed, 37 insertions(+), 14 deletions(-)
> >
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> > b/src/intel/compiler/brw_fs_nir.cpp
> > index d6ab286147..ff04e2468b 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -4075,14 +4075,15 @@ fs_visitor::nir_emit_intrinsic(const
> > fs_builder , nir_intrinsic_instr *instr
> > * Also, we have to suffle 64-bit data to be in the
> > appropriate layout
> > * expected by our 32-bit write messages.
> > */
> > -  unsigned type_size = 4;
> > -  if (nir_src_bit_size(instr->src[0]) == 64) {
> > - type_size = 8;
> > +  unsigned bit_size = nir_src_bit_size(instr->src[0]);
> > +  unsigned type_size = bit_size / 8;
> > +  if (bit_size == 64) {
> >   val_reg = shuffle_64bit_data_for_32bit_write(bld,
> >  val_reg, instr->num_components);
> >}
> >
> > -  unsigned type_slots = type_size / 4;
> > +  /* 16-bit types would use a minimum of 1 slot */
> > +  unsigned type_slots = MAX2(type_size / 4, 1);
> >
> >
> > Given that this is only used for emit_typed_write, maybe we should just
> > move it next to the emit_typed_write call and just get rid of the
> > MAX2().  More on that later.
>
> It makes sanes, i follow partially this approach at "[PATCH v4 26/44]
> i965/fs: Optimize 16-bit SSBO stores by packing two into a 32-bit reg"
> using an slots_per_component that is just 2 for 64-bits and 1 for the
> other bitsizes. But i like your approach.
>
> >/* Combine groups of consecutive enabled channels in one write
> > * message. We use ffs to find the first enabled channel and
> > then ffs on
> > @@ -4093,12 +4094,19 @@ fs_visitor::nir_emit_intrinsic(const
> > fs_builder , nir_intrinsic_instr *instr
> >   unsigned first_component = ffs(writemask) - 1;
> >   unsigned length = ffs(~(writemask >> first_component)) - 1;
> >
> >
> > If the one above is first_component, num_components would be a better
> > name for this one.  It's very confusing go have something generically
> > named "length" in a piece of code with so many different possible units.
>
> It was also confussing to me. What about a rename to
> num_consecutive_components as that what is really calculating? so we
> don't confuse it with the num_components of instr.
>

Hrm... That would work I suppose.  Not a huge deal in any case.


> > - /* We can't write more than 2 64-bit components at once.
> > Limit the
> > -  * length of the write to what we can do and let the next
> > iteration
> > -  * handle the rest
> > -  */
> > - if (type_size > 4)
> > + if (type_size > 4) {
> > +/* We can't write more than 2 64-bit components at
> > once. Limit
> > + * the length of the write to what we can do and let
> > the next
> > + * iteration handle the rest.
> > + */
> >  length = MIN2(2, length);
> > + } else if (type_size == 2) {
> >
> >
> 

Re: [Mesa-dev] [PATCH 2/2] r600/atomic: add cayman version of atomic save/restore from GDS

2017-12-01 Thread Nicolai Hähnle

On 01.12.2017 06:06, Dave Airlie wrote:

From: Dave Airlie 

On Cayman we don't use the append/consume counters (fglrx doesn't)
and they don't seem to work well with compute shaders.

This just uses GDS instead to do the atomic operations.


Interesting. This is kind of what I'd have expected to be used from the 
beginning at least for GCN.


Don't you still need to use an EOS event for proper synchronization? I 
mean, I guess you looked at fglrx traces, but still... CP_DMA definitely 
isn't waiting for shaders on newer hardware, and I don't know why it 
would do that on older hardware.


FWIW, I don't have the packet specification for pre-GCN hardware here, 
but on GCN it should be:


radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, 0) | pkt_flags);
radeon_emit(cs, EVENT_TYPE(event) | EVENT_INDEX(6));
radeon_emit(cs, (dst_offset) & 0x);
radeon_emit(cs, (1 << 29) | ((dst_offset >> 32) & 0x));
radeon_emit(cs, (gds_index & 0x) | (num_dwords << 16));

to copy GDS data to memory at EOS.

Cheers,
Nicolai



Signed-off-by: Dave Airlie 
---
  src/gallium/drivers/r600/evergreen_state.c | 60 +++-
  src/gallium/drivers/r600/r600_shader.c | 91 +++---
  2 files changed, 129 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 850165b30b..c44ed27b2c 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2659,6 +2659,7 @@ static void cayman_init_atom_start_cs(struct r600_context 
*rctx)
r600_store_value(cb, 0x76543210); /* 
CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 */
r600_store_value(cb, 0xfedcba98); /* 
CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 */
  
+	r600_store_context_reg(cb, R_028724_GDS_ADDR_SIZE, 0x3fff);

r600_store_context_reg_seq(cb, R_0288E8_SQ_LDS_ALLOC, 2);
r600_store_value(cb, 0); /* R_0288E8_SQ_LDS_ALLOC */
r600_store_value(cb, 0); /* R_0288EC_SQ_LDS_ALLOC_PS */
@@ -4502,6 +4503,51 @@ static void evergreen_emit_event_write_eos(struct 
r600_context *rctx,
radeon_emit(cs, reloc);
  }
  
+/* writes count from a buffer into GDS */

+static void cayman_write_count_to_gds(struct r600_context *rctx,
+ struct r600_shader_atomic *atomic,
+ struct r600_resource *resource,
+ uint32_t pkt_flags)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   unsigned reloc = radeon_add_to_buffer_list(>b, >b.gfx,
+  resource,
+  RADEON_USAGE_READ,
+  
RADEON_PRIO_SHADER_RW_BUFFER);
+   uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
+
+   radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags);
+   radeon_emit(cs, dst_offset & 0x);
+   radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_DST_SEL(1) | ((dst_offset 
>> 32) & 0xff));// GDS
+   radeon_emit(cs, atomic->hw_idx * 4);
+   radeon_emit(cs, 0);
+   radeon_emit(cs, PKT3_CP_DMA_CMD_DAS | 4);
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, reloc);
+}
+
+static void cayman_read_count_from_gds(struct r600_context *rctx,
+   struct r600_shader_atomic *atomic,
+   struct r600_resource *resource,
+   uint32_t pkt_flags)
+{
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
+   unsigned reloc = radeon_add_to_buffer_list(>b, >b.gfx,
+  resource,
+  RADEON_USAGE_WRITE,
+  
RADEON_PRIO_SHADER_RW_BUFFER);
+   uint64_t dst_offset = resource->gpu_address + (atomic->start * 4);
+
+   radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0) | pkt_flags);
+   radeon_emit(cs, atomic->hw_idx * 4);
+   radeon_emit(cs, PKT3_CP_DMA_CP_SYNC | PKT3_CP_DMA_SRC_SEL(1));// GDS
+   radeon_emit(cs, dst_offset & 0x);
+   radeon_emit(cs, (dst_offset >> 32) & 0xff);
+   radeon_emit(cs, PKT3_CP_DMA_CMD_SAS | 4);
+   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+   radeon_emit(cs, reloc);
+}
+
  bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
struct r600_shader_atomic 
*combined_atomics,
uint8_t *atomic_used_mask_p)
@@ -4549,7 +4595,10 @@ bool evergreen_emit_atomic_buffer_setup(struct 
r600_context *rctx,
struct r600_resource *resource = 
r600_resource(astate->buffer[atomic->buffer_id].buffer);
assert(resource);
  
-		evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);


Re: [Mesa-dev] [PATCH v4 28/44] i965/fs: Use untyped_surface_read for 16-bit load_ssbo

2017-12-01 Thread Jason Ekstrand
On Wed, Nov 29, 2017 at 6:57 PM, Jose Maria Casanova Crespo <
jmcasan...@igalia.com> wrote:

> SSBO loads were using byte_scattered read messages as they allow
> reading 16-bit size components. byte_scattered messages can only
> operate one component at a time so we needed to emit as many messages
> as components.
>
> But for vec2 and vec4 of 16-bit, being multiple of 32-bit we can use the
> untyped_surface_read message to read pairs of 16-bit components using only
> one message. Once each pair is read it is unshuffled to return the proper
> 16-bit components.
>
> On 16-bit scalar and vec3 16-bit the not paired component is read using
> only one byte_scattered_read message.
>

My gut tells me that, for vec3's, we'd be better off with a single untyped
read than one untyped read and one byte scattered read.  Also, are there
alignment issues with untyped surface reads/writes that might cause us
problems on vec3's?  I don't know what the alignment rules are for 16-bit
vec3's in Vulkan.


> v2: Removed use of stride = 2 on sources (Jason Ekstrand)
> Rework optimization using unshuffle 16 reads (Chema Casanova)
> ---
>  src/intel/compiler/brw_fs_nir.cpp | 43 ++
> -
>  1 file changed, 33 insertions(+), 10 deletions(-)
>
> diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> index fa7aa9c247..57e79853ef 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -2354,16 +2354,39 @@ do_untyped_vector_read(const fs_builder ,
>   bld.ADD(read_offset, read_offset, brw_imm_ud(16));
>}
> } else if (type_sz(dest.type) == 2) {
> -  fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
> -  bld.MOV(read_offset, offset_reg);
> -  for (unsigned i = 0; i < num_components; i++) {
> - fs_reg read_reg = emit_byte_scattered_read(bld, surf_index,
> read_offset,
> -1 /* dims */,
> -1,
> -16 /*bit_size */,
> -BRW_PREDICATE_NONE);
> - bld.MOV(offset(dest,bld,i), subscript(read_reg, dest.type, 0));
> - bld.ADD(read_offset, read_offset, brw_imm_ud(type_sz(dest.type))
> );
> +  assert(dest.stride == 1);
> +
> +  int component_pairs = num_components / 2;
> +  /* Pairs of 16-bit components can be read with untyped read */
> +  if (component_pairs > 0) {
> + fs_reg read_result = emit_untyped_read(bld, surf_index,
> +offset_reg,
> +1 /* dims */,
> +component_pairs,
> +BRW_PREDICATE_NONE);
> + shuffle_32bit_load_result_to_16bit_data(bld,
> +   retype(dest, BRW_REGISTER_TYPE_HF),
> +   retype(read_result, BRW_REGISTER_TYPE_F),
>

I'd rather we use W and D rather than HF and F.  Rounding errors scare me.


> +   component_pairs * 2);
> +  }
> +  /* Last component of vec3 and scalar 16-bit read needs to be read
> +   * using one byte_scattered_read message
> +   */
> +  if (num_components % 2) {
> + fs_reg read_offset = bld.vgrf(BRW_REGISTER_TYPE_UD);
> + bld.ADD(read_offset,
> + offset_reg,
> + brw_imm_ud((num_components - 1) * type_sz(dest.type)));
> + fs_reg read_result = emit_byte_scattered_read(bld, surf_index,
> +   read_offset,
> +   1 /* dims */,
> +   1,
> +   16 /* bit_size */,
> +
>  BRW_PREDICATE_NONE);
> + read_result.type = dest.type;
> + read_result.stride = 2;
> +
> + bld.MOV(offset(dest, bld, num_components - 1), read_result);
>}
> } else {
>unreachable("Unsupported type");
> --
> 2.14.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >