Re: [Mesa-dev] [PATCH 6/6] radeonsi: add glClearBufferSubData acceleration

2015-11-09 Thread Nicolai Hähnle

On 08.11.2015 22:44, Marek Olšák wrote:

From: Marek Olšák 

Unaligned 8-bit and 16-bit clears are done in software.


I found this confusing at first. I think a better phrasing is something 
along the lines of:


8-bit and 16-bit clears which are not aligned to dwords are done in 
software.


With this, the whole series is

Reviewed-by: Nicolai Hähnle 


---
  src/gallium/drivers/radeonsi/si_blit.c | 60 ++
  1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index d320ac4..31f22c4 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -737,9 +737,69 @@ static void si_flush_resource(struct pipe_context *ctx,
}
  }

+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value_ptr,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   uint32_t dword_value;
+   unsigned i;
+
+   assert(offset % clear_value_size == 0);
+   assert(size % clear_value_size == 0);
+
+   if (clear_value_size > 4) {
+   const uint32_t *u32 = clear_value_ptr;
+   bool clear_dword_duplicated = true;
+
+   /* See if we can lower large fills to dword fills. */
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_dword_duplicated = false;
+   break;
+   }
+
+   if (!clear_dword_duplicated) {
+   /* Use transform feedback for 64-bit, 96-bit, and
+* 128-bit fills.
+*/
+   union pipe_color_union clear_value;
+
+   memcpy(_value, clear_value_ptr, clear_value_size);
+   si_blitter_begin(ctx, SI_DISABLE_RENDER_COND);
+   util_blitter_clear_buffer(sctx->blitter, dst, offset,
+ size, clear_value_size / 4,
+ _value);
+   si_blitter_end(ctx);
+   return;
+   }
+   }
+
+   /* Expand the clear value to a dword. */
+   switch (clear_value_size) {
+   case 1:
+   dword_value = *(uint8_t*)clear_value_ptr;
+   dword_value |= (dword_value << 8) |
+  (dword_value << 16) |
+  (dword_value << 24);
+   break;
+   case 2:
+   dword_value = *(uint16_t*)clear_value_ptr;
+   dword_value |= dword_value << 16;
+   break;
+   default:
+   dword_value = *(uint32_t*)clear_value_ptr;
+   }
+
+   sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
+}
+
  void si_init_blit_functions(struct si_context *sctx)
  {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/6] radeonsi: add glClearBufferSubData acceleration

2015-11-08 Thread Marek Olšák
From: Marek Olšák 

Unaligned 8-bit and 16-bit clears are done in software.
---
 src/gallium/drivers/radeonsi/si_blit.c | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index d320ac4..31f22c4 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -737,9 +737,69 @@ static void si_flush_resource(struct pipe_context *ctx,
}
 }
 
+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value_ptr,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   uint32_t dword_value;
+   unsigned i;
+
+   assert(offset % clear_value_size == 0);
+   assert(size % clear_value_size == 0);
+
+   if (clear_value_size > 4) {
+   const uint32_t *u32 = clear_value_ptr;
+   bool clear_dword_duplicated = true;
+
+   /* See if we can lower large fills to dword fills. */
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_dword_duplicated = false;
+   break;
+   }
+
+   if (!clear_dword_duplicated) {
+   /* Use transform feedback for 64-bit, 96-bit, and
+* 128-bit fills.
+*/
+   union pipe_color_union clear_value;
+
+   memcpy(_value, clear_value_ptr, clear_value_size);
+   si_blitter_begin(ctx, SI_DISABLE_RENDER_COND);
+   util_blitter_clear_buffer(sctx->blitter, dst, offset,
+ size, clear_value_size / 4,
+ _value);
+   si_blitter_end(ctx);
+   return;
+   }
+   }
+
+   /* Expand the clear value to a dword. */
+   switch (clear_value_size) {
+   case 1:
+   dword_value = *(uint8_t*)clear_value_ptr;
+   dword_value |= (dword_value << 8) |
+  (dword_value << 16) |
+  (dword_value << 24);
+   break;
+   case 2:
+   dword_value = *(uint16_t*)clear_value_ptr;
+   dword_value |= dword_value << 16;
+   break;
+   default:
+   dword_value = *(uint32_t*)clear_value_ptr;
+   }
+
+   sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
+}
+
 void si_init_blit_functions(struct si_context *sctx)
 {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev