Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-05 Thread Nicolai Hähnle

On 04.11.2015 00:47, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_blit.c | 55 ++
  1 file changed, 55 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index fce014a..e934146 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx,
}
  }

+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   const uint32_t *u32 = clear_value;
+   unsigned i;
+   bool clear_value_fits_dword = true;
+   uint8_t *map;
+
+   if (clear_value_size > 4)
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_value_fits_dword = false;
+   break;
+   }
+
+   /* Use CP DMA for the simple case. */
+   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) {
+   uint32_t value = u32[0];
+
+   switch (clear_value_size) {
+   case 1:
+   value &= 0xff;
+   value |= (value << 8) | (value << 16) | (value << 24);
+   break;
+   case 2:
+   value &= 0x;
+   value |= value << 16;
+   break;
+   }


To reduce the chance of complaints by valgrind et al:

switch (clear_value_size) {
case 1:
value = *(uint8_t *)u32;
value |= (value << 8) | (value << 16) | (value << 24);
break;
case 2:
value = *(uint16_t *)u32;
value |= value << 16;
break;
default:
value = *u32;
break;
}

Cheers,
Nicolai


+
+   sctx->b.clear_buffer(ctx, dst, offset, size, value, false);
+   return;
+   }
+
+   /* TODO: use a compute shader for other cases. */
+
+   /* Software fallback. */
+   map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst),
+ PIPE_TRANSFER_WRITE);
+   if (!map)
+   return;
+
+   map += offset;
+   size /= clear_value_size;
+   for (i = 0; i < size; i++) {
+   memcpy(map, clear_value, clear_value_size);
+   map += clear_value_size;
+   }
+}
+
  void si_init_blit_functions(struct si_context *sctx)
  {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-05 Thread Marek Olšák
On Thu, Nov 5, 2015 at 7:58 PM, Nicolai Hähnle  wrote:
> On 04.11.2015 00:47, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>   src/gallium/drivers/radeonsi/si_blit.c | 55
>> ++
>>   1 file changed, 55 insertions(+)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>> b/src/gallium/drivers/radeonsi/si_blit.c
>> index fce014a..e934146 100644
>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context
>> *ctx,
>> }
>>   }
>>
>> +static void si_pipe_clear_buffer(struct pipe_context *ctx,
>> +struct pipe_resource *dst,
>> +unsigned offset, unsigned size,
>> +const void *clear_value,
>> +int clear_value_size)
>> +{
>> +   struct si_context *sctx = (struct si_context*)ctx;
>> +   const uint32_t *u32 = clear_value;
>> +   unsigned i;
>> +   bool clear_value_fits_dword = true;
>> +   uint8_t *map;
>> +
>> +   if (clear_value_size > 4)
>> +   for (i = 1; i < clear_value_size / 4; i++)
>> +   if (u32[0] != u32[i]) {
>> +   clear_value_fits_dword = false;
>> +   break;
>> +   }
>> +
>> +   /* Use CP DMA for the simple case. */
>> +   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) {
>> +   uint32_t value = u32[0];
>> +
>> +   switch (clear_value_size) {
>> +   case 1:
>> +   value &= 0xff;
>> +   value |= (value << 8) | (value << 16) | (value <<
>> 24);
>> +   break;
>> +   case 2:
>> +   value &= 0x;
>> +   value |= value << 16;
>> +   break;
>> +   }
>
>
> To reduce the chance of complaints by valgrind et al:
>
> switch (clear_value_size) {
> case 1:
> value = *(uint8_t *)u32;
> value |= (value << 8) | (value << 16) | (value << 24);
> break;
> case 2:
> value = *(uint16_t *)u32;
> value |= value << 16;
> break;
> default:
> value = *u32;
> break;
> }

Thanks.

The preliminary plan is to use transform feedback for fills>=64 bits
(already implemented by u_blitter), and CP DMA should be used for
32-bit fills and any fills that can be lowered to 32-bit.

Unaligned 8-bit and 16-bit fills should fill the largest aligned
subrange using CP DMA. Then, the unaligned beginning and ending dwords
will be filled separately using:
COPY_DATA from mem to reg
REG_RMW to fill the requested bytes
COPY_DATA from reg to mem

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-04 Thread Ilia Mirkin
On Wed, Nov 4, 2015 at 4:08 AM, Marek Olšák  wrote:
> On Nov 4, 2015 4:02 AM, "Alex Deucher"  wrote:
>>
>> On Tue, Nov 3, 2015 at 6:47 PM, Marek Olšák  wrote:
>> > From: Marek Olšák 
>> >
>> > ---
>> >  src/gallium/drivers/radeonsi/si_blit.c | 55
>> > ++
>> >  1 file changed, 55 insertions(+)
>> >
>> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>> > b/src/gallium/drivers/radeonsi/si_blit.c
>> > index fce014a..e934146 100644
>> > --- a/src/gallium/drivers/radeonsi/si_blit.c
>> > +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> > @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context
>> > *ctx,
>> > }
>> >  }
>> >
>> > +static void si_pipe_clear_buffer(struct pipe_context *ctx,
>> > +struct pipe_resource *dst,
>> > +unsigned offset, unsigned size,
>> > +const void *clear_value,
>> > +int clear_value_size)
>> > +{
>> > +   struct si_context *sctx = (struct si_context*)ctx;
>> > +   const uint32_t *u32 = clear_value;
>> > +   unsigned i;
>> > +   bool clear_value_fits_dword = true;
>> > +   uint8_t *map;
>> > +
>> > +   if (clear_value_size > 4)
>> > +   for (i = 1; i < clear_value_size / 4; i++)
>> > +   if (u32[0] != u32[i]) {
>> > +   clear_value_fits_dword = false;
>> > +   break;
>> > +   }
>> > +
>> > +   /* Use CP DMA for the simple case. */
>> > +   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword)
>> > {
>> > +   uint32_t value = u32[0];
>> > +
>> > +   switch (clear_value_size) {
>> > +   case 1:
>> > +   value &= 0xff;
>> > +   value |= (value << 8) | (value << 16) | (value
>> > << 24);
>> > +   break;
>> > +   case 2:
>> > +   value &= 0x;
>> > +   value |= value << 16;
>> > +   break;
>> > +   }
>> > +
>> > +   sctx->b.clear_buffer(ctx, dst, offset, size, value,
>> > false);
>> > +   return;
>> > +   }
>> > +
>> > +   /* TODO: use a compute shader for other cases. */
>>
>> What about using SDMA?  It supports byte aligned constant fills at
>> least on CIK+.
>
> I think CP DMA supports byte aligned constant fills as well, I just need to
> test it. The bigger problem is 64 and 128 bit fills. Those can only be done
> with a shader AFAIK.

Just to briefly interject, don't forget abotu 96-bit fills for
tbo_rgb32 buffers. I assume this won't really matter if you're using a
shader.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-04 Thread Marek Olšák
On Nov 4, 2015 4:02 AM, "Alex Deucher"  wrote:
>
> On Tue, Nov 3, 2015 at 6:47 PM, Marek Olšák  wrote:
> > From: Marek Olšák 
> >
> > ---
> >  src/gallium/drivers/radeonsi/si_blit.c | 55
++
> >  1 file changed, 55 insertions(+)
> >
> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c
b/src/gallium/drivers/radeonsi/si_blit.c
> > index fce014a..e934146 100644
> > --- a/src/gallium/drivers/radeonsi/si_blit.c
> > +++ b/src/gallium/drivers/radeonsi/si_blit.c
> > @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context
*ctx,
> > }
> >  }
> >
> > +static void si_pipe_clear_buffer(struct pipe_context *ctx,
> > +struct pipe_resource *dst,
> > +unsigned offset, unsigned size,
> > +const void *clear_value,
> > +int clear_value_size)
> > +{
> > +   struct si_context *sctx = (struct si_context*)ctx;
> > +   const uint32_t *u32 = clear_value;
> > +   unsigned i;
> > +   bool clear_value_fits_dword = true;
> > +   uint8_t *map;
> > +
> > +   if (clear_value_size > 4)
> > +   for (i = 1; i < clear_value_size / 4; i++)
> > +   if (u32[0] != u32[i]) {
> > +   clear_value_fits_dword = false;
> > +   break;
> > +   }
> > +
> > +   /* Use CP DMA for the simple case. */
> > +   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword)
{
> > +   uint32_t value = u32[0];
> > +
> > +   switch (clear_value_size) {
> > +   case 1:
> > +   value &= 0xff;
> > +   value |= (value << 8) | (value << 16) | (value
<< 24);
> > +   break;
> > +   case 2:
> > +   value &= 0x;
> > +   value |= value << 16;
> > +   break;
> > +   }
> > +
> > +   sctx->b.clear_buffer(ctx, dst, offset, size, value,
false);
> > +   return;
> > +   }
> > +
> > +   /* TODO: use a compute shader for other cases. */
>
> What about using SDMA?  It supports byte aligned constant fills at
> least on CIK+.

I think CP DMA supports byte aligned constant fills as well, I just need to
test it. The bigger problem is 64 and 128 bit fills. Those can only be done
with a shader AFAIK.

>
>
> > +
> > +   /* Software fallback. */
> > +   map = r600_buffer_map_sync_with_rings(>b,
r600_resource(dst),
> > + PIPE_TRANSFER_WRITE);
> > +   if (!map)
> > +   return;
> > +
> > +   map += offset;
> > +   size /= clear_value_size;
> > +   for (i = 0; i < size; i++) {
> > +   memcpy(map, clear_value, clear_value_size);
> > +   map += clear_value_size;
> > +   }
> > +}
> > +
> >  void si_init_blit_functions(struct si_context *sctx)
> >  {
> > sctx->b.b.clear = si_clear;
> > +   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
> > sctx->b.b.clear_render_target = si_clear_render_target;
> > sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
> > sctx->b.b.resource_copy_region = si_resource_copy_region;
> > --
> > 2.1.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-03 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_blit.c | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index fce014a..e934146 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx,
}
 }
 
+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   const uint32_t *u32 = clear_value;
+   unsigned i;
+   bool clear_value_fits_dword = true;
+   uint8_t *map;
+
+   if (clear_value_size > 4)
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_value_fits_dword = false;
+   break;
+   }
+
+   /* Use CP DMA for the simple case. */
+   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) {
+   uint32_t value = u32[0];
+
+   switch (clear_value_size) {
+   case 1:
+   value &= 0xff;
+   value |= (value << 8) | (value << 16) | (value << 24);
+   break;
+   case 2:
+   value &= 0x;
+   value |= value << 16;
+   break;
+   }
+
+   sctx->b.clear_buffer(ctx, dst, offset, size, value, false);
+   return;
+   }
+
+   /* TODO: use a compute shader for other cases. */
+
+   /* Software fallback. */
+   map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst),
+ PIPE_TRANSFER_WRITE);
+   if (!map)
+   return;
+
+   map += offset;
+   size /= clear_value_size;
+   for (i = 0; i < size; i++) {
+   memcpy(map, clear_value, clear_value_size);
+   map += clear_value_size;
+   }
+}
+
 void si_init_blit_functions(struct si_context *sctx)
 {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-03 Thread Alex Deucher
On Tue, Nov 3, 2015 at 6:47 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> ---
>  src/gallium/drivers/radeonsi/si_blit.c | 55 
> ++
>  1 file changed, 55 insertions(+)
>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
> b/src/gallium/drivers/radeonsi/si_blit.c
> index fce014a..e934146 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx,
> }
>  }
>
> +static void si_pipe_clear_buffer(struct pipe_context *ctx,
> +struct pipe_resource *dst,
> +unsigned offset, unsigned size,
> +const void *clear_value,
> +int clear_value_size)
> +{
> +   struct si_context *sctx = (struct si_context*)ctx;
> +   const uint32_t *u32 = clear_value;
> +   unsigned i;
> +   bool clear_value_fits_dword = true;
> +   uint8_t *map;
> +
> +   if (clear_value_size > 4)
> +   for (i = 1; i < clear_value_size / 4; i++)
> +   if (u32[0] != u32[i]) {
> +   clear_value_fits_dword = false;
> +   break;
> +   }
> +
> +   /* Use CP DMA for the simple case. */
> +   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) {
> +   uint32_t value = u32[0];
> +
> +   switch (clear_value_size) {
> +   case 1:
> +   value &= 0xff;
> +   value |= (value << 8) | (value << 16) | (value << 24);
> +   break;
> +   case 2:
> +   value &= 0x;
> +   value |= value << 16;
> +   break;
> +   }
> +
> +   sctx->b.clear_buffer(ctx, dst, offset, size, value, false);
> +   return;
> +   }
> +
> +   /* TODO: use a compute shader for other cases. */

What about using SDMA?  It supports byte aligned constant fills at
least on CIK+.

Alex


> +
> +   /* Software fallback. */
> +   map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst),
> + PIPE_TRANSFER_WRITE);
> +   if (!map)
> +   return;
> +
> +   map += offset;
> +   size /= clear_value_size;
> +   for (i = 0; i < size; i++) {
> +   memcpy(map, clear_value, clear_value_size);
> +   map += clear_value_size;
> +   }
> +}
> +
>  void si_init_blit_functions(struct si_context *sctx)
>  {
> sctx->b.b.clear = si_clear;
> +   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
> sctx->b.b.clear_render_target = si_clear_render_target;
> sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
> sctx->b.b.resource_copy_region = si_resource_copy_region;
> --
> 2.1.4
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev