Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration
On 04.11.2015 00:47, Marek Olšák wrote: From: Marek Olšák--- src/gallium/drivers/radeonsi/si_blit.c | 55 ++ 1 file changed, 55 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index fce014a..e934146 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx, } } +static void si_pipe_clear_buffer(struct pipe_context *ctx, +struct pipe_resource *dst, +unsigned offset, unsigned size, +const void *clear_value, +int clear_value_size) +{ + struct si_context *sctx = (struct si_context*)ctx; + const uint32_t *u32 = clear_value; + unsigned i; + bool clear_value_fits_dword = true; + uint8_t *map; + + if (clear_value_size > 4) + for (i = 1; i < clear_value_size / 4; i++) + if (u32[0] != u32[i]) { + clear_value_fits_dword = false; + break; + } + + /* Use CP DMA for the simple case. */ + if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) { + uint32_t value = u32[0]; + + switch (clear_value_size) { + case 1: + value &= 0xff; + value |= (value << 8) | (value << 16) | (value << 24); + break; + case 2: + value &= 0x; + value |= value << 16; + break; + } To reduce the chance of complaints by valgrind et al: switch (clear_value_size) { case 1: value = *(uint8_t *)u32; value |= (value << 8) | (value << 16) | (value << 24); break; case 2: value = *(uint16_t *)u32; value |= value << 16; break; default: value = *u32; break; } Cheers, Nicolai + + sctx->b.clear_buffer(ctx, dst, offset, size, value, false); + return; + } + + /* TODO: use a compute shader for other cases. */ + + /* Software fallback. */ + map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst), + PIPE_TRANSFER_WRITE); + if (!map) + return; + + map += offset; + size /= clear_value_size; + for (i = 0; i < size; i++) { + memcpy(map, clear_value, clear_value_size); + map += clear_value_size; + } +} + void si_init_blit_functions(struct si_context *sctx) { sctx->b.b.clear = si_clear; + sctx->b.b.clear_buffer = si_pipe_clear_buffer; sctx->b.b.clear_render_target = si_clear_render_target; sctx->b.b.clear_depth_stencil = si_clear_depth_stencil; sctx->b.b.resource_copy_region = si_resource_copy_region; ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration
On Thu, Nov 5, 2015 at 7:58 PM, Nicolai Hähnlewrote: > On 04.11.2015 00:47, Marek Olšák wrote: >> >> From: Marek Olšák >> >> --- >> src/gallium/drivers/radeonsi/si_blit.c | 55 >> ++ >> 1 file changed, 55 insertions(+) >> >> diff --git a/src/gallium/drivers/radeonsi/si_blit.c >> b/src/gallium/drivers/radeonsi/si_blit.c >> index fce014a..e934146 100644 >> --- a/src/gallium/drivers/radeonsi/si_blit.c >> +++ b/src/gallium/drivers/radeonsi/si_blit.c >> @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context >> *ctx, >> } >> } >> >> +static void si_pipe_clear_buffer(struct pipe_context *ctx, >> +struct pipe_resource *dst, >> +unsigned offset, unsigned size, >> +const void *clear_value, >> +int clear_value_size) >> +{ >> + struct si_context *sctx = (struct si_context*)ctx; >> + const uint32_t *u32 = clear_value; >> + unsigned i; >> + bool clear_value_fits_dword = true; >> + uint8_t *map; >> + >> + if (clear_value_size > 4) >> + for (i = 1; i < clear_value_size / 4; i++) >> + if (u32[0] != u32[i]) { >> + clear_value_fits_dword = false; >> + break; >> + } >> + >> + /* Use CP DMA for the simple case. */ >> + if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) { >> + uint32_t value = u32[0]; >> + >> + switch (clear_value_size) { >> + case 1: >> + value &= 0xff; >> + value |= (value << 8) | (value << 16) | (value << >> 24); >> + break; >> + case 2: >> + value &= 0x; >> + value |= value << 16; >> + break; >> + } > > > To reduce the chance of complaints by valgrind et al: > > switch (clear_value_size) { > case 1: > value = *(uint8_t *)u32; > value |= (value << 8) | (value << 16) | (value << 24); > break; > case 2: > value = *(uint16_t *)u32; > value |= value << 16; > break; > default: > value = *u32; > break; > } Thanks. The preliminary plan is to use transform feedback for fills>=64 bits (already implemented by u_blitter), and CP DMA should be used for 32-bit fills and any fills that can be lowered to 32-bit. Unaligned 8-bit and 16-bit fills should fill the largest aligned subrange using CP DMA. Then, the unaligned beginning and ending dwords will be filled separately using: COPY_DATA from mem to reg REG_RMW to fill the requested bytes COPY_DATA from reg to mem Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration
On Wed, Nov 4, 2015 at 4:08 AM, Marek Olšákwrote: > On Nov 4, 2015 4:02 AM, "Alex Deucher" wrote: >> >> On Tue, Nov 3, 2015 at 6:47 PM, Marek Olšák wrote: >> > From: Marek Olšák >> > >> > --- >> > src/gallium/drivers/radeonsi/si_blit.c | 55 >> > ++ >> > 1 file changed, 55 insertions(+) >> > >> > diff --git a/src/gallium/drivers/radeonsi/si_blit.c >> > b/src/gallium/drivers/radeonsi/si_blit.c >> > index fce014a..e934146 100644 >> > --- a/src/gallium/drivers/radeonsi/si_blit.c >> > +++ b/src/gallium/drivers/radeonsi/si_blit.c >> > @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context >> > *ctx, >> > } >> > } >> > >> > +static void si_pipe_clear_buffer(struct pipe_context *ctx, >> > +struct pipe_resource *dst, >> > +unsigned offset, unsigned size, >> > +const void *clear_value, >> > +int clear_value_size) >> > +{ >> > + struct si_context *sctx = (struct si_context*)ctx; >> > + const uint32_t *u32 = clear_value; >> > + unsigned i; >> > + bool clear_value_fits_dword = true; >> > + uint8_t *map; >> > + >> > + if (clear_value_size > 4) >> > + for (i = 1; i < clear_value_size / 4; i++) >> > + if (u32[0] != u32[i]) { >> > + clear_value_fits_dword = false; >> > + break; >> > + } >> > + >> > + /* Use CP DMA for the simple case. */ >> > + if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) >> > { >> > + uint32_t value = u32[0]; >> > + >> > + switch (clear_value_size) { >> > + case 1: >> > + value &= 0xff; >> > + value |= (value << 8) | (value << 16) | (value >> > << 24); >> > + break; >> > + case 2: >> > + value &= 0x; >> > + value |= value << 16; >> > + break; >> > + } >> > + >> > + sctx->b.clear_buffer(ctx, dst, offset, size, value, >> > false); >> > + return; >> > + } >> > + >> > + /* TODO: use a compute shader for other cases. */ >> >> What about using SDMA? It supports byte aligned constant fills at >> least on CIK+. > > I think CP DMA supports byte aligned constant fills as well, I just need to > test it. The bigger problem is 64 and 128 bit fills. Those can only be done > with a shader AFAIK. Just to briefly interject, don't forget abotu 96-bit fills for tbo_rgb32 buffers. I assume this won't really matter if you're using a shader. Cheers, -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration
On Nov 4, 2015 4:02 AM, "Alex Deucher"wrote: > > On Tue, Nov 3, 2015 at 6:47 PM, Marek Olšák wrote: > > From: Marek Olšák > > > > --- > > src/gallium/drivers/radeonsi/si_blit.c | 55 ++ > > 1 file changed, 55 insertions(+) > > > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c > > index fce014a..e934146 100644 > > --- a/src/gallium/drivers/radeonsi/si_blit.c > > +++ b/src/gallium/drivers/radeonsi/si_blit.c > > @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx, > > } > > } > > > > +static void si_pipe_clear_buffer(struct pipe_context *ctx, > > +struct pipe_resource *dst, > > +unsigned offset, unsigned size, > > +const void *clear_value, > > +int clear_value_size) > > +{ > > + struct si_context *sctx = (struct si_context*)ctx; > > + const uint32_t *u32 = clear_value; > > + unsigned i; > > + bool clear_value_fits_dword = true; > > + uint8_t *map; > > + > > + if (clear_value_size > 4) > > + for (i = 1; i < clear_value_size / 4; i++) > > + if (u32[0] != u32[i]) { > > + clear_value_fits_dword = false; > > + break; > > + } > > + > > + /* Use CP DMA for the simple case. */ > > + if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) { > > + uint32_t value = u32[0]; > > + > > + switch (clear_value_size) { > > + case 1: > > + value &= 0xff; > > + value |= (value << 8) | (value << 16) | (value << 24); > > + break; > > + case 2: > > + value &= 0x; > > + value |= value << 16; > > + break; > > + } > > + > > + sctx->b.clear_buffer(ctx, dst, offset, size, value, false); > > + return; > > + } > > + > > + /* TODO: use a compute shader for other cases. */ > > What about using SDMA? It supports byte aligned constant fills at > least on CIK+. I think CP DMA supports byte aligned constant fills as well, I just need to test it. The bigger problem is 64 and 128 bit fills. Those can only be done with a shader AFAIK. > > > > + > > + /* Software fallback. */ > > + map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst), > > + PIPE_TRANSFER_WRITE); > > + if (!map) > > + return; > > + > > + map += offset; > > + size /= clear_value_size; > > + for (i = 0; i < size; i++) { > > + memcpy(map, clear_value, clear_value_size); > > + map += clear_value_size; > > + } > > +} > > + > > void si_init_blit_functions(struct si_context *sctx) > > { > > sctx->b.b.clear = si_clear; > > + sctx->b.b.clear_buffer = si_pipe_clear_buffer; > > sctx->b.b.clear_render_target = si_clear_render_target; > > sctx->b.b.clear_depth_stencil = si_clear_depth_stencil; > > sctx->b.b.resource_copy_region = si_resource_copy_region; > > -- > > 2.1.4 > > > > ___ > > mesa-dev mailing list > > mesa-dev@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration
From: Marek Olšák--- src/gallium/drivers/radeonsi/si_blit.c | 55 ++ 1 file changed, 55 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index fce014a..e934146 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx, } } +static void si_pipe_clear_buffer(struct pipe_context *ctx, +struct pipe_resource *dst, +unsigned offset, unsigned size, +const void *clear_value, +int clear_value_size) +{ + struct si_context *sctx = (struct si_context*)ctx; + const uint32_t *u32 = clear_value; + unsigned i; + bool clear_value_fits_dword = true; + uint8_t *map; + + if (clear_value_size > 4) + for (i = 1; i < clear_value_size / 4; i++) + if (u32[0] != u32[i]) { + clear_value_fits_dword = false; + break; + } + + /* Use CP DMA for the simple case. */ + if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) { + uint32_t value = u32[0]; + + switch (clear_value_size) { + case 1: + value &= 0xff; + value |= (value << 8) | (value << 16) | (value << 24); + break; + case 2: + value &= 0x; + value |= value << 16; + break; + } + + sctx->b.clear_buffer(ctx, dst, offset, size, value, false); + return; + } + + /* TODO: use a compute shader for other cases. */ + + /* Software fallback. */ + map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst), + PIPE_TRANSFER_WRITE); + if (!map) + return; + + map += offset; + size /= clear_value_size; + for (i = 0; i < size; i++) { + memcpy(map, clear_value, clear_value_size); + map += clear_value_size; + } +} + void si_init_blit_functions(struct si_context *sctx) { sctx->b.b.clear = si_clear; + sctx->b.b.clear_buffer = si_pipe_clear_buffer; sctx->b.b.clear_render_target = si_clear_render_target; sctx->b.b.clear_depth_stencil = si_clear_depth_stencil; sctx->b.b.resource_copy_region = si_resource_copy_region; -- 2.1.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration
On Tue, Nov 3, 2015 at 6:47 PM, Marek Olšákwrote: > From: Marek Olšák > > --- > src/gallium/drivers/radeonsi/si_blit.c | 55 > ++ > 1 file changed, 55 insertions(+) > > diff --git a/src/gallium/drivers/radeonsi/si_blit.c > b/src/gallium/drivers/radeonsi/si_blit.c > index fce014a..e934146 100644 > --- a/src/gallium/drivers/radeonsi/si_blit.c > +++ b/src/gallium/drivers/radeonsi/si_blit.c > @@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx, > } > } > > +static void si_pipe_clear_buffer(struct pipe_context *ctx, > +struct pipe_resource *dst, > +unsigned offset, unsigned size, > +const void *clear_value, > +int clear_value_size) > +{ > + struct si_context *sctx = (struct si_context*)ctx; > + const uint32_t *u32 = clear_value; > + unsigned i; > + bool clear_value_fits_dword = true; > + uint8_t *map; > + > + if (clear_value_size > 4) > + for (i = 1; i < clear_value_size / 4; i++) > + if (u32[0] != u32[i]) { > + clear_value_fits_dword = false; > + break; > + } > + > + /* Use CP DMA for the simple case. */ > + if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) { > + uint32_t value = u32[0]; > + > + switch (clear_value_size) { > + case 1: > + value &= 0xff; > + value |= (value << 8) | (value << 16) | (value << 24); > + break; > + case 2: > + value &= 0x; > + value |= value << 16; > + break; > + } > + > + sctx->b.clear_buffer(ctx, dst, offset, size, value, false); > + return; > + } > + > + /* TODO: use a compute shader for other cases. */ What about using SDMA? It supports byte aligned constant fills at least on CIK+. Alex > + > + /* Software fallback. */ > + map = r600_buffer_map_sync_with_rings(>b, r600_resource(dst), > + PIPE_TRANSFER_WRITE); > + if (!map) > + return; > + > + map += offset; > + size /= clear_value_size; > + for (i = 0; i < size; i++) { > + memcpy(map, clear_value, clear_value_size); > + map += clear_value_size; > + } > +} > + > void si_init_blit_functions(struct si_context *sctx) > { > sctx->b.b.clear = si_clear; > + sctx->b.b.clear_buffer = si_pipe_clear_buffer; > sctx->b.b.clear_render_target = si_clear_render_target; > sctx->b.b.clear_depth_stencil = si_clear_depth_stencil; > sctx->b.b.resource_copy_region = si_resource_copy_region; > -- > 2.1.4 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev