It's pushed already. Marek
On Mon, Feb 11, 2019, 11:41 PM Dieter Nützel <[email protected] wrote: > Maybe rebase? > > Dieter > > Am 24.01.2019 00:28, schrieb Marek Olšák: > > From: Sonny Jiang <[email protected]> > > > > Signed-off-by: Sonny Jiang <[email protected]> > > Signed-off-by: Marek Olšák <[email protected]> > > --- > > src/gallium/drivers/radeonsi/si_clear.c | 6 ++ > > .../drivers/radeonsi/si_compute_blit.c | 96 +++++++++++++++++++ > > src/gallium/drivers/radeonsi/si_pipe.c | 4 + > > src/gallium/drivers/radeonsi/si_pipe.h | 9 ++ > > .../drivers/radeonsi/si_shaderlib_tgsi.c | 69 +++++++++++++ > > 5 files changed, 184 insertions(+) > > > > diff --git a/src/gallium/drivers/radeonsi/si_clear.c > > b/src/gallium/drivers/radeonsi/si_clear.c > > index b3910a4651c..8afc01f2ccc 100644 > > --- a/src/gallium/drivers/radeonsi/si_clear.c > > +++ b/src/gallium/drivers/radeonsi/si_clear.c > > @@ -664,20 +664,26 @@ static void si_clear(struct pipe_context *ctx, > > unsigned buffers, > > } > > > > static void si_clear_render_target(struct pipe_context *ctx, > > struct pipe_surface *dst, > > const union pipe_color_union *color, > > unsigned dstx, unsigned dsty, > > unsigned width, unsigned height, > > bool render_condition_enabled) > > { > > struct si_context *sctx = (struct si_context *)ctx; > > + struct si_texture *sdst = (struct si_texture*)dst->texture; > > + > > + if (dst->texture->nr_samples <= 1 && !sdst->dcc_offset) { > > + si_compute_clear_render_target(ctx, dst, color, dstx, > dsty, width, > > height); > > + return; > > + } > > > > si_blitter_begin(sctx, SI_CLEAR_SURFACE | > > (render_condition_enabled ? 0 : > SI_DISABLE_RENDER_COND)); > > util_blitter_clear_render_target(sctx->blitter, dst, color, > > dstx, dsty, width, height); > > si_blitter_end(sctx); > > } > > > > static void si_clear_depth_stencil(struct pipe_context *ctx, > > struct pipe_surface *dst, > > diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c > > b/src/gallium/drivers/radeonsi/si_compute_blit.c > > index 38c48c30be9..f06497f4dac 100644 > > --- a/src/gallium/drivers/radeonsi/si_compute_blit.c > > +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c > > @@ -18,20 +18,21 @@ > > * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT > > SHALL > > * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, > > * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT > > OR > > * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE > > OR THE > > * USE OR OTHER DEALINGS IN THE SOFTWARE. > > * > > */ > > > > #include "si_pipe.h" > > #include "util/u_format.h" > > +#include "util/format_srgb.h" > > > > /* Note: Compute shaders always use SI_COMPUTE_DST_CACHE_POLICY for > > dst > > * and L2_STREAM for src. > > */ > > static enum si_cache_policy get_cache_policy(struct si_context *sctx, > > enum si_coherency coher, > > uint64_t size) > > { > > if ((sctx->chip_class >= GFX9 && (coher == SI_COHERENCY_CB_META || > > coher == SI_COHERENCY_CP)) || > > @@ -418,10 +419,105 @@ void si_compute_copy_image(struct si_context > > *sctx, > > ctx->bind_compute_state(ctx, saved_cs); > > ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, > saved_image); > > ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); > > si_compute_internal_end(sctx); > > } > > > > void si_init_compute_blit_functions(struct si_context *sctx) > > { > > sctx->b.clear_buffer = si_pipe_clear_buffer; > > } > > + > > +/* Clear a region of a color surface to a constant value. */ > > +void si_compute_clear_render_target(struct pipe_context *ctx, > > + struct pipe_surface *dstsurf, > > + const union pipe_color_union *color, > > + unsigned dstx, unsigned dsty, > > + unsigned width, unsigned height) > > +{ > > + struct si_context *sctx = (struct si_context *)ctx; > > + unsigned num_layers = dstsurf->u.tex.last_layer - > > dstsurf->u.tex.first_layer + 1; > > + unsigned data[4 + sizeof(color->ui)] = {dstx, dsty, > > dstsurf->u.tex.first_layer, 0}; > > + > > + if (width == 0 || height == 0) > > + return; > > + > > + if (util_format_is_srgb(dstsurf->format)) { > > + union pipe_color_union color_srgb; > > + for (int i = 0; i < 3; i++) > > + color_srgb.f[i] = > util_format_linear_to_srgb_float(color->f[i]); > > + color_srgb.f[3] = color->f[3]; > > + memcpy(data + 4, color_srgb.ui, sizeof(color->ui)); > > + } else { > > + memcpy(data + 4, color->ui, sizeof(color->ui)); > > + } > > + > > + si_compute_internal_begin(sctx); > > + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | > > + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, > L2_STREAM); > > + si_make_CB_shader_coherent(sctx, dstsurf->texture->nr_samples, > true); > > + > > + struct pipe_constant_buffer saved_cb = {}; > > + si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, > &saved_cb); > > + > > + struct si_images *images = &sctx->images[PIPE_SHADER_COMPUTE]; > > + struct pipe_image_view saved_image = {0}; > > + util_copy_image_view(&saved_image, &images->views[0]); > > + > > + void *saved_cs = sctx->cs_shader_state.program; > > + > > + struct pipe_constant_buffer cb = {}; > > + cb.buffer_size = sizeof(data); > > + cb.user_buffer = data; > > + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &cb); > > + > > + struct pipe_image_view image = {0}; > > + image.resource = dstsurf->texture; > > + image.shader_access = image.access = PIPE_IMAGE_ACCESS_WRITE; > > + image.format = util_format_linear(dstsurf->format); > > + image.u.tex.level = dstsurf->u.tex.level; > > + image.u.tex.first_layer = 0; /* 3D images ignore first_layer > > (BASE_ARRAY) */ > > + image.u.tex.last_layer = dstsurf->u.tex.last_layer; > > + > > + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image); > > + > > + struct pipe_grid_info info = {0}; > > + > > + if (dstsurf->texture->target != PIPE_TEXTURE_1D_ARRAY) { > > + if (!sctx->cs_clear_render_target) > > + sctx->cs_clear_render_target = > si_clear_render_target_shader(ctx); > > + ctx->bind_compute_state(ctx, sctx->cs_clear_render_target); > > + info.block[0] = 8; > > + sctx->compute_last_block[0] = width % 8; > > + info.block[1] = 8; > > + sctx->compute_last_block[1] = height % 8; > > + info.block[2] = 1; > > + info.grid[0] = DIV_ROUND_UP(width, 8); > > + info.grid[1] = DIV_ROUND_UP(height, 8); > > + info.grid[2] = num_layers; > > + } else { > > + if (!sctx->cs_clear_render_target_1d_array) > > + sctx->cs_clear_render_target_1d_array = > > + > si_clear_render_target_shader_1d_array(ctx); > > + ctx->bind_compute_state(ctx, > sctx->cs_clear_render_target_1d_array); > > + info.block[0] = 64; > > + sctx->compute_last_block[0] = width % 64; > > + info.block[1] = 1; > > + info.block[2] = 1; > > + info.grid[0] = DIV_ROUND_UP(width, 64); > > + info.grid[1] = num_layers; > > + info.grid[2] = 1; > > + } > > + > > + ctx->launch_grid(ctx, &info); > > + > > + sctx->compute_last_block[0] = 0; > > + sctx->compute_last_block[1] = 0; > > + > > + sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | > > + (sctx->chip_class <= VI ? > SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) > > | > > + si_get_flush_flags(sctx, SI_COHERENCY_SHADER, > L2_STREAM); > > + ctx->bind_compute_state(ctx, saved_cs); > > + ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, > &saved_image); > > + ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); > > + si_compute_internal_end(sctx); > > +} > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > > b/src/gallium/drivers/radeonsi/si_pipe.c > > index 41d395d7d3f..439b550c4cf 100644 > > --- a/src/gallium/drivers/radeonsi/si_pipe.c > > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > > @@ -198,20 +198,24 @@ static void si_destroy_context(struct > > pipe_context *context) > > if (sctx->vs_blit_texcoord) > > sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_texcoord); > > if (sctx->cs_clear_buffer) > > sctx->b.delete_compute_state(&sctx->b, > sctx->cs_clear_buffer); > > if (sctx->cs_copy_buffer) > > sctx->b.delete_compute_state(&sctx->b, > sctx->cs_copy_buffer); > > if (sctx->cs_copy_image) > > sctx->b.delete_compute_state(&sctx->b, > sctx->cs_copy_image); > > if (sctx->cs_copy_image_1d_array) > > sctx->b.delete_compute_state(&sctx->b, > > sctx->cs_copy_image_1d_array); > > + if (sctx->cs_clear_render_target) > > + sctx->b.delete_compute_state(&sctx->b, > > sctx->cs_clear_render_target); > > + if (sctx->cs_clear_render_target_1d_array) > > + sctx->b.delete_compute_state(&sctx->b, > > sctx->cs_clear_render_target_1d_array); > > > > if (sctx->blitter) > > util_blitter_destroy(sctx->blitter); > > > > /* Release DCC stats. */ > > for (int i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) { > > assert(!sctx->dcc_stats[i].query_active); > > > > for (int j = 0; j < > ARRAY_SIZE(sctx->dcc_stats[i].ps_stats); j++) > > if (sctx->dcc_stats[i].ps_stats[j]) > > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > > b/src/gallium/drivers/radeonsi/si_pipe.h > > index 3a5d9d2fbd2..437144316d0 100644 > > --- a/src/gallium/drivers/radeonsi/si_pipe.h > > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > > @@ -805,20 +805,22 @@ struct si_context { > > void *custom_blend_dcc_decompress; > > void *vs_blit_pos; > > void *vs_blit_pos_layered; > > void *vs_blit_color; > > void *vs_blit_color_layered; > > void *vs_blit_texcoord; > > void *cs_clear_buffer; > > void *cs_copy_buffer; > > void *cs_copy_image; > > void *cs_copy_image_1d_array; > > + void *cs_clear_render_target; > > + void *cs_clear_render_target_1d_array; > > struct si_screen *screen; > > struct pipe_debug_callback debug; > > struct ac_llvm_compiler compiler; /* only non-threaded > compilation > > */ > > struct si_shader_ctx_state fixed_func_tcs_shader; > > struct si_resource *wait_mem_scratch; > > unsigned wait_mem_number; > > uint16_t prefetch_L2_mask; > > > > bool gfx_flush_in_progress:1; > > bool gfx_last_ib_is_busy:1; > > @@ -1182,20 +1184,25 @@ void si_clear_buffer(struct si_context *sctx, > > struct pipe_resource *dst, > > void si_copy_buffer(struct si_context *sctx, > > struct pipe_resource *dst, struct pipe_resource *src, > > uint64_t dst_offset, uint64_t src_offset, unsigned > size); > > void si_compute_copy_image(struct si_context *sctx, > > struct pipe_resource *dst, > > unsigned dst_level, > > struct pipe_resource *src, > > unsigned src_level, > > unsigned dstx, unsigned dsty, unsigned dstz, > > const struct pipe_box *src_box); > > +void si_compute_clear_render_target(struct pipe_context *ctx, > > + struct pipe_surface *dstsurf, > > + const union pipe_color_union > > *color, > > + unsigned dstx, unsigned dsty, > > + unsigned width, unsigned height); > > void si_init_compute_blit_functions(struct si_context *sctx); > > > > /* si_cp_dma.c */ > > #define SI_CPDMA_SKIP_CHECK_CS_SPACE (1 << 0) /* don't call > > need_cs_space */ > > #define SI_CPDMA_SKIP_SYNC_AFTER (1 << 1) /* don't wait for DMA > after > > the copy */ > > #define SI_CPDMA_SKIP_SYNC_BEFORE (1 << 2) /* don't wait for DMA > > before the copy (RAW hazards) */ > > #define SI_CPDMA_SKIP_GFX_SYNC (1 << 3) /* don't flush > caches and > > don't wait for PS/CS */ > > #define SI_CPDMA_SKIP_BO_LIST_UPDATE (1 << 4) /* don't update the BO > > list */ > > #define SI_CPDMA_SKIP_ALL (SI_CPDMA_SKIP_CHECK_CS_SPACE | \ > > SI_CPDMA_SKIP_SYNC_AFTER | \ > > @@ -1297,20 +1304,22 @@ void si_resume_queries(struct si_context > > *sctx); > > > > /* si_shaderlib_tgsi.c */ > > void *si_get_blitter_vs(struct si_context *sctx, enum > > blitter_attrib_type type, > > unsigned num_layers); > > void *si_create_fixed_func_tcs(struct si_context *sctx); > > void *si_create_dma_compute_shader(struct pipe_context *ctx, > > unsigned num_dwords_per_thread, > > bool dst_stream_cache_policy, bool > is_copy); > > void *si_create_copy_image_compute_shader(struct pipe_context *ctx); > > void *si_create_copy_image_compute_shader_1d_array(struct pipe_context > > *ctx); > > +void *si_clear_render_target_shader(struct pipe_context *ctx); > > +void *si_clear_render_target_shader_1d_array(struct pipe_context > > *ctx); > > void *si_create_query_result_cs(struct si_context *sctx); > > > > /* si_test_dma.c */ > > void si_test_dma(struct si_screen *sscreen); > > > > /* si_test_clearbuffer.c */ > > void si_test_dma_perf(struct si_screen *sscreen); > > > > /* si_uvd.c */ > > struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context > > *context, > > diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c > > b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c > > index 55f96b3a25e..91a23b1d7ed 100644 > > --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c > > +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c > > @@ -509,10 +509,79 @@ void > > *si_create_copy_image_compute_shader_1d_array(struct pipe_context > > *ctx) > > if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { > > assert(false); > > return NULL; > > } > > > > state.ir_type = PIPE_SHADER_IR_TGSI; > > state.prog = tokens; > > > > return ctx->create_compute_state(ctx, &state); > > } > > + > > +void *si_clear_render_target_shader(struct pipe_context *ctx) > > +{ > > + static const char text[] = > > + "COMP\n" > > + "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" > > + "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" > > + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" > > + "DCL SV[0], THREAD_ID\n" > > + "DCL SV[1], BLOCK_ID\n" > > + "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, > WR\n" > > + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw > > + "DCL TEMP[0..3], LOCAL\n" > > + "IMM[0] UINT32 {8, 1, 0, 0}\n" > > + "MOV TEMP[0].xyz, CONST[0][0].xyzw\n" > > + "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n" > > + "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n" > > + "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n" > > + "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 2D_ARRAY, > > PIPE_FORMAT_R32G32B32A32_FLOAT\n" > > + "END\n"; > > + > > + struct tgsi_token tokens[1024]; > > + struct pipe_compute_state state = {0}; > > + > > + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { > > + assert(false); > > + return NULL; > > + } > > + > > + state.ir_type = PIPE_SHADER_IR_TGSI; > > + state.prog = tokens; > > + > > + return ctx->create_compute_state(ctx, &state); > > +} > > + > > +/* TODO: Didn't really test 1D_ARRAY */ > > +void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx) > > +{ > > + static const char text[] = > > + "COMP\n" > > + "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n" > > + "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n" > > + "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" > > + "DCL SV[0], THREAD_ID\n" > > + "DCL SV[1], BLOCK_ID\n" > > + "DCL IMAGE[0], 1D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, > WR\n" > > + "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw > > + "DCL TEMP[0..3], LOCAL\n" > > + "IMM[0] UINT32 {64, 1, 0, 0}\n" > > + "MOV TEMP[0].xy, CONST[0][0].xzzw\n" > > + "UMAD TEMP[1].xy, SV[1].xyzz, IMM[0].xyyy, SV[0].xyzz\n" > > + "UADD TEMP[2].xy, TEMP[1].xyzx, TEMP[0].xyzx\n" > > + "MOV TEMP[3].xyzw, CONST[0][1].xyzw\n" > > + "STORE IMAGE[0], TEMP[2].xyzz, TEMP[3], 1D_ARRAY, > > PIPE_FORMAT_R32G32B32A32_FLOAT\n" > > + "END\n"; > > + > > + struct tgsi_token tokens[1024]; > > + struct pipe_compute_state state = {0}; > > + > > + if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) { > > + assert(false); > > + return NULL; > > + } > > + > > + state.ir_type = PIPE_SHADER_IR_TGSI; > > + state.prog = tokens; > > + > > + return ctx->create_compute_state(ctx, &state); > > +} >
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
