Mesa (master): radeonsi: Reinitialize all descriptors in CE preamble.
Module: Mesa Branch: master Commit: 54f755fa0fda14c578022767bcef2f27b2e89707 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=54f755fa0fda14c578022767bcef2f27b2e89707 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Jun 6 22:36:35 2016 +0200 radeonsi: Reinitialize all descriptors in CE preamble. This fixes a problem with the CE preamble and restoring only stuff in the preamble when needed. To illustrate suppose we have two graphics IB's 1 and 2, which are submitted in that order. Furthermore suppose IB 1 does not use CE ram, but IB 2 does, and we have a context switch at the start of IB 1, but not between IB 1 and IB 2. The old code put the CE RAM loads in the preamble of IB 2. As the preamble of IB 1 does not have the loads and the preamble of IB 2 does not get executed, the old values are not load into CE RAM. Fix this by always restoring the entire CE RAM. v2: - Just load all descriptor set buffers instead of load and store the entire CE RAM. - Leave the ce_ram_dirty tracking in place for the non-preamble case. v3: - Fixed parameter alignment. - Rebased to master (Nicolai's descriptor series). Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_descriptors.c | 14 +++--- src/gallium/drivers/radeonsi/si_hw_context.c | 3 +++ src/gallium/drivers/radeonsi/si_state.h | 1 + 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index e80db39..2d780e6 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -160,8 +160,8 @@ static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned s return true; } -static void si_reinitialize_ce_ram(struct si_context *sctx, -struct si_descriptors *desc) +static void si_ce_reinitialize_descriptors(struct si_context *sctx, + struct si_descriptors *desc) { if (desc->buffer) { struct r600_resource *buffer = (struct r600_resource*)desc->buffer; @@ -186,6 +186,14 @@ static void si_reinitialize_ce_ram(struct si_context *sctx, desc->ce_ram_dirty = false; } +void si_ce_reinitialize_all_descriptors(struct si_context *sctx) +{ + int i; + + for (i = 0; i < SI_NUM_DESCS; ++i) + si_ce_reinitialize_descriptors(sctx, >descriptors[i]); +} + void si_ce_enable_loads(struct radeon_winsys_cs *ib) { radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); @@ -207,7 +215,7 @@ static bool si_upload_descriptors(struct si_context *sctx, uint32_t const* list = (uint32_t const*)desc->list; if (desc->ce_ram_dirty) - si_reinitialize_ce_ram(sctx, desc); + si_ce_reinitialize_descriptors(sctx, desc); while(desc->dirty_mask) { int begin, count; diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index fa6a2cb..d1b9851 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -213,6 +213,9 @@ void si_begin_new_cs(struct si_context *ctx) else if (ctx->ce_ib) si_ce_enable_loads(ctx->ce_ib); + if (ctx->ce_preamble_ib) + si_ce_reinitialize_all_descriptors(ctx); + ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, >framebuffer.atom); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index a4a58bb..ab34fec 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -276,6 +276,7 @@ struct si_buffer_resources { } while(0) /* si_descriptors.c */ +void si_ce_reinitialize_all_descriptors(struct si_context *sctx); void si_ce_enable_loads(struct radeon_winsys_cs *ib); void si_set_mutable_tex_desc_fields(struct r600_texture *tex, const struct radeon_surf_level *base_level_info, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Don't offset OFFCHIP_BUFFERING on pre-VI cards.
Module: Mesa Branch: master Commit: e9d3246a7a74282fbbf95ed077e3c3b7750c8ddd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9d3246a7a74282fbbf95ed077e3c3b7750c8ddd Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun May 29 18:35:22 2016 +0200 radeonsi: Don't offset OFFCHIP_BUFFERING on pre-VI cards. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96239 Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state_shaders.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8ec0470..a7af76d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1819,12 +1819,16 @@ static void si_init_tess_factor_ring(struct si_context *sctx) /* Append these registers to the init config state. */ if (sctx->b.chip_class >= CIK) { + unsigned offchip_buffering = offchip_blocks; + if(sctx->b.chip_class >= VI) + --offchip_buffering; + si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(sctx->tf_ring->width0 / 4)); si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, r600_resource(sctx->tf_ring)->gpu_address >> 8); si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM, -S_03093C_OFFCHIP_BUFFERING(offchip_blocks - 1) | +S_03093C_OFFCHIP_BUFFERING(offchip_buffering) | S_03093C_OFFCHIP_GRANULARITY(V_03093C_X_8K_DWORDS)); } else { si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE, @@ -1832,7 +1836,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx) si_pm4_set_reg(sctx->init_config, R_0089B8_VGT_TF_MEMORY_BASE, r600_resource(sctx->tf_ring)->gpu_address >> 8); si_pm4_set_reg(sctx->init_config, R_0089B0_VGT_HS_OFFCHIP_PARAM, - S_0089B0_OFFCHIP_BUFFERING(offchip_blocks - 1)); + S_0089B0_OFFCHIP_BUFFERING(offchip_blocks)); } /* Flush the context to re-emit the init_config state. ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Decompress DCC textures in a render feedback loop.
Module: Mesa Branch: master Commit: 35818129a676502415a5f502ccd2759646066921 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=35818129a676502415a5f502ccd2759646066921 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue May 31 14:11:49 2016 +0200 radeonsi: Decompress DCC textures in a render feedback loop. By using a counter to quickly reject textures that are not bound to a framebuffer, the performance impact when binding sampler_views/images is not too large. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_blit.c| 100 ++ src/gallium/drivers/radeonsi/si_descriptors.c | 8 +++ src/gallium/drivers/radeonsi/si_pipe.h| 3 + src/gallium/drivers/radeonsi/si_state.c | 2 + 4 files changed, 113 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 6f3199c..3748a59 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -412,6 +412,104 @@ si_decompress_image_color_textures(struct si_context *sctx, } } +static void si_check_render_feedback_textures(struct si_context *sctx, + struct si_textures_info *textures) +{ + uint32_t mask = textures->views.desc.enabled_mask; + + while (mask) { + const struct pipe_sampler_view *view; + struct r600_texture *tex; + bool render_feedback = false; + + unsigned i = u_bit_scan(); + + view = textures->views.views[i]; + if(view->texture->target == PIPE_BUFFER) + continue; + + tex = (struct r600_texture *)view->texture; + if (!tex->dcc_offset) + continue; + + for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { + struct r600_surface * surf; + + if (!sctx->framebuffer.state.cbufs[j]) + continue; + + surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[j]; + + if (tex == (struct r600_texture*)surf->base.texture && + surf->base.u.tex.level >= view->u.tex.first_level && + surf->base.u.tex.level <= view->u.tex.last_level && + surf->base.u.tex.first_layer <= view->u.tex.last_layer && + surf->base.u.tex.last_layer >= view->u.tex.first_layer) + render_feedback = true; + } + + if (render_feedback) { + struct si_screen *screen = sctx->screen; + r600_texture_disable_dcc(>b, tex); + } + } +} + +static void si_check_render_feedback_images(struct si_context *sctx, +struct si_images_info *images) +{ + uint32_t mask = images->desc.enabled_mask; + + while (mask) { + const struct pipe_image_view *view; + struct r600_texture *tex; + bool render_feedback = false; + + unsigned i = u_bit_scan(); + + view = >views[i]; + if (view->resource->target == PIPE_BUFFER) + continue; + + tex = (struct r600_texture *)view->resource; + if (!tex->dcc_offset) + continue; + + for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) { + struct r600_surface * surf; + + if (!sctx->framebuffer.state.cbufs[j]) + continue; + + surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[j]; + + if (tex == (struct r600_texture*)surf->base.texture && + surf->base.u.tex.level == view->u.tex.level && + surf->base.u.tex.first_layer <= view->u.tex.last_layer && + surf->base.u.tex.last_layer >= view->u.tex.first_layer) + render_feedback = true; + } + + if (render_feedback) { + struct si_screen *screen = sctx->screen; + r600_texture_disable_dcc(>b, tex); + } + } +} + +static void si_check_render_feedback(struct si_context *sctx) +{ + + if (!sctx->need_check_render_feedback) + return; + + for (int i = 0; i < SI_NUM_SHADERS; ++i) { + si_check_render_feedback_images(s
Mesa (master): radeonsi: Add counter to check if a texture is bound to a framebuffer.
Module: Mesa Branch: master Commit: cbe3421f05b1a99df6df0fc93d7ce7d5071af02f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cbe3421f05b1a99df6df0fc93d7ce7d5071af02f Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue May 31 13:44:03 2016 +0200 radeonsi: Add counter to check if a texture is bound to a framebuffer. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/r600_pipe_common.h | 5 + src/gallium/drivers/radeonsi/si_pipe.c| 2 ++ src/gallium/drivers/radeonsi/si_state.c | 18 ++ src/gallium/drivers/radeonsi/si_state.h | 1 + 4 files changed, 26 insertions(+) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 3e54534..084e3fb 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -260,6 +260,11 @@ struct r600_texture { uint8_t stencil_clear_value; boolnon_disp_tiling; /* R600-Cayman only */ + + /* Counter that should be non-zero if the texture is bound to a +* framebuffer. Implemented in radeonsi only. +*/ + uint32_tframebuffers_bound; }; struct r600_surface { diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 88f4f20..0987baf 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -40,6 +40,8 @@ static void si_destroy_context(struct pipe_context *context) struct si_context *sctx = (struct si_context *)context; int i; + si_dec_framebuffer_counters(>framebuffer.state); + si_release_all_descriptors(sctx); if (sctx->ce_suballocator) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ab321ef..ed62710 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2268,6 +2268,21 @@ static void si_init_depth_surface(struct si_context *sctx, surf->depth_initialized = true; } +void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) +{ + for (int i = 0; i < state->nr_cbufs; ++i) { + struct r600_surface *surf = NULL; + struct r600_texture *rtex; + + if (!state->cbufs[i]) + continue; + surf = (struct r600_surface*)state->cbufs[i]; + rtex = (struct r600_texture*)surf->base.texture; + + p_atomic_dec(>framebuffers_bound); + } +} + static void si_set_framebuffer_state(struct pipe_context *ctx, const struct pipe_framebuffer_state *state) { @@ -2298,6 +2313,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; + si_dec_framebuffer_counters(>framebuffer.state); util_copy_framebuffer_state(>framebuffer.state, state); sctx->framebuffer.spi_shader_col_format = 0; @@ -2342,6 +2358,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.compressed_cb_mask |= 1 << i; } r600_context_add_resource_size(ctx, surf->base.texture); + + p_atomic_inc(>framebuffers_bound); } /* Set the second SPI format for possible dual-src blending. */ if (i == 1 && surf) { diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index a3589d4..01b73f6 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -308,6 +308,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, const struct pipe_sampler_view *state, unsigned width0, unsigned height0, unsigned force_level); +void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state); /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium/ddebug: Add passthrough for query_memory_info.
Module: Mesa Branch: master Commit: 4717d5a2d33cf61e65573339ba78b4016203021e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4717d5a2d33cf61e65573339ba78b4016203021e Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Fri May 27 13:55:56 2016 +0200 gallium/ddebug: Add passthrough for query_memory_info. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/ddebug/dd_screen.c | 9 + 1 file changed, 9 insertions(+) diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c index ebe090b..5a883bd 100644 --- a/src/gallium/drivers/ddebug/dd_screen.c +++ b/src/gallium/drivers/ddebug/dd_screen.c @@ -101,6 +101,14 @@ dd_screen_get_timestamp(struct pipe_screen *_screen) return screen->get_timestamp(screen); } +static void dd_screen_query_memory_info(struct pipe_screen *_screen, +struct pipe_memory_info *info) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->query_memory_info(screen, info); +} + static struct pipe_context * dd_screen_context_create(struct pipe_screen *_screen, void *priv, unsigned flags) @@ -332,6 +340,7 @@ ddebug_screen_create(struct pipe_screen *screen) dscreen->base.get_paramf = dd_screen_get_paramf; dscreen->base.get_compute_param = dd_screen_get_compute_param; dscreen->base.get_shader_param = dd_screen_get_shader_param; + dscreen->base.query_memory_info = dd_screen_query_memory_info; /* get_video_param */ /* get_compute_param */ SCR_INIT(get_timestamp); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: enable OpenGL 4.3
Module: Mesa Branch: master Commit: 2cee0d0f9c9e9e269885b1d943ff123e033d9b52 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2cee0d0f9c9e9e269885b1d943ff123e033d9b52 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 19 00:47:49 2016 +0200 radeonsi: enable OpenGL 4.3 Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_pipe.c | 4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index eefc68a..88f4f20 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -369,6 +369,10 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return HAVE_LLVM >= 0x0309 ? 4 : 0; case PIPE_CAP_GLSL_FEATURE_LEVEL: + if (pscreen->get_shader_param(pscreen, PIPE_SHADER_COMPUTE, + PIPE_SHADER_CAP_SUPPORTED_IRS) & + (1 << PIPE_SHADER_IR_TGSI)) + return 430; return HAVE_LLVM >= 0x0309 ? 420 : HAVE_LLVM >= 0x0307 ? 410 : 330; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Allow TES distribution between shader engines.
Module: Mesa Branch: master Commit: 43d7305a405c82e81c9b7b3cc4958169b13777bb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=43d7305a405c82e81c9b7b3cc4958169b13777bb Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 12 20:28:46 2016 +0200 radeonsi: Allow TES distribution between shader engines. The R_028B50_VGT_TESS_DISTRIBUTION value is copied from amdgpu-pro. Smaller values in the ACCUM fields seem to decrease the performance advantage from this patch, higher values don't seem to matter. v2: Add distribution mode field enums. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state.c | 5 src/gallium/drivers/radeonsi/si_state_draw.c| 8 + src/gallium/drivers/radeonsi/si_state_shaders.c | 39 +++-- src/gallium/drivers/radeonsi/sid.h | 3 ++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index aefa336..ab321ef 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3791,6 +3791,11 @@ static void si_init_config(struct si_context *sctx) S_028424_OVERWRITE_COMBINER_WATERMARK(4)); si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); + si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, + S_028B50_ACCUM_ISOLINE(32) | + S_028B50_ACCUM_TRI(11) | + S_028B50_ACCUM_QUAD(11) | + S_028B50_DONUT_SPLIT(16)); } if (sctx->b.family == CHIP_STONEY) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c8b87a9..788869e 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -279,6 +279,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, sctx->b.family == CHIP_BONAIRE) && sctx->gs_shader.cso) partial_vs_wave = true; + + /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ + if (sctx->b.chip_class >= VI) { + if (sctx->gs_shader.cso) + partial_es_wave = true; + else + partial_vs_wave = true; + } } /* This is a hardware requirement. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 116bf27..c6f51ea 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -249,7 +249,8 @@ void si_destroy_shader_cache(struct si_screen *sscreen) /* SHADER STATES */ -static void si_set_tesseval_regs(struct si_shader *shader, +static void si_set_tesseval_regs(struct si_screen *sscreen, +struct si_shader *shader, struct si_pm4_state *pm4) { struct tgsi_shader_info *info = >selector->info; @@ -257,7 +258,7 @@ static void si_set_tesseval_regs(struct si_shader *shader, unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; - unsigned type, partitioning, topology; + unsigned type, partitioning, topology, distribution_mode; switch (tes_prim_mode) { case PIPE_PRIM_LINES: @@ -299,10 +300,16 @@ static void si_set_tesseval_regs(struct si_shader *shader, else topology = V_028B6C_OUTPUT_TRIANGLE_CW; + if (sscreen->b.chip_class >= VI) + distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS; + else + distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST; + si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM, S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | - S_028B6C_TOPOLOGY(topology)); + S_028B6C_TOPOLOGY(topology) | + S_028B6C_DISTRIBUTION_MODE(distribution_mode)); } static void si_shader_ls(struct si_shader *shader) @@ -359,7 +366,7 @@ static void si_shader_hs(struct si_shader *shader) S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } -static void si_shader_es(struct si_shader *shader) +static void si_shader_es(struct si_s
Mesa (si-offchip-tess-v3): radeonsi: Allow TES distribution between shader engines.
Module: Mesa Branch: si-offchip-tess-v3 Commit: 9df32d1e509c7ce49ea9a89aaaff1488d4650395 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9df32d1e509c7ce49ea9a89aaaff1488d4650395 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 12 20:28:46 2016 +0200 radeonsi: Allow TES distribution between shader engines. The R_028B50_VGT_TESS_DISTRIBUTION value is copied from amdgpu-pro. Smaller values in the ACCUM fields seem to decrease the performance advantage from this patch, higher values don't seem to matter. v2: Add distribution mode field enums. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state.c | 5 src/gallium/drivers/radeonsi/si_state_draw.c| 8 + src/gallium/drivers/radeonsi/si_state_shaders.c | 39 +++-- src/gallium/drivers/radeonsi/sid.h | 3 ++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index aefa336..ab321ef 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3791,6 +3791,11 @@ static void si_init_config(struct si_context *sctx) S_028424_OVERWRITE_COMBINER_WATERMARK(4)); si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); + si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, + S_028B50_ACCUM_ISOLINE(32) | + S_028B50_ACCUM_TRI(11) | + S_028B50_ACCUM_QUAD(11) | + S_028B50_DONUT_SPLIT(16)); } if (sctx->b.family == CHIP_STONEY) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index c8b87a9..788869e 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -279,6 +279,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, sctx->b.family == CHIP_BONAIRE) && sctx->gs_shader.cso) partial_vs_wave = true; + + /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ + if (sctx->b.chip_class >= VI) { + if (sctx->gs_shader.cso) + partial_es_wave = true; + else + partial_vs_wave = true; + } } /* This is a hardware requirement. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 116bf27..c6f51ea 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -249,7 +249,8 @@ void si_destroy_shader_cache(struct si_screen *sscreen) /* SHADER STATES */ -static void si_set_tesseval_regs(struct si_shader *shader, +static void si_set_tesseval_regs(struct si_screen *sscreen, +struct si_shader *shader, struct si_pm4_state *pm4) { struct tgsi_shader_info *info = >selector->info; @@ -257,7 +258,7 @@ static void si_set_tesseval_regs(struct si_shader *shader, unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING]; bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW]; bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE]; - unsigned type, partitioning, topology; + unsigned type, partitioning, topology, distribution_mode; switch (tes_prim_mode) { case PIPE_PRIM_LINES: @@ -299,10 +300,16 @@ static void si_set_tesseval_regs(struct si_shader *shader, else topology = V_028B6C_OUTPUT_TRIANGLE_CW; + if (sscreen->b.chip_class >= VI) + distribution_mode = V_028B6C_DISTRIBUTION_MODE_DONUTS; + else + distribution_mode = V_028B6C_DISTRIBUTION_MODE_NO_DIST; + si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM, S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | - S_028B6C_TOPOLOGY(topology)); + S_028B6C_TOPOLOGY(topology) | + S_028B6C_DISTRIBUTION_MODE(distribution_mode)); } static void si_shader_ls(struct si_shader *shader) @@ -359,7 +366,7 @@ static void si_shader_hs(struct si_shader *shader) S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } -static void si_shader_es(struct si_shader *shader) +static void si_shader_es(struct
Mesa (master): radeonsi: Store inputs to memory when not using a TCS.
Module: Mesa Branch: master Commit: 6217716e8f6091f7bbacbb9fa2f52997d3ac5d03 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6217716e8f6091f7bbacbb9fa2f52997d3ac5d03 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue May 3 21:31:00 2016 +0200 radeonsi: Store inputs to memory when not using a TCS. We need to copy the VS outputs to memory. I decided to do this using a shader key, as the value depends on other shaders. I also switch the fixed function TCS over to monolithic, as otherwisze many of the user SGPR's need to be passed to the epilog, which increases register pressure, or complexity to avoid that. The main body of the fixed function TCS is not that interesting to precompile anyway, since we do it on demand and it is very small. v2: Use u_bit_scan64. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c| 45 + src/gallium/drivers/radeonsi/si_shader.h| 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++ 3 files changed, 49 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 83bcf5e..b04d0f7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2444,6 +2444,48 @@ handle_semantic: } } +static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset; + LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base; + uint64_t inputs; + + invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5); + + rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_RW_BUFFERS); + buffer = build_indexed_load_const(ctx, rw_buffers, + lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); + + buffer_offset = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds); + + lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8); + lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id, +lds_vertex_stride, ""); + lds_base = get_tcs_in_current_patch_offset(ctx); + lds_base = LLVMBuildAdd(gallivm->builder, lds_base, lds_vertex_offset, ""); + + inputs = ctx->shader->key.tcs.epilog.inputs_to_copy; + while (inputs) { + unsigned i = u_bit_scan64(); + + LLVMValueRef lds_ptr = LLVMBuildAdd(gallivm->builder, lds_base, + lp_build_const_int32(gallivm, 4 * i), +""); + + LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(ctx, + invocation_id, + lp_build_const_int32(gallivm, i)); + + LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0, + lds_ptr); + + build_tbuffer_store_dwords(ctx, buffer, value, 4, buffer_addr, + buffer_offset, 0); + } +} + static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, @@ -2585,6 +2627,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) return; } + si_copy_tcs_inputs(bld_base); si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset); } @@ -7426,6 +7469,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, shader->key.vs.as_ls != mainp->key.vs.as_ls)) || (shader->selector->type == PIPE_SHADER_TESS_EVAL && shader->key.tes.as_es != mainp->key.tes.as_es) || + (shader->selector->type == PIPE_SHADER_TESS_CTRL && +shader->key.tcs.epilog.inputs_to_copy) || shader->selector->type == PIPE_SHADER_COMPUTE) { /* Monolithic shader (compiled as a whole, has many variants, * may take a long time to compile). diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 26be25e..67b457b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -304,6 +304,7 @@ struct si_vs_epilog_bits { /* Common TCS bits between the shader key and the epilog key. */ struct si_tcs_epilog_bits { unsignedprim_mode:3; + uint64_t
Mesa (master): radeonsi: Enable dynamic HS.
Module: Mesa Branch: master Commit: fee3160af9c8c9594e7d452cf3035b03a8f4153a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fee3160af9c8c9594e7d452cf3035b03a8f4153a Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon May 2 14:59:43 2016 +0200 radeonsi: Enable dynamic HS. This allows running the TES on different CU's than the TCS which results in performance improvements. v2: Only write the control word from one invocation. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c| 19 +++ src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 11c7c38..166b2e8 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2532,7 +2532,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; LLVMValueRef out[6], vec0, vec1, rw_buffers, tf_base; unsigned stride, outer_comps, inner_comps, i; - struct lp_build_if_state if_ctx; + struct lp_build_if_state if_ctx, inner_if_ctx; /* Do this only for invocation 0, because the tess levels are per-patch, * not per-vertex. @@ -2604,12 +2604,23 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, byteoffset = LLVMBuildMul(gallivm->builder, rel_patch_id, lp_build_const_int32(gallivm, 4 * stride), ""); - /* Store the outputs. */ + lp_build_if(_if_ctx, gallivm, + LLVMBuildICmp(gallivm->builder, LLVMIntEQ, + rel_patch_id, bld_base->uint_bld.zero, "")); + + /* Store the dynamic HS control word. */ + build_tbuffer_store_dwords(ctx, buffer, + lp_build_const_int32(gallivm, 0x8000), + 1, lp_build_const_int32(gallivm, 0), tf_base, 0); + + lp_build_endif(_if_ctx); + + /* Store the tessellation factors. */ build_tbuffer_store_dwords(ctx, buffer, vec0, - MIN2(stride, 4), byteoffset, tf_base, 0); + MIN2(stride, 4), byteoffset, tf_base, 4); if (vec1) build_tbuffer_store_dwords(ctx, buffer, vec1, - stride - 4, byteoffset, tf_base, 16); + stride - 4, byteoffset, tf_base, 20); lp_build_endif(_ctx); } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 2aecfa3..116bf27 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1882,7 +1882,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx) if (sctx->tes_shader.cso) { stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | - S_028B54_HS_EN(1); + S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); if (sctx->gs_shader.cso) stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Add offchip tessellation parameters.
Module: Mesa Branch: master Commit: 5c34562d7ce5d278c2948b4f27d16fcb3e4fd22d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c34562d7ce5d278c2948b4f27d16fcb3e4fd22d Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon May 2 13:20:43 2016 +0200 radeonsi: Add offchip tessellation parameters. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c| 28 - src/gallium/drivers/radeonsi/si_shader.h| 3 ++- src/gallium/drivers/radeonsi/si_state_shaders.c | 9 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3df7820..1f162b5 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -91,6 +91,12 @@ struct si_shader_context int param_tes_rel_patch_id; int param_tes_patch_id; int param_es2gs_offset; + int param_oc_lds; + + /* Sets a bit if the dynamic HS control word was 0x8000. The bit is +* 0x80 for VS, 0x1 for ES. +*/ + int param_tess_offchip; LLVMTargetMachineRef tm; @@ -2326,14 +2332,14 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_TESS_FACTOR_OFFSET); ret = LLVMBuildInsertValue(builder, ret, tf_soffset, - SI_TCS_NUM_USER_SGPR, ""); + SI_TCS_NUM_USER_SGPR + 1, ""); /* VGPRs */ rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id); invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id); tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset); - vgpr = SI_TCS_NUM_USER_SGPR + 1; + vgpr = SI_TCS_NUM_USER_SGPR + 2; ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); @@ -4945,7 +4951,11 @@ static void declare_streamout_params(struct si_shader_context *ctx, /* Streamout SGPRs. */ if (so->num_outputs) { - params[ctx->param_streamout_config = (*num_params)++] = i32; + if (ctx->type != PIPE_SHADER_TESS_EVAL) + params[ctx->param_streamout_config = (*num_params)++] = i32; + else + ctx->param_streamout_config = ctx->param_tess_offchip; + params[ctx->param_streamout_write_index = (*num_params)++] = i32; } /* A streamout buffer offset is loaded if the stride is non-zero. */ @@ -5065,6 +5075,7 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32; + params[ctx->param_oc_lds = SI_PARAM_TCS_OC_LDS] = ctx->i32; params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx->i32; last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET; @@ -5074,8 +5085,10 @@ static void create_function(struct si_shader_context *ctx) num_params = SI_PARAM_REL_IDS+1; if (!ctx->is_monolithic) { - /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */ - for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++) + /* SI_PARAM_TCS_OC_LDS and PARAM_TESS_FACTOR_OFFSET are +* placed after the user SGPRs. +*/ + for (i = 0; i < SI_TCS_NUM_USER_SGPR + 2; i++) returns[num_returns++] = ctx->i32; /* SGPRs */ for (i = 0; i < 3; i++) @@ -5089,10 +5102,14 @@ static void create_function(struct si_shader_context *ctx) num_params = SI_PARAM_TCS_OUT_LAYOUT+1; if (shader->key.tes.as_es) { + params[ctx->param_oc_lds = num_params++] = ctx->i32; + params[ctx->param_tess_offchip = num_params++] = ctx->i32; params[ctx->param_es2gs_offset = num_params++] = ctx->i32; } else { + params[ctx->param_tess_offchip = num_params++] = ctx->i32; declare_streamout_params(ctx, >selec
Mesa (master): radeonsi: Add offchip buffer address calculation.
Module: Mesa Branch: master Commit: 7846fa876820fde373b4402e5d1cf3d24f06d11f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7846fa876820fde373b4402e5d1cf3d24f06d11f Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue May 10 00:49:39 2016 +0200 radeonsi: Add offchip buffer address calculation. Instead of creating a memory area per patch and per vertex, we put the same attribute of every vertex & patch together. Most loads and stores access the same attribute across all lanes, only for different patches and vertices. For the TCS this results in tightly packed data for 4-component stores. For the TES this is not the case as within a patch the loads often also access the same vertex. However if there are < 4 vertices/patch, this still results in a reduction of the number of cache lines. In the LDS situation we only do better than worst case if the data per patch < 64 bytes, which due to the tessellation factors is pretty much never. We do not use hardware swizzling for this. It would slightly reduce the number of executed VALU instructions, but I had issues with increased wait times that I haven't been able to solve yet. Furthermore, the tbuffer_store intrinsic does not support both VGPR offset and an index, so we have a problem storing indirectly indexed outputs. This can be solved by temporarily storing arrays in LDS and then copying them, but I don't think that is worth the effort. The difference in VALU cycles hardware swizzling gives is about 0.2% of total busy cycles. That is without handling the array case. I chose for attributes instead of components as they are often accessed together, and the software swizzling takes VALU cycles for calculating offsets. v2: - Rename functions to get_tcs_tes_buffer_address. - multiply by 16 as late as possible. - Use tgsi_full_src_register_from_dst. - Remove some bad comments. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 124 +++ 1 file changed, 124 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ac42721..83bcf5e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -671,6 +671,130 @@ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, lp_build_const_int32(gallivm, param * 4), ""); } +/* The offchip buffer layout for TCS->TES is + * + * - attribute 0 of patch 0 vertex 0 + * - attribute 0 of patch 0 vertex 1 + * - attribute 0 of patch 0 vertex 2 + * ... + * - attribute 0 of patch 1 vertex 0 + * - attribute 0 of patch 1 vertex 1 + * ... + * - attribute 1 of patch 0 vertex 0 + * - attribute 1 of patch 0 vertex 1 + * ... + * - per patch attribute 0 of patch 0 + * - per patch attribute 0 of patch 1 + * ... + * + * Note that every attribute has 4 components. + */ +static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, + LLVMValueRef vertex_index, + LLVMValueRef param_index) +{ + struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm; + LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices; + LLVMValueRef param_stride, constant16; + + vertices_per_patch = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 9, 6); + num_patches = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 0, 9); + total_vertices = LLVMBuildMul(gallivm->builder, vertices_per_patch, + num_patches, ""); + + constant16 = lp_build_const_int32(gallivm, 16); + if (vertex_index) { + base_addr = LLVMBuildMul(gallivm->builder, get_rel_patch_id(ctx), +vertices_per_patch, ""); + + base_addr = LLVMBuildAdd(gallivm->builder, base_addr, +vertex_index, ""); + + param_stride = total_vertices; + } else { + base_addr = get_rel_patch_id(ctx); + param_stride = num_patches; + } + + base_addr = LLVMBuildAdd(gallivm->builder, base_addr, +LLVMBuildMul(gallivm->builder, param_index, + param_stride, ""), ""); + + base_addr = LLVMBuildMul(gallivm->builder, base_addr, constant16, ""); + + if (!vertex_index) { + LLVMValueRef patch_data_offset = + unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 16, 16); + + base_addr = LLVMBuildAdd(gallivm->builder, base_addr, +
Mesa (master): radeonsi: Add buffer for offchip storage between TCS and TES.
Module: Mesa Branch: master Commit: d27ff7d6838fef2419ffb05798967e785e196afb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d27ff7d6838fef2419ffb05798967e785e196afb Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon May 2 09:54:11 2016 +0200 radeonsi: Add buffer for offchip storage between TCS and TES. The buffer is quite large, but should only be allocated if the application uses tessellation. Most non-games don't. v2: - Use the correct register for SI. - Add define for block size. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.h | 3 +++ src/gallium/drivers/radeonsi/si_state_shaders.c | 18 ++ 4 files changed, 23 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 6700590..eefc68a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -48,6 +48,7 @@ static void si_destroy_context(struct pipe_context *context) pipe_resource_reference(>esgs_ring, NULL); pipe_resource_reference(>gsvs_ring, NULL); pipe_resource_reference(>tf_ring, NULL); + pipe_resource_reference(>tess_offchip_ring, NULL); pipe_resource_reference(>null_const_buf.buffer, NULL); r600_resource_reference(>border_color_buffer, NULL); free(sctx->border_color_table); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 33d3d25..e5b88c7 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -256,6 +256,7 @@ struct si_context { struct pipe_resource*esgs_ring; struct pipe_resource*gsvs_ring; struct pipe_resource*tf_ring; + struct pipe_resource*tess_offchip_ring; union pipe_color_union *border_color_table; /* in CPU memory, any endian */ struct r600_resource*border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f2a3b03..a3589d4 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -40,6 +40,8 @@ #define SI_NUM_IMAGES 16 #define SI_NUM_SHADER_BUFFERS 16 +#define SI_TESS_OFFCHIP_BLOCK_SIZE (8192 * 4) + struct si_screen; struct si_shader; @@ -155,6 +157,7 @@ struct si_shader_data { /* Private read-write buffer slots. */ enum { SI_HS_RING_TESS_FACTOR, + SI_HS_RING_TESS_OFFCHIP, SI_ES_RING_ESGS, SI_GS_RING_ESGS, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 13066ff..d8ae2b2 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1770,6 +1770,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) static void si_init_tess_factor_ring(struct si_context *sctx) { + unsigned offchip_blocks = sctx->b.chip_class >= CIK ? 256 : 64; assert(!sctx->tf_ring); sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM, @@ -1780,6 +1781,14 @@ static void si_init_tess_factor_ring(struct si_context *sctx) assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0); + sctx->tess_offchip_ring = pipe_buffer_create(sctx->b.b.screen, +PIPE_BIND_CUSTOM, +PIPE_USAGE_DEFAULT, +offchip_blocks * + SI_TESS_OFFCHIP_BLOCK_SIZE); + if (!sctx->tess_offchip_ring) + return; + si_init_config_add_vgt_flush(sctx); /* Append these registers to the init config state. */ @@ -1788,11 +1797,16 @@ static void si_init_tess_factor_ring(struct si_context *sctx) S_030938_SIZE(sctx->tf_ring->width0 / 4)); si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE, r600_resource(sctx->tf_ring)->gpu_address >> 8); + si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM, +S_03093C_OFFCHIP_BUFFERING(offchip_blocks - 1) | + S_03093C_OFFCHIP_GRANULARITY(V_03093C_X_8K_DWORDS)); } else { si_pm4_set_reg(sctx->init_config, R_008988_VGT_TF_RING_SIZE,
Mesa (master): radeonsi: Define build_tbuffer_store_dwords earlier to support new users.
Module: Mesa Branch: master Commit: 9fdb7787023e6032cf263d282669b31291ddb6ce URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9fdb7787023e6032cf263d282669b31291ddb6ce Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon May 2 14:20:19 2016 +0200 radeonsi: Define build_tbuffer_store_dwords earlier to support new users. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 138 +++ 1 file changed, 69 insertions(+), 69 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 1f162b5..6690f05 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -671,6 +671,75 @@ static LLVMValueRef get_dw_address(struct si_shader_context *ctx, lp_build_const_int32(gallivm, param * 4), ""); } +/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. + * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), + * or v4i32 (num_channels=3,4). */ +static void build_tbuffer_store(struct si_shader_context *ctx, + LLVMValueRef rsrc, + LLVMValueRef vdata, + unsigned num_channels, + LLVMValueRef vaddr, + LLVMValueRef soffset, + unsigned inst_offset, + unsigned dfmt, + unsigned nfmt, + unsigned offen, + unsigned idxen, + unsigned glc, + unsigned slc, + unsigned tfe) +{ + struct gallivm_state *gallivm = >radeon_bld.gallivm; + LLVMValueRef args[] = { + rsrc, + vdata, + LLVMConstInt(ctx->i32, num_channels, 0), + vaddr, + soffset, + LLVMConstInt(ctx->i32, inst_offset, 0), + LLVMConstInt(ctx->i32, dfmt, 0), + LLVMConstInt(ctx->i32, nfmt, 0), + LLVMConstInt(ctx->i32, offen, 0), + LLVMConstInt(ctx->i32, idxen, 0), + LLVMConstInt(ctx->i32, glc, 0), + LLVMConstInt(ctx->i32, slc, 0), + LLVMConstInt(ctx->i32, tfe, 0) + }; + + /* The instruction offset field has 12 bits */ + assert(offen || inst_offset < (1 << 12)); + + /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ + unsigned func = CLAMP(num_channels, 1, 3) - 1; + const char *types[] = {"i32", "v2i32", "v4i32"}; + char name[256]; + snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); + + lp_build_intrinsic(gallivm->builder, name, ctx->voidt, + args, ARRAY_SIZE(args), 0); +} + +static void build_tbuffer_store_dwords(struct si_shader_context *ctx, +LLVMValueRef rsrc, +LLVMValueRef vdata, +unsigned num_channels, +LLVMValueRef vaddr, +LLVMValueRef soffset, +unsigned inst_offset) +{ + static unsigned dfmt[] = { + V_008F0C_BUF_DATA_FORMAT_32, + V_008F0C_BUF_DATA_FORMAT_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32_32 + }; + assert(num_channels >= 1 && num_channels <= 4); + + build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset, + inst_offset, dfmt[num_channels-1], + V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); +} + /** * Load from LDS. * @@ -1844,75 +1913,6 @@ static void si_dump_streamout(struct pipe_stream_output_info *so) } } -/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. - * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), - * or v4i32 (num_channels=3,4). */ -static void build_tbuffer_store(struct si_shader_context *ctx, - LLVMValueRef rsrc, - LLVMValueRef vdata, - unsigned num_channels, - LLVMValueRef vaddr, - LLVMValueRef soffset, - unsigned inst_offset, -
Mesa (master): radeonsi: Add buffer load functions.
Module: Mesa Branch: master Commit: 3e7a7a9a65ebc4add06a3786d641a0eff7d5068f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e7a7a9a65ebc4add06a3786d641a0eff7d5068f Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon May 2 14:39:56 2016 +0200 radeonsi: Add buffer load functions. v2: - Use llvm.admgcn.buffer.load instrinsics for new LLVM. - Code style fixes. v3: - Code style fix. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 114 +++ 1 file changed, 114 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6690f05..eb57345 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -740,6 +740,120 @@ static void build_tbuffer_store_dwords(struct si_shader_context *ctx, V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); } +static LLVMValueRef build_buffer_load(struct si_shader_context *ctx, + LLVMValueRef rsrc, + int num_channels, + LLVMValueRef vindex, + LLVMValueRef voffset, + LLVMValueRef soffset, + unsigned inst_offset, + unsigned glc, + unsigned slc) +{ + struct gallivm_state *gallivm = >radeon_bld.gallivm; + unsigned func = CLAMP(num_channels, 1, 3) - 1; + + if (HAVE_LLVM >= 0x309) { + LLVMValueRef args[] = { + LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, ""), + vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0), + LLVMConstInt(ctx->i32, inst_offset, 0), + LLVMConstInt(ctx->i1, glc, 0), + LLVMConstInt(ctx->i1, slc, 0) + }; + + LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2), + ctx->v4f32}; + const char *type_names[] = {"f32", "v2f32", "v4f32"}; + char name[256]; + + if (voffset) { + args[2] = LLVMBuildAdd(gallivm->builder, args[2], voffset, + ""); + } + + if (soffset) { + args[2] = LLVMBuildAdd(gallivm->builder, args[2], soffset, + ""); + } + + snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s", +type_names[func]); + + return lp_build_intrinsic(gallivm->builder, name, types[func], args, + ARRAY_SIZE(args), LLVMReadOnlyAttribute | + LLVMNoUnwindAttribute); + } else { + LLVMValueRef args[] = { + LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v16i8, ""), + voffset ? voffset : vindex, + soffset, + LLVMConstInt(ctx->i32, inst_offset, 0), + LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen + LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen + LLVMConstInt(ctx->i32, glc, 0), + LLVMConstInt(ctx->i32, slc, 0), + LLVMConstInt(ctx->i32, 0, 0), // TFE + }; + + LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2), + ctx->v4i32}; + const char *type_names[] = {"i32", "v2i32", "v4i32"}; + const char *arg_type = "i32"; + char name[256]; + + if (voffset && vindex) { + LLVMValueRef vaddr[] = {vindex, voffset}; + + arg_type = "v2i32"; + args[1] = lp_build_gather_values(gallivm, vaddr, 2); + } + + snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s", +type_names[func], arg_type); + + return lp_build_intrinsic(gallivm->builder, name, types[func], args, + ARRAY_SIZE(args), LLVMReadOnlyAttribute | + LLVMNoUnwindAttribute); + } +} + +static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, +
Mesa (master): radeonsi: Remove LDS layout user SGPR's from TES.
Module: Mesa Branch: master Commit: 26f436132bbeebb7ec5efd56c1473a13719daccf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=26f436132bbeebb7ec5efd56c1473a13719daccf Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue May 10 01:05:32 2016 +0200 radeonsi: Remove LDS layout user SGPR's from TES. They are unused. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 4 +--- src/gallium/drivers/radeonsi/si_shader.h | 15 --- src/gallium/drivers/radeonsi/si_state_draw.c | 4 +--- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6694f00..11c7c38 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5414,9 +5414,7 @@ static void create_function(struct si_shader_context *ctx) case PIPE_SHADER_TESS_EVAL: params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32; - params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; - params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32; - num_params = SI_PARAM_TCS_OUT_LAYOUT+1; + num_params = SI_PARAM_TCS_OFFCHIP_LAYOUT+1; if (shader->key.tes.as_es) { params[ctx->param_oc_lds = num_params++] = ctx->i32; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 67b457b..9425b1e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -108,12 +108,12 @@ enum { /* both TCS and TES */ SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, - SI_SGPR_TCS_OUT_OFFSETS, - SI_SGPR_TCS_OUT_LAYOUT, SI_TES_NUM_USER_SGPR, /* TCS only */ - SI_SGPR_TCS_IN_LAYOUT = SI_TES_NUM_USER_SGPR, + SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR, + SI_SGPR_TCS_OUT_LAYOUT, + SI_SGPR_TCS_IN_LAYOUT, SI_TCS_NUM_USER_SGPR, /* GS limits */ @@ -155,26 +155,27 @@ enum { */ SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */ + /* TCS only parameters. */ + /* Offsets where TCS outputs and TCS patch outputs live in LDS: * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32 */ - SI_PARAM_TCS_OUT_OFFSETS, /* for TCS & TES */ + SI_PARAM_TCS_OUT_OFFSETS, /* Layout of TCS outputs / TES inputs: * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4 * [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4 * [26:31] = gl_PatchVerticesIn, max = 32 */ - SI_PARAM_TCS_OUT_LAYOUT, /* for TCS & TES */ + SI_PARAM_TCS_OUT_LAYOUT, /* Layout of LS outputs / TCS inputs * [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4 * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4 */ - SI_PARAM_TCS_IN_LAYOUT, /* TCS only */ + SI_PARAM_TCS_IN_LAYOUT, - /* TCS only parameters. */ SI_PARAM_TCS_OC_LDS, SI_PARAM_TESS_FACTOR_OFFSET, SI_PARAM_PATCH_ID, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e14a1c9..6fe2619 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -201,10 +201,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, radeon_emit(cs, tcs_in_layout); /* Set them for TES. */ - radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 3); + radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 1); radeon_emit(cs, offchip_layout); - radeon_emit(cs, tcs_out_offsets); - radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26)); } static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Use buffer loads and stores for passing data from TCS to TES.
Module: Mesa Branch: master Commit: a4e2146a9d24592ed7e3bf778e3c21c6cfb89330 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a4e2146a9d24592ed7e3bf778e3c21c6cfb89330 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon May 2 14:55:52 2016 +0200 radeonsi: Use buffer loads and stores for passing data from TCS to TES. We always try to use 4-component loads, as LLVM does not combine loads and they bypass the L1 cache. We can't use a similar strategy for stores and this is especially notable with the tess factors, as they are often set with separate MOV's per component in the TGSI. We keep storing to LDS and the LDS space, so we can load the outputs later, either due to the shader, of for wrting the tess factors. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 66 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b04d0f7..6694f00 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1082,18 +1082,18 @@ static LLVMValueRef fetch_input_tes( enum tgsi_opcode_type type, unsigned swizzle) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef dw_addr, stride; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMValueRef rw_buffers, buffer, base, addr; - if (reg->Register.Dimension) { - stride = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 13, 8); - dw_addr = get_tcs_out_current_patch_offset(ctx); - dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr); - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr); - } + rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); + buffer = build_indexed_load_const(ctx, rw_buffers, + lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); - return lds_load(bld_base, type, swizzle, dw_addr); + base = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds); + addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg); + + return buffer_load(bld_base, type, swizzle, buffer, base, addr); } static void store_output_tcs(struct lp_build_tgsi_context *bld_base, @@ -1102,9 +1102,12 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, LLVMValueRef dst[4]) { struct si_shader_context *ctx = si_shader_context(bld_base); + struct gallivm_state *gallivm = bld_base->base.gallivm; const struct tgsi_full_dst_register *reg = >Dst[0]; unsigned chan_index; LLVMValueRef dw_addr, stride; + LLVMValueRef rw_buffers, buffer, base, buf_addr; + LLVMValueRef values[4]; /* Only handle per-patch and per-vertex outputs here. * Vectors will be lowered to scalars and this function will be called again. @@ -1124,6 +1127,15 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr); } + rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); + buffer = build_indexed_load_const(ctx, rw_buffers, + lp_build_const_int32(gallivm, SI_HS_RING_TESS_OFFCHIP)); + + base = LLVMGetParam(ctx->radeon_bld.main_fn, ctx->param_oc_lds); + buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL); + + TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) { LLVMValueRef value = dst[chan_index]; @@ -1131,6 +1143,22 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base, value = radeon_llvm_saturate(bld_base, value); lds_store(bld_base, chan_index, dw_addr, value); + + value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, ""); + values[chan_index] = value; + + if (inst->Dst[0].Register.WriteMask != 0xF) { + build_tbuffer_store_dwords(ctx, buffer, value, 1, + buf_addr, base, + 4 * chan_index); + } + } + + if (inst->Dst[0].Register.WriteMask == 0xF) { + LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm, + values, 4); + build_tbuffer_store_dwords(ctx,
Mesa (master): radeonsi: Add user SGPR for the layout of the offchip buffer.
Module: Mesa Branch: master Commit: c49e68dc4bcc14cac529d1e3be5fe0090ed4d146 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c49e68dc4bcc14cac529d1e3be5fe0090ed4d146 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue May 10 00:48:55 2016 +0200 radeonsi: Add user SGPR for the layout of the offchip buffer. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 3 +++ src/gallium/drivers/radeonsi/si_shader.h | 12 ++-- src/gallium/drivers/radeonsi/si_state_draw.c | 9 +++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index eb57345..ac42721 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5186,6 +5186,7 @@ static void create_function(struct si_shader_context *ctx) break; case PIPE_SHADER_TESS_CTRL: + params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx->i32; @@ -5211,6 +5212,7 @@ static void create_function(struct si_shader_context *ctx) break; case PIPE_SHADER_TESS_EVAL: + params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx->i32; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx->i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx->i32; num_params = SI_PARAM_TCS_OUT_LAYOUT+1; @@ -6768,6 +6770,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen, params[SI_PARAM_SAMPLERS] = ctx.i64; params[SI_PARAM_IMAGES] = ctx.i64; params[SI_PARAM_SHADER_BUFFERS] = ctx.i64; + params[SI_PARAM_TCS_OFFCHIP_LAYOUT] = ctx.i32; params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32; params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32; params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 7b1cbf9..26be25e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -107,7 +107,8 @@ enum { SI_LS_NUM_USER_SGPR, /* both TCS and TES */ - SI_SGPR_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_SGPRS, + SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS, + SI_SGPR_TCS_OUT_OFFSETS, SI_SGPR_TCS_OUT_LAYOUT, SI_TES_NUM_USER_SGPR, @@ -147,11 +148,18 @@ enum { SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_START_INSTANCE + 1, /* the other VS parameters are assigned dynamically */ + /* Layout of TCS outputs in the offchip buffer +* [0:8] = the number of patches per threadgroup. +* [9:15] = the number of output vertices per patch. +* [16:31] = the offset of per patch attributes in the buffer in bytes. +*/ + SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */ + /* Offsets where TCS outputs and TCS patch outputs live in LDS: * [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32 * [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32 */ - SI_PARAM_TCS_OUT_OFFSETS = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */ + SI_PARAM_TCS_OUT_OFFSETS, /* for TCS & TES */ /* Layout of TCS outputs / TES inputs: * [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4 diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index dab0dcc..e14a1c9 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -108,6 +108,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned input_patch_size, output_patch_size, output_patch0_offset; unsigned perpatch_output_offset, lds_size, ls_rsrc2; unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets; + unsigned offchip_layout; *num_patches = 1; /* TODO: calculate this */ @@ -183,6 +184,8 @@ static void si_emit_derived_tess_state(struct si_context *sctx, ((output_vertex_size / 4) << 13); tcs_out_offsets = (output_patch0_offset / 16) | ((perpatch_output_offset / 16) << 16); + offchip_layout = (pervertex_output_patch_size * *num_patches << 16) | +(num_tcs_output_cp << 9) | *num_patches; /* Set them for LS. */ radeon_set_sh_reg(cs, @@ -191,13 +194,15 @@ static void si_emit_derived_tess_state(struct si_c
Mesa (master): radeonsi: Use correct parameter index for LS_OUT_LAYOUT.
Module: Mesa Branch: master Commit: d9a0c54f6f9811cfe6411a0ed4af8a1086b01b9d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9a0c54f6f9811cfe6411a0ed4af8a1086b01b9d Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun May 1 20:35:40 2016 +0200 radeonsi: Use correct parameter index for LS_OUT_LAYOUT. This happens to be in the right position, but that changes when TCS/TES get new parameters. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.h | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 61ddcd1..7b1cbf9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -141,8 +141,10 @@ enum { SI_PARAM_VERTEX_BUFFERS = SI_NUM_RESOURCE_PARAMS, SI_PARAM_BASE_VERTEX, SI_PARAM_START_INSTANCE, - /* [0] = clamp vertex color */ + /* [0] = clamp vertex color, VS as VS only */ SI_PARAM_VS_STATE_BITS, + /* same value as TCS_IN_LAYOUT, VS as LS only */ + SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_START_INSTANCE + 1, /* the other VS parameters are assigned dynamically */ /* Offsets where TCS outputs and TCS patch outputs live in LDS: @@ -163,10 +165,9 @@ enum { * [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4 */ SI_PARAM_TCS_IN_LAYOUT, /* TCS only */ - SI_PARAM_LS_OUT_LAYOUT, /* same value as TCS_IN_LAYOUT, LS only */ /* TCS only parameters. */ - SI_PARAM_TCS_OC_LDS = SI_PARAM_TCS_IN_LAYOUT + 1, + SI_PARAM_TCS_OC_LDS, SI_PARAM_TESS_FACTOR_OFFSET, SI_PARAM_PATCH_ID, SI_PARAM_REL_IDS, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Add barrier before writing the tess factors.
Module: Mesa Branch: master Commit: fd0a7a382f2accea67396584826f5f8e40239ef3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fd0a7a382f2accea67396584826f5f8e40239ef3 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu May 26 14:09:43 2016 +0200 radeonsi: Add barrier before writing the tess factors. The factors may be stored to LDs by another invocation than the invocation for vertex 0. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 166b2e8..5e5bf68 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -144,6 +144,10 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_shader *shader, LLVMTargetMachineRef tm); +static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, +struct lp_build_tgsi_context *bld_base, +struct lp_build_emit_data *emit_data); + /* Ideally pass the sample mask input to the PS epilog as v13, which * is its usual location, so that the shader doesn't have to add v_mov. */ @@ -2534,6 +2538,8 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, unsigned stride, outer_comps, inner_comps, i; struct lp_build_if_state if_ctx, inner_if_ctx; + si_llvm_emit_barrier(NULL, bld_base, NULL); + /* Do this only for invocation 0, because the tess levels are per-patch, * not per-vertex. * ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): docs: Mention GL4.3 and ES3.1 support for nvc0 and radeonsi
Module: Mesa Branch: master Commit: 65d4ba6f20a70eb741318ef0c712c834adc233e1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=65d4ba6f20a70eb741318ef0c712c834adc233e1 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat May 28 00:57:31 2016 +0200 docs: Mention GL4.3 and ES3.1 support for nvc0 and radeonsi v2: also update the introductory text. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> --- docs/relnotes/11.3.0.html | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html index 0ae1a72..e16ef7d 100644 --- a/docs/relnotes/11.3.0.html +++ b/docs/relnotes/11.3.0.html @@ -22,11 +22,11 @@ People who are concerned with stability and reliability should stick with a previous release or wait for Mesa 11.3.1. -Mesa 11.3.0 implements the OpenGL 4.2 API, but the version reported by +Mesa 11.3.0 implements the OpenGL 4.3 API, but the version reported by glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. -Some drivers don't support all the features required in OpenGL 4.2. OpenGL -4.2 is only available if requested at context creation +Some drivers don't support all the features required in OpenGL 4.3. OpenGL +4.3 is only available if requested at context creation because compatibility contexts are not supported. @@ -44,7 +44,9 @@ Note: some of the new features are only available with certain drivers. -OpenGL 4.2 on nvc0, radeonsi, i965 (Gen8+) +OpenGL 4.2 on i965 (Gen8+) +OpenGL 4.3 on nvc0, radeonsi +OpenGL ES 3.1 on nvc0, radeonsi GL_ARB_ES3_1_compatibility on nvc0, radeonsi GL_ARB_compute_shader on nvc0, radeonsi, softpipe GL_ARB_cull_distance on i965/gen6+, nv50, nvc0, llvmpipe, softpipe ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: set shader calling conventions
Module: Mesa Branch: master Commit: 339335811580c522d6ff66878bc40e662739c47b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=339335811580c522d6ff66878bc40e662739c47b Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Mar 31 11:58:26 2016 +0200 radeonsi: set shader calling conventions Note that old mesa + new LLVM or new mesa + old LLVM breaks with this change and the corresponding LLVM change (D18559). For LLVM version <= 3.8 we use the old method, but we can't detect people using a post 3.8 svn version that is still too old. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Tom Stellard <thomas.stell...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 17 - 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 474154e..7174132 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -55,6 +55,13 @@ enum radeon_llvm_shader_type { RADEON_LLVM_SHADER_CS = 3, }; +enum radeon_llvm_calling_convention { + RADEON_LLVM_AMDGPU_VS = 87, + RADEON_LLVM_AMDGPU_GS = 88, + RADEON_LLVM_AMDGPU_PS = 89, + RADEON_LLVM_AMDGPU_CS = 90, +}; + void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) { char str[16]; @@ -71,27 +78,35 @@ void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) { enum radeon_llvm_shader_type llvm_type; + enum radeon_llvm_calling_convention calling_conv; switch (type) { case TGSI_PROCESSOR_VERTEX: case TGSI_PROCESSOR_TESS_CTRL: case TGSI_PROCESSOR_TESS_EVAL: llvm_type = RADEON_LLVM_SHADER_VS; + calling_conv = RADEON_LLVM_AMDGPU_VS; break; case TGSI_PROCESSOR_GEOMETRY: llvm_type = RADEON_LLVM_SHADER_GS; + calling_conv = RADEON_LLVM_AMDGPU_GS; break; case TGSI_PROCESSOR_FRAGMENT: llvm_type = RADEON_LLVM_SHADER_PS; + calling_conv = RADEON_LLVM_AMDGPU_PS; break; case TGSI_PROCESSOR_COMPUTE: llvm_type = RADEON_LLVM_SHADER_CS; + calling_conv = RADEON_LLVM_AMDGPU_CS; break; default: assert(0); } - radeon_llvm_add_attribute(F, "ShaderType", llvm_type); + if (HAVE_LLVM >= 0x309) + LLVMSetFunctionCallConv(F, calling_conv); + else + radeon_llvm_add_attribute(F, "ShaderType", llvm_type); } static void init_r600_target() ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: use bounded indexing for constant buffers
Module: Mesa Branch: master Commit: 713353db182dbf5d9be802aa2c1ec7d6debd07a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=713353db182dbf5d9be802aa2c1ec7d6debd07a9 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Apr 3 11:39:52 2016 +0200 radeonsi: use bounded indexing for constant buffers Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 56c5759..ca2ff4d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1328,8 +1328,9 @@ static LLVMValueRef fetch_constant( if (reg->Register.Dimension && reg->Dimension.Indirect) { LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS); LLVMValueRef index; - index = get_indirect_index(ctx, >DimIndirect, - reg->Dimension.Index); + index = get_bounded_indirect_index(ctx, >DimIndirect, + reg->Dimension.Index, + SI_NUM_USER_CONST_BUFFERS); bufp = build_indexed_load_const(ctx, ptr, index); } else bufp = ctx->const_buffers[buf]; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: use bounded indexing for samplers
Module: Mesa Branch: master Commit: 799789ba99f4bd27119cf46cc0e7f5384ec3d01e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=799789ba99f4bd27119cf46cc0e7f5384ec3d01e Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Apr 3 11:45:02 2016 +0200 radeonsi: use bounded indexing for samplers Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ca2ff4d..bf3f008 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3357,7 +3357,10 @@ static void tex_fetch_ptrs( const struct tgsi_full_src_register *reg = _data->inst->Src[sampler_src]; LLVMValueRef ind_index; - ind_index = get_indirect_index(ctx, >Indirect, reg->Register.Index); + ind_index = get_bounded_indirect_index(ctx, + >Indirect, + reg->Register.Index, + SI_NUM_USER_SAMPLERS); *res_ptr = get_sampler_desc(ctx, ind_index, DESC_IMAGE); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: Add capability for ARB_robust_buffer_access_behavior.
Module: Mesa Branch: master Commit: 70dcd841f7d94a7b44b294d5264324fc5905aae8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=70dcd841f7d94a7b44b294d5264324fc5905aae8 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 12 15:00:31 2016 +0200 gallium: Add capability for ARB_robust_buffer_access_behavior. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Roland Scheidegger <srol...@vmware.com> --- src/gallium/docs/source/screen.rst | 5 + src/gallium/drivers/freedreno/freedreno_screen.c | 1 + src/gallium/drivers/i915/i915_screen.c | 1 + src/gallium/drivers/ilo/ilo_screen.c | 1 + src/gallium/drivers/llvmpipe/lp_screen.c | 1 + src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/r300/r300_screen.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 1 + src/gallium/drivers/softpipe/sp_screen.c | 1 + src/gallium/drivers/svga/svga_screen.c | 1 + src/gallium/drivers/swr/swr_screen.cpp | 1 + src/gallium/drivers/vc4/vc4_screen.c | 1 + src/gallium/drivers/virgl/virgl_screen.c | 1 + src/gallium/include/pipe/p_defines.h | 1 + src/mesa/state_tracker/st_extensions.c | 1 + 18 files changed, 22 insertions(+) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 824f580..9451075 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -331,6 +331,11 @@ The integer capabilities: primitive on a layer is obtained from ``PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS`` even though it can be larger than the number of layers supported by either rendering or textures. +* ``PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR``: Implementation uses bounds + checking on resource accesses by shader if the context is created with + PIPE_CONTEXT_ROBUST_BUFFER_ACCESS. See the ARB_robust_buffer_access_behavior + extension for information on the required behavior for out of bounds accesses + and accesses to unbound resources. .. _pipe_capf: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 707be17..37a72f2 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -256,6 +256,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index 68e32e5..9b6a660 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -270,6 +270,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 142d6f1..538f817 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -499,6 +499,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 6a5f906..cb681ba 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -320,6 +320,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index ece8af7..400e9f5 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -193,6 +193,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUS
Mesa (master): radeonsi: Mark ARB_robust_buffer_access_behavior as supported.
Module: Mesa Branch: master Commit: 126da23d70dccd9eb5ebe7bf26cb113193f882a4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=126da23d70dccd9eb5ebe7bf26cb113193f882a4 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Apr 3 21:49:44 2016 +0200 radeonsi: Mark ARB_robust_buffer_access_behavior as supported. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- docs/GL3.txt | 2 +- docs/relnotes/11.3.0.html | 1 + src/gallium/drivers/radeonsi/si_pipe.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/GL3.txt b/docs/GL3.txt index 066889a..423cafa 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -177,7 +177,7 @@ GL 4.3, GLSL 4.30: GL_ARB_invalidate_subdata DONE (all drivers) GL_ARB_multi_draw_indirectDONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe) GL_ARB_program_interface_queryDONE (all drivers) - GL_ARB_robust_buffer_access_behavior not started + GL_ARB_robust_buffer_access_behavior DONE (radeonsi) GL_ARB_shader_image_size DONE (i965, radeonsi, softpipe) GL_ARB_shader_storage_buffer_object DONE (i965, nvc0, softpipe) GL_ARB_stencil_texturing DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe) diff --git a/docs/relnotes/11.3.0.html b/docs/relnotes/11.3.0.html index f1d958d..f46616d 100644 --- a/docs/relnotes/11.3.0.html +++ b/docs/relnotes/11.3.0.html @@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers. GL_ARB_framebuffer_no_attachments on nvc0, r600, radeonsi GL_ARB_internalformat_query2 on all drivers +GL_ARB_robust_buffer_access_behavior on radeonsi GL_ARB_shader_atomic_counters on softpipe GL_ARB_shader_atomic_counter_ops on nvc0, softpipe GL_ARB_shader_image_load_store on radeonsi, softpipe diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9cfb11c..971359c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -308,6 +308,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_QUERY_MEMORY_INFO: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: + case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -357,7 +358,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_STRING_MARKER: case PIPE_CAP_QUERY_BUFFER_OBJECT: - case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: return 0; case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Expose the ARB_robust_buffer_access_behavior extension.
Module: Mesa Branch: master Commit: 285dc05055f2f98137188692d4c924605e5a942d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=285dc05055f2f98137188692d4c924605e5a942d Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 12 14:57:07 2016 +0200 mesa: Expose the ARB_robust_buffer_access_behavior extension. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/mesa/main/extensions_table.h | 1 + src/mesa/main/mtypes.h | 1 + src/mesa/main/version.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index ddc25d8..78899ec 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -91,6 +91,7 @@ EXT(ARB_point_sprite, ARB_point_sprite EXT(ARB_program_interface_query , dummy_true , GLL, GLC, x , x , 2012) EXT(ARB_provoking_vertex, EXT_provoking_vertex , GLL, GLC, x , x , 2009) EXT(ARB_query_buffer_object , ARB_query_buffer_object , GLL, GLC, x , x , 2013) +EXT(ARB_robust_buffer_access_behavior , ARB_robust_buffer_access_behavior , GLL, GLC, x , x , 2012) EXT(ARB_robustness , dummy_true , GLL, GLC, x , x , 2010) EXT(ARB_sample_shading , ARB_sample_shading , GLL, GLC, x , x , 2009) EXT(ARB_sampler_objects , dummy_true , GLL, GLC, x , x , 2009) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 36c6e20..e4a3036 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3766,6 +3766,7 @@ struct gl_extensions GLboolean ARB_pipeline_statistics_query; GLboolean ARB_point_sprite; GLboolean ARB_query_buffer_object; + GLboolean ARB_robust_buffer_access_behavior; GLboolean ARB_sample_shading; GLboolean ARB_seamless_cube_map; GLboolean ARB_shader_atomic_counter_ops; diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 2af3653..b9c1bcb 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -361,7 +361,7 @@ compute_version(const struct gl_extensions *extensions, extensions->ARB_fragment_layer_viewport && extensions->ARB_framebuffer_no_attachments && extensions->ARB_internalformat_query2 && - /* extensions->ARB_robust_buffer_access_behavior */ 0 && + extensions->ARB_robust_buffer_access_behavior && extensions->ARB_shader_image_size && extensions->ARB_shader_storage_buffer_object && extensions->ARB_stencil_texturing && ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Synchronize a streamout write after read hazard.
Module: Mesa Branch: master Commit: fc67375379ec26eef63f8e530724cd53c97bc3d0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc67375379ec26eef63f8e530724cd53c97bc3d0 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Apr 11 15:53:43 2016 +0200 radeonsi: Synchronize a streamout write after read hazard. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_descriptors.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 6dd2e4f..b3792c2 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -883,6 +883,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx, SI_CONTEXT_VS_PARTIAL_FLUSH; } + /* All readers of the streamout targets need to be finished before we can +* start writing to the targets. +*/ + if (num_targets) + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; + /* Streamout buffers must be bound in 2 places: * 1) in VGT by setting the VGT_STRMOUT registers * 2) as shader resources ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: distinguish between shader IR in get_compute_param
Module: Mesa Branch: master Commit: 1a5c8c24b5791efa02a7beefa4ba1c49ae033c73 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1a5c8c24b5791efa02a7beefa4ba1c49ae033c73 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Fri Mar 25 02:06:50 2016 +0100 gallium: distinguish between shader IR in get_compute_param For radeonsi, native and TGSI use different compilers and this results in different limits for different IR's. The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE and MAX_THREADS_PER_BLOCK params, but I added a few others as shader related that seemed like they would also typically depend on the compiler. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/gallium/docs/source/screen.rst| 18 ++--- src/gallium/drivers/ilo/ilo_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c| 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c| 1 + src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 3 ++- src/gallium/drivers/radeonsi/si_pipe.c| 2 +- src/gallium/drivers/trace/tr_screen.c | 4 ++- src/gallium/include/pipe/p_screen.h | 13 ++ src/gallium/state_trackers/clover/core/device.cpp | 31 +-- src/gallium/tests/trivial/compute.c | 4 ++- src/mesa/state_tracker/st_extensions.c| 13 +- 12 files changed, 54 insertions(+), 39 deletions(-) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 46ec381..47a19de 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -436,26 +436,26 @@ pipe_screen::get_compute_param. ``processor-arch-manufacturer-os`` that will be passed on to the compiler. This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM or PIPE_SHADER_IR_NATIVE for their preferred IR. - Value type: null-terminated string. + Value type: null-terminated string. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions - for grid and block coordinates. Value type: ``uint64_t``. + for grid and block coordinates. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block - units. Value type: ``uint64_t []``. + units. Value type: ``uint64_t []``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread - units. Value type: ``uint64_t []``. + units. Value type: ``uint64_t []``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that - a single block can contain. Value type: ``uint64_t``. + a single block can contain. Value type: ``uint64_t``. Shader IR type dependent. This may be less than the product of the components of MAX_BLOCK_SIZE and is usually limited by the number of threads that can be resident simultaneously on a compute unit. * ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object allocation in bytes. Value type: ``uint64_t``. * ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 548d215..7812c82 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen, static int ilo_get_compute_param(struct pipe_screen *screen, + enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *ret) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 57e2899..ba5e500 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -368,6 +368,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) static int nv50_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_shader_ir ir_type,
Mesa (master): gallium: add compute shader IR type
Module: Mesa Branch: master Commit: ea8f4a6b13b94eb060bff4ccc6c13efc01d2b682 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ea8f4a6b13b94eb060bff4ccc6c13efc01d2b682 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Mar 17 14:15:39 2016 +0100 gallium: add compute shader IR type Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/gallium/drivers/trace/tr_dump_state.c | 4 +++- src/gallium/include/pipe/p_state.h| 1 + src/gallium/state_trackers/clover/core/kernel.cpp | 1 + src/gallium/tests/trivial/compute.c | 1 + src/mesa/state_tracker/st_program.c | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 0627e5a..b53d7db 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -317,8 +317,10 @@ void trace_dump_compute_state(const struct pipe_compute_state *state) trace_dump_struct_begin("pipe_compute_state"); + trace_dump_member(uint, state, ir_type); + trace_dump_member_begin("prog"); - if (state->prog) { + if (state->prog && state->ir_type == PIPE_SHADER_IR_TGSI) { static char str[64 * 1024]; tgsi_dump_str(state->prog, 0, str, sizeof(str)); trace_dump_string(str); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 2e720ce..5ab5372 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -727,6 +727,7 @@ struct pipe_llvm_program_header struct pipe_compute_state { + enum pipe_shader_ir ir_type; /**< IR type contained in prog. */ const void *prog; /**< Compute program to be executed. */ unsigned req_local_mem; /**< Required size of the LOCAL resource. */ unsigned req_private_mem; /**< Required size of the PRIVATE resource. */ diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index c12755b..bce3b52 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -223,6 +223,7 @@ kernel::exec_context::bind(intrusive_ptr _q, if (st) _q->pipe->delete_compute_state(_q->pipe, st); + cs.ir_type = q->device().ir_format(); cs.prog = &(msec.data[0]); cs.req_local_mem = mem_local; cs.req_input_mem = input.size(); diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c index 5d5e0b0..2ddfc42 100644 --- a/src/gallium/tests/trivial/compute.c +++ b/src/gallium/tests/trivial/compute.c @@ -144,6 +144,7 @@ static void init_prog(struct context *ctx, unsigned local_sz, struct pipe_context *pipe = ctx->pipe; struct tgsi_token prog[1024]; struct pipe_compute_state cs = { +.ir_type = PIPE_SHADER_IR_TGSI, .prog = prog, .req_local_mem = local_sz, .req_private_mem = private_sz, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 94dc489..d2d68ac 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -1463,6 +1463,7 @@ st_translate_compute_program(struct st_context *st, st_translate_program_common(st, >Base.Base, stcp->glsl_to_tgsi, ureg, TGSI_PROCESSOR_COMPUTE, ); + stcp->tgsi.ir_type = PIPE_SHADER_IR_TGSI; stcp->tgsi.prog = prog.tokens; stcp->tgsi.req_local_mem = stcp->Base.SharedSize; stcp->tgsi.req_private_mem = 0; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: add global buffer memory barrier bit
Module: Mesa Branch: master Commit: be5899dcf9a337548d8095a00060d4451b0df222 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=be5899dcf9a337548d8095a00060d4451b0df222 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Mar 24 23:11:03 2016 +0100 gallium: add global buffer memory barrier bit Currently radeonsi synchronizes after every dispatch and Clover does nothing to synchronize. This is overzealous, especially with GL compute, so add a barrier for global buffers. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/gallium/include/pipe/p_defines.h | 1 + src/gallium/state_trackers/clover/core/kernel.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 8257b4a..6f30f9e 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -368,6 +368,7 @@ enum pipe_flush_flags #define PIPE_BARRIER_IMAGE (1 << 8) #define PIPE_BARRIER_FRAMEBUFFER (1 << 9) #define PIPE_BARRIER_STREAMOUT_BUFFER (1 << 10) +#define PIPE_BARRIER_GLOBAL_BUFFER (1 << 11) /** * Resource binding flags -- state tracker must specify in advance all diff --git a/src/gallium/state_trackers/clover/core/kernel.cpp b/src/gallium/state_trackers/clover/core/kernel.cpp index bce3b52..266d50e 100644 --- a/src/gallium/state_trackers/clover/core/kernel.cpp +++ b/src/gallium/state_trackers/clover/core/kernel.cpp @@ -89,6 +89,8 @@ kernel::launch(command_queue , exec.sviews.size(), NULL); q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0, exec.samplers.size(), NULL); + + q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER); exec.unbind(); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: add threads per block TGSI property
Module: Mesa Branch: master Commit: 01f993a21f859d372d68c2818d845ebf47d70492 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=01f993a21f859d372d68c2818d845ebf47d70492 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Mar 28 02:40:03 2016 +0200 gallium: add threads per block TGSI property The value 0 for unknown has been chosen to so that drivers using tgsi_scan_shader do not need to detect missing properties if they zero-initialize the struct. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Ilia Mirkin <imir...@alum.mit.edu> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/gallium/auxiliary/tgsi/tgsi_strings.c | 3 +++ src/gallium/docs/source/tgsi.rst | 6 ++ src/gallium/include/pipe/p_shader_tokens.h | 5 - src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 18 ++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index ae779a8..d613f5e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -146,6 +146,9 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "NUM_CULLDIST_ENABLED", "FS_EARLY_DEPTH_STENCIL", "NEXT_SHADER", + "CS_FIXED_BLOCK_WIDTH", + "CS_FIXED_BLOCK_HEIGHT", + "CS_FIXED_BLOCK_DEPTH" }; const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 3ac6ba3..ac6052a 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -3220,6 +3220,12 @@ Which shader stage will MOST LIKELY follow after this shader when the shader is bound. This is only a hint to the driver and doesn't have to be precise. Only set for VS and TES. +TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH / HEIGHT / DEPTH +""""""""""""""""""""""""""""""""""""""""""""""""""" + +Threads per block in each dimension, if known at compile time. If the block size +is known all three should be at least 1. If it is unknown they should all be set +to 0 or not set. Texture Sampling and Texture Formats diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 5cc18a2..c25786e 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -276,7 +276,10 @@ union tgsi_immediate_data #define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16 #define TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL 17 #define TGSI_PROPERTY_NEXT_SHADER18 -#define TGSI_PROPERTY_COUNT 19 +#define TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH 19 +#define TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT 20 +#define TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH 21 +#define TGSI_PROPERTY_COUNT 22 struct tgsi_property { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 23786b8..cd481c1 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5935,6 +5935,20 @@ find_array(unsigned attr, struct array_decl *arrays, unsigned count, return false; } +static void +emit_compute_block_size(const struct gl_program *program, +struct ureg_program *ureg) { + const struct gl_compute_program *cp = + (const struct gl_compute_program *)program; + + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, + cp->LocalSize[0]); + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, + cp->LocalSize[1]); + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, + cp->LocalSize[2]); +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -6180,6 +6194,10 @@ st_translate_program( } } + if (procType == TGSI_PROCESSOR_COMPUTE) { + emit_compute_block_size(proginfo, ureg); + } + /* Declare address register. */ if (program->num_address_regs > 0) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Add config parameter to si_shader_apply_scratch_relocs.
Module: Mesa Branch: master Commit: 38f4cee3ff36970254ddce238638c4b9724669e3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=38f4cee3ff36970254ddce238638c4b9724669e3 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 18:12:48 2016 +0200 radeonsi: Add config parameter to si_shader_apply_scratch_relocs. shader->config is not updated for compute kernels. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Tom Stellard <thomas.stell...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c| 3 ++- src/gallium/drivers/radeonsi/si_shader.h| 1 + src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 905c169..7e05be5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -221,7 +221,7 @@ static bool si_setup_compute_scratch_buffer(struct si_context *sctx, if (sctx->compute_scratch_buffer != shader->scratch_bo && scratch_needed) { uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address; - si_shader_apply_scratch_relocs(sctx, shader, scratch_va); + si_shader_apply_scratch_relocs(sctx, shader, config, scratch_va); if (si_shader_binary_upload(sctx->screen, shader)) return false; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3bf68eb..c48ae3b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5394,13 +5394,14 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary, void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, + struct si_shader_config *config, uint64_t scratch_va) { unsigned i; uint32_t scratch_rsrc_dword0 = scratch_va; uint32_t scratch_rsrc_dword1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) - | S_008F04_STRIDE(shader->config.scratch_bytes_per_wave / 64); + | S_008F04_STRIDE(config->scratch_bytes_per_wave / 64); for (i = 0 ; i < shader->binary.reloc_count; i++) { const struct radeon_shader_reloc *reloc = diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 6ea849d..857a682 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -478,6 +478,7 @@ void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, FILE *f); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, + struct si_shader_config *config, uint64_t scratch_va); void si_shader_binary_read_config(struct radeon_shader_binary *binary, struct si_shader_config *conf, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index d560aae..49e688a 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1634,7 +1634,7 @@ static int si_update_scratch_buffer(struct si_context *sctx, assert(sctx->scratch_buffer); - si_shader_apply_scratch_relocs(sctx, shader, scratch_va); + si_shader_apply_scratch_relocs(sctx, shader, >config, scratch_va); /* Replace the shader bo with a new bo that has the relocs applied. */ r = si_shader_binary_upload(sctx->screen, shader); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Fix memory leak in error path.
Module: Mesa Branch: master Commit: 31631d8515924df0dbaefd8bea3530dc5a4d02c8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=31631d8515924df0dbaefd8bea3530dc5a4d02c8 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 20:34:04 2016 +0200 radeonsi: Fix memory leak in error path. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 7e05be5..09cbe13 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -70,6 +70,7 @@ static void *si_create_compute_state( sel.tokens = tgsi_dup_tokens(cso->prog); if (!sel.tokens) { + FREE(program); return NULL; } @@ -84,6 +85,7 @@ static void *si_create_compute_state( if (si_shader_create(sscreen, sctx->tm, >shader, >b.debug)) { FREE(sel.tokens); + FREE(program); return NULL; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium/ddebug: Support compute states.
Module: Mesa Branch: master Commit: 22b35122faae0c6907a29876c33240afb345edf1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=22b35122faae0c6907a29876c33240afb345edf1 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 17:07:01 2016 +0200 gallium/ddebug: Support compute states. v2: Reuse the macro for bind & delete. Note that may not be able to share the delete long-term as pipe_compute_state contains members not in pipe_shader_state, and we need to distinguish the pointer location if we add that struct to the union. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/ddebug/dd_context.c | 56 +++-- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c index d06efbc..0f8ef18 100644 --- a/src/gallium/drivers/ddebug/dd_context.c +++ b/src/gallium/drivers/ddebug/dd_context.c @@ -250,22 +250,7 @@ DD_CSO_DELETE(vertex_elements) * shaders */ -#define DD_SHADER(NAME, name) \ - static void * \ - dd_context_create_##name##_state(struct pipe_context *_pipe, \ -const struct pipe_shader_state *state) \ - { \ - struct pipe_context *pipe = dd_context(_pipe)->pipe; \ - struct dd_state *hstate = CALLOC_STRUCT(dd_state); \ - \ - if (!hstate) \ - return NULL; \ - hstate->cso = pipe->create_##name##_state(pipe, state); \ - hstate->state.shader = *state; \ - hstate->state.shader.tokens = tgsi_dup_tokens(state->tokens); \ - return hstate; \ - } \ -\ +#define DD_SHADER_NOCREATE(NAME, name) \ static void \ dd_context_bind_##name##_state(struct pipe_context *_pipe, void *state) \ { \ @@ -289,12 +274,48 @@ DD_CSO_DELETE(vertex_elements) FREE(hstate); \ } +#define DD_SHADER(NAME, name) \ + static void * \ + dd_context_create_##name##_state(struct pipe_context *_pipe, \ +const struct pipe_shader_state *state) \ + { \ + struct pipe_context *pipe = dd_context(_pipe)->pipe; \ + struct dd_state *hstate = CALLOC_STRUCT(dd_state); \ + \ + if (!hstate) \ + return NULL; \ + hstate->cso = pipe->create_##name##_state(pipe, state); \ + hstate->state.shader = *state; \ + hstate->state.shader.tokens = tgsi_dup_tokens(state->tokens); \ + return hstate; \ + } \ +\ + DD_SHADER_NOCREATE(NAME, name) + DD_SHADER(FRAGMENT, fs) DD_SHADER(VERTEX, vs) DD_SHADER(GEOMETRY, gs) DD_SHADER(TESS_CTRL, tcs) DD_SHADER(TESS_EVAL, tes) +static void * \ +dd_context_create_compute_state(struct pipe_context *_pipe, + const struct pipe_compute_state *state) +{ + struct pipe_context *pipe = dd_context(_pipe)->pipe; + struct dd_state *hstate = CALLOC_STRUCT(dd_state); + + if (!hstate) + return NULL; + hstate->cso = pipe->create_compute_state(pipe, state); + + if (state->ir_type == PIPE_SHADER_IR_TGSI) + hstate->state.shader.tokens = tgsi_dup_tokens(state->prog); + + return hstate; +} + +DD_SHADER_NOCREATE(COMPUTE, compute) / * immediate states @@ -703,6 +724,9 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) CTX_INIT(create_tes_state); CTX_INIT(bind_tes_state); CTX_INIT(delete_tes_state); + CTX_INIT(create_compute_state); + CTX_INIT(bind_compute_state); + CTX_INIT(delete_compute_state); CTX_INIT(create_vertex_elements_state); CTX_INIT(bind_vertex_elements_state); CTX_INIT(delete_vertex_elements_state); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium/ddebug: Implement launch_grid.
Module: Mesa Branch: master Commit: ac77fb74a018c37bbc0d42d9d4fafc1b8511ad3f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ac77fb74a018c37bbc0d42d9d4fafc1b8511ad3f Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 17:37:40 2016 +0200 gallium/ddebug: Implement launch_grid. Does not implement dumping info. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/ddebug/dd_draw.c | 29 + 1 file changed, 29 insertions(+) diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c index 45e4e10..f0c8887 100644 --- a/src/gallium/drivers/ddebug/dd_draw.c +++ b/src/gallium/drivers/ddebug/dd_draw.c @@ -35,6 +35,7 @@ enum call_type { CALL_DRAW_VBO, + CALL_LAUNCH_GRID, CALL_RESOURCE_COPY_REGION, CALL_BLIT, CALL_FLUSH_RESOURCE, @@ -77,6 +78,7 @@ struct dd_call union { struct pipe_draw_info draw_vbo; + struct pipe_grid_info launch_grid; struct call_resource_copy_region resource_copy_region; struct pipe_blit_info blit; struct pipe_resource *flush_resource; @@ -372,6 +374,13 @@ dd_dump_draw_vbo(struct dd_context *dctx, struct pipe_draw_info *info, FILE *f) } static void +dd_dump_launch_grid(struct dd_context *dctx, struct pipe_grid_info *info, FILE *f) +{ + fprintf(f, "%s:\n", __func__+8); + /* TODO */ +} + +static void dd_dump_resource_copy_region(struct dd_context *dctx, struct call_resource_copy_region *info, FILE *f) @@ -485,6 +494,9 @@ dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags) case CALL_DRAW_VBO: dd_dump_draw_vbo(dctx, >info.draw_vbo, f); break; + case CALL_LAUNCH_GRID: + dd_dump_launch_grid(dctx, >info.launch_grid, f); + break; case CALL_RESOURCE_COPY_REGION: dd_dump_resource_copy_region(dctx, >info.resource_copy_region, f); break; @@ -649,6 +661,22 @@ dd_context_draw_vbo(struct pipe_context *_pipe, } static void +dd_context_launch_grid(struct pipe_context *_pipe, + const struct pipe_grid_info *info) +{ + struct dd_context *dctx = dd_context(_pipe); + struct pipe_context *pipe = dctx->pipe; + struct dd_call call; + + call.type = CALL_LAUNCH_GRID; + call.info.launch_grid = *info; + + dd_before_draw(dctx); + pipe->launch_grid(pipe, info); + dd_after_draw(dctx, ); +} + +static void dd_context_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz, @@ -789,6 +817,7 @@ dd_init_draw_functions(struct dd_context *dctx) { CTX_INIT(flush); CTX_INIT(draw_vbo); + CTX_INIT(launch_grid); CTX_INIT(resource_copy_region); CTX_INIT(blit); CTX_INIT(clear); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium/ddebug: Add passthrough for get_compute_param.
Module: Mesa Branch: master Commit: 5efe477b13d2bb7e40af2b7ce67c87546bf4ea60 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5efe477b13d2bb7e40af2b7ce67c87546bf4ea60 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 17:05:19 2016 +0200 gallium/ddebug: Add passthrough for get_compute_param. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/ddebug/dd_screen.c | 12 1 file changed, 12 insertions(+) diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c index fbc0bec..ebe090b 100644 --- a/src/gallium/drivers/ddebug/dd_screen.c +++ b/src/gallium/drivers/ddebug/dd_screen.c @@ -74,6 +74,17 @@ dd_screen_get_paramf(struct pipe_screen *_screen, } static int +dd_screen_get_compute_param(struct pipe_screen *_screen, +enum pipe_shader_ir ir_type, +enum pipe_compute_cap param, +void *ret) +{ + struct pipe_screen *screen = dd_screen(_screen)->screen; + + return screen->get_compute_param(screen, ir_type, param, ret); +} + +static int dd_screen_get_shader_param(struct pipe_screen *_screen, unsigned shader, enum pipe_shader_cap param) { @@ -319,6 +330,7 @@ ddebug_screen_create(struct pipe_screen *screen) dscreen->base.get_device_vendor = dd_screen_get_device_vendor; dscreen->base.get_param = dd_screen_get_param; dscreen->base.get_paramf = dd_screen_get_paramf; + dscreen->base.get_compute_param = dd_screen_get_compute_param; dscreen->base.get_shader_param = dd_screen_get_shader_param; /* get_video_param */ /* get_compute_param */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Compute correct LDS size for fragment shaders.
Module: Mesa Branch: master Commit: 6291f19f71d660b82cc16ca6af9da66f8fa33956 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6291f19f71d660b82cc16ca6af9da66f8fa33956 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu May 5 22:25:01 2016 +0200 radeonsi: Compute correct LDS size for fragment shaders. No sure where the 36 came from, but we clearly need at least 48 bytes per attribute per primitive. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 49c498d..211db9f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5640,15 +5640,18 @@ static void si_shader_dump_stats(struct si_screen *sscreen, /* Compute LDS usage for PS. */ if (processor == PIPE_SHADER_FRAGMENT) { - /* The minimum usage per wave is (num_inputs * 36). The maximum -* usage is (num_inputs * 36 * 16). + /* The minimum usage per wave is (num_inputs * 48). The maximum +* usage is (num_inputs * 48 * 16). * We can get anything in between and it varies between waves. * +* The 48 bytes per input for a single primitive is equal to +* 4 bytes/component * 4 components/input * 3 points. +* * Other stages don't know the size at compile time or don't * allocate LDS per wave, but instead they do it per thread group. */ lds_per_wave = conf->lds_size * lds_increment + - align(num_inputs * 36, lds_increment); + align(num_inputs * 48, lds_increment); } /* Compute the per-SIMD wave counts. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mapi/glapi: Fix dup word typo in glapi_getproc.c
Module: Mesa Branch: master Commit: 23cf24e227cce9d71e2a7d206133d4094b2c2e1f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23cf24e227cce9d71e2a7d206133d4094b2c2e1f Author: Edward O'CallaghanDate: Sun Apr 24 12:40:41 2016 +1000 mapi/glapi: Fix dup word typo in glapi_getproc.c Signed-off-by: Edward O'Callaghan Reviewed-by: Ian Romanick --- src/mapi/glapi/glapi_getproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mapi/glapi/glapi_getproc.c b/src/mapi/glapi/glapi_getproc.c index 8f6f0a4..a50b5b3 100644 --- a/src/mapi/glapi/glapi_getproc.c +++ b/src/mapi/glapi/glapi_getproc.c @@ -325,7 +325,7 @@ set_entry_info( struct _glapi_function * entry, const char * signature, unsigned * Fill-in the dispatch stub for the named function. * * This function is intended to be called by a hardware driver. When called, - * a dispatch stub may be created created for the function. A pointer to this + * a dispatch stub may be created for the function. A pointer to this * dispatch function will be returned by glXGetProcAddress. * * \param function_names Array of pointers to function names that should ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Set declared tessellation LDS size to hardware size.
Module: Mesa Branch: master Commit: 3d21720d31a6d51702411b9aa2c0afc2639867bf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d21720d31a6d51702411b9aa2c0afc2639867bf Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Fri May 6 21:06:14 2016 +0200 radeonsi: Set declared tessellation LDS size to hardware size. The calculated limit gave problems on SI as it was > 32 KiB and the hardware LDS size on SI is only 32 KiB. It isn't correct anyway when processing multiple patches in a threadgroup. As we potentially have any number of patches such that the used LDS is at most the hardware LDS size, and exact size per patch is not known at compile time, this seems like the only valid bound. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_shader.c | 18 ++ 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 12ccbab..448c145 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4944,27 +4944,13 @@ static void declare_tess_lds(struct si_shader_context *ctx) { struct gallivm_state *gallivm = >radeon_bld.gallivm; LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type; - - /* This is the upper bound, maximum is 32 inputs times 32 vertices */ - unsigned vertex_data_dw_size = 32*32*4; - unsigned patch_data_dw_size = 32*4; - /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */ - unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size; - unsigned lds_dwords = patch_dw_size; - - if (ctx->screen->b.chip_class <= SI) { - /* This is a horrible temporary workaround to make tesselation -* not be completely broken on SI now that LLVM checks that -* the declared LDS size fits into the device maximum of 32KB. -*/ - lds_dwords = 8 * 1024; - } + unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768; /* The actual size is computed outside of the shader to reduce * the number of shader variants. */ ctx->lds = LLVMAddGlobalInAddressSpace(gallivm->module, - LLVMArrayType(i32, lds_dwords), + LLVMArrayType(i32, lds_size / 4), "tess_lds", LOCAL_ADDR_SPACE); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): 34 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=464cef5b06e65aa740704e4adac68b7f5fee1b88 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Mar 19 15:16:50 2016 +0100 radeonsi: enable TGSI support cap for compute shaders v2: Use chip_class instead of family. v3: Check kernel version for SI. v4: Preemptively allow amdgpu winsys for SI. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f32d5d59fff7a4ef42cd2811ef4116c5827b9a0 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 19 14:08:13 2016 +0200 radeonsi: Consider input SGPR count for compute shader SGPR count. si_shader_create corrects the SGPR count with si_fix_num_sgprs. We then recompute the rsrc1 register to use the new SGPR count. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6c833ba1ab7ffe615d8c025a7452984083c1143b Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 19 13:52:32 2016 +0200 radeonsi: Add CE synchronization for compute dispatches. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e0b729c544ab0f25cd90af5daffdff0940743e14 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Apr 2 13:39:54 2016 +0200 mesa/st: enable compute shaders if images are also supported v2: Also depend on atomic counters. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=41d79bcbfa64f6f72b0090e12838073983ea6e5b Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Apr 2 11:37:06 2016 +0200 radeonsi: clean up compute flush Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a92c0842892bf55a82b7d95ab5a3b7dfbb83407 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Mar 27 11:14:34 2016 +0200 radeonsi: do not do two full flushes on every compute dispatch v2: Add more CS_PARTIAL_FLUSH events. Essentially every place with waits on finishing for pixel shaders also has a write after read hazard with compute shaders. Invalidating L2 waits implicitly on pixel and compute shaders, so, we don't need a CS_PARTIAL_FLUSH for switching FBO. v3: Add CS_PARTIAL_FLUSH events even if we already have INV_GLOBAL_L2. According to Marek the INV_GLOBAL_L2 events don't wait for compute shaders to finish, so wait for them explicitly. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Edward O'Callaghan <eocallag...@alterapraxis.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e764ee13ae21e3c1dbda24daeb2d08c5e7c81871 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Mar 19 13:56:29 2016 +0100 radeonsi: split setting graphics and compute descriptors Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=061ce9399a08f3edd4f5af16afd36bb14d58c864 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Mar 19 18:41:20 2016 +0100 radeonsi: split texture decompression for compute shaders Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e56514f6316e48ee2231841d45695ff2b8f8b4f5 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Apr 5 17:38:38 2016 +0200 radeonsi: update predicate condition for compute dispatches Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Edward O'Callaghan <eocallag...@alterapraxis.com> URL: http://cgit.
Mesa (master): gallium/radeon: Silence possibly uninitialized variable warning.
Module: Mesa Branch: master Commit: 4abe051a3f848af074193140930dab7fdca297bc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4abe051a3f848af074193140930dab7fdca297bc Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 13:23:36 2016 +0200 gallium/radeon: Silence possibly uninitialized variable warning. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 7174132..d3f5ae3 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -100,7 +100,7 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) calling_conv = RADEON_LLVM_AMDGPU_CS; break; default: - assert(0); + unreachable("Unhandle shader type"); } if (HAVE_LLVM >= 0x309) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): winsys/amdgpu: Silence possibly uninitialized variable warning.
Module: Mesa Branch: master Commit: 51d1551241b8c0e41da810106cf037ce91165719 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=51d1551241b8c0e41da810106cf037ce91165719 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 13:22:08 2016 +0200 winsys/amdgpu: Silence possibly uninitialized variable warning. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index bbd29fc..69fb9bb 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -219,6 +219,9 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_ib *ib, case IB_MAIN: buffer_size = 128 * 1024 * 4; ib_size = 20 * 1024 * 4; + break; + default: + unreachable("unhandled IB type"); } ib->base.cdw = 0; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st/mesa: Use correct size for compute CAPs.
Module: Mesa Branch: master Commit: 43ed1f73f8bb351b6eaec66875c51f2bad9db4eb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=43ed1f73f8bb351b6eaec66875c51f2bad9db4eb Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Wed Apr 20 15:31:22 2016 +0200 st/mesa: Use correct size for compute CAPs. Some CAPs are stored as 64-bit value while Mesa stores the related constant as 32-bit value. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- src/mesa/state_tracker/st_extensions.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 939f15d..3f769b6 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -1152,6 +1152,7 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_SHADER_CAP_SUPPORTED_IRS); if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { uint64_t grid_size[3], block_size[3]; + uint64_t max_local_size, max_threads_per_block; screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size); @@ -1159,10 +1160,13 @@ void st_init_extensions(struct pipe_screen *screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size); screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, - >MaxComputeWorkGroupInvocations); + _threads_per_block); screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, - >MaxComputeSharedMemorySize); + _local_size); + + consts->MaxComputeWorkGroupInvocations = max_threads_per_block; + consts->MaxComputeSharedMemorySize = max_local_size; for (i = 0; i < 3; i++) { consts->MaxComputeWorkGroupCount[i] = grid_size[i]; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Use defines for CONTEXT_CONTROL instead of magic values.
Module: Mesa Branch: master Commit: f45f54e14ac54460f2785bc14fc1f9220a0a763d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f45f54e14ac54460f2785bc14fc1f9220a0a763d Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 01:19:28 2016 +0200 radeonsi: Use defines for CONTEXT_CONTROL instead of magic values. v2: Use field names provided by Nicolai. v3: Updated to use CONTEXT_CONTROL prefix. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_state.c | 4 ++-- src/gallium/drivers/radeonsi/sid.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 305a70b..8603f3e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3821,8 +3821,8 @@ static void si_init_config(struct si_context *sctx) return; si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); - si_pm4_cmd_add(pm4, 0x8000); - si_pm4_cmd_add(pm4, 0x8000); + si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); + si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); si_pm4_cmd_end(pm4, false); si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 516e114..9daefdb 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -88,6 +88,9 @@ #define PKT3_INDEX_BASE0x26 #define PKT3_DRAW_INDEX_2 0x27 #define PKT3_CONTEXT_CONTROL 0x28 +#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((x) & 0x1) << 31) +#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((x) & 0x1) << 28) +#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((x) & 0x1) << 31) #define PKT3_INDEX_TYPE0x2A #define PKT3_DRAW_INDIRECT_MULTI 0x2C #define PKT3_DRAW_INDEX_AUTO 0x2D ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radeonsi: Enable loading into CE RAM.
Module: Mesa Branch: master Commit: 4d13c7c8794082400e383ac6d76eb6ba753dcb0f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4d13c7c8794082400e383ac6d76eb6ba753dcb0f Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Apr 21 01:22:02 2016 +0200 radeonsi: Enable loading into CE RAM. We need to enable a bit in the CONTEXT_CONTROL packet for the loads to work. v2: Style issues. Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/gallium/drivers/radeonsi/si_descriptors.c | 8 src/gallium/drivers/radeonsi/si_hw_context.c | 5 + src/gallium/drivers/radeonsi/si_state.h | 1 + 3 files changed, 14 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 1580e61..2306a8b 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -185,6 +185,14 @@ static void si_reinitialize_ce_ram(struct si_context *sctx, desc->ce_ram_dirty = false; } +void si_ce_enable_loads(struct radeon_winsys_cs *ib) +{ + radeon_emit(ib, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); + radeon_emit(ib, CONTEXT_CONTROL_LOAD_ENABLE(1) | + CONTEXT_CONTROL_LOAD_CE_RAM(1)); + radeon_emit(ib, CONTEXT_CONTROL_SHADOW_ENABLE(1)); +} + static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc, struct r600_atom * atom) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index e3abb7f..e6018f3 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -202,6 +202,11 @@ void si_begin_new_cs(struct si_context *ctx) if (ctx->init_config_gs_rings) si_pm4_emit(ctx, ctx->init_config_gs_rings); + if (ctx->ce_preamble_ib) + si_ce_enable_loads(ctx->ce_preamble_ib); + else if (ctx->ce_ib) + si_ce_enable_loads(ctx->ce_ib); + ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, >framebuffer.atom); diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index c4b2b45..cbe91dd 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -251,6 +251,7 @@ struct si_buffer_resources { } while(0) /* si_descriptors.c */ +void si_ce_enable_loads(struct radeon_winsys_cs *ib); void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, struct pipe_resource *buffer, unsigned stride, unsigned num_records, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Implement Float64 load/store var.
Module: Mesa Branch: master Commit: 03724af2629e6f67eb684fabb93f086298aeee6f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=03724af2629e6f67eb684fabb93f086298aeee6f Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 8 01:31:07 2017 +0100 radv/ac: Implement Float64 load/store var. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 101 +++- 1 file changed, 48 insertions(+), 53 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index bdfad6a..4367cd1 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2305,24 +2305,31 @@ load_gs_input(struct nir_to_llvm_context *ctx, static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef values[4]; + LLVMValueRef values[8]; int idx = instr->variables[0]->var->data.driver_location; int ve = instr->dest.ssa.num_components; LLVMValueRef indir_index; + LLVMValueRef ret; unsigned const_index; + bool vs_in = ctx->stage == MESA_SHADER_VERTEX && +instr->variables[0]->var->data.mode == nir_var_shader_in; + radv_get_deref_offset(ctx, >variables[0]->deref, vs_in, NULL, + _index, _index); + + if (instr->dest.ssa.bit_size == 64) + ve *= 2; + switch (instr->variables[0]->var->data.mode) { case nir_var_shader_in: if (ctx->stage == MESA_SHADER_GEOMETRY) { return load_gs_input(ctx, instr); } - radv_get_deref_offset(ctx, >variables[0]->deref, - ctx->stage == MESA_SHADER_VERTEX, NULL, - _index, _index); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, ctx->stage == MESA_SHADER_VERTEX); + count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( >ac, ctx->inputs + idx + chan, count, 4, false); @@ -2333,15 +2340,13 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, } else values[chan] = ctx->inputs[idx + chan + const_index * 4]; } - return to_integer(ctx, ac_build_gather_values(>ac, values, ve)); break; case nir_var_local: - radv_get_deref_offset(ctx, >variables[0]->deref, false, - NULL, _index, _index); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); + count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( >ac, ctx->locals + idx + chan, count, 4, true); @@ -2353,14 +2358,13 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], ""); } } - return to_integer(ctx, ac_build_gather_values(>ac, values, ve)); + break; case nir_var_shader_out: - radv_get_deref_offset(ctx, >variables[0]->deref, false, - NULL, _index, _index); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); + count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( >ac, ctx->outputs + idx + chan, count, 4, true); @@ -2374,10 +2378,8 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
Mesa (master): radv: Enable Float64 support.
Module: Mesa Branch: master Commit: 798ae37cc937c5ea92709fc0cc999590925fca61 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=798ae37cc937c5ea92709fc0cc999590925fca61 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 29 23:07:10 2017 +0100 radv: Enable Float64 support. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 2 +- src/amd/vulkan/radv_pipeline.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index da67b65..0026de5 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -463,7 +463,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderStorageImageWriteWithoutFormat = false, .shaderClipDistance = true, .shaderCullDistance = true, - .shaderFloat64= false, + .shaderFloat64= true, .shaderInt64 = false, .shaderInt16 = false, .alphaToOne = true, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 279a076..bf3007b 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -207,6 +207,7 @@ radv_shader_compile_to_nir(struct radv_device *device, } } const struct nir_spirv_supported_extensions supported_ext = { + .float64 = true }; entry_point = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Implement Float64 SSBO stores.
Module: Mesa Branch: master Commit: 91074bb11bdaf58509d95736ac27aba48c1940e9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=91074bb11bdaf58509d95736ac27aba48c1940e9 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Jan 5 01:36:26 2017 +0100 radv/ac: Implement Float64 SSBO stores. No f16 support as I'm not quite sure about alignment yet. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 17 ++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index be31585..bdfad6a 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2007,7 +2007,10 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { const char *store_name; + LLVMValueRef src_data = get_src(ctx, instr->src[0]); LLVMTypeRef data_type = ctx->f32; + int elem_size_mult = get_elem_bits(ctx, LLVMTypeOf(src_data)) / 32; + int components_32bit = elem_size_mult * instr->num_components; unsigned writemask = nir_intrinsic_write_mask(instr); LLVMValueRef base_data, base_offset; LLVMValueRef params[6]; @@ -2020,10 +2023,10 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */ params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */ - if (instr->num_components > 1) - data_type = LLVMVectorType(ctx->f32, instr->num_components); + if (components_32bit > 1) + data_type = LLVMVectorType(ctx->f32, components_32bit); - base_data = to_float(ctx, get_src(ctx, instr->src[0])); + base_data = to_float(ctx, src_data); base_data = trim_vector(ctx, base_data, instr->num_components); base_data = LLVMBuildBitCast(ctx->builder, base_data, data_type, ""); @@ -2042,6 +2045,14 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, count = 2; } + start *= elem_size_mult; + count *= elem_size_mult; + + if (count > 4) { + writemask |= ((1u << (count - 4)) - 1u) << (start + 4); + count = 4; + } + if (count == 4) { store_name = "llvm.amdgcn.buffer.store.v4f32"; data = base_data; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Add core Float64 support.
Module: Mesa Branch: master Commit: 29577b21230a588b048b8e445fdf0dfabc695373 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=29577b21230a588b048b8e445fdf0dfabc695373 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Jan 5 01:09:12 2017 +0100 radv/ac: Add core Float64 support. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 173 ++-- 1 file changed, 129 insertions(+), 44 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c622c00..be31585 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -119,6 +119,7 @@ struct nir_to_llvm_context { LLVMTypeRef v3i32; LLVMTypeRef v4i32; LLVMTypeRef v8i32; + LLVMTypeRef f64; LLVMTypeRef f32; LLVMTypeRef f16; LLVMTypeRef v2f32; @@ -313,34 +314,78 @@ static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, return ptr; } +static LLVMTypeRef to_integer_type_scalar(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +{ + if (t == ctx->f16 || t == ctx->i16) + return ctx->i16; + else if (t == ctx->f32 || t == ctx->i32) + return ctx->i32; + else if (t == ctx->f64 || t == ctx->i64) + return ctx->i64; + else + unreachable("Unhandled integer size"); +} + +static LLVMTypeRef to_integer_type(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +{ + if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { + LLVMTypeRef elem_type = LLVMGetElementType(t); + return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), + LLVMGetVectorSize(t)); + } + return to_integer_type_scalar(ctx, t); +} + static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v) { LLVMTypeRef type = LLVMTypeOf(v); - if (type == ctx->f32) { - return LLVMBuildBitCast(ctx->builder, v, ctx->i32, ""); - } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { - LLVMTypeRef elem_type = LLVMGetElementType(type); - if (elem_type == ctx->f32) { - LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type)); - return LLVMBuildBitCast(ctx->builder, v, nt, ""); - } + return LLVMBuildBitCast(ctx->builder, v, to_integer_type(ctx, type), ""); +} + +static LLVMTypeRef to_float_type_scalar(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +{ + if (t == ctx->i16 || t == ctx->f16) + return ctx->f16; + else if (t == ctx->i32 || t == ctx->f32) + return ctx->f32; + else if (t == ctx->i64 || t == ctx->f64) + return ctx->f64; + else + unreachable("Unhandled float size"); +} + +static LLVMTypeRef to_float_type(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +{ + if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { + LLVMTypeRef elem_type = LLVMGetElementType(t); + return LLVMVectorType(to_float_type_scalar(ctx, elem_type), + LLVMGetVectorSize(t)); } - return v; + return to_float_type_scalar(ctx, t); } static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v) { LLVMTypeRef type = LLVMTypeOf(v); - if (type == ctx->i32) { - return LLVMBuildBitCast(ctx->builder, v, ctx->f32, ""); - } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) { - LLVMTypeRef elem_type = LLVMGetElementType(type); - if (elem_type == ctx->i32) { - LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type)); - return LLVMBuildBitCast(ctx->builder, v, nt, ""); - } - } - return v; + return LLVMBuildBitCast(ctx->builder, v, to_float_type(ctx, type), ""); +} + +static int get_elem_bits(struct nir_to_llvm_context *ctx, LLVMTypeRef type) +{ + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) + type = LLVMGetElementType(type); + + if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) + return LLVMGetIntTypeWidth(type); + + if (type == ctx->f16) + return 16; + if (type == ctx->f32) + return 32; + if (type == ctx->f64) + return 64; + + unreachable("Unhandled type kind in get_elem_bits"); } static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx, @@ -710,6 +755,7 @@ static void setup_types(struct nir_to_llvm_context *c
Mesa (master): radv/ac: Implement Float64 UBO loads.
Module: Mesa Branch: master Commit: bb1ce630026a6e15b1aaa911d64543341f485a15 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bb1ce630026a6e15b1aaa911d64543341f485a15 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 8 01:36:30 2017 +0100 radv/ac: Implement Float64 UBO loads. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 4367cd1..c50292e 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2185,13 +2185,17 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef results[4], ret; + LLVMValueRef results[8], ret; LLVMValueRef rsrc = get_src(ctx, instr->src[0]); LLVMValueRef offset = get_src(ctx, instr->src[1]); + int num_components = instr->num_components; rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), ""); - for (unsigned i = 0; i < instr->num_components; ++i) { + if (instr->dest.ssa.bit_size == 64) + num_components *= 2; + + for (unsigned i = 0; i < num_components; ++i) { LLVMValueRef params[] = { rsrc, LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Implement Float64 SSBO loads.
Module: Mesa Branch: master Commit: 441ee1e65b041866a37885bd0ed717709ee0be1a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=441ee1e65b041866a37885bd0ed717709ee0be1a Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 8 19:38:28 2017 +0100 radv/ac: Implement Float64 SSBO loads. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 75 +++-- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index c50292e..50ed4d4 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2148,35 +2148,58 @@ static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { - const char *load_name; - LLVMTypeRef data_type = ctx->f32; - if (instr->num_components == 3) - data_type = LLVMVectorType(ctx->f32, 4); - else if (instr->num_components > 1) - data_type = LLVMVectorType(ctx->f32, instr->num_components); - - if (instr->num_components == 4 || instr->num_components == 3) - load_name = "llvm.amdgcn.buffer.load.v4f32"; - else if (instr->num_components == 2) - load_name = "llvm.amdgcn.buffer.load.v2f32"; - else if (instr->num_components == 1) - load_name = "llvm.amdgcn.buffer.load.f32"; - else - abort(); + LLVMValueRef results[2]; + int load_components; + int num_components = instr->num_components; + if (instr->dest.ssa.bit_size == 64) + num_components *= 2; - LLVMValueRef params[] = { - get_src(ctx, instr->src[0]), - LLVMConstInt(ctx->i32, 0, false), - get_src(ctx, instr->src[1]), - LLVMConstInt(ctx->i1, 0, false), - LLVMConstInt(ctx->i1, 0, false), - }; + for (int i = 0; i < num_components; i += load_components) { + load_components = MIN2(num_components - i, 4); + const char *load_name; + LLVMTypeRef data_type = ctx->f32; + LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false); + offset = LLVMBuildAdd(ctx->builder, get_src(ctx, instr->src[1]), offset, ""); + + if (load_components == 3) + data_type = LLVMVectorType(ctx->f32, 4); + else if (load_components > 1) + data_type = LLVMVectorType(ctx->f32, load_components); + + if (load_components >= 3) + load_name = "llvm.amdgcn.buffer.load.v4f32"; + else if (load_components == 2) + load_name = "llvm.amdgcn.buffer.load.v2f32"; + else if (load_components == 1) + load_name = "llvm.amdgcn.buffer.load.f32"; + else + unreachable("unhandled number of components"); - LLVMValueRef ret = - ac_emit_llvm_intrinsic(>ac, load_name, data_type, params, 5, 0); + LLVMValueRef params[] = { + get_src(ctx, instr->src[0]), + LLVMConstInt(ctx->i32, 0, false), + offset, + LLVMConstInt(ctx->i1, 0, false), + LLVMConstInt(ctx->i1, 0, false), + }; + + results[i] = ac_emit_llvm_intrinsic(>ac, load_name, data_type, params, 5, 0); + + } - if (instr->num_components == 3) - ret = trim_vector(ctx, ret, 3); + LLVMValueRef ret = results[0]; + if (num_components > 4 || num_components == 3) { + LLVMValueRef masks[] = { + LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), + LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false), + LLVMConstInt(ctx->i32, 4, false), LLVMConstInt(ctx->i32, 5, false), + LLVMConstInt(ctx->i32, 6, false), LLVMConstInt(ctx->i32, 7, false) + }; + + LLVMValueRef swizzle = LLVMConstVector(masks, num_components); + ret = LLVMBuildShuffleVector(ctx->builder, results[0], +results[num_components > 4 ? 1 : 0], swizzle, ""); + } return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, >dest.ssa), ""); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Pass DCC alignment to application.
Module: Mesa Branch: master Commit: 47ca0f537dfbc03f0eb0cb12fdee06dbe664fbc7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=47ca0f537dfbc03f0eb0cb12fdee06dbe664fbc7 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Feb 7 00:45:11 2017 +0100 radv: Pass DCC alignment to application. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Cc: "17.0" <mesa-sta...@lists.freedesktop.org> Reviewed-by: Dave Airlie <airl...@redhat.com> Reviewed-by: Andres Rodriguez <andre...@gmail.com> --- src/amd/vulkan/radv_image.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 202f460..1581645 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -582,6 +582,7 @@ radv_image_alloc_dcc(struct radv_device *device, /* + 8 for storing the clear values */ image->clear_value_offset = image->dcc_offset + image->surface.dcc_size; image->size = image->dcc_offset + image->surface.dcc_size + 8; + image->alignment = MAX2(image->alignment, image->surface.dcc_alignment); } static unsigned ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Pass CMASK alignment to application.
Module: Mesa Branch: master Commit: eb01b20cc41e9501062eb25034069e484f8b1899 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eb01b20cc41e9501062eb25034069e484f8b1899 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Feb 7 00:24:16 2017 +0100 radv: Pass CMASK alignment to application. CMASK alignment can be greater than image data alignment, so pass it to the app so that it knows what alignment to backing memory should have. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Cc: <mesa-sta...@lists.freedesktop.org> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_image.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index 99d1737..202f460 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -571,6 +571,7 @@ radv_image_alloc_cmask(struct radv_device *device, /* + 8 for storing the clear values */ image->clear_value_offset = image->cmask.offset + image->cmask.size; image->size = image->cmask.offset + image->cmask.size + 8; + image->alignment = MAX2(image->alignment, image->cmask.alignment); } static void ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Enable fast clears by default.
Module: Mesa Branch: master Commit: 0d1283850bef7738b09c23d6a546696d653863ca URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0d1283850bef7738b09c23d6a546696d653863ca Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Feb 7 00:36:41 2017 +0100 radv: Enable fast clears by default. Works for me on dota2 and talos now. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Reviewed-by: Andres Rodriguez <andre...@gmail.com> --- src/amd/vulkan/radv_device.c | 4 ++-- src/amd/vulkan/radv_meta_clear.c | 2 +- src/amd/vulkan/radv_private.h| 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 16c9c0e..98d4b91 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -294,7 +294,7 @@ static const VkAllocationCallbacks default_alloc = { }; static const struct debug_control radv_debug_options[] = { - {"fastclears", RADV_DEBUG_FAST_CLEARS}, + {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS}, {"nodcc", RADV_DEBUG_NO_DCC}, {"shaders", RADV_DEBUG_DUMP_SHADERS}, {"nocache", RADV_DEBUG_NO_CACHE}, @@ -2157,7 +2157,7 @@ radv_initialise_color_surface(struct radv_device *device, cb->cb_color_info |= S_028C70_COMPRESSION(1); if (iview->image->cmask.size && - (device->debug_flags & RADV_DEBUG_FAST_CLEARS)) + !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)) cb->cb_color_info |= S_028C70_FAST_CLEAR(1); if (iview->image->surface.dcc_size && level_info->dcc_enabled) diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index a42e834..6d02acc 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -841,7 +841,7 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer, if (!iview->image->cmask.size && !iview->image->surface.dcc_size) return false; - if (!(cmd_buffer->device->debug_flags & RADV_DEBUG_FAST_CLEARS)) + if (cmd_buffer->device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) return false; if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index))) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 69ff357..25ed5de 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -102,7 +102,7 @@ enum radv_mem_type { enum { - RADV_DEBUG_FAST_CLEARS = 0x1, + RADV_DEBUG_NO_FAST_CLEARS= 0x1, RADV_DEBUG_NO_DCC= 0x2, RADV_DEBUG_DUMP_SHADERS = 0x4, RADV_DEBUG_NO_CACHE = 0x8, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Pass draw index to shader.
Module: Mesa Branch: master Commit: cf8a11c1baccebbd6dda38da8639e1a6eeccd7c7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cf8a11c1baccebbd6dda38da8639e1a6eeccd7c7 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Jan 31 21:25:41 2017 +0100 radv: Pass draw index to shader. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> --- src/amd/vulkan/radv_cmd_buffer.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 091d970..97c6b63 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2174,15 +2174,16 @@ void radv_CmdDraw( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); radv_cmd_buffer_flush_state(cmd_buffer); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE); if (loc->sgpr_idx != -1) { uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline)); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 2); + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3); radeon_emit(cmd_buffer->cs, firstVertex); radeon_emit(cmd_buffer->cs, firstInstance); + radeon_emit(cmd_buffer->cs, 0); } radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -2225,7 +2226,7 @@ void radv_CmdDrawIndexed( radv_cmd_buffer_flush_state(cmd_buffer); radv_emit_primitive_reset_index(cmd_buffer); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type); @@ -2234,9 +2235,10 @@ void radv_CmdDrawIndexed( AC_UD_VS_BASE_VERTEX_START_INSTANCE); if (loc->sgpr_idx != -1) { uint32_t base_reg = shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(cmd_buffer->state.pipeline)); - radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 2); + radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3); radeon_emit(cmd_buffer->cs, vertexOffset); radeon_emit(cmd_buffer->cs, firstInstance); + radeon_emit(cmd_buffer->cs, 0); } radeon_emit(cmd_buffer->cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cmd_buffer->cs, instanceCount); @@ -2298,7 +2300,9 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, 0); radeon_emit(cs, ((base_reg + loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2); radeon_emit(cs, ((base_reg + (loc->sgpr_idx + 1) * 4) - SI_SH_REG_OFFSET) >> 2); - radeon_emit(cs, S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); /* draw_index and count_indirect enable */ + radeon_emit(cs, (((base_reg + (loc->sgpr_idx + 2) * 4) - SI_SH_REG_OFFSET) >> 2) | + S_2C3_DRAW_INDEX_ENABLE(1) | + S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); radeon_emit(cs, draw_count); /* count */ radeon_emit(cs, count_va); /* count_addr */ radeon_emit(cs, count_va >> 32); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Enable VK_KHR_shader_draw_parameters.
Module: Mesa Branch: master Commit: f5f8eb2c7cb425f63798daaf151cb517c8bd6a0d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5f8eb2c7cb425f63798daaf151cb517c8bd6a0d Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Jan 31 21:37:48 2017 +0100 radv: Enable VK_KHR_shader_draw_parameters. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> --- src/amd/vulkan/radv_device.c | 4 src/amd/vulkan/radv_pipeline.c | 1 + 2 files changed, 5 insertions(+) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 0026de5..16c9c0e 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -124,6 +124,10 @@ static const VkExtensionProperties common_device_extensions[] = { .extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, .specVersion = 1, }, + { + .extensionName = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, + .specVersion = 1, + }, }; static VkResult diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index bf3007b..53f06ac 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -207,6 +207,7 @@ radv_shader_compile_to_nir(struct radv_device *device, } } const struct nir_spirv_supported_extensions supported_ext = { + .draw_parameters = true, .float64 = true }; entry_point = spirv_to_nir(spirv, module->size / 4, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Add draw index support.
Module: Mesa Branch: master Commit: 80f4331ed1314d674e385c8753c970d8fa394a88 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=80f4331ed1314d674e385c8753c970d8fa394a88 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Jan 31 21:21:47 2017 +0100 radv/ac: Add draw index support. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> --- src/amd/common/ac_nir_to_llvm.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 50ed4d4..67c5c06 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -82,6 +82,7 @@ struct nir_to_llvm_context { LLVMValueRef vertex_buffers; LLVMValueRef base_vertex; LLVMValueRef start_instance; + LLVMValueRef draw_index; LLVMValueRef vertex_id; LLVMValueRef rel_auto_id; LLVMValueRef vs_prim_id; @@ -560,6 +561,7 @@ static void create_function(struct nir_to_llvm_context *ctx) arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */ arg_types[arg_idx++] = ctx->i32; // base vertex arg_types[arg_idx++] = ctx->i32; // start instance + arg_types[arg_idx++] = ctx->i32; // draw index } user_sgpr_count = arg_idx; if (ctx->options->key.vs.as_es) @@ -684,10 +686,11 @@ static void create_function(struct nir_to_llvm_context *ctx) set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2); user_sgpr_idx += 2; ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++); - set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2); - user_sgpr_idx += 2; + set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 3); + user_sgpr_idx += 3; ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++); ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++); + ctx->draw_index = LLVMGetParam(ctx->main_function, arg_idx++); } if (ctx->options->key.vs.as_es) ctx->es2gs_offset = LLVMGetParam(ctx->main_function, arg_idx++); @@ -3263,6 +3266,9 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, case nir_intrinsic_load_base_instance: result = ctx->start_instance; break; + case nir_intrinsic_load_draw_id: + result = ctx->draw_index; + break; case nir_intrinsic_load_invocation_id: result = ctx->gs_invocation_id; break; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: don' t resubmit the same cs over and over while tracing
Module: Mesa Branch: master Commit: f65b3641c3233f1697b96ea8126b578dae6de4f1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f65b3641c3233f1697b96ea8126b578dae6de4f1 Author: Grazvydas Ignotas <nota...@gmail.com> Date: Mon Jan 23 23:16:42 2017 +0200 radv: don't resubmit the same cs over and over while tracing Fixes: 97dfff54 ("radv: Dump command buffer on hang.") Signed-off-by: Grazvydas Ignotas <nota...@gmail.com> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> CC: <mesa-sta...@lists.freedesktop.org> --- src/amd/vulkan/radv_device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 9371536..4aa6af2 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -989,8 +989,7 @@ VkResult radv_QueueSubmit( if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array, - pSubmits[i].commandBufferCount, + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance, (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, b ? pSubmits[i].waitSemaphoreCount : 0, (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Prevent Coverity warning
Module: Mesa Branch: master Commit: 0b63f47030e63ad061b7716cb6ab963cdae0f14f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0b63f47030e63ad061b7716cb6ab963cdae0f14f Author: Robert Foss <robert.f...@collabora.com> Date: Mon Jan 30 16:26:58 2017 -0500 radv: Prevent Coverity warning Prevent Coverity seeing potential errors when src is no initialized in the switch case. Coverity-Id: 1396397 Signed-off-by: Robert Foss <robert.f...@collabora.com> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_cmd_buffer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8c0bf8f..afa8452 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -415,6 +415,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, case 16: src = cmd_buffer->device->sample_locations_16x; break; + default: + unreachable("unknown number of samples"); } memcpy(samples_ptr, src, num_samples * 4 * 2); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Use base in push constant loads.
Module: Mesa Branch: master Commit: 96c60b7f07e626d9ca0fc5789117f0c725ba1da2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=96c60b7f07e626d9ca0fc5789117f0c725ba1da2 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Jan 28 01:32:20 2017 +0100 radv/ac: Use base in push constant loads. Apparently the source is not an address but an offset, so we actually need to use the base. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> CC: <mesa-sta...@lists.freedesktop.org> --- src/amd/common/ac_nir_to_llvm.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 26b87e8..72ae6eb 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1745,9 +1745,12 @@ static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef ptr; + LLVMValueRef ptr, addr; + + addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0); + addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), ""); - ptr = build_gep0(ctx, ctx->push_constants, get_src(ctx, instr->src[0])); + ptr = build_gep0(ctx, ctx->push_constants, addr); ptr = cast_ptr(ctx, ptr, get_def_type(ctx, >dest.ssa)); return LLVMBuildLoad(ctx->builder, ptr, ""); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Add compiler support for spilling.
Module: Mesa Branch: master Commit: 29c1f67e9f166da4393493d213ee06498aecac51 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=29c1f67e9f166da4393493d213ee06498aecac51 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Jan 28 23:51:19 2017 +0100 radv/ac: Add compiler support for spilling. Based on code written by Dave Airlie. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_binary.c | 30 +++--- src/amd/common/ac_binary.h | 4 +++- src/amd/common/ac_llvm_util.c | 4 ++-- src/amd/common/ac_llvm_util.h | 2 +- src/amd/common/ac_nir_to_llvm.c | 16 +++- src/amd/common/ac_nir_to_llvm.h | 6 -- src/amd/vulkan/radv_pipeline.c | 3 ++- 7 files changed, 42 insertions(+), 23 deletions(-) diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c index 01cf000..9c66a82 100644 --- a/src/amd/common/ac_binary.c +++ b/src/amd/common/ac_binary.c @@ -212,23 +212,28 @@ static const char *scratch_rsrc_dword1_symbol = void ac_shader_binary_read_config(struct ac_shader_binary *binary, struct ac_shader_config *conf, - unsigned symbol_offset) + unsigned symbol_offset, + bool supports_spill) { unsigned i; const unsigned char *config = ac_shader_binary_config_start(binary, symbol_offset); bool really_needs_scratch = false; - + uint32_t wavesize = 0; /* LLVM adds SGPR spills to the scratch size. * Find out if we really need the scratch buffer. */ - for (i = 0; i < binary->reloc_count; i++) { - const struct ac_shader_reloc *reloc = >relocs[i]; + if (supports_spill) { + really_needs_scratch = true; + } else { + for (i = 0; i < binary->reloc_count; i++) { + const struct ac_shader_reloc *reloc = >relocs[i]; - if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || - !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { - really_needs_scratch = true; - break; + if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) || + !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) { + really_needs_scratch = true; + break; + } } } @@ -259,9 +264,7 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary, case R_0286E8_SPI_TMPRING_SIZE: case R_00B860_COMPUTE_TMPRING_SIZE: /* WAVESIZE is in units of 256 dwords. */ - if (really_needs_scratch) - conf->scratch_bytes_per_wave = - G_00B860_WAVESIZE(value) * 256 * 4; + wavesize = value; break; case SPILLED_SGPRS: conf->spilled_sgprs = value; @@ -285,4 +288,9 @@ void ac_shader_binary_read_config(struct ac_shader_binary *binary, if (!conf->spi_ps_input_addr) conf->spi_ps_input_addr = conf->spi_ps_input_ena; } + + if (really_needs_scratch) { + /* sgprs spills aren't spilling */ + conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4; + } } diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h index 282f33d..06fd855 100644 --- a/src/amd/common/ac_binary.h +++ b/src/amd/common/ac_binary.h @@ -27,6 +27,7 @@ #pragma once #include +#include struct ac_shader_reloc { char name[32]; @@ -85,4 +86,5 @@ void ac_elf_read(const char *elf_data, unsigned elf_size, void ac_shader_binary_read_config(struct ac_shader_binary *binary, struct ac_shader_config *conf, - unsigned symbol_offset); + unsigned symbol_offset, + bool supports_spill); diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 7317db7..f3cab92 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -126,11 +126,11 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family) } } -LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family) +LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill) { assert(family >= CHIP_TAHITI); - const char *triple = "amdgcn--"; + const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn-
Mesa (master): radv/amdgpu: Support a preamble CS.
Module: Mesa Branch: master Commit: d115b67712d6db1eff9d3a4bb57a585c0158be74 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d115b67712d6db1eff9d3a4bb57a585c0158be74 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Fri Jan 27 00:19:52 2017 +0100 radv/amdgpu: Support a preamble CS. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 6 ++- src/amd/vulkan/radv_radeon_winsys.h | 1 + src/amd/vulkan/radv_wsi.c | 2 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 62 +-- 4 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 1505498..ad83f9f 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1021,7 +1021,8 @@ VkResult radv_QueueSubmit( if (queue->device->trace_bo) *queue->device->trace_id_ptr = 0; - ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance, + ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, + advance, NULL, (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, b ? pSubmits[i].waitSemaphoreCount : 0, (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, @@ -1052,7 +1053,8 @@ VkResult radv_QueueSubmit( if (!submitCount) ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, >device->empty_cs[queue->queue_family_index], - 1, NULL, 0, NULL, 0, false, base_fence); + 1, NULL, NULL, 0, NULL, 0, + false, base_fence); fence->submitted = true; } diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index a0b5092..bdb1439 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -305,6 +305,7 @@ struct radeon_winsys { int queue_index, struct radeon_winsys_cs **cs_array, unsigned cs_count, +struct radeon_winsys_cs *preamble_cs, struct radeon_winsys_sem **wait_sem, unsigned wait_sem_count, struct radeon_winsys_sem **signal_sem, diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index 2f45961..9c9e1bb 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -367,7 +367,7 @@ VkResult radv_QueuePresentKHR( struct radeon_winsys_ctx *ctx = queue->hw_ctx; queue->device->ws->cs_submit(ctx, queue->queue_idx, >device->empty_cs[queue->queue_family_index], -1, +1, NULL, (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores, pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence); fence->submitted = true; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index f7707f6..b58f5db 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -422,6 +422,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_winsys_cs **cs_array, unsigned count, struct radv_amdgpu_winsys_bo *extra_bo, + struct radeon_winsys_cs *extra_cs, amdgpu_bo_list_handle *bo_list) { int r; @@ -448,7 +449,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, bo_list); free(handles); pthread_mutex_unlock(>global_bo_list_lock); - } else if (count == 1 && !extra_bo) { + } else if (count == 1 && !extra_bo && !extra_cs) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; r = amdgpu_bo_list_create(ws->dev, cs->num_buff
Mesa (master): radv: Track scratch usage across pipelines & command buffers.
Module: Mesa Branch: master Commit: ccff93e1387d31243a9c5aede27fbfbe35a03957 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ccff93e1387d31243a9c5aede27fbfbe35a03957 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 29 15:20:03 2017 +0100 radv: Track scratch usage across pipelines & command buffers. Based on code written by Dave Airlie. Signed-off-by: Bas Nieuwenhuizen <ba...@oogle.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_cmd_buffer.c | 22 +++- src/amd/vulkan/radv_device.c | 22 src/amd/vulkan/radv_pipeline.c | 75 src/amd/vulkan/radv_private.h| 8 + 4 files changed, 119 insertions(+), 8 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c62d275..eebfac5 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -627,6 +627,13 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, pipeline->graphics.prim_restart_enable); + cmd_buffer->scratch_size_needed = + MAX2(cmd_buffer->scratch_size_needed, + pipeline->max_waves * pipeline->scratch_bytes_per_wave); + + radeon_set_context_reg(cmd_buffer->cs, R_0286E8_SPI_TMPRING_SIZE, + S_0286E8_WAVES(pipeline->max_waves) | + S_0286E8_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); cmd_buffer->state.emitted_pipeline = pipeline; } @@ -1402,6 +1409,8 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) free(up); } + cmd_buffer->scratch_size_needed = 0; + cmd_buffer->compute_scratch_size_needed = 0; if (cmd_buffer->upload.upload_bo) cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, cmd_buffer->upload.upload_bo, 8); @@ -1629,9 +1638,15 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) radeon_emit(cmd_buffer->cs, compute_shader->rsrc1); radeon_emit(cmd_buffer->cs, compute_shader->rsrc2); + + cmd_buffer->compute_scratch_size_needed = + MAX2(cmd_buffer->compute_scratch_size_needed, + pipeline->max_waves * pipeline->scratch_bytes_per_wave); + /* change these once we have scratch support */ radeon_set_sh_reg(cmd_buffer->cs, R_00B860_COMPUTE_TMPRING_SIZE, - S_00B860_WAVES(32) | S_00B860_WAVESIZE(0)); + S_00B860_WAVES(pipeline->max_waves) | + S_00B860_WAVESIZE(pipeline->scratch_bytes_per_wave >> 10)); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); radeon_emit(cmd_buffer->cs, @@ -1821,6 +1836,11 @@ void radv_CmdExecuteCommands( for (uint32_t i = 0; i < commandBufferCount; i++) { RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); + primary->scratch_size_needed = MAX2(primary->scratch_size_needed, + secondary->scratch_size_needed); + primary->compute_scratch_size_needed = MAX2(primary->compute_scratch_size_needed, + secondary->compute_scratch_size_needed); + primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); } diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index ad83f9f..da65511 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -813,6 +813,28 @@ VkResult radv_CreateDevice( } } +#if HAVE_LLVM < 0x0400 + device->llvm_supports_spill = false; +#else + device->llvm_supports_spill = true; +#endif + + /* The maximum number of scratch waves. Scratch space isn't divided +* evenly between CUs. The number is only a function of the number of CUs. +* We can decrease the constant to decrease the scratch buffer size. +* +* sctx->scratch_waves must be >= the maximum posible size of +* 1 threadgroup, so that the hw doesn't hang from being unable +* to start any. +* +* The recommended value is 4 per CU at most. Higher numbers don't +* bring much benefit, but they still occupy chip resources (think +* async compute). I've seen ~2% performance difference between 4 and 32. +*/ + uint32_t max_threads_per_block = 204
Mesa (master): radv: Handle command buffers that need scratch memory.
Module: Mesa Branch: master Commit: c4d7b9cd290bdedb0e58fa52bf32d39d2411a789 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c4d7b9cd290bdedb0e58fa52bf32d39d2411a789 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 29 13:53:05 2017 +0100 radv: Handle command buffers that need scratch memory. v2: Create the descriptor BO with CPU access. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 186 - src/amd/vulkan/radv_pipeline.c | 11 +-- src/amd/vulkan/radv_private.h | 8 ++ 3 files changed, 199 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index da65511..6071421 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -32,6 +32,7 @@ #include #include #include "radv_private.h" +#include "radv_cs.h" #include "util/strtod.h" #include @@ -752,6 +753,15 @@ radv_queue_finish(struct radv_queue *queue) { if (queue->hw_ctx) queue->device->ws->ctx_destroy(queue->hw_ctx); + + if (queue->preamble_cs) + queue->device->ws->cs_destroy(queue->preamble_cs); + if (queue->descriptor_bo) + queue->device->ws->buffer_destroy(queue->descriptor_bo); + if (queue->scratch_bo) + queue->device->ws->buffer_destroy(queue->scratch_bo); + if (queue->compute_scratch_bo) + queue->device->ws->buffer_destroy(queue->compute_scratch_bo); } VkResult radv_CreateDevice( @@ -1000,6 +1010,159 @@ static void radv_dump_trace(struct radv_device *device, fclose(f); } +static VkResult +radv_get_preamble_cs(struct radv_queue *queue, + uint32_t scratch_size, + uint32_t compute_scratch_size, + struct radeon_winsys_cs **preamble_cs) +{ + struct radeon_winsys_bo *scratch_bo = NULL; + struct radeon_winsys_bo *descriptor_bo = NULL; + struct radeon_winsys_bo *compute_scratch_bo = NULL; + struct radeon_winsys_cs *cs = NULL; + + if (!scratch_size && !compute_scratch_size) { + *preamble_cs = NULL; + return VK_SUCCESS; + } + + if (scratch_size <= queue->scratch_size && + compute_scratch_size <= queue->compute_scratch_size) { + *preamble_cs = queue->preamble_cs; + return VK_SUCCESS; + } + + if (scratch_size > queue->scratch_size) { + scratch_bo = queue->device->ws->buffer_create(queue->device->ws, + scratch_size, + 4096, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_CPU_ACCESS); + if (!scratch_bo) + goto fail; + } else + scratch_bo = queue->scratch_bo; + + if (compute_scratch_size > queue->compute_scratch_size) { + compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws, + compute_scratch_size, + 4096, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_CPU_ACCESS); + if (!compute_scratch_bo) + goto fail; + + } else + compute_scratch_bo = queue->compute_scratch_bo; + + if (scratch_bo != queue->scratch_bo) { + descriptor_bo = queue->device->ws->buffer_create(queue->device->ws, +8, +4096, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_CPU_ACCESS); + if (!descriptor_bo) + goto fail; + } else + descriptor_bo = queue->descriptor_bo; + + cs = queue->device->ws->cs_create(queue->device->ws, + queue->queue_family_index ? RING_COMPUTE : RING_GFX); + if (!cs) + goto fail; + + + if (scratch_bo) + queue->device->ws->cs_add_buffer(cs, scratch_bo, 8); + + if (descriptor_bo) + queue->device->
Mesa (master): radv: add trim command pool stub
Module: Mesa Branch: master Commit: ec0f5c005ce75ebd94fea8d8e71a6ae852293fce URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec0f5c005ce75ebd94fea8d8e71a6ae852293fce Author: Andres Rodriguez <andre...@gmail.com> Date: Fri Jan 27 00:03:02 2017 -0500 radv: add trim command pool stub Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_cmd_buffer.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index eebfac5..a83090f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1915,6 +1915,13 @@ VkResult radv_ResetCommandPool( return VK_SUCCESS; } +void radv_TrimCommandPoolKHR( +VkDevicedevice, +VkCommandPool commandPool, +VkCommandPoolTrimFlagsKHR flags) +{ +} + void radv_CmdBeginRenderPass( VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo*pRenderPassBegin, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: use new error codes for AllocateDescriptorSets
Module: Mesa Branch: master Commit: f8d5e1ab2dcb7d6d96c47cff7bd174e8e98bbdd5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f8d5e1ab2dcb7d6d96c47cff7bd174e8e98bbdd5 Author: Andres Rodriguez <andre...@gmail.com> Date: Fri Jan 27 00:03:04 2017 -0500 radv: use new error codes for AllocateDescriptorSets There is a new error code in Maintenance1 that is more specific to the situation: VK_ERROR_OUT_OF_POOL_MEMORY_KHR Fixes CTS test case: dEQP-VK.api.descriptor_pool.out_of_pool_memory Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_descriptor_set.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index eb8b5d6..6d89d60 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -298,7 +298,7 @@ radv_descriptor_set_create(struct radv_device *device, if (entry < 0) { vk_free2(>alloc, NULL, set); - return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); + return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); } offset = pool->free_nodes[entry].offset; pool->free_nodes[entry].next = pool->full_list; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: vkAllocateCommandBuffers should NULL all output handles
Module: Mesa Branch: master Commit: e199a993b2eb13319691b690af8f52bf8a0016b8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e199a993b2eb13319691b690af8f52bf8a0016b8 Author: Andres Rodriguez <andre...@gmail.com> Date: Fri Jan 27 00:03:03 2017 -0500 radv: vkAllocateCommandBuffers should NULL all output handles This is part of the spec and fixes CTS tests: dEQP-VK.api.object_management.alloc_callback_fail_multiple.command_buffer_* Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_cmd_buffer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index a83090f..c4e8324 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1351,6 +1351,9 @@ VkResult radv_AllocateCommandBuffers( VkResult result = VK_SUCCESS; uint32_t i; + memset(pCommandBuffers, 0, + sizeof(*pCommandBuffers)*pAllocateInfo->commandBufferCount); + for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, [i]); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Fix vkCmdCopyImage for 2d slices into 3d Images
Module: Mesa Branch: master Commit: 7b890a36dfec3cc2249d61f7eb57bba637b3debb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7b890a36dfec3cc2249d61f7eb57bba637b3debb Author: Andres Rodriguez <andre...@gmail.com> Date: Fri Jan 27 00:03:07 2017 -0500 radv: Fix vkCmdCopyImage for 2d slices into 3d Images Previously the z offset of the destination image was being ignored. It should be taken into account when copying into a 3d target. Also, img_extent_el.depth was being incorrectly clamped to 1 due to the source image being VK_IMAGE_TYPE_2D. This would result in the blit failing to iterate over all the 3d slices. Instead we clamp to the destination image type. Fixes failures in CTS tests: dEQP-VK.api.copy_and_blit.image_to_image.3d_images.* Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_meta_copy.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c index 64e0ea8..2bd20b5 100644 --- a/src/amd/vulkan/radv_meta_copy.c +++ b/src/amd/vulkan/radv_meta_copy.c @@ -369,7 +369,7 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, const VkOffset3D src_offset_el = meta_region_offset_el(src_image, [r].srcOffset); const VkExtent3D img_extent_el = - meta_region_extent_el(src_image, [r].extent); + meta_region_extent_el(dest_image, [r].extent); /* Start creating blit rect */ struct radv_meta_blit2d_rect rect = { @@ -377,6 +377,9 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer, .height = img_extent_el.height, }; + if (dest_image->type == VK_IMAGE_TYPE_3D) + b_dst.layer = dst_offset_el.z; + /* Loop through each 3D or array slice */ unsigned num_slices_3d = img_extent_el.depth; unsigned num_slices_array = pRegions[r].dstSubresource.layerCount; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Don't allow any operations on non-supported depth/ stencil formats.
Module: Mesa Branch: master Commit: 34bfe4b1bb68b579b376033ce5ed4b4520d2dc28 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=34bfe4b1bb68b579b376033ce5ed4b4520d2dc28 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Fri Jan 27 00:03:05 2017 -0500 radv: Don't allow any operations on non-supported depth/stencil formats. We really use the depth block for the blits. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_formats.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index 87c28f1..e147f94 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -565,11 +565,12 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical } if (vk_format_is_depth_or_stencil(format)) { - if (radv_is_zs_format_supported(format)) + if (radv_is_zs_format_supported(format)) { tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | - VK_FORMAT_FEATURE_BLIT_DST_BIT; + tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | +VK_FORMAT_FEATURE_BLIT_DST_BIT; + } } else { bool linear_sampling; if (radv_is_sampler_format_supported(format, _sampling)) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Expose transfer format features.
Module: Mesa Branch: master Commit: 4eae3597eb390171f165d2bb62c2d48cc0b18a7d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4eae3597eb390171f165d2bb62c2d48cc0b18a7d Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Fri Jan 27 00:03:06 2017 -0500 radv: Expose transfer format features. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> --- src/amd/vulkan/radv_formats.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index e147f94..f18ecee 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -570,6 +570,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; + tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | +VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; } } else { bool linear_sampling; @@ -591,6 +593,15 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; } } + if (util_is_power_of_two(vk_format_get_blocksize(format))) { + tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | +VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; + } + } + + if (util_is_power_of_two(vk_format_get_blocksize(format))) { + linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR | + VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR; } if (format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: add missing core errors in vk_errorf()
Module: Mesa Branch: master Commit: 43cf96751277f76d726a0829e7fa733f85e70061 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=43cf96751277f76d726a0829e7fa733f85e70061 Author: Eric Engestrom <eric.engest...@imgtec.com> Date: Thu Jan 26 14:20:23 2017 + radv: add missing core errors in vk_errorf() Signed-off-by: Eric Engestrom <eric.engest...@imgtec.com> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_util.c | 4 1 file changed, 4 insertions(+) diff --git a/src/amd/vulkan/radv_util.c b/src/amd/vulkan/radv_util.c index c642bb7..494bf7e 100644 --- a/src/amd/vulkan/radv_util.c +++ b/src/amd/vulkan/radv_util.c @@ -84,7 +84,11 @@ __vk_errorf(VkResult error, const char *file, int line, const char *format, ...) ERROR_CASE(VK_ERROR_MEMORY_MAP_FAILED) ERROR_CASE(VK_ERROR_LAYER_NOT_PRESENT) ERROR_CASE(VK_ERROR_EXTENSION_NOT_PRESENT) + ERROR_CASE(VK_ERROR_FEATURE_NOT_PRESENT) ERROR_CASE(VK_ERROR_INCOMPATIBLE_DRIVER) + ERROR_CASE(VK_ERROR_TOO_MANY_OBJECTS) + ERROR_CASE(VK_ERROR_FORMAT_NOT_SUPPORTED) + ERROR_CASE(VK_ERROR_FRAGMENTED_POOL) /* Extension errors */ ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: add missing extension errors in vk_errorf()
Module: Mesa Branch: master Commit: 06842585df830a710ec59c2385bcef5badd8ecc6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=06842585df830a710ec59c2385bcef5badd8ecc6 Author: Eric Engestrom <eric.engest...@imgtec.com> Date: Thu Jan 26 14:20:24 2017 + radv: add missing extension errors in vk_errorf() v2(Bas): Remove the extra VK_ERROR_FRAGMENTED_POOL cases. Signed-off-by: Eric Engestrom <eric.engest...@imgtec.com> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_util.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/src/amd/vulkan/radv_util.c b/src/amd/vulkan/radv_util.c index 494bf7e..684956e 100644 --- a/src/amd/vulkan/radv_util.c +++ b/src/amd/vulkan/radv_util.c @@ -91,7 +91,13 @@ __vk_errorf(VkResult error, const char *file, int line, const char *format, ...) ERROR_CASE(VK_ERROR_FRAGMENTED_POOL) /* Extension errors */ + ERROR_CASE(VK_ERROR_SURFACE_LOST_KHR) + ERROR_CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR) ERROR_CASE(VK_ERROR_OUT_OF_DATE_KHR) + ERROR_CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR) + ERROR_CASE(VK_ERROR_VALIDATION_FAILED_EXT) + ERROR_CASE(VK_ERROR_INVALID_SHADER_NV) + ERROR_CASE(VK_ERROR_OUT_OF_POOL_MEMORY_KHR) default: assert(!"Unknown error"); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): various: Fix missing DumpModule with recent LLVM.
Module: Mesa Branch: master Commit: 0fca80b3db64dc1d004f78e22b9de86a07e9de96 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fca80b3db64dc1d004f78e22b9de86a07e9de96 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sat Jan 28 17:32:05 2017 +0100 various: Fix missing DumpModule with recent LLVM. Since LLVM revision 293359 DumpModule gets only implemented when either a debug build or LLVM_ENABLE_DUMP is set. This patch adds a direct replacement for the function for radv and radeonsi, However, as I don't know a good place to put common LLVM code for all three I inlined the implementation for LLVMPipe. v2: Use the new code for LLVM 3.4+ instead of LLVM 5+ & fixed indentation Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Roland Scheidegger <srol...@vmware.com> --- src/amd/common/ac_llvm_util.c| 8 src/amd/common/ac_llvm_util.h| 3 +++ src/amd/common/ac_nir_to_llvm.c | 2 +- src/gallium/drivers/llvmpipe/lp_jit.c| 8 +++- src/gallium/drivers/radeonsi/si_shader.c | 6 +++--- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index 770e3bd..7317db7 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -504,3 +504,11 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx, memcpy(coords_arg, coords, sizeof(coords)); } + +void +ac_dump_module(LLVMModuleRef module) +{ + char *str = LLVMPrintModuleToString(module); + fprintf(stderr, "%s", str); + LLVMDisposeMessage(str); +} diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 802c266..2d301c9 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -95,6 +95,9 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); +void +ac_dump_module(LLVMModuleRef module); + #ifdef __cplusplus } #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 72ae6eb..e83c7a2 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -4569,7 +4569,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm, bool dump_shader) { if (dump_shader) - LLVMDumpModule(llvm_module); + ac_dump_module(llvm_module); memset(binary, 0, sizeof(*binary)); int v = ac_llvm_compile(llvm_module, binary, tm); diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index 2126036..bb2b87f 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -222,7 +222,13 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) } if (gallivm_debug & GALLIVM_DEBUG_IR) { - LLVMDumpModule(gallivm->module); +#if HAVE_LLVM >= 0x304 + char *str = LLVMPrintModuleToString(gallivm->module); + fprintf(stderr, "%s", str); + LLVMDisposeMessage(str); +#else + DumpModule(gallivm->module); +#endif } } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5ca974e..cad7bf7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -6400,7 +6400,7 @@ int si_compile_llvm(struct si_screen *sscreen, if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) { fprintf(stderr, "%s LLVM IR:\n\n", name); - LLVMDumpModule(mod); + ac_dump_module(mod); fprintf(stderr, "\n"); } } @@ -6599,7 +6599,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && r600_can_dump_shader(>b, PIPE_SHADER_GEOMETRY)) - LLVMDumpModule(bld_base->base.gallivm->module); + ac_dump_module(bld_base->base.gallivm->module); si_llvm_finalize_module(, r600_extra_shader_checks(>b, PIPE_SHADER_GEOMETRY)); @@ -7603,7 +7603,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && r600_can_dump_shader(>b, ctx.type)) - LLVMDumpModule(mod); + ac_dump_module(mod); si_llvm_finalize_module(, r600_extra_shader_checks(>b, ctx.type)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): llvmpipe: Use LLVMDumpModule, not DumpModule.
Module: Mesa Branch: master Commit: b8ee45ebdc2408f50bc0fdb4525ab4f16ffe36e3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b8ee45ebdc2408f50bc0fdb4525ab4f16ffe36e3 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 29 17:03:25 2017 +0100 llvmpipe: Use LLVMDumpModule, not DumpModule. Forgot the prefix ... Fixes: 0fca80b3db64dc1d004f78e22b9de86a07e9de96 Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> --- src/gallium/drivers/llvmpipe/lp_jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c index bb2b87f..a2762f3 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.c +++ b/src/gallium/drivers/llvmpipe/lp_jit.c @@ -227,7 +227,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp) fprintf(stderr, "%s", str); LLVMDisposeMessage(str); #else - DumpModule(gallivm->module); + LLVMDumpModule(gallivm->module); #endif } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Handle VK_REMAINING_ARRAY_LAYERS in fast clear eliminate.
Module: Mesa Branch: master Commit: 81b23796648afd1816e6367348ef682660eb630e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=81b23796648afd1816e6367348ef682660eb630e Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Feb 19 14:00:25 2017 +0100 radv: Handle VK_REMAINING_ARRAY_LAYERS in fast clear eliminate. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_meta_fast_clear.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index 0902c45..ffaa9a3 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -419,13 +419,13 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_pass_state saved_pass_state; VkDevice device_h = radv_device_to_handle(cmd_buffer->device); VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer); + uint32_t layer_count = radv_get_layerCount(image, subresourceRange); assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL); radv_meta_save_pass(_pass_state, cmd_buffer); radv_meta_save_graphics_reset_vport_scissor(_state, cmd_buffer); - for (uint32_t layer = 0; layer < subresourceRange->layerCount; -++layer) { + for (uint32_t layer = 0; layer < layer_count; ++layer) { struct radv_image_view iview; radv_image_view_init(, cmd_buffer->device, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Clamp framebuffer dimensions to min. attachment dimensions.
Module: Mesa Branch: master Commit: e12cf3f9bf4d71579360cb6735c1023bf39aaad3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e12cf3f9bf4d71579360cb6735c1023bf39aaad3 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Feb 19 01:16:19 2017 +0100 radv: Clamp framebuffer dimensions to min. attachment dimensions. Even though the preferred stance is not to fix incorrect applications via the driver, this prevents some nasty GPU hangs. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index be0d9d9..6f2fac2 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2107,6 +2107,11 @@ si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil) return image->surface.tiling_index[level]; } +static uint32_t radv_surface_layer_count(struct radv_image_view *iview) +{ + return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count; +} + static void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb, @@ -2138,7 +2143,7 @@ radv_initialise_color_surface(struct radv_device *device, va += iview->image->dcc_offset; cb->cb_dcc_base = va >> 8; - uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count; + uint32_t max_slice = radv_surface_layer_count(iview); cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) | S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1); @@ -2292,7 +2297,7 @@ radv_initialise_ds_surface(struct radv_device *device, z_offs += iview->image->surface.level[level].offset; s_offs += iview->image->surface.stencil_level[level].offset; - uint32_t max_slice = iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : iview->layer_count; + uint32_t max_slice = radv_surface_layer_count(iview); ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(iview->base_layer + max_slice - 1); ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); @@ -2389,6 +2394,9 @@ VkResult radv_CreateFramebuffer( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); framebuffer->attachment_count = pCreateInfo->attachmentCount; + framebuffer->width = pCreateInfo->width; + framebuffer->height = pCreateInfo->height; + framebuffer->layers = pCreateInfo->layers; for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { VkImageView _iview = pCreateInfo->pAttachments[i]; struct radv_image_view *iview = radv_image_view_from_handle(_iview); @@ -2398,12 +2406,11 @@ VkResult radv_CreateFramebuffer( } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { radv_initialise_ds_surface(device, >attachments[i].ds, iview); } + framebuffer->width = MIN2(framebuffer->width, iview->extent.width); + framebuffer->height = MIN2(framebuffer->height, iview->extent.height); + framebuffer->layers = MIN2(framebuffer->layers, radv_surface_layer_count(iview)); } - framebuffer->width = pCreateInfo->width; - framebuffer->height = pCreateInfo->height; - framebuffer->layers = pCreateInfo->layers; - *pFramebuffer = radv_framebuffer_to_handle(framebuffer); return VK_SUCCESS; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Flush in the initial preamble CS.
Module: Mesa Branch: master Commit: 5241fb0ffbe302db0835268c911d566fa18665b9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5241fb0ffbe302db0835268c911d566fa18665b9 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Feb 20 09:26:00 2017 +0100 radv: Flush in the initial preamble CS. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 224 + src/amd/vulkan/radv_private.h | 12 ++- src/amd/vulkan/si_cmd_buffer.c | 2 +- 3 files changed, 148 insertions(+), 90 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8b8e4dd..9b58e41 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -792,8 +792,10 @@ radv_queue_finish(struct radv_queue *queue) if (queue->hw_ctx) queue->device->ws->ctx_destroy(queue->hw_ctx); - if (queue->preamble_cs) - queue->device->ws->cs_destroy(queue->preamble_cs); + if (queue->initial_preamble_cs) + queue->device->ws->cs_destroy(queue->initial_preamble_cs); + if (queue->continue_preamble_cs) + queue->device->ws->cs_destroy(queue->continue_preamble_cs); if (queue->descriptor_bo) queue->device->ws->buffer_destroy(queue->descriptor_bo); if (queue->scratch_bo) @@ -939,6 +941,21 @@ VkResult radv_CreateDevice( break; } device->ws->cs_finalize(device->empty_cs[family]); + + device->flush_cs[family] = device->ws->cs_create(device->ws, family); + switch (family) { + case RADV_QUEUE_GENERAL: + case RADV_QUEUE_COMPUTE: + si_cs_emit_cache_flush(device->flush_cs[family], + device->physical_device->rad_info.chip_class, + family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK, + RADV_CMD_FLAG_INV_ICACHE | + RADV_CMD_FLAG_INV_SMEM_L1 | + RADV_CMD_FLAG_INV_VMEM_L1 | + RADV_CMD_FLAG_INV_GLOBAL_L2); + break; + } + device->ws->cs_finalize(device->flush_cs[family]); } if (getenv("RADV_TRACE_FILE")) { @@ -995,6 +1012,8 @@ void radv_DestroyDevice( vk_free(>alloc, device->queues[i]); if (device->empty_cs[i]) device->ws->cs_destroy(device->empty_cs[i]); + if (device->flush_cs[i]) + device->ws->cs_destroy(device->flush_cs[i]); } radv_device_finish_meta(device); @@ -1192,25 +1211,25 @@ radv_get_preamble_cs(struct radv_queue *queue, uint32_t compute_scratch_size, uint32_t esgs_ring_size, uint32_t gsvs_ring_size, - struct radeon_winsys_cs **preamble_cs) + struct radeon_winsys_cs **initial_preamble_cs, + struct radeon_winsys_cs **continue_preamble_cs) { struct radeon_winsys_bo *scratch_bo = NULL; struct radeon_winsys_bo *descriptor_bo = NULL; struct radeon_winsys_bo *compute_scratch_bo = NULL; struct radeon_winsys_bo *esgs_ring_bo = NULL; struct radeon_winsys_bo *gsvs_ring_bo = NULL; - struct radeon_winsys_cs *cs = NULL; - - if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) { - *preamble_cs = NULL; - return VK_SUCCESS; - } + struct radeon_winsys_cs *dest_cs[2] = {0}; if (scratch_size <= queue->scratch_size && compute_scratch_size <= queue->compute_scratch_size && esgs_ring_size <= queue->esgs_ring_size && - gsvs_ring_size <= queue->gsvs_ring_size) { - *preamble_cs = queue->preamble_cs; + gsvs_ring_size <= queue->gsvs_ring_size && + queue->initial_preamble_cs) { + *initial_preamble_cs = queue->initial_preamble_cs; + *continue_preamble_cs = queue->continue_preamble_cs; + if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size) + *continue_preamble_cs = NULL; return VK_SUCCESS; } @@ -1282,94 +1301,113 @@ radv_get_preamble_cs(struct
Mesa (master): radv: Special case the initial preamble.
Module: Mesa Branch: master Commit: c121739c4772a9442bc6866abae9306773abe3de URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c121739c4772a9442bc6866abae9306773abe3de Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Feb 20 09:08:31 2017 +0100 radv: Special case the initial preamble. For flushing we don't want to flush every third IB. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 6 +++--- src/amd/vulkan/radv_radeon_winsys.h | 3 ++- src/amd/vulkan/radv_wsi.c | 2 +- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 26 -- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 222fc7a..8b8e4dd 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -1471,7 +1471,7 @@ VkResult radv_QueueSubmit( if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) { ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, >device->empty_cs[queue->queue_family_index], - 1, NULL, + 1, NULL, NULL, (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, pSubmits[i].waitSemaphoreCount, (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, @@ -1509,7 +1509,7 @@ VkResult radv_QueueSubmit( *queue->device->trace_id_ptr = 0; ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, - advance, preamble_cs, + advance, preamble_cs, preamble_cs, (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, b ? pSubmits[i].waitSemaphoreCount : 0, (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, @@ -1541,7 +1541,7 @@ VkResult radv_QueueSubmit( if (!fence_emitted) ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, >device->empty_cs[queue->queue_family_index], - 1, NULL, NULL, 0, NULL, 0, + 1, NULL, NULL, NULL, 0, NULL, 0, false, base_fence); fence->submitted = true; diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h index bdb1439..8cf29a3 100644 --- a/src/amd/vulkan/radv_radeon_winsys.h +++ b/src/amd/vulkan/radv_radeon_winsys.h @@ -305,7 +305,8 @@ struct radeon_winsys { int queue_index, struct radeon_winsys_cs **cs_array, unsigned cs_count, -struct radeon_winsys_cs *preamble_cs, +struct radeon_winsys_cs *initial_preamble_cs, +struct radeon_winsys_cs *continue_preamble_cs, struct radeon_winsys_sem **wait_sem, unsigned wait_sem_count, struct radeon_winsys_sem **signal_sem, diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index ea8e784..c8a1996 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -367,7 +367,7 @@ VkResult radv_QueuePresentKHR( struct radeon_winsys_ctx *ctx = queue->hw_ctx; queue->device->ws->cs_submit(ctx, queue->queue_idx, >device->empty_cs[queue->queue_family_index], -1, NULL, +1, NULL, NULL, (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores, pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence); fence->submitted = true; diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 9e468bd..80100e6 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/am
Mesa (master): radv: Don't flush at the start of a command buffer.
Module: Mesa Branch: master Commit: 8cff852ae2481245a81854073e366892eb199963 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8cff852ae2481245a81854073e366892eb199963 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Feb 20 09:27:17 2017 +0100 radv: Don't flush at the start of a command buffer. The preamble flushes now and the rest is the responsibility of the app. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_cmd_buffer.c | 15 --- 1 file changed, 15 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 916906f..248e1af 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1693,26 +1693,11 @@ VkResult radv_BeginCommandBuffer( if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { switch (cmd_buffer->queue_family_index) { case RADV_QUEUE_GENERAL: - /* Flush read caches at the beginning of CS not flushed by the kernel. */ - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | - RADV_CMD_FLAG_INV_GLOBAL_L2; emit_gfx_buffer_state(cmd_buffer); radv_set_db_count_control(cmd_buffer); - si_emit_cache_flush(cmd_buffer); break; case RADV_QUEUE_COMPUTE: - cmd_buffer->state.flush_bits = RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2; si_init_compute(cmd_buffer); - si_emit_cache_flush(cmd_buffer); break; case RADV_QUEUE_TRANSFER: default: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Split emitting the cache flush out.
Module: Mesa Branch: master Commit: eac790811b0260c5989b82fe3644bb8ee7c6ddc0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=eac790811b0260c5989b82fe3644bb8ee7c6ddc0 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Feb 20 01:57:46 2017 +0100 radv: Split emitting the cache flush out. So that we can use it without a cmd_buffer. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/si_cmd_buffer.c | 141 ++--- 1 file changed, 77 insertions(+), 64 deletions(-) diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index e2ba413..1091c7b 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -689,37 +689,27 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, } -void -si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) +static void +si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, + enum chip_class chip_class, + bool is_mec, + enum radv_cmd_flush_bits flush_bits) { - enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class; unsigned cp_coher_cntl = 0; - bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE; - if (is_compute) - cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | - RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_VS_PARTIAL_FLUSH | - RADV_CMD_FLAG_VGT_FLUSH); - - radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128); - - if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_ICACHE) + if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); - if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) + if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) + if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); - if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) { + if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) { cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); if (chip_class >= VI) cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); } - if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { + if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | S_0085F0_CB1_DEST_BASE_ENA(1) | @@ -731,85 +721,108 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) S_0085F0_CB7_DEST_BASE_ENA(1); /* Necessary for DCC */ - if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | + if (chip_class >= VI) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | EVENT_INDEX(5)); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); - radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); + radeon_emit(cs, 0); } } - if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { + if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) { cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1); } - if (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); -
Mesa (master): radv: Free empty_cs on device destruction.
Module: Mesa Branch: master Commit: b6e0df2edd71b56aa1c4a6faacfde3f117a76b30 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b6e0df2edd71b56aa1c4a6faacfde3f117a76b30 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Feb 20 02:22:39 2017 +0100 radv: Free empty_cs on device destruction. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 3d0e742..222fc7a 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -993,6 +993,8 @@ void radv_DestroyDevice( radv_queue_finish(>queues[i][q]); if (device->queue_count[i]) vk_free(>alloc, device->queues[i]); + if (device->empty_cs[i]) + device->ws->cs_destroy(device->empty_cs[i]); } radv_device_finish_meta(device); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Fix radv_GetPhysicalDeviceQueueFamilyProperties2KHR.
Module: Mesa Branch: master Commit: 1811ccf1256662ae5d0c2b6f26916e8369497d1d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1811ccf1256662ae5d0c2b6f26916e8369497d1d Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Fri Feb 10 21:23:04 2017 +0100 radv: Fix radv_GetPhysicalDeviceQueueFamilyProperties2KHR. The struct have different size, so the arrays have different stride. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 45 +++- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8a54a2a..fff3125 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -623,12 +623,11 @@ void radv_GetPhysicalDeviceProperties2KHR( return radv_GetPhysicalDeviceProperties(physicalDevice, >properties); } -void radv_GetPhysicalDeviceQueueFamilyProperties( - VkPhysicalDevicephysicalDevice, +static void radv_get_physical_device_queue_family_properties( + struct radv_physical_device*pdevice, uint32_t* pCount, - VkQueueFamilyProperties*pQueueFamilyProperties) + VkQueueFamilyProperties**pQueueFamilyProperties) { - RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); int num_queue_families = 1; int idx; if (pdevice->rad_info.compute_rings > 0 && @@ -646,7 +645,7 @@ void radv_GetPhysicalDeviceQueueFamilyProperties( idx = 0; if (*pCount >= 1) { - pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { + *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, @@ -661,7 +660,7 @@ void radv_GetPhysicalDeviceQueueFamilyProperties( pdevice->rad_info.chip_class >= CIK && !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { if (*pCount > idx) { - pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { + *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = pdevice->rad_info.compute_rings, .timestampValidBits = 64, @@ -673,14 +672,42 @@ void radv_GetPhysicalDeviceQueueFamilyProperties( *pCount = idx; } +void radv_GetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevicephysicalDevice, + uint32_t* pCount, + VkQueueFamilyProperties*pQueueFamilyProperties) +{ + RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); + if (!pQueueFamilyProperties) { + return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); + return; + } + VkQueueFamilyProperties *properties[] = { + pQueueFamilyProperties + 0, + pQueueFamilyProperties + 1, + pQueueFamilyProperties + 2, + }; + radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); + assert(*pCount <= 3); +} + void radv_GetPhysicalDeviceQueueFamilyProperties2KHR( VkPhysicalDevicephysicalDevice, uint32_t* pCount, VkQueueFamilyProperties2KHR*pQueueFamilyProperties) { - return radv_GetPhysicalDeviceQueueFamilyProperties(physicalDevice, - pCount, - >queueFamilyProperties); + RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); + if (!pQueueFamilyProperties) { + return radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL); + return; + } + VkQueueFamilyProperties *properties[] = { + [0].queueFamilyProperties, + [1].queueFamilyProperties, + [2].queueFamilyProperties, + }; + radv_get_physical_device_queue_family_properties(pdevice, pCount, properties); + assert(*pCount <= 3); } void radv_GetPhysicalDeviceMemoryProperties( ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Add support for shaderStorageImageWriteWithoutFormat.
Module: Mesa Branch: master Commit: 53873697e4fe85735d18ecfdec52548bb1bc6702 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=53873697e4fe85735d18ecfdec52548bb1bc6702 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Wed Feb 15 00:55:19 2017 +0100 radv: Add support for shaderStorageImageWriteWithoutFormat. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 2 +- src/amd/vulkan/radv_pipeline.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 1132eeb..9185ef1 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -464,7 +464,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderStorageBufferArrayDynamicIndexing = true, .shaderStorageImageArrayDynamicIndexing = true, .shaderStorageImageReadWithoutFormat = false, - .shaderStorageImageWriteWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = true, .shaderClipDistance = true, .shaderCullDistance = true, .shaderFloat64= true, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index cbd846a..722e58d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -208,7 +208,8 @@ radv_shader_compile_to_nir(struct radv_device *device, } const struct nir_spirv_supported_extensions supported_ext = { .draw_parameters = true, - .float64 = true + .float64 = true, + .image_write_without_format = true, }; entry_point = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): spirv: Add support for SpvCapabilityStorageImageReadWithoutFormat.
Module: Mesa Branch: master Commit: 501a4c0d73b441d40b3a2532b8e8103f268f7996 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=501a4c0d73b441d40b3a2532b8e8103f268f7996 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Wed Feb 15 00:58:41 2017 +0100 spirv: Add support for SpvCapabilityStorageImageReadWithoutFormat. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/compiler/spirv/nir_spirv.h| 1 + src/compiler/spirv/spirv_to_nir.c | 5 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/compiler/spirv/nir_spirv.h b/src/compiler/spirv/nir_spirv.h index e43e9b5..8864a8f 100644 --- a/src/compiler/spirv/nir_spirv.h +++ b/src/compiler/spirv/nir_spirv.h @@ -49,6 +49,7 @@ struct nir_spirv_supported_extensions { bool image_ms_array; bool tessellation; bool draw_parameters; + bool image_read_without_format; bool image_write_without_format; }; diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c index 7e7874a..8108e10 100644 --- a/src/compiler/spirv/spirv_to_nir.c +++ b/src/compiler/spirv/spirv_to_nir.c @@ -2663,7 +2663,6 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, case SpvCapabilitySparseResidency: case SpvCapabilityMinLod: case SpvCapabilityTransformFeedback: - case SpvCapabilityStorageImageReadWithoutFormat: vtn_warn("Unsupported SPIR-V capability: %s", spirv_capability_to_string(cap)); break; @@ -2699,6 +2698,10 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, spv_check_supported(draw_parameters, cap); break; + case SpvCapabilityStorageImageReadWithoutFormat: + spv_check_supported(image_read_without_format, cap); + break; + case SpvCapabilityStorageImageWriteWithoutFormat: spv_check_supported(image_write_without_format, cap); break; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Add support for shaderStorageImageReadWithoutFormat.
Module: Mesa Branch: master Commit: 4e6095ff61efef9d27323494147c97fc16d61052 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e6095ff61efef9d27323494147c97fc16d61052 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Wed Feb 15 01:00:07 2017 +0100 radv: Add support for shaderStorageImageReadWithoutFormat. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Edward O'Callaghan <funfunc...@folklore1984.net> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 2 +- src/amd/vulkan/radv_pipeline.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 9185ef1..be0d9d9 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -463,7 +463,7 @@ void radv_GetPhysicalDeviceFeatures( .shaderSampledImageArrayDynamicIndexing = true, .shaderStorageBufferArrayDynamicIndexing = true, .shaderStorageImageArrayDynamicIndexing = true, - .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageReadWithoutFormat = true, .shaderStorageImageWriteWithoutFormat = true, .shaderClipDistance = true, .shaderCullDistance = true, diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 722e58d..723c32c 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -209,6 +209,7 @@ radv_shader_compile_to_nir(struct radv_device *device, const struct nir_spirv_supported_extensions supported_ext = { .draw_parameters = true, .float64 = true, + .image_read_without_format = true, .image_write_without_format = true, }; entry_point = spirv_to_nir(spirv, module->size / 4, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Use different allocator for descriptor set vram.
Module: Mesa Branch: master Commit: d5bf4c739462a686fb186f5e804ea9e628431526 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d5bf4c739462a686fb186f5e804ea9e628431526 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Feb 16 21:23:58 2017 +0100 radv: Use different allocator for descriptor set vram. This one only keeps allocated memory in the list, and list nodes in the descriptor sets. Thsi doesn't need messing around with max_sets, and we get automatic merging of free regions. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_descriptor_set.c | 82 +++- src/amd/vulkan/radv_private.h| 18 ++-- 2 files changed, 29 insertions(+), 71 deletions(-) diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index 81291d1..e2bd9b9 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -275,39 +275,37 @@ radv_descriptor_set_create(struct radv_device *device, uint32_t layout_size = align_u32(layout->size, 32); set->size = layout->size; if (!cmd_buffer) { - if (pool->current_offset + layout_size <= pool->size && - pool->allocated_sets < pool->max_sets) { + /* try to allocate linearly first, so that we don't spend +* time looking for gaps if the app only allocates & +* resets via the pool. */ + if (pool->current_offset + layout_size <= pool->size) { set->bo = pool->bo; set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset); set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset; pool->current_offset += layout_size; - ++pool->allocated_sets; + list_addtail(>vram_list, >vram_list); } else { - int entry = pool->free_list, prev_entry = -1; - uint32_t offset; - while (entry >= 0) { - if (pool->free_nodes[entry].size >= layout_size) { - if (prev_entry >= 0) - pool->free_nodes[prev_entry].next = pool->free_nodes[entry].next; - else - pool->free_list = pool->free_nodes[entry].next; + uint64_t offset = 0; + struct list_head *prev = >vram_list; + struct radv_descriptor_set *cur; + LIST_FOR_EACH_ENTRY(cur, >vram_list, vram_list) { + uint64_t start = (uint8_t*)cur->mapped_ptr - pool->mapped_ptr; + if (start - offset >= layout_size) break; - } - prev_entry = entry; - entry = pool->free_nodes[entry].next; + + offset = start + cur->size; + prev = >vram_list; } - if (entry < 0) { + if (pool->size - offset < layout_size) { + vk_free2(>alloc, NULL, set->dynamic_descriptors); vk_free2(>alloc, NULL, set); return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR); } - offset = pool->free_nodes[entry].offset; - pool->free_nodes[entry].next = pool->full_list; - pool->full_list = entry; - set->bo = pool->bo; set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset); set->va = device->ws->buffer_get_va(set->bo) + offset; + list_add(>vram_list, prev); } } else { unsigned bo_offset; @@ -324,11 +322,6 @@ radv_descriptor_set_create(struct radv_device *device, } } - if (pool) -
Mesa (master): radv: Never try to create more than max_sets descriptor sets.
Module: Mesa Branch: master Commit: f4487016226c3337fa5fb2244c75ce298093c3ce URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f4487016226c3337fa5fb2244c75ce298093c3ce Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Thu Feb 16 20:52:24 2017 +0100 radv: Never try to create more than max_sets descriptor sets. We only use the freed ones after all free space has been used. If the app only allocates small descriptor sets, we might go over max_sets before the memory is full. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> CC: <mesa-sta...@lists.freedesktop.org> Fixes: f4e499ec79147f4172f3669ae9dafd941aaeeb65 --- src/amd/vulkan/radv_descriptor_set.c | 7 +-- src/amd/vulkan/radv_private.h| 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index 6d89d60..81291d1 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -275,12 +275,13 @@ radv_descriptor_set_create(struct radv_device *device, uint32_t layout_size = align_u32(layout->size, 32); set->size = layout->size; if (!cmd_buffer) { - if (pool->current_offset + layout_size <= pool->size) { + if (pool->current_offset + layout_size <= pool->size && + pool->allocated_sets < pool->max_sets) { set->bo = pool->bo; set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset); set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset; pool->current_offset += layout_size; - + ++pool->allocated_sets; } else { int entry = pool->free_list, prev_entry = -1; uint32_t offset; @@ -417,6 +418,7 @@ VkResult radv_CreateDescriptorPool( pool->full_list = 0; pool->free_nodes[max_sets - 1].next = -1; pool->max_sets = max_sets; + pool->allocated_sets = 0; for (int i = 0; i + 1 < max_sets; ++i) pool->free_nodes[i].next = i + 1; @@ -494,6 +496,7 @@ VkResult radv_ResetDescriptorPool( radv_descriptor_set_destroy(device, pool, set, false); } + pool->allocated_sets = 0; pool->current_offset = 0; pool->free_list = -1; pool->full_list = 0; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 7b1d8fb..9c326dc 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -564,6 +564,7 @@ struct radv_descriptor_pool { int free_list; int full_list; uint32_t max_sets; + uint32_t allocated_sets; struct radv_descriptor_pool_free_node free_nodes[]; }; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): ac/debug: Decrease num_dw for type 2 NOP's.
Module: Mesa Branch: master Commit: 3b4bf8aa636768f4ad5fb636b8406e58d0d78f62 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3b4bf8aa636768f4ad5fb636b8406e58d0d78f62 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Jan 15 23:01:03 2017 +0100 ac/debug: Decrease num_dw for type 2 NOP's. Otherwise we read past the end of the buffer. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com> --- src/amd/common/ac_debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index f91e448..989dfda 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -357,6 +357,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id, if (ib[0] == 0x8000) { fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n"); ib++; + num_dw--; break; } /* fall through */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Support multiple devices.
Module: Mesa Branch: master Commit: 5ae4de18d93eb47fefe09b516189a2794f2e4a51 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5ae4de18d93eb47fefe09b516189a2794f2e4a51 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Jan 16 21:25:10 2017 +0100 radv: Support multiple devices. Pretty straightforward. Also deleted the big comment block as it is a pretty standard pattern for filling in arrays. Also removed the error message on non-existent devices, as getting 7 errors printed to the console each time you enumerate the devices is pretty confusing. v2: Add constant for number of DRM devices. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_device.c | 60 --- src/amd/vulkan/radv_private.h | 3 ++- 2 files changed, 19 insertions(+), 44 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 10d4d2b..9371536 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -190,8 +190,7 @@ radv_physical_device_init(struct radv_physical_device *device, fd = open(path, O_RDWR | O_CLOEXEC); if (fd < 0) - return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, -"failed to open %s: %m", path); + return VK_ERROR_INCOMPATIBLE_DRIVER; version = drmGetVersion(fd); if (!version) { @@ -365,10 +364,8 @@ void radv_DestroyInstance( { RADV_FROM_HANDLE(radv_instance, instance, _instance); - if (instance->physicalDeviceCount > 0) { - /* We support at most one physical device. */ - assert(instance->physicalDeviceCount == 1); - radv_physical_device_finish(>physicalDevice); + for (int i = 0; i < instance->physicalDeviceCount; ++i) { + radv_physical_device_finish(instance->physicalDevices + i); } VG(VALGRIND_DESTROY_MEMPOOL(instance)); @@ -388,52 +385,29 @@ VkResult radv_EnumeratePhysicalDevices( if (instance->physicalDeviceCount < 0) { char path[20]; - for (unsigned i = 0; i < 8; i++) { + instance->physicalDeviceCount = 0; + for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) { snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i); - result = radv_physical_device_init(>physicalDevice, - instance, path); - if (result != VK_ERROR_INCOMPATIBLE_DRIVER) - break; - } - - if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { - instance->physicalDeviceCount = 0; - } else if (result == VK_SUCCESS) { - instance->physicalDeviceCount = 1; - } else { - return result; + result = radv_physical_device_init(instance->physicalDevices + + instance->physicalDeviceCount, + instance, path); + if (result == VK_SUCCESS) + ++instance->physicalDeviceCount; + else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) + return result; } } - /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; -* otherwise it's an inout parameter. -* -* The Vulkan spec (git aaed022) says: -* -*pPhysicalDeviceCount is a pointer to an unsigned integer variable -*that is initialized with the number of devices the application is -*prepared to receive handles to. pname:pPhysicalDevices is pointer to -*an array of at least this many VkPhysicalDevice handles [...]. -* -*Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices -*overwrites the contents of the variable pointed to by -*pPhysicalDeviceCount with the number of physical devices in in the -*instance; otherwise, vkEnumeratePhysicalDevices overwrites -*pPhysicalDeviceCount with the number of physical handles written to -*pPhysicalDevices. -*/ if (!pPhysicalDevices) { *pPhysicalDeviceCount = instance->physicalDeviceCount; - } else if (*pPhysicalDeviceCount >= 1) { - pPhysicalDevices[0] = radv_physical_device_to_handle(>physicalDevice); - *pPhysicalDeviceCount = 1; - } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) { - return VK_INCOMP
Mesa (master): radv: Get physical device from radv_device instead of the instance.
Module: Mesa Branch: master Commit: 8406f79d6a3511dc6bf4d9c2ab3a80828c363d3a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8406f79d6a3511dc6bf4d9c2ab3a80828c363d3a Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Mon Jan 16 21:23:48 2017 +0100 radv: Get physical device from radv_device instead of the instance. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_cmd_buffer.c | 18 +- src/amd/vulkan/radv_device.c | 15 --- src/amd/vulkan/radv_image.c | 18 +- src/amd/vulkan/radv_pipeline.c | 10 +- src/amd/vulkan/radv_pipeline_cache.c | 10 -- src/amd/vulkan/radv_private.h| 2 ++ src/amd/vulkan/radv_query.c | 6 +++--- src/amd/vulkan/radv_wsi.c| 4 ++-- src/amd/vulkan/si_cmd_buffer.c | 16 9 files changed, 50 insertions(+), 49 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 651b1dd..01e77f8 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -117,7 +117,7 @@ radv_dynamic_state_copy(struct radv_dynamic_state *dest, bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) { return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE && - cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK; + cmd_buffer->device->physical_device->rad_info.chip_class >= CIK; } enum ring_type radv_queue_family_to_ring(int f) { @@ -645,7 +645,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct radv_color_buffer_info *cb) { - bool is_vi = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI; + bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= VI; radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); radeon_emit(cmd_buffer->cs, cb->cb_color_base); radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); @@ -911,13 +911,13 @@ void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer) uint32_t db_count_control; if(!cmd_buffer->state.active_occlusion_queries) { - if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { db_count_control = 0; } else { db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); } } else { - if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(0) | /* TODO: set this to the number of samples of the current framebuffer */ S_028004_ZPASS_ENABLE(1) | @@ -1129,7 +1129,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) va += offset + buffer->offset; desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class <= CIK && stride) + if (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride) desc[2] = (buffer->size - offset - cmd_buffer->state.pipeline->va_format_size[i]) / stride + 1; else desc[2] = buffer->size - offset; @@ -1161,7 +1161,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, 0); ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer); - if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim)
Mesa (master): amd/common: Fix build with new ac_add_function_attr()
Module: Mesa Branch: master Commit: 6d600cf632f83d7ffdd844e0b3a049605ae858cc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d600cf632f83d7ffdd844e0b3a049605ae858cc Author: Tobias Klausmann <tobias.johannes.klausm...@mni.thm.de> Date: Wed Mar 1 22:16:27 2017 +0100 amd/common: Fix build with new ac_add_function_attr() Fix usage of ac_add_function_attr() and make it known! common/ac_nir_to_llvm.c: In function 'create_llvm_function': common/ac_nir_to_llvm.c:265:4: error: implicit declaration of function 'ac_add_function_attr' [-Werror=implicit-function-declaration] ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL); ^~~~ Signed-off-by: Tobias Klausmann <tobias.johannes.klausm...@mni.thm.de> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/common/ac_llvm_util.c | 2 +- src/amd/common/ac_llvm_util.h | 2 ++ src/amd/common/ac_nir_to_llvm.c | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index fb525dd..1936014 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -180,7 +180,7 @@ static const char *attr_to_str(enum ac_func_attr attr) #endif -static void +void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, enum ac_func_attr attr) { diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h index 4fe4ab4..805db77 100644 --- a/src/amd/common/ac_llvm_util.h +++ b/src/amd/common/ac_llvm_util.h @@ -53,6 +53,8 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool su void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes); bool ac_is_sgpr_param(LLVMValueRef param); +void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, + int attr_idx, enum ac_func_attr attr); void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask); void ac_dump_module(LLVMModuleRef module); diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index a800ff0..2228dd8 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -262,11 +262,11 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, for (unsigned i = 0; i < sgpr_params; ++i) { if (array_params_mask & (1 << i)) { LLVMValueRef P = LLVMGetParam(main_function, i); - ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL); + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL); ac_add_attr_dereferenceable(P, UINT64_MAX); } else { - ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG); + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Only use PKT3_OCCLUSION_QUERY when it doesn't hang.
Module: Mesa Branch: master Commit: 8ea34a98c06af4e3ce00ea665ee07480bcbd199f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8ea34a98c06af4e3ce00ea665ee07480bcbd199f Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Feb 26 22:26:30 2017 +0100 radv: Only use PKT3_OCCLUSION_QUERY when it doesn't hang. PKT3_OCCLUSION_QUERY hangs when used in a nested IB. This only calls it when in a primary command buffer and we change GetQueryPoolResults to not need it. CmdCopyQueryPoolResults still needs it so we break that behavior for secondary command buffers. However, that would hang already and using an unitialized value is better than a hang. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Cc: 13.0 17.0 <mesa-sta...@lists.freedesktop.org> --- src/amd/vulkan/radv_query.c | 65 +++-- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index a29a05d..a2d0889 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -131,6 +131,7 @@ VkResult radv_GetQueryPoolResults( VkDeviceSizestride, VkQueryResultFlags flags) { + RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); char *data = pData; VkResult result = VK_SUCCESS; @@ -141,23 +142,20 @@ VkResult radv_GetQueryPoolResults( char *src = pool->ptr + query * pool->stride; uint32_t available; - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query)) - ; - } + switch (pool->type) { + case VK_QUERY_TYPE_TIMESTAMP: { + if (flags & VK_QUERY_RESULT_WAIT_BIT) { + while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query)) + ; + } - if (!*(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query) && - !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { - if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) - *(uint32_t*)dest = 0; - result = VK_NOT_READY; - continue; + available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query); + if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { + result = VK_NOT_READY; + break; - } + } - available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query); - switch (pool->type) { - case VK_QUERY_TYPE_TIMESTAMP: if (flags & VK_QUERY_RESULT_64_BIT) { *(uint64_t*)dest = *(uint64_t*)src; dest += 8; @@ -166,8 +164,32 @@ VkResult radv_GetQueryPoolResults( dest += 4; } break; + } case VK_QUERY_TYPE_OCCLUSION: { - uint64_t result = *(uint64_t*)(src + pool->stride - 16); + volatile uint64_t const *src64 = (volatile uint64_t const *)src; + uint64_t result = 0; + int db_count = get_max_db(device); + available = 1; + + for (int i = 0; i < db_count; ++i) { + uint64_t start, end; + do { + start = src64[2 * i]; + end = src64[2 * i + 1]; + } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT)); + + if (!(start & (1ull << 63)) || !(end & (1ull << 63))) + available = 0; + else { + result += end - start; + } + } + + if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) { + result = VK_NOT_READY; + break; + + } if (flags & VK_QUERY_RESULT_64_BIT) { *(uint64_t*)dest = r
Mesa (master): radv: Use correct size for availability flag.
Module: Mesa Branch: master Commit: 43d833ae97daa73a2422b5696b587a7b46750e07 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=43d833ae97daa73a2422b5696b587a7b46750e07 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Feb 26 22:36:10 2017 +0100 radv: Use correct size for availability flag. Per spec, VK_QUERY_RESULT_64_BIT specifies the integer size and the availability flag is an integer. We apparently handled this correctly already for the copy to buffer case. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Cc: 13.0 17.0 <mesa-sta...@lists.freedesktop.org> --- src/amd/vulkan/radv_query.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index a2d0889..288bd43 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -205,8 +205,11 @@ VkResult radv_GetQueryPoolResults( } if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { - *(uint32_t*)dest = available; - dest += 4; + if (flags & VK_QUERY_RESULT_64_BIT) { + *(uint64_t*)dest = available; + } else { + *(uint32_t*)dest = available; + } } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Reset emitted compute pipeline when calling secondary cmd buffer.
Module: Mesa Branch: master Commit: bb878db7eb94e48476a920d66174a45da2585e19 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bb878db7eb94e48476a920d66174a45da2585e19 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Feb 26 20:01:26 2017 +0100 radv: Reset emitted compute pipeline when calling secondary cmd buffer. Otherwise if the new compute pipeline is the same as the last used pipeline before the call, we don't emit it again. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Cc: 13.0 17.0 <mesa-sta...@lists.freedesktop.org> --- src/amd/vulkan/radv_cmd_buffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 1e38cbe..709ae8b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2114,6 +2114,7 @@ void radv_CmdExecuteCommands( /* if we execute secondary we need to re-emit out pipelines */ if (commandBufferCount) { primary->state.emitted_pipeline = NULL; + primary->state.emitted_compute_pipeline = NULL; primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE; primary->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/ac: Add integer->integer casts.
Module: Mesa Branch: master Commit: 336b05c49a6c6ce915bbd6629da17321ecb72bee URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=336b05c49a6c6ce915bbd6629da17321ecb72bee Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Sun Feb 26 02:37:27 2017 +0100 radv/ac: Add integer->integer casts. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Acked-by: Edward O'Callaghan <funfunc...@folklore1984.net> --- src/amd/common/ac_nir_to_llvm.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 9778581..4f3d689 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1476,6 +1476,24 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) case nir_op_d2f: result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(ctx, def_type), ""); break; + case nir_op_u2u32: + case nir_op_u2u64: + case nir_op_u2i32: + case nir_op_u2i64: + if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) + result = LLVMBuildZExt(ctx->builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + break; + case nir_op_i2u32: + case nir_op_i2u64: + case nir_op_i2i32: + case nir_op_i2i64: + if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) + result = LLVMBuildSExt(ctx->builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + break; case nir_op_bcsel: result = emit_bcsel(ctx, src[0], src[1], src[2]); break; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600g/sb: fix struct/class declaration conflicts
Module: Mesa Branch: master Commit: 15804c4b9093179f6dd6be2b3a45e18b1318d5a3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=15804c4b9093179f6dd6be2b3a45e18b1318d5a3 Author: Martina Kollarova <martina.kollar...@intel.com> Date: Fri Sep 16 18:54:53 2016 +0300 r600g/sb: fix struct/class declaration conflicts A couple of forward-declarations were causing warnings in clang: 'value' defined as a class here but previously declared as a struct [-Wmismatched-tags] Signed-off-by: Martina Kollarova <martina.kollar...@intel.com> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/gallium/drivers/r600/sb/sb_ir.h | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/sb/sb_ir.h b/src/gallium/drivers/r600/sb/sb_ir.h index c612e6c..4fc4da2 100644 --- a/src/gallium/drivers/r600/sb/sb_ir.h +++ b/src/gallium/drivers/r600/sb/sb_ir.h @@ -263,8 +263,6 @@ public: } }; -class value; - enum value_kind { VLK_REG, VLK_REL_REG, @@ -433,8 +431,6 @@ inline value_flags& operator &=(value_flags , value_flags r) { return l; } -struct value; - sb_ostream& operator << (sb_ostream , value ); typedef uint32_t value_hash; @@ -467,7 +463,7 @@ enum constraint_kind { class shader; class sb_value_pool; -class ra_chunk; +struct ra_chunk; class ra_constraint; class value { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv/pipeline: Don' t dereference NULL dynamic state pointers
Module: Mesa Branch: master Commit: 9b121512ac0f78d0996613664b456005d88370d2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b121512ac0f78d0996613664b456005d88370d2 Author: Darren Salt <devs...@moreofthesa.me.uk> Date: Sun Oct 16 20:32:19 2016 +0100 radv/pipeline: Don't dereference NULL dynamic state pointers This is a port of commit a4a59172482d50318a5ae7f99021bcf0125e0f53: Add guards to prevent dereferencing NULL dynamic pipeline state. Asserts of pCreateInfo members are moved to the earliest points at which they should not be NULL. This fixes a segfault, related to pColorBlendState, seen in Talos Principle which I've observed after startup is completed and when exiting the menus, depending on when Vulkan rendering is selected. v2: moved the NULL check in radv_pipeline_init_blend_state to after the declarations. Acked-by: Edward O'Callaghan <funfunc...@folklore1984.net> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> --- src/amd/vulkan/radv_pipeline.c | 67 +- 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index bb97531..404e840 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -722,6 +722,10 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline, bool blend_mrt0_is_dual_src = false; int i; bool single_cb_enable = false; + + if (!vkblend) + return; + if (extra && extra->custom_blend_mode) { single_cb_enable = true; mode = extra->custom_blend_mode; @@ -1076,18 +1080,27 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, struct radv_dynamic_state *dynamic = >dynamic_state; - dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; - if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { - typed_memcpy(dynamic->viewport.viewports, -pCreateInfo->pViewportState->pViewports, -pCreateInfo->pViewportState->viewportCount); - } + /* Section 9.2 of the Vulkan 1.0.15 spec says: +* +*pViewportState is [...] NULL if the pipeline +*has rasterization disabled. +*/ + if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) { + assert(pCreateInfo->pViewportState); + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, +pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } - dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; - if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { - typed_memcpy(dynamic->scissor.scissors, -pCreateInfo->pViewportState->pScissors, -pCreateInfo->pViewportState->scissorCount); + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, +pCreateInfo->pViewportState->pScissors, +pCreateInfo->pViewportState->scissorCount); + } } if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { @@ -1105,7 +1118,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline, pCreateInfo->pRasterizationState->depthBiasSlopeFactor; } - if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + /* Section 9.2 of the Vulkan 1.0.15 spec says: +* +*pColorBlendState is [...] NULL if the pipeline has rasterization +*disabled or if the subpass of the render pass the pipeline is +*created against does not use any color attachments. +*/ + bool uses_color_att = false; + for (unsigned i = 0; i < subpass->color_count; ++i) { + if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) { + uses_color_att = true; + break; + } + } + + if (uses_color_att && states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { assert(pCreateInfo->pColorBlendState); typed_memcpy(dynamic->blend_constants, pCreateInfo->pColorBlendStat
Mesa (master): radv: Use different intrinsic for ubo loads.
Module: Mesa Branch: master Commit: 05533ce418851b12fd0a1e940a633f9280203aab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=05533ce418851b12fd0a1e940a633f9280203aab Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Nov 29 00:18:43 2016 +0100 radv: Use different intrinsic for ubo loads. Not sure about the deprecation path, but this intrinsic can be lowered to SMEM loads. This results in a significant Talos performance improvement. v2: Fix for LLVM attribute changes. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> --- src/amd/common/ac_nir_to_llvm.c | 30 +- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index f623cc0..c9b0106 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -2035,6 +2035,34 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, get_def_type(ctx, >dest.ssa), ""); } +static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, + nir_intrinsic_instr *instr) +{ + const nir_intrinsic_info *info = _intrinsic_infos[instr->intrinsic]; + const char *load_name; + LLVMTypeRef data_type = ctx->f32; + LLVMValueRef results[4], ret; + LLVMValueRef rsrc = get_src(ctx, instr->src[0]); + LLVMValueRef offset = get_src(ctx, instr->src[1]); + + rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), ""); + + for (unsigned i = 0; i < instr->num_components; ++i) { + LLVMValueRef params[] = { + rsrc, + LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), +offset, "") + }; + results[i] = emit_llvm_intrinsic(ctx, "llvm.SI.load.const", ctx->f32, +params, 2, AC_FUNC_ATTR_READNONE); + } + + + ret = build_gather_values(ctx, results, instr->num_components); + return LLVMBuildBitCast(ctx->builder, ret, + get_def_type(ctx, >dest.ssa), ""); +} + static void radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, bool vs_in, unsigned *const_out, LLVMValueRef *indir_out) @@ -2956,7 +2984,7 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, result = visit_atomic_ssbo(ctx, instr); break; case nir_intrinsic_load_ubo: - result = visit_load_buffer(ctx, instr); + result = visit_load_ubo_buffer(ctx, instr); break; case nir_intrinsic_get_buffer_size: result = visit_get_buffer_size(ctx, instr); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): radv: Don't generate radv_timestamp.h
Module: Mesa Branch: master Commit: a794f0901719eb2689cbbabd5724fa89a2cfecd2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a794f0901719eb2689cbbabd5724fa89a2cfecd2 Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Date: Tue Nov 22 21:28:28 2016 +0100 radv: Don't generate radv_timestamp.h Not needed anymore. Signed-off-by: Bas Nieuwenhuizen <ba...@google.com> Reviewed-by: Dave Airlie <airl...@redhat.com> Reviewed-by: Emil Velikov <emil.veli...@collabora.com> --- src/amd/vulkan/Makefile.am | 6 -- src/amd/vulkan/Makefile.sources | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/amd/vulkan/Makefile.am b/src/amd/vulkan/Makefile.am index c559a95..6e184c0 100644 --- a/src/amd/vulkan/Makefile.am +++ b/src/amd/vulkan/Makefile.am @@ -119,12 +119,6 @@ radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_include_HEADERS) $(AM_V_GEN) cat $(vulkan_include_HEADERS) |\ $(PYTHON2) $(srcdir)/radv_entrypoints_gen.py code > $@ -.PHONY: radv_timestamp.h - -radv_timestamp.h: - @echo "Updating radv_timestamp.h" - $(AM_V_GEN) echo "#define RADV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@ - vk_format_table.c: vk_format_table.py \ vk_format_parse.py \ vk_format_layout.csv diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources index d163b98..425a00f 100644 --- a/src/amd/vulkan/Makefile.sources +++ b/src/amd/vulkan/Makefile.sources @@ -72,6 +72,5 @@ VULKAN_WSI_X11_FILES := \ VULKAN_GENERATED_FILES := \ radv_entrypoints.c \ - radv_entrypoints.h \ - radv_timestamp.h + radv_entrypoints.h ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit