Module: Mesa Branch: main Commit: fbff2b6c6525844f243c229d9c028939c772597b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fbff2b6c6525844f243c229d9c028939c772597b
Author: Mike Blumenkrantz <[email protected]> Date: Wed Sep 15 15:12:09 2021 -0400 zink: implement GL_EXT_multisampled_render_to_texture this requires allocating and using a lazily-allocated msaa surface as a transient attachment for the base render operation, resolving it ...except vulkan has no "replicate" renderpass attachment mechanism, so for now we're just gonna allocate a transient surface and hang on to it. sorry tilers! Reviewed-by: Adam Jackson <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12934> --- src/gallium/drivers/zink/zink_context.c | 122 ++++++++++++++++++--- src/gallium/drivers/zink/zink_framebuffer.c | 68 +++++++++++- src/gallium/drivers/zink/zink_render_pass.c | 157 +++++++++++++++++++++++++++- src/gallium/drivers/zink/zink_render_pass.h | 14 ++- src/gallium/drivers/zink/zink_screen.c | 3 + src/gallium/drivers/zink/zink_surface.c | 43 ++++++-- src/gallium/drivers/zink/zink_surface.h | 10 ++ 7 files changed, 388 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 06a642c38ec..fcd0ea87820 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -1574,12 +1574,18 @@ get_render_pass(struct zink_context *ctx) for (int i = 0; i < fb->nr_cbufs; i++) { struct pipe_surface *surf = fb->cbufs[i]; if (surf) { + struct zink_surface *transient = zink_transient_surface(surf); state.rts[i].format = zink_get_format(screen, surf->format); - state.rts[i].samples = surf->texture->nr_samples > 0 ? surf->texture->nr_samples : - VK_SAMPLE_COUNT_1_BIT; + state.rts[i].samples = MAX3(transient ? transient->base.nr_samples : 0, surf->texture->nr_samples, 1); state.rts[i].clear_color = zink_fb_clear_enabled(ctx, i) && !zink_fb_clear_first_needs_explicit(&ctx->fb_clears[i]); clears |= !!state.rts[i].clear_color ? PIPE_CLEAR_COLOR0 << i : 0; state.rts[i].swapchain = surf->texture->bind & PIPE_BIND_SCANOUT; + if (transient) { + state.num_cresolves++; + state.rts[i].resolve = true; + if (!state.rts[i].clear_color) + state.msaa_expand_mask |= BITFIELD_BIT(i); + } } else { state.rts[i].format = VK_FORMAT_R8_UINT; state.rts[i].samples = fb->samples; @@ -1587,12 +1593,18 @@ get_render_pass(struct zink_context *ctx) state.num_rts++; } state.num_cbufs = fb->nr_cbufs; + assert(!state.num_cresolves || state.num_cbufs == state.num_cresolves); if (fb->zsbuf) { struct zink_resource *zsbuf = zink_resource(fb->zsbuf->texture); struct zink_framebuffer_clear *fb_clear = &ctx->fb_clears[PIPE_MAX_COLOR_BUFS]; + struct zink_surface *transient = zink_transient_surface(fb->zsbuf); state.rts[fb->nr_cbufs].format = zsbuf->format; - state.rts[fb->nr_cbufs].samples = zsbuf->base.b.nr_samples > 0 ? zsbuf->base.b.nr_samples : VK_SAMPLE_COUNT_1_BIT; + state.rts[fb->nr_cbufs].samples = MAX3(transient ? transient->base.nr_samples : 0, fb->zsbuf->texture->nr_samples, 1); + if (transient) { + state.num_zsresolves = 1; + state.rts[fb->nr_cbufs].resolve = true; + } state.rts[fb->nr_cbufs].clear_color = zink_fb_clear_enabled(ctx, PIPE_MAX_COLOR_BUFS) && !zink_fb_clear_first_needs_explicit(fb_clear) && (zink_fb_clear_element(fb_clear, 0)->zs.bits & PIPE_CLEAR_DEPTH); @@ -1607,7 +1619,7 @@ get_render_pass(struct zink_context *ctx) ctx->gfx_stages[PIPE_SHADER_FRAGMENT]->nir->info.outputs_written : 0; bool needs_write = (ctx->dsa_state && ctx->dsa_state->hw_state.depth_write) || outputs_written & (BITFIELD64_BIT(FRAG_RESULT_DEPTH) | BITFIELD64_BIT(FRAG_RESULT_STENCIL)); - state.rts[fb->nr_cbufs].needs_write = needs_write || state.rts[fb->nr_cbufs].clear_color || state.rts[fb->nr_cbufs].clear_stencil; + state.rts[fb->nr_cbufs].needs_write = needs_write || state.num_zsresolves || state.rts[fb->nr_cbufs].clear_color || state.rts[fb->nr_cbufs].clear_stencil; state.num_rts++; } state.have_zsbuf = fb->zsbuf != NULL; @@ -1725,11 +1737,24 @@ prep_fb_attachments(struct zink_context *ctx, VkImageView *att) unsigned num_resolves = 0; for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]); - att[i] = prep_fb_attachment(ctx, surf, i); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.cbufs[i]); + if (transient) { + att[i] = prep_fb_attachment(ctx, transient, i); + att[i + cresolve_offset] = prep_fb_attachment(ctx, surf, i); + num_resolves++; + } else { + att[i] = prep_fb_attachment(ctx, surf, i); + } } if (ctx->fb_state.zsbuf) { struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); - att[ctx->fb_state.nr_cbufs] = prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.zsbuf); + if (transient) { + att[ctx->fb_state.nr_cbufs] = prep_fb_attachment(ctx, transient, ctx->fb_state.nr_cbufs); + att[cresolve_offset + num_resolves] = prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs); + } else { + att[ctx->fb_state.nr_cbufs] = prep_fb_attachment(ctx, surf, ctx->fb_state.nr_cbufs); + } } } @@ -1792,7 +1817,7 @@ begin_render_pass(struct zink_context *ctx) assert(ctx->gfx_pipeline_state.render_pass && ctx->framebuffer); VkRenderPassAttachmentBeginInfo infos; - VkImageView att[PIPE_MAX_COLOR_BUFS + 1]; + VkImageView att[2 * (PIPE_MAX_COLOR_BUFS + 1)]; infos.sType = VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO; infos.pNext = NULL; infos.attachmentCount = ctx->framebuffer->state.num_attachments; @@ -1800,12 +1825,29 @@ begin_render_pass(struct zink_context *ctx) prep_fb_attachments(ctx, att); if (zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer) { #ifndef NDEBUG + const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!ctx->fb_state.zsbuf; for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { - assert(!ctx->fb_state.cbufs[i] || zink_resource(ctx->fb_state.cbufs[i]->texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage); - assert(!ctx->fb_state.cbufs[i] || zink_resource(ctx->fb_state.cbufs[i]->texture)->obj->vkflags == ctx->framebuffer->state.infos[i].flags); + if (ctx->fb_state.cbufs[i]) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.cbufs[i]); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.cbufs[i]); + if (transient) { + assert(zink_resource(transient->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage); + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[cresolve_offset].usage); + } else { + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[i].usage); + } + } + } + if (ctx->fb_state.zsbuf) { + struct zink_surface *surf = zink_csurface(ctx->fb_state.zsbuf); + struct zink_surface *transient = zink_transient_surface(ctx->fb_state.zsbuf); + if (transient) { + assert(zink_resource(transient->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage); + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[cresolve_offset].usage); + } else { + assert(zink_resource(surf->base.texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage); + } } - assert(!ctx->fb_state.zsbuf || zink_resource(ctx->fb_state.zsbuf->texture)->obj->vkusage == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].usage); - assert(!ctx->fb_state.zsbuf || zink_resource(ctx->fb_state.zsbuf->texture)->obj->vkflags == ctx->framebuffer->state.infos[ctx->fb_state.nr_cbufs].flags); #endif rpbi.pNext = &infos; } @@ -1847,6 +1889,40 @@ void zink_begin_render_pass(struct zink_context *ctx) { setup_framebuffer(ctx); + /* TODO: need replicate EXT */ + if (ctx->framebuffer->rp->state.msaa_expand_mask) { + uint32_t rp_state = ctx->gfx_pipeline_state.rp_state; + struct zink_render_pass *rp = ctx->gfx_pipeline_state.render_pass; + + u_foreach_bit(i, ctx->framebuffer->rp->state.msaa_expand_mask) { + struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i]; + if (csurf->transient_init) + continue; + struct pipe_surface *dst_view = (struct pipe_surface*)csurf->transient; + assert(dst_view); + struct pipe_sampler_view src_templ, *src_view; + struct pipe_resource *src = ctx->fb_state.cbufs[i]->texture; + struct pipe_box dstbox; + + u_box_3d(0, 0, 0, ctx->fb_state.width, ctx->fb_state.height, + 1 + dst_view->u.tex.last_layer - dst_view->u.tex.first_layer, &dstbox); + + util_blitter_default_src_texture(ctx->blitter, &src_templ, src, ctx->fb_state.cbufs[i]->u.tex.level); + src_view = ctx->base.create_sampler_view(&ctx->base, src, &src_templ); + + zink_blit_begin(ctx, ZINK_BLIT_SAVE_FB | ZINK_BLIT_SAVE_FS | ZINK_BLIT_SAVE_TEXTURES); + util_blitter_blit_generic(ctx->blitter, dst_view, &dstbox, + src_view, &dstbox, ctx->fb_state.width, ctx->fb_state.height, + PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL, + false, false); + + pipe_sampler_view_reference(&src_view, NULL); + csurf->transient_init = true; + } + ctx->fb_changed = ctx->rp_changed = false; + ctx->gfx_pipeline_state.rp_state = rp_state; + ctx->gfx_pipeline_state.render_pass = rp; + } assert(ctx->gfx_pipeline_state.render_pass); unsigned clear_buffers = begin_render_pass(ctx); @@ -1862,6 +1938,11 @@ zink_end_render_pass(struct zink_context *ctx) if (ctx->render_condition.query) zink_stop_conditional_render(ctx); VKCTX(CmdEndRenderPass)(ctx->batch.state->cmdbuf); + for (unsigned i = 0; i < ctx->fb_state.nr_cbufs; i++) { + struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)ctx->fb_state.cbufs[i]; + if (csurf) + csurf->transient_init = true; + } } ctx->batch.in_rp = false; } @@ -2041,10 +2122,14 @@ unbind_fb_surface(struct zink_context *ctx, struct pipe_surface *surf, bool chan { if (!surf) return; + struct zink_surface *transient = zink_transient_surface(surf); if (changed) { zink_fb_clears_apply(ctx, surf->texture); - if (zink_batch_usage_exists(zink_csurface(surf)->batch_uses)) + if (zink_batch_usage_exists(zink_csurface(surf)->batch_uses)) { zink_batch_reference_surface(&ctx->batch, zink_csurface(surf)); + if (transient) + zink_batch_reference_surface(&ctx->batch, transient); + } ctx->rp_changed = true; } struct zink_resource *res = zink_resource(surf->texture); @@ -2058,9 +2143,12 @@ zink_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *state) { struct zink_context *ctx = zink_context(pctx); + unsigned samples = state->nr_cbufs || state->zsbuf ? 0 : state->samples; for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { struct pipe_surface *surf = ctx->fb_state.cbufs[i]; + if (i < state->nr_cbufs) + ctx->rp_changed |= !!zink_transient_surface(surf) != !!zink_transient_surface(state->cbufs[i]); unbind_fb_surface(ctx, surf, i >= state->nr_cbufs || surf != state->cbufs[i]); } if (ctx->fb_state.zsbuf) { @@ -2068,6 +2156,8 @@ zink_set_framebuffer_state(struct pipe_context *pctx, struct zink_resource *res = zink_resource(surf->texture); bool changed = surf != state->zsbuf; unbind_fb_surface(ctx, surf, changed); + if (!changed) + ctx->rp_changed |= !!zink_transient_surface(surf) != !!zink_transient_surface(state->zsbuf); if (changed && unlikely(res->obj->needs_zs_evaluate)) /* have to flush zs eval while the sample location data still exists, * so just throw some random barrier */ @@ -2088,6 +2178,9 @@ zink_set_framebuffer_state(struct pipe_context *pctx, for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { struct pipe_surface *surf = ctx->fb_state.cbufs[i]; if (surf) { + struct zink_surface *transient = zink_transient_surface(surf); + if (!samples) + samples = MAX3(transient ? transient->base.nr_samples : 1, surf->texture->nr_samples, 1); zink_resource(surf->texture)->fb_binds++; ctx->gfx_pipeline_state.void_alpha_attachments |= util_format_has_alpha1(surf->format) ? BITFIELD_BIT(i) : 0; } @@ -2096,12 +2189,15 @@ zink_set_framebuffer_state(struct pipe_context *pctx, ctx->gfx_pipeline_state.dirty = true; if (ctx->fb_state.zsbuf) { struct pipe_surface *surf = ctx->fb_state.zsbuf; + struct zink_surface *transient = zink_transient_surface(surf); + if (!samples) + samples = MAX3(transient ? transient->base.nr_samples : 1, surf->texture->nr_samples, 1); zink_resource(surf->texture)->fb_binds++; } if (ctx->fb_state.width != w || ctx->fb_state.height != h) ctx->scissor_changed = true; rebind_fb_state(ctx, NULL, true); - ctx->fb_state.samples = util_framebuffer_get_num_samples(state); + ctx->fb_state.samples = MAX2(samples, 1); /* get_framebuffer adds a ref if the fb is reused or created; * always do get_framebuffer first to avoid deleting the same fb * we're about to use diff --git a/src/gallium/drivers/zink/zink_framebuffer.c b/src/gallium/drivers/zink/zink_framebuffer.c index ef785c55319..192450dd37b 100644 --- a/src/gallium/drivers/zink/zink_framebuffer.c +++ b/src/gallium/drivers/zink/zink_framebuffer.c @@ -69,6 +69,8 @@ zink_init_framebuffer_imageless(struct zink_screen *screen, struct zink_framebuf goto out; } + assert(rp->state.num_cbufs + rp->state.have_zsbuf + rp->state.num_cresolves + rp->state.num_zsresolves == fb->state.num_attachments); + VkFramebufferCreateInfo fci; fci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fci.flags = VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT; @@ -141,22 +143,42 @@ zink_get_framebuffer_imageless(struct zink_context *ctx) assert(zink_screen(ctx->base.screen)->info.have_KHR_imageless_framebuffer); struct zink_framebuffer_state state; + const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!ctx->fb_state.zsbuf; + unsigned num_resolves = 0; for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; if (!psurf) psurf = ctx->dummy_surface[util_logbase2_ceil(ctx->gfx_pipeline_state.rast_samples+1)]; struct zink_surface *surface = zink_csurface(psurf); - memcpy(&state.infos[i], &surface->info, sizeof(surface->info)); + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient) { + memcpy(&state.infos[i], &transient->info, sizeof(transient->info)); + memcpy(&state.infos[cresolve_offset + i], &surface->info, sizeof(surface->info)); + num_resolves++; + } else { + memcpy(&state.infos[i], &surface->info, sizeof(surface->info)); + } } state.num_attachments = ctx->fb_state.nr_cbufs; + const unsigned zsresolve_offset = cresolve_offset + num_resolves; if (ctx->fb_state.zsbuf) { struct pipe_surface *psurf = ctx->fb_state.zsbuf; struct zink_surface *surface = zink_csurface(psurf); - memcpy(&state.infos[state.num_attachments], &surface->info, sizeof(surface->info)); + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient) { + memcpy(&state.infos[state.num_attachments], &transient->info, sizeof(transient->info)); + memcpy(&state.infos[zsresolve_offset], &surface->info, sizeof(surface->info)); + num_resolves++; + } else { + memcpy(&state.infos[state.num_attachments], &surface->info, sizeof(surface->info)); + } state.num_attachments++; } + /* avoid bitfield explosion */ + assert(state.num_attachments + num_resolves < 16); + state.num_attachments += num_resolves; state.width = MAX2(ctx->fb_state.width, 1); state.height = MAX2(ctx->fb_state.height, 1); state.layers = MAX2(util_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1; @@ -194,6 +216,8 @@ zink_init_framebuffer(struct zink_screen *screen, struct zink_framebuffer *fb, s goto out; } + assert(rp->state.num_cbufs + rp->state.have_zsbuf + rp->state.num_cresolves + rp->state.num_zsresolves == fb->state.num_attachments); + VkFramebufferCreateInfo fci = {0}; fci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fci.renderPass = rp->render_pass; @@ -270,22 +294,56 @@ zink_get_framebuffer(struct zink_context *ctx) assert(!screen->info.have_KHR_imageless_framebuffer); - struct pipe_surface *attachments[PIPE_MAX_COLOR_BUFS + 1] = {0}; + struct pipe_surface *attachments[2 * (PIPE_MAX_COLOR_BUFS + 1)] = {0}; + const unsigned cresolve_offset = ctx->fb_state.nr_cbufs + !!ctx->fb_state.zsbuf; + unsigned num_resolves = 0; struct zink_framebuffer_state state = {0}; for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) { struct pipe_surface *psurf = ctx->fb_state.cbufs[i]; - state.attachments[i] = psurf ? zink_csurface(psurf)->image_view : VK_NULL_HANDLE; + if (psurf) { + struct zink_surface *surf = zink_csurface(psurf); + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient) { + state.attachments[i] = transient->image_view; + state.attachments[cresolve_offset + i] = surf->image_view; + attachments[cresolve_offset + i] = psurf; + psurf = &transient->base; + num_resolves++; + } else { + state.attachments[i] = surf->image_view; + } + } else { + state.attachments[i] = VK_NULL_HANDLE; + } attachments[i] = psurf; } state.num_attachments = ctx->fb_state.nr_cbufs; + const unsigned zsresolve_offset = cresolve_offset + num_resolves; if (ctx->fb_state.zsbuf) { struct pipe_surface *psurf = ctx->fb_state.zsbuf; - state.attachments[state.num_attachments] = psurf ? zink_csurface(psurf)->image_view : VK_NULL_HANDLE; + if (psurf) { + struct zink_surface *surf = zink_csurface(psurf); + struct zink_surface *transient = zink_transient_surface(psurf); + if (transient) { + state.attachments[state.num_attachments] = transient->image_view; + state.attachments[zsresolve_offset] = surf->image_view; + attachments[zsresolve_offset] = psurf; + psurf = &transient->base; + num_resolves++; + } else { + state.attachments[state.num_attachments] = surf->image_view; + } + } else { + state.attachments[state.num_attachments] = VK_NULL_HANDLE; + } attachments[state.num_attachments++] = psurf; } + /* avoid bitfield explosion */ + assert(state.num_attachments + num_resolves < 16); + state.num_attachments += num_resolves; state.width = MAX2(ctx->fb_state.width, 1); state.height = MAX2(ctx->fb_state.height, 1); state.layers = MAX2(util_framebuffer_get_num_layers(&ctx->fb_state), 1) - 1; diff --git a/src/gallium/drivers/zink/zink_render_pass.c b/src/gallium/drivers/zink/zink_render_pass.c index e76cc54740b..265bf941e22 100644 --- a/src/gallium/drivers/zink/zink_render_pass.c +++ b/src/gallium/drivers/zink/zink_render_pass.c @@ -125,6 +125,160 @@ create_render_pass(struct zink_screen *screen, struct zink_render_pass_state *st return render_pass; } +static VkRenderPass +create_render_pass2(struct zink_screen *screen, struct zink_render_pass_state *state, struct zink_render_pass_pipeline_state *pstate) +{ + + VkAttachmentReference2 color_refs[PIPE_MAX_COLOR_BUFS], color_resolves[PIPE_MAX_COLOR_BUFS], zs_ref, zs_resolve; + VkAttachmentReference2 input_attachments[PIPE_MAX_COLOR_BUFS]; + VkAttachmentDescription2 attachments[2 * (PIPE_MAX_COLOR_BUFS + 1)]; + VkPipelineStageFlags dep_pipeline = 0; + VkAccessFlags dep_access = 0; + unsigned input_count = 0; + const unsigned cresolve_offset = state->num_cbufs + state->have_zsbuf; + const unsigned zsresolve_offset = cresolve_offset + state->num_cresolves; + + pstate->num_attachments = state->num_cbufs; + pstate->num_cresolves = state->num_cresolves; + pstate->num_zsresolves = state->num_zsresolves; + for (int i = 0; i < state->num_cbufs; i++) { + struct zink_rt_attrib *rt = state->rts + i; + attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; + attachments[i].pNext = NULL; + attachments[i].flags = 0; + pstate->attachments[i].format = attachments[i].format = rt->format; + pstate->attachments[i].samples = attachments[i].samples = rt->samples; + attachments[i].loadOp = rt->clear_color ? VK_ATTACHMENT_LOAD_OP_CLEAR : + /* TODO: need replicate EXT */ + //rt->resolve || (state->swapchain_init && rt->swapchain) ? + state->swapchain_init && rt->swapchain ? + VK_ATTACHMENT_LOAD_OP_DONT_CARE : + VK_ATTACHMENT_LOAD_OP_LOAD; + + /* TODO: need replicate EXT */ + //attachments[i].storeOp = rt->resolve ? VK_ATTACHMENT_STORE_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE; + attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + /* if layout changes are ever handled here, need VkAttachmentSampleLocationsEXT */ + VkImageLayout layout = rt->fbfetch ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + attachments[i].initialLayout = layout; + attachments[i].finalLayout = layout; + color_refs[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; + color_refs[i].pNext = NULL; + color_refs[i].attachment = i; + color_refs[i].layout = layout; + dep_pipeline |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + if (rt->fbfetch) + memcpy(&input_attachments[input_count++], &color_refs[i], sizeof(VkAttachmentReference2)); + dep_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + if (attachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + dep_access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + + if (rt->resolve) { + memcpy(&attachments[cresolve_offset + i], &attachments[i], sizeof(VkAttachmentDescription2)); + attachments[cresolve_offset + i].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[cresolve_offset + i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[cresolve_offset + i].samples = 1; + memcpy(&color_resolves[i], &color_refs[i], sizeof(VkAttachmentReference2)); + color_resolves[i].attachment = cresolve_offset + i; + if (attachments[cresolve_offset + i].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + dep_access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + } + } + + int num_attachments = state->num_cbufs; + if (state->have_zsbuf) { + struct zink_rt_attrib *rt = state->rts + state->num_cbufs; + bool has_clear = rt->clear_color || rt->clear_stencil; + VkImageLayout layout = rt->needs_write || has_clear ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; + attachments[num_attachments].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; + attachments[num_attachments].pNext = NULL; + attachments[num_attachments].flags = 0; + pstate->attachments[num_attachments].format = attachments[num_attachments].format = rt->format; + pstate->attachments[num_attachments].samples = attachments[num_attachments].samples = rt->samples; + attachments[num_attachments].loadOp = rt->clear_color ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; + attachments[num_attachments].stencilLoadOp = rt->clear_stencil ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD; + /* TODO: need replicate EXT */ + //attachments[num_attachments].storeOp = rt->resolve ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE; + //attachments[num_attachments].stencilStoreOp = rt->resolve ? VK_ATTACHMENT_LOAD_OP_DONT_CARE : VK_ATTACHMENT_STORE_OP_STORE; + attachments[num_attachments].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[num_attachments].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + /* if layout changes are ever handled here, need VkAttachmentSampleLocationsEXT */ + attachments[num_attachments].initialLayout = layout; + attachments[num_attachments].finalLayout = layout; + + dep_pipeline |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + if (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) + dep_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + if (attachments[num_attachments].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || + attachments[num_attachments].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + dep_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + + zs_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2; + zs_ref.pNext = NULL; + zs_ref.attachment = num_attachments++; + zs_ref.layout = layout; + if (rt->resolve) { + memcpy(&attachments[zsresolve_offset], &attachments[num_attachments], sizeof(VkAttachmentDescription2)); + attachments[zsresolve_offset].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[zsresolve_offset].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[zsresolve_offset].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[zsresolve_offset].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[zsresolve_offset].samples = 1; + memcpy(&zs_resolve, &zs_ref, sizeof(VkAttachmentReference2)); + zs_ref.attachment = zsresolve_offset; + if (attachments[zsresolve_offset].loadOp == VK_ATTACHMENT_LOAD_OP_LOAD || + attachments[zsresolve_offset].stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) + dep_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + } + pstate->num_attachments++; + } + + VkSubpassDependency2 deps[] = { + [0] = {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, VK_SUBPASS_EXTERNAL, 0, dep_pipeline, dep_pipeline, 0, dep_access, VK_DEPENDENCY_BY_REGION_BIT, 0}, + [1] = {VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, NULL, 0, VK_SUBPASS_EXTERNAL, dep_pipeline, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, dep_access, 0, VK_DEPENDENCY_BY_REGION_BIT, 0} + }; + + VkSubpassDescription2 subpass = {0}; + VkSubpassDescriptionDepthStencilResolve zsresolve; + subpass.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = state->num_cbufs; + subpass.pColorAttachments = color_refs; + subpass.pDepthStencilAttachment = state->have_zsbuf ? &zs_ref : NULL; + subpass.inputAttachmentCount = input_count; + subpass.pInputAttachments = input_attachments; + if (state->num_cresolves) + subpass.pResolveAttachments = color_resolves; + if (state->num_zsresolves) { + subpass.pNext = &zsresolve; + zsresolve.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE; + zsresolve.pNext = NULL; + zsresolve.depthResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + zsresolve.stencilResolveMode = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT; + zsresolve.pDepthStencilResolveAttachment = &zs_resolve; + } else + subpass.pNext = NULL; + + VkRenderPassCreateInfo2 rpci = {0}; + rpci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2; + rpci.attachmentCount = num_attachments + state->num_cresolves + state->num_zsresolves; + rpci.pAttachments = attachments; + rpci.subpassCount = 1; + rpci.pSubpasses = &subpass; + rpci.dependencyCount = 2; + rpci.pDependencies = deps; + + VkRenderPass render_pass; + if (VKSCR(CreateRenderPass2)(screen->dev, &rpci, NULL, &render_pass) != VK_SUCCESS) { + debug_printf("vkCreateRenderPass2 failed\n"); + return VK_NULL_HANDLE; + } + + return render_pass; +} + struct zink_render_pass * zink_create_render_pass(struct zink_screen *screen, struct zink_render_pass_state *state, @@ -134,7 +288,8 @@ zink_create_render_pass(struct zink_screen *screen, if (!rp) goto fail; - rp->render_pass = create_render_pass(screen, state, pstate); + rp->render_pass = screen->vk_version >= VK_MAKE_VERSION(1,2,0) ? + create_render_pass2(screen, state, pstate) : create_render_pass(screen, state, pstate); if (!rp->render_pass) goto fail; memcpy(&rp->state, state, sizeof(struct zink_render_pass_state)); diff --git a/src/gallium/drivers/zink/zink_render_pass.h b/src/gallium/drivers/zink/zink_render_pass.h index 103b15a249c..2e02f1566d9 100644 --- a/src/gallium/drivers/zink/zink_render_pass.h +++ b/src/gallium/drivers/zink/zink_render_pass.h @@ -35,12 +35,15 @@ struct zink_rt_attrib { VkFormat format; VkSampleCountFlagBits samples; bool clear_color; - bool clear_stencil; - bool fbfetch; + union { + bool clear_stencil; + bool fbfetch; + }; union { bool swapchain; bool needs_write; }; + bool resolve; }; struct zink_render_pass_state { @@ -48,9 +51,12 @@ struct zink_render_pass_state { uint8_t have_zsbuf : 1; uint8_t samples:1; //for fs samplemask uint8_t swapchain_init:1; + uint32_t num_zsresolves : 1; + uint32_t num_cresolves : 23; /* PIPE_MAX_COLOR_BUFS, but this is a struct hole */ struct zink_rt_attrib rts[PIPE_MAX_COLOR_BUFS + 1]; unsigned num_rts; uint32_t clears; //for extra verification and update flagging + uint32_t msaa_expand_mask; }; struct zink_pipeline_rt { @@ -59,7 +65,9 @@ struct zink_pipeline_rt { }; struct zink_render_pass_pipeline_state { - uint32_t num_attachments:31; + uint32_t num_attachments:26; + uint32_t num_cresolves:4; + uint32_t num_zsresolves:1; bool samples:1; //for fs samplemask struct zink_pipeline_rt attachments[PIPE_MAX_COLOR_BUFS + 1]; unsigned id; diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index 8ce3ccb2897..77eaf568b98 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -346,6 +346,9 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_PACK_HALF_FLOAT: return 1; + case PIPE_CAP_SURFACE_SAMPLE_COUNT: + return screen->vk_version >= VK_MAKE_VERSION(1,2,0); + case PIPE_CAP_DRAW_PARAMETERS: return screen->info.feats11.shaderDrawParameters || screen->info.have_KHR_shader_draw_parameters; diff --git a/src/gallium/drivers/zink/zink_surface.c b/src/gallium/drivers/zink/zink_surface.c index f3cfacb1d14..1a22e19fa3d 100644 --- a/src/gallium/drivers/zink/zink_surface.c +++ b/src/gallium/drivers/zink/zink_surface.c @@ -170,6 +170,7 @@ zink_get_surface(struct zink_context *ctx, if (!entry) { /* create a new surface */ surface = create_surface(&ctx->base, pres, templ, ivci); + surface->base.nr_samples = 0; surface->hash = hash; surface->ivci = *ivci; entry = _mesa_hash_table_insert_pre_hashed(&res->surface_cache, hash, &surface->ivci, surface); @@ -184,6 +185,7 @@ zink_get_surface(struct zink_context *ctx, p_atomic_inc(&surface->base.reference.count); } simple_mtx_unlock(&res->surface_mtx); + return &surface->base; } @@ -210,7 +212,31 @@ zink_create_surface(struct pipe_context *pctx, if (pres->target == PIPE_TEXTURE_3D) ivci.viewType = VK_IMAGE_VIEW_TYPE_2D; - return wrap_surface(pctx, zink_get_surface(zink_context(pctx), pres, templ, &ivci)); + struct pipe_surface *psurf = zink_get_surface(zink_context(pctx), pres, templ, &ivci); + if (!psurf) + return NULL; + + struct zink_ctx_surface *csurf = (struct zink_ctx_surface*)wrap_surface(pctx, psurf); + + if (templ->nr_samples) { + /* transient fb attachment: not cached */ + struct pipe_resource rtempl = *pres; + rtempl.nr_samples = templ->nr_samples; + rtempl.bind |= ZINK_BIND_TRANSIENT; + struct zink_resource *transient = zink_resource(pctx->screen->resource_create(pctx->screen, &rtempl)); + if (!transient) + return NULL; + ivci.image = transient->obj->image; + csurf->transient = (struct zink_ctx_surface*)wrap_surface(pctx, (struct pipe_surface*)create_surface(pctx, &transient->base.b, templ, &ivci)); + if (!csurf->transient) { + pipe_resource_reference((struct pipe_resource**)&transient, NULL); + pipe_surface_release(pctx, &psurf); + return NULL; + } + pipe_resource_reference((struct pipe_resource**)&transient, NULL); + } + + return &csurf->base; } /* framebuffers are owned by their surfaces, so each time a surface that's part of a cached fb @@ -245,12 +271,14 @@ zink_destroy_surface(struct zink_screen *screen, struct pipe_surface *psurface) { struct zink_surface *surface = zink_surface(psurface); struct zink_resource *res = zink_resource(psurface->texture); - simple_mtx_lock(&res->surface_mtx); - struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci); - assert(he); - assert(he->data == surface); - _mesa_hash_table_remove(&res->surface_cache, he); - simple_mtx_unlock(&res->surface_mtx); + if (!psurface->nr_samples) { + simple_mtx_lock(&res->surface_mtx); + struct hash_entry *he = _mesa_hash_table_search_pre_hashed(&res->surface_cache, surface->hash, &surface->ivci); + assert(he); + assert(he->data == surface); + _mesa_hash_table_remove(&res->surface_cache, he); + simple_mtx_unlock(&res->surface_mtx); + } if (!screen->info.have_KHR_imageless_framebuffer) surface_clear_fb_refs(screen, psurface); zink_descriptor_set_refs_clear(&surface->desc_set_refs, surface); @@ -268,6 +296,7 @@ zink_surface_destroy(struct pipe_context *pctx, { struct zink_ctx_surface *csurf = (struct zink_ctx_surface *)psurface; zink_surface_reference(zink_screen(pctx->screen), &csurf->surf, NULL); + pipe_surface_release(pctx, (struct pipe_surface**)&csurf->transient); FREE(csurf); } diff --git a/src/gallium/drivers/zink/zink_surface.h b/src/gallium/drivers/zink/zink_surface.h index 617084d39e8..b6de77cd596 100644 --- a/src/gallium/drivers/zink/zink_surface.h +++ b/src/gallium/drivers/zink/zink_surface.h @@ -59,6 +59,9 @@ struct zink_surface { struct zink_ctx_surface { struct pipe_surface base; struct zink_surface *surf; + struct zink_ctx_surface *transient; //zink_ctx_surface + /* TODO: need replicate EXT */ + bool transient_init; }; /* use this cast for framebuffer surfaces */ @@ -68,6 +71,13 @@ zink_csurface(struct pipe_surface *psurface) return psurface ? ((struct zink_ctx_surface *)psurface)->surf : NULL; } +/* use this cast for checking transient framebuffer surfaces */ +static inline struct zink_surface * +zink_transient_surface(struct pipe_surface *psurface) +{ + return psurface ? ((struct zink_ctx_surface *)psurface)->transient ? ((struct zink_ctx_surface *)psurface)->transient->surf : NULL : NULL; +} + /* use this cast for internal surfaces */ static inline struct zink_surface * zink_surface(struct pipe_surface *psurface)
