From: Marek Olšák <marek.ol...@amd.com> For faster initialization of non-indirect draws. --- src/gallium/auxiliary/util/u_draw.c | 4 +- src/gallium/auxiliary/util/u_dump_state.c | 15 ++++--- src/gallium/auxiliary/util/u_vbuf.c | 8 ++-- src/gallium/docs/source/screen.rst | 2 +- src/gallium/drivers/ddebug/dd_draw.c | 42 ++++++++++------- src/gallium/drivers/ddebug/dd_pipe.h | 7 ++- src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 16 +++---- src/gallium/drivers/r600/r600_state_common.c | 12 ++--- src/gallium/drivers/radeonsi/si_state_draw.c | 59 +++++++++++++----------- src/gallium/drivers/trace/tr_dump_state.c | 12 ++++- src/gallium/include/pipe/p_state.h | 67 +++++++++++++++------------- src/gallium/state_trackers/nine/device9.c | 1 - src/gallium/state_trackers/nine/nine_state.c | 1 - src/mesa/state_tracker/st_draw.c | 19 ++++---- 14 files changed, 152 insertions(+), 113 deletions(-)
diff --git a/src/gallium/auxiliary/util/u_draw.c b/src/gallium/auxiliary/util/u_draw.c index ca78648..e7abbfc 100644 --- a/src/gallium/auxiliary/util/u_draw.c +++ b/src/gallium/auxiliary/util/u_draw.c @@ -138,22 +138,22 @@ util_draw_indirect(struct pipe_context *pipe, uint32_t *params; const unsigned num_params = info_in->indexed ? 5 : 4; assert(info_in->indirect); assert(!info_in->count_from_stream_output); memcpy(&info, info_in, sizeof(info)); params = (uint32_t *) pipe_buffer_map_range(pipe, - info_in->indirect, - info_in->indirect_offset, + info_in->indirect->buffer, + info_in->indirect->offset, num_params * sizeof(uint32_t), PIPE_TRANSFER_READ, &transfer); if (!transfer) { debug_printf("%s: failed to map indirect buffer\n", __FUNCTION__); return; } info.count = params[0]; info.instance_count = params[1]; diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index 0af81f7..9c32557 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -932,25 +932,30 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state) util_dump_member(stream, int, state, index_bias); util_dump_member(stream, uint, state, min_index); util_dump_member(stream, uint, state, max_index); util_dump_member(stream, bool, state, primitive_restart); util_dump_member(stream, uint, state, restart_index); util_dump_member(stream, ptr, state, count_from_stream_output); - util_dump_member(stream, ptr, state, indirect); - util_dump_member(stream, uint, state, indirect_offset); - util_dump_member(stream, uint, state, indirect_stride); - util_dump_member(stream, uint, state, indirect_count); - util_dump_member(stream, uint, state, indirect_params_offset); + if (!state->indirect) { + util_dump_member(stream, ptr, state, indirect); + } else { + util_dump_member(stream, uint, state, indirect->offset); + util_dump_member(stream, uint, state, indirect->stride); + util_dump_member(stream, uint, state, indirect->draw_count); + util_dump_member(stream, uint, state, indirect->indirect_draw_count_offset); + util_dump_member(stream, ptr, state, indirect->buffer); + util_dump_member(stream, ptr, state, indirect->indirect_draw_count); + } util_dump_struct_end(stream); } void util_dump_box(FILE *stream, const struct pipe_box *box) { if (!box) { util_dump_null(stream); return; } diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index 62b88ac..9d6d529 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -1161,29 +1161,29 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info) } new_info = *info; /* Fallback. We need to know all the parameters. */ if (new_info.indirect) { struct pipe_transfer *transfer = NULL; int *data; if (new_info.indexed) { - data = pipe_buffer_map_range(pipe, new_info.indirect, - new_info.indirect_offset, 20, + data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, + new_info.indirect->offset, 20, PIPE_TRANSFER_READ, &transfer); new_info.index_bias = data[3]; new_info.start_instance = data[4]; } else { - data = pipe_buffer_map_range(pipe, new_info.indirect, - new_info.indirect_offset, 16, + data = pipe_buffer_map_range(pipe, new_info.indirect->buffer, + new_info.indirect->offset, 16, PIPE_TRANSFER_READ, &transfer); new_info.start_instance = data[3]; } new_info.count = data[0]; new_info.instance_count = data[1]; new_info.start = data[2]; pipe_buffer_unmap(pipe, transfer); new_info.indirect = NULL; } diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index de9de05..03a37f0 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -210,21 +210,21 @@ The integer capabilities: * ``PIPE_CAP_MAX_VERTEX_STREAMS``: The maximum number of vertex streams supported by the geometry shader. If stream-out is supported, this should be at least 1. If stream-out is not supported, this should be 0. * ``PIPE_CAP_DRAW_INDIRECT``: Whether the driver supports taking draw arguments { count, instance_count, start, index_bias } from a PIPE_BUFFER resource. See pipe_draw_info. * ``PIPE_CAP_MULTI_DRAW_INDIRECT``: Whether the driver supports pipe_draw_info::indirect_stride and ::indirect_count * ``PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS``: Whether the driver supports taking the number of indirect draws from a separate parameter - buffer, see pipe_draw_info::indirect_params. + buffer, see pipe_draw_indirect_info::indirect_draw_count. * ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE``: Whether the fragment shader supports the FINE versions of DDX/DDY. * ``PIPE_CAP_VENDOR_ID``: The vendor ID of the underlying hardware. If it's not available one should return 0xFFFFFFFF. * ``PIPE_CAP_DEVICE_ID``: The device ID (PCI ID) of the underlying hardware. 0xFFFFFFFF if not available. * ``PIPE_CAP_ACCELERATED``: Whether the renderer is hardware accelerated. * ``PIPE_CAP_VIDEO_MEMORY``: The amount of video memory in megabytes. * ``PIPE_CAP_UMA``: If the device has a unified memory architecture or on-card memory and GART. diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c index a70187d..7ffbb44 100644 --- a/src/gallium/drivers/ddebug/dd_draw.c +++ b/src/gallium/drivers/ddebug/dd_draw.c @@ -211,24 +211,26 @@ dd_dump_draw_vbo(struct dd_draw_state *dstate, struct pipe_draw_info *info, FILE DUMP(draw_info, info); if (info->indexed) { DUMP(index_buffer, &dstate->index_buffer); if (dstate->index_buffer.buffer) DUMP_M(resource, &dstate->index_buffer, buffer); } if (info->count_from_stream_output) DUMP_M(stream_output_target, info, count_from_stream_output); - if (info->indirect) - DUMP_M(resource, info, indirect); - if (info->indirect_params) - DUMP_M(resource, info, indirect_params); + if (info->indirect) { + DUMP_M(resource, info, indirect->buffer); + if (info->indirect->indirect_draw_count) + DUMP_M(resource, info, indirect->indirect_draw_count); + } + fprintf(f, "\n"); /* TODO: dump active queries */ dd_dump_render_condition(dstate, f); for (i = 0; i < PIPE_MAX_ATTRIBS; i++) if (dstate->vertex_buffers[i].buffer.resource) { DUMP_I(vertex_buffer, &dstate->vertex_buffers[i], i); if (!dstate->vertex_buffers[i].is_user_buffer) @@ -481,21 +483,21 @@ dd_dump_driver_state(struct dd_context *dctx, FILE *f, unsigned flags) fprintf(f, "Driver-specific state:\n\n"); dctx->pipe->dump_debug_state(dctx->pipe, f, flags); } } static void dd_dump_call(FILE *f, struct dd_draw_state *state, struct dd_call *call) { switch (call->type) { case CALL_DRAW_VBO: - dd_dump_draw_vbo(state, &call->info.draw_vbo, f); + dd_dump_draw_vbo(state, &call->info.draw_vbo.draw, f); break; case CALL_LAUNCH_GRID: dd_dump_launch_grid(state, &call->info.launch_grid, f); break; case CALL_RESOURCE_COPY_REGION: dd_dump_resource_copy_region(state, &call->info.resource_copy_region, f); break; case CALL_BLIT: dd_dump_blit(state, &call->info.blit, f); @@ -600,23 +602,23 @@ dd_flush_and_handle_hang(struct dd_context *dctx, /* Terminate the process to prevent future hangs. */ dd_kill_process(); } } static void dd_unreference_copy_of_call(struct dd_call *dst) { switch (dst->type) { case CALL_DRAW_VBO: - pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output, NULL); - pipe_resource_reference(&dst->info.draw_vbo.indirect, NULL); - pipe_resource_reference(&dst->info.draw_vbo.indirect_params, NULL); + pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output, NULL); + pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer, NULL); + pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count, NULL); break; case CALL_LAUNCH_GRID: pipe_resource_reference(&dst->info.launch_grid.indirect, NULL); break; case CALL_RESOURCE_COPY_REGION: pipe_resource_reference(&dst->info.resource_copy_region.dst, NULL); pipe_resource_reference(&dst->info.resource_copy_region.src, NULL); break; case CALL_BLIT: pipe_resource_reference(&dst->info.blit.dst.resource, NULL); @@ -642,27 +644,31 @@ dd_unreference_copy_of_call(struct dd_call *dst) } } static void dd_copy_call(struct dd_call *dst, struct dd_call *src) { dst->type = src->type; switch (src->type) { case CALL_DRAW_VBO: - pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output, - src->info.draw_vbo.count_from_stream_output); - pipe_resource_reference(&dst->info.draw_vbo.indirect, - src->info.draw_vbo.indirect); - pipe_resource_reference(&dst->info.draw_vbo.indirect_params, - src->info.draw_vbo.indirect_params); + pipe_so_target_reference(&dst->info.draw_vbo.draw.count_from_stream_output, + src->info.draw_vbo.draw.count_from_stream_output); + pipe_resource_reference(&dst->info.draw_vbo.indirect.buffer, + src->info.draw_vbo.indirect.buffer); + pipe_resource_reference(&dst->info.draw_vbo.indirect.indirect_draw_count, + src->info.draw_vbo.indirect.indirect_draw_count); dst->info.draw_vbo = src->info.draw_vbo; + if (!src->info.draw_vbo.draw.indirect) + dst->info.draw_vbo.draw.indirect = NULL; + else + dst->info.draw_vbo.draw.indirect = &dst->info.draw_vbo.indirect; break; case CALL_LAUNCH_GRID: pipe_resource_reference(&dst->info.launch_grid.indirect, src->info.launch_grid.indirect); dst->info.launch_grid = src->info.launch_grid; break; case CALL_RESOURCE_COPY_REGION: pipe_resource_reference(&dst->info.resource_copy_region.dst, src->info.resource_copy_region.dst); pipe_resource_reference(&dst->info.resource_copy_region.src, @@ -1154,21 +1160,27 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call) static void dd_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) { struct dd_context *dctx = dd_context(_pipe); struct pipe_context *pipe = dctx->pipe; struct dd_call call; call.type = CALL_DRAW_VBO; - call.info.draw_vbo = *info; + call.info.draw_vbo.draw = *info; + if (info->indirect) { + call.info.draw_vbo.indirect = *info->indirect; + call.info.draw_vbo.draw.indirect = &call.info.draw_vbo.indirect; + } else { + memset(&call.info.draw_vbo.indirect, 0, sizeof(*info->indirect)); + } dd_before_draw(dctx); pipe->draw_vbo(pipe, info); dd_after_draw(dctx, &call); } static void dd_context_launch_grid(struct pipe_context *_pipe, const struct pipe_grid_info *info) { diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h index deae1f5..ea33193 100644 --- a/src/gallium/drivers/ddebug/dd_pipe.h +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -97,26 +97,31 @@ struct call_clear_buffer struct call_generate_mipmap { struct pipe_resource *res; enum pipe_format format; unsigned base_level; unsigned last_level; unsigned first_layer; unsigned last_layer; }; +struct call_draw_info { + struct pipe_draw_info draw; + struct pipe_draw_indirect_info indirect; +}; + struct dd_call { enum call_type type; union { - struct pipe_draw_info draw_vbo; + struct call_draw_info draw_vbo; struct pipe_grid_info launch_grid; struct call_resource_copy_region resource_copy_region; struct pipe_blit_info blit; struct pipe_resource *flush_resource; struct call_clear clear; struct call_clear_buffer clear_buffer; struct call_generate_mipmap generate_mipmap; } info; }; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index b42b468..7cea5fb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -811,24 +811,24 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0, IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0); mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; } } static void nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; - struct nv04_resource *buf = nv04_resource(info->indirect); - struct nv04_resource *buf_count = nv04_resource(info->indirect_params); - unsigned size, macro, count = info->indirect_count, drawid = info->drawid; - uint32_t offset = buf->offset + info->indirect_offset; + struct nv04_resource *buf = nv04_resource(info->indirect->buffer); + struct nv04_resource *buf_count = nv04_resource(info->indirect->indirect_draw_count); + unsigned size, macro, count = info->indirect->draw_count, drawid = info->drawid; + uint32_t offset = buf->offset + info->indirect->offset; struct nvc0_screen *screen = nvc0->screen; PUSH_SPACE(push, 7); /* must make FIFO wait for engines idle before continuing to process */ if ((buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr)) || (buf_count && buf_count->fence_wr && !nouveau_fence_signalled(buf_count->fence_wr))) { IMMED_NVC0(push, SUBC_3D(NV10_SUBCHAN_REF_CNT), 0); } @@ -863,54 +863,54 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) macro = NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT; } /* If the stride is not the natural stride, we have to stick a separate * push data reference for each draw. Otherwise it can all go in as one. * Of course there is a maximum packet size, so we have to break things up * along those borders as well. */ while (count) { unsigned draws = count, pushes, i; - if (info->indirect_stride == size * 4) { + if (info->indirect->stride == size * 4) { draws = MIN2(draws, (NV04_PFIFO_MAX_PACKET_LEN - 4) / size); pushes = 1; } else { draws = MIN2(draws, 32); pushes = draws; } nouveau_pushbuf_space(push, 16, 0, pushes + !!buf_count); PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain); if (buf_count) PUSH_REFN(push, buf_count->bo, NOUVEAU_BO_RD | buf_count->domain); PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(0, macro, 3 + !!buf_count + draws * size)); PUSH_DATA(push, nvc0_prim_gl(info->mode)); PUSH_DATA(push, drawid); PUSH_DATA(push, draws); if (buf_count) { nouveau_pushbuf_data(push, buf_count->bo, - buf_count->offset + info->indirect_params_offset, + buf_count->offset + info->indirect->indirect_draw_count_offset, NVC0_IB_ENTRY_1_NO_PREFETCH | 4); } if (pushes == 1) { nouveau_pushbuf_data(push, buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4 * draws)); - offset += draws * info->indirect_stride; + offset += draws * info->indirect->stride; } else { for (i = 0; i < pushes; i++) { nouveau_pushbuf_data(push, buf->bo, offset, NVC0_IB_ENTRY_1_NO_PREFETCH | (size * 4)); - offset += info->indirect_stride; + offset += info->indirect->stride; } } count -= draws; drawid += draws; } } static inline void nvc0_update_prim_restart(struct nvc0_context *nvc0, bool en, uint32_t index) { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index f3011c8..ee6fd26 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1763,25 +1763,25 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info unsigned out_offset; void *ptr; unsigned start, count; if (likely(!info->indirect)) { start = 0; count = info->count; } else { /* Have to get start/count from indirect buffer, slow path ahead... */ - struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect; + struct r600_resource *indirect_resource = (struct r600_resource *)info->indirect->buffer; unsigned *data = r600_buffer_map_sync_with_rings(&rctx->b, indirect_resource, PIPE_TRANSFER_READ); if (data) { - data += info->indirect_offset / sizeof(unsigned); + data += info->indirect->offset / sizeof(unsigned); start = data[2] * ib.index_size; count = data[0]; } else { start = 0; count = 0; } } u_upload_alloc(ctx->stream_uploader, start, count * 2, @@ -1911,35 +1911,35 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info r600_conv_pipe_prim(info->mode)); rctx->last_primitive_type = info->mode; } /* Draw packets. */ if (likely(!info->indirect)) { radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cs, info->instance_count); } else { - uint64_t va = r600_resource(info->indirect)->gpu_address; + uint64_t va = r600_resource(info->indirect->buffer)->gpu_address; assert(rctx->b.chip_class >= EVERGREEN); // Invalidate so non-indirect draw calls reset this state rctx->vgt_state.last_draw_was_indirect = true; rctx->last_start_instance = -1; radeon_emit(cs, PKT3(EG_PKT3_SET_BASE, 2, 0)); radeon_emit(cs, EG_DRAW_INDEX_INDIRECT_PATCH_TABLE_BASE); radeon_emit(cs, va); radeon_emit(cs, (va >> 32UL) & 0xFF); radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, - (struct r600_resource*)info->indirect, + (struct r600_resource*)info->indirect->buffer, RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT)); } if (info->indexed) { radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cs, ib.index_size == 4 ? (VGT_INDEX_32 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_32_BIT : 0)) : (VGT_INDEX_16 | (R600_BIG_ENDIAN ? VGT_DMA_SWAP_16_BIT : 0))); @@ -1975,21 +1975,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER)); radeon_emit(cs, PKT3(EG_PKT3_INDEX_BUFFER_SIZE, 0, 0)); radeon_emit(cs, max_size); radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit)); - radeon_emit(cs, info->indirect_offset); + radeon_emit(cs, info->indirect->offset); radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); } } } else { if (unlikely(info->count_from_stream_output)) { struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output; uint64_t va = t->buf_filled_size->gpu_address + t->buf_filled_size_offset; radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); @@ -2005,21 +2005,21 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info t->buf_filled_size, RADEON_USAGE_READ, RADEON_PRIO_SO_FILLED_SIZE)); } if (likely(!info->indirect)) { radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); radeon_emit(cs, info->count); } else { radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit)); - radeon_emit(cs, info->indirect_offset); + radeon_emit(cs, info->indirect->offset); } radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | (info->count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0)); } /* SMX returns CONTEXT_DONE too early workaround */ if (rctx->b.family == CHIP_R600 || rctx->b.family == CHIP_RV610 || rctx->b.family == CHIP_RV630 || rctx->b.family == CHIP_RV635) { diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e6a9ee0..70b6ed8 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -603,20 +603,21 @@ static void si_emit_draw_registers(struct si_context *sctx, radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, info->restart_index); sctx->last_restart_index = info->restart_index; } } static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info, const struct pipe_index_buffer *ib) { + struct pipe_draw_indirect_info *indirect = info->indirect; struct radeon_winsys_cs *cs = sctx->b.gfx.cs; unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX]; bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off; uint32_t index_max_size = 0; uint64_t index_va = 0; if (info->count_from_stream_output) { struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output; uint64_t va = t->buf_filled_size->gpu_address + @@ -683,85 +684,85 @@ static void si_emit_draw_packets(struct si_context *sctx, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); } else { /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, * so the state must be re-emitted before the next indexed draw. */ if (sctx->b.chip_class >= CIK) sctx->last_index_size = -1; } - if (info->indirect) { - uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; + if (indirect) { + uint64_t indirect_va = r600_resource(indirect->buffer)->gpu_address; assert(indirect_va % 8 == 0); si_invalidate_draw_sh_constants(sctx); radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); radeon_emit(cs, 1); radeon_emit(cs, indirect_va); radeon_emit(cs, indirect_va >> 32); radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - (struct r600_resource *)info->indirect, + (struct r600_resource *)indirect->buffer, RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); unsigned di_src_sel = info->indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; - assert(info->indirect_offset % 4 == 0); + assert(indirect->offset % 4 == 0); if (info->indexed) { radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); radeon_emit(cs, index_va); radeon_emit(cs, index_va >> 32); radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); radeon_emit(cs, index_max_size); } if (!sctx->screen->has_draw_indirect_multi) { radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, render_cond_bit)); - radeon_emit(cs, info->indirect_offset); + radeon_emit(cs, indirect->offset); radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit(cs, di_src_sel); } else { uint64_t count_va = 0; - if (info->indirect_params) { + if (indirect->indirect_draw_count) { struct r600_resource *params_buf = - (struct r600_resource *)info->indirect_params; + (struct r600_resource *)indirect->indirect_draw_count; radeon_add_to_buffer_list( &sctx->b, &sctx->b.gfx, params_buf, RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); - count_va = params_buf->gpu_address + info->indirect_params_offset; + count_va = params_buf->gpu_address + indirect->indirect_draw_count_offset; } radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8, render_cond_bit)); - radeon_emit(cs, info->indirect_offset); + radeon_emit(cs, indirect->offset); radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) | S_2C3_DRAW_INDEX_ENABLE(1) | - S_2C3_COUNT_INDIRECT_ENABLE(!!info->indirect_params)); - radeon_emit(cs, info->indirect_count); + S_2C3_COUNT_INDIRECT_ENABLE(!!indirect->indirect_draw_count)); + radeon_emit(cs, indirect->draw_count); radeon_emit(cs, count_va); radeon_emit(cs, count_va >> 32); - radeon_emit(cs, info->indirect_stride); + radeon_emit(cs, indirect->stride); radeon_emit(cs, di_src_sel); } } else { int base_vertex; radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); radeon_emit(cs, info->instance_count); /* Base vertex and start instance. */ base_vertex = info->indexed ? info->index_bias : info->start; @@ -1048,64 +1049,66 @@ void si_emit_cache_flush(struct si_context *sctx) EVENT_INDEX(0)); } rctx->flags = 0; } static void si_get_draw_start_count(struct si_context *sctx, const struct pipe_draw_info *info, unsigned *start, unsigned *count) { - if (info->indirect) { + struct pipe_draw_indirect_info *indirect = info->indirect; + + if (indirect) { unsigned indirect_count; struct pipe_transfer *transfer; unsigned begin, end; unsigned map_size; unsigned *data; - if (info->indirect_params) { + if (indirect->indirect_draw_count) { data = pipe_buffer_map_range(&sctx->b.b, - info->indirect_params, - info->indirect_params_offset, + indirect->indirect_draw_count, + indirect->indirect_draw_count_offset, sizeof(unsigned), PIPE_TRANSFER_READ, &transfer); indirect_count = *data; pipe_buffer_unmap(&sctx->b.b, transfer); } else { - indirect_count = info->indirect_count; + indirect_count = indirect->draw_count; } if (!indirect_count) { *start = *count = 0; return; } - map_size = (indirect_count - 1) * info->indirect_stride + 3 * sizeof(unsigned); - data = pipe_buffer_map_range(&sctx->b.b, info->indirect, - info->indirect_offset, map_size, + map_size = (indirect_count - 1) * indirect->stride + 3 * sizeof(unsigned); + data = pipe_buffer_map_range(&sctx->b.b, indirect->buffer, + indirect->offset, map_size, PIPE_TRANSFER_READ, &transfer); begin = UINT_MAX; end = 0; for (unsigned i = 0; i < indirect_count; ++i) { unsigned count = data[0]; unsigned start = data[2]; if (count > 0) { begin = MIN2(begin, start); end = MAX2(end, start + count); } - data += info->indirect_stride / sizeof(unsigned); + data += indirect->stride / sizeof(unsigned); } pipe_buffer_unmap(&sctx->b.b, transfer); if (begin < end) { *start = begin; *count = end - begin; } else { *start = *count = 0; } @@ -1277,32 +1280,34 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } else if (sctx->b.chip_class <= CIK && r600_resource(ib->buffer)->TC_L2_dirty) { /* VI reads index buffers through TC L2, so it doesn't * need this. */ sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; r600_resource(ib->buffer)->TC_L2_dirty = false; } } if (info->indirect) { + struct pipe_draw_indirect_info *indirect = info->indirect; + /* Add the buffer size for memory checking in need_cs_space. */ - r600_context_add_resource_size(ctx, info->indirect); + r600_context_add_resource_size(ctx, indirect->buffer); - if (r600_resource(info->indirect)->TC_L2_dirty) { + if (r600_resource(indirect->buffer)->TC_L2_dirty) { sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(info->indirect)->TC_L2_dirty = false; + r600_resource(indirect->buffer)->TC_L2_dirty = false; } - if (info->indirect_params && - r600_resource(info->indirect_params)->TC_L2_dirty) { + if (indirect->indirect_draw_count && + r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) { sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(info->indirect_params)->TC_L2_dirty = false; + r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; } } si_need_cs_space(sctx); /* Since we've called r600_context_add_resource_size for vertex buffers, * this must be called after si_need_cs_space, because we must let * need_cs_space flush before we add buffers to the buffer list. */ if (!si_upload_vertex_buffer_descriptors(sctx)) diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index 13c0a9d..e4a5e3b 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -805,22 +805,30 @@ void trace_dump_draw_info(const struct pipe_draw_info *state) trace_dump_member(int, state, index_bias); trace_dump_member(uint, state, min_index); trace_dump_member(uint, state, max_index); trace_dump_member(bool, state, primitive_restart); trace_dump_member(uint, state, restart_index); trace_dump_member(ptr, state, count_from_stream_output); - trace_dump_member(ptr, state, indirect); - trace_dump_member(uint, state, indirect_offset); + if (!state->indirect) { + trace_dump_member(ptr, state, indirect); + } else { + trace_dump_member(uint, state, indirect->offset); + trace_dump_member(uint, state, indirect->stride); + trace_dump_member(uint, state, indirect->draw_count); + trace_dump_member(uint, state, indirect->indirect_draw_count_offset); + trace_dump_member(ptr, state, indirect->buffer); + trace_dump_member(ptr, state, indirect->indirect_draw_count); + } trace_dump_struct_end(); } void trace_dump_blit_info(const struct pipe_blit_info *info) { char mask[7]; if (!trace_dumping_enabled_locked()) return; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 0c9b4b4..3cfdd34 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -634,20 +634,54 @@ struct pipe_vertex_element */ struct pipe_index_buffer { unsigned index_size; /**< size of an index, in bytes */ unsigned offset; /**< offset to start of data in buffer, in bytes */ struct pipe_resource *buffer; /**< the actual buffer */ const void *user_buffer; /**< pointer to a user buffer if buffer == NULL */ }; +struct pipe_draw_indirect_info +{ + unsigned offset; /**< must be 4 byte aligned */ + unsigned stride; /**< must be 4 byte aligned */ + unsigned draw_count; /**< number of indirect draws */ + unsigned indirect_draw_count_offset; /**< must be 4 byte aligned */ + + /* Indirect draw parameters resource is laid out as follows: + * + * if indexed is TRUE: + * struct { + * uint32_t count; + * uint32_t instance_count; + * uint32_t start; + * int32_t index_bias; + * uint32_t start_instance; + * }; + * otherwise: + * struct { + * uint32_t count; + * uint32_t instance_count; + * uint32_t start; + * uint32_t start_instance; + * }; + */ + struct pipe_resource *buffer; + + /* Indirect draw count resource: If not NULL, contains a 32-bit value which + * is to be used as the real draw_count. + */ + struct pipe_resource *indirect_draw_count; +}; + + /** * Information to describe a draw_vbo call. */ struct pipe_draw_info { boolean indexed; /**< use index buffer */ enum pipe_prim_type mode:8; /**< the mode of the primitive */ boolean primitive_restart; ubyte vertices_per_patch; /**< the number of vertices per patch */ @@ -664,54 +698,23 @@ struct pipe_draw_info */ int index_bias; /**< a bias to be added to each index */ unsigned min_index; /**< the min index */ unsigned max_index; /**< the max index */ /** * Primitive restart enable/index (only applies to indexed drawing) */ unsigned restart_index; - unsigned indirect_offset; /**< must be 4 byte aligned */ - unsigned indirect_stride; /**< must be 4 byte aligned */ - unsigned indirect_count; /**< number of indirect draws */ - - unsigned indirect_params_offset; /**< must be 4 byte aligned */ - /* Pointers must be at the end for an optimal structure layout on 64-bit. */ - /* Indirect draw parameters resource: If not NULL, most values are taken - * from this buffer instead, which is laid out as follows: - * - * if indexed is TRUE: - * struct { - * uint32_t count; - * uint32_t instance_count; - * uint32_t start; - * int32_t index_bias; - * uint32_t start_instance; - * }; - * otherwise: - * struct { - * uint32_t count; - * uint32_t instance_count; - * uint32_t start; - * uint32_t start_instance; - * }; - */ - struct pipe_resource *indirect; - - /* Indirect draw count resource: If not NULL, contains a 32-bit value which - * is to be used as the real indirect_count. In that case indirect_count - * becomes the maximum possible value. - */ - struct pipe_resource *indirect_params; + struct pipe_draw_indirect_info *indirect; /**< Indirect draw. */ /** * Stream output target. If not NULL, it's used to provide the 'count' * parameter based on the number vertices captured by the stream output * stage. (or generally, based on the number of bytes captured) * * Only 'mode', 'start_instance', and 'instance_count' are taken into * account, all the other variables from pipe_draw_info are ignored. * * 'start' is implicitly 0 and 'count' is set as discussed above. diff --git a/src/gallium/state_trackers/nine/device9.c b/src/gallium/state_trackers/nine/device9.c index 6f97ddd..6390735 100644 --- a/src/gallium/state_trackers/nine/device9.c +++ b/src/gallium/state_trackers/nine/device9.c @@ -3024,21 +3024,20 @@ NineDevice9_ProcessVertices( struct NineDevice9 *This, return D3DERR_DRIVERINTERNALERROR; } draw.mode = PIPE_PRIM_POINTS; draw.count = VertexCount; draw.start_instance = 0; draw.primitive_restart = FALSE; draw.restart_index = 0; draw.count_from_stream_output = NULL; draw.indirect = NULL; - draw.indirect_params = NULL; draw.instance_count = 1; draw.indexed = FALSE; draw.start = 0; draw.index_bias = 0; draw.min_index = 0; draw.max_index = VertexCount - 1; pipe_sw->set_stream_output_targets(pipe_sw, 1, &target, offsets); diff --git a/src/gallium/state_trackers/nine/nine_state.c b/src/gallium/state_trackers/nine/nine_state.c index 2046d9d..3b1cd7c 100644 --- a/src/gallium/state_trackers/nine/nine_state.c +++ b/src/gallium/state_trackers/nine/nine_state.c @@ -2552,21 +2552,20 @@ init_draw_info(struct pipe_draw_info *info, info->mode = d3dprimitivetype_to_pipe_prim(type); info->count = prim_count_to_vertex_count(type, count); info->start_instance = 0; info->instance_count = 1; if (dev->context.stream_instancedata_mask & dev->context.stream_usage_mask) info->instance_count = MAX2(dev->context.stream_freq[0] & 0x7FFFFF, 1); info->primitive_restart = FALSE; info->restart_index = 0; info->count_from_stream_output = NULL; info->indirect = NULL; - info->indirect_params = NULL; } CSMT_ITEM_NO_WAIT(nine_context_draw_primitive, ARG_VAL(D3DPRIMITIVETYPE, PrimitiveType), ARG_VAL(UINT, StartVertex), ARG_VAL(UINT, PrimitiveCount)) { struct nine_context *context = &device->context; struct pipe_draw_info info; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 5c9f7ea..29381b6 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -257,76 +257,79 @@ st_indirect_draw_vbo(struct gl_context *ctx, struct gl_buffer_object *indirect_data, GLsizeiptr indirect_offset, unsigned draw_count, unsigned stride, struct gl_buffer_object *indirect_params, GLsizeiptr indirect_params_offset, const struct _mesa_index_buffer *ib) { struct st_context *st = st_context(ctx); struct pipe_draw_info info; + struct pipe_draw_indirect_info indirect; /* Mesa core state should have been validated already */ assert(ctx->NewState == 0x0); assert(stride); st_invalidate_readpix_cache(st); /* Validate state. */ if ((st->dirty | ctx->NewDriverState) & ST_PIPELINE_RENDER_STATE_MASK || st->gfx_shaders_may_be_dirty) { st_validate_state(st, ST_PIPELINE_RENDER); } if (st->vertex_array_out_of_memory) { return; } + memset(&indirect, 0, sizeof(indirect)); util_draw_init_info(&info); if (ib) { setup_index_buffer(st, ib); info.indexed = TRUE; /* Primitive restart is not handled by the VBO module in this case. */ setup_primitive_restart(ctx, &info, ib->index_size); } info.mode = translate_prim(ctx, mode); info.vertices_per_patch = ctx->TessCtrlProgram.patch_vertices; - info.indirect = st_buffer_object(indirect_data)->buffer; - info.indirect_offset = indirect_offset; + info.indirect = &indirect; + indirect.buffer = st_buffer_object(indirect_data)->buffer; + indirect.offset = indirect_offset; if (ST_DEBUG & DEBUG_DRAW) { debug_printf("st/draw indirect: mode %s drawcount %d indexed %d\n", u_prim_name(info.mode), draw_count, info.indexed); } if (!st->has_multi_draw_indirect) { int i; assert(!indirect_params); - info.indirect_count = 1; + indirect.draw_count = 1; for (i = 0; i < draw_count; i++) { info.drawid = i; cso_draw_vbo(st->cso_context, &info); - info.indirect_offset += stride; + indirect.offset += stride; } } else { - info.indirect_count = draw_count; - info.indirect_stride = stride; + indirect.draw_count = draw_count; + indirect.stride = stride; if (indirect_params) { - info.indirect_params = st_buffer_object(indirect_params)->buffer; - info.indirect_params_offset = indirect_params_offset; + indirect.indirect_draw_count = st_buffer_object(indirect_params)->buffer; + indirect.indirect_draw_count_offset = indirect_params_offset; } cso_draw_vbo(st->cso_context, &info); } } void st_init_draw(struct st_context *st) { struct gl_context *ctx = st->ctx; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev