The original brw_emit_vertices code is left intact for now, as it is still used by gen4-5. We are bringing all the code to genX_state_upload.c, and gen4-5 state emitting code is left on a separate file.
Signed-off-by: Rafael Antognolli <[email protected]> --- src/mesa/drivers/dri/i965/brw_state.h | 1 +- src/mesa/drivers/dri/i965/gen8_draw_upload.c | 330 +----------- src/mesa/drivers/dri/i965/genX_state_upload.c | 547 ++++++++++++++++++- 3 files changed, 544 insertions(+), 334 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index b4c269f..0ed4dc1 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -125,7 +125,6 @@ extern const struct brw_tracked_state haswell_cut_index; extern const struct brw_tracked_state gen8_index_buffer; extern const struct brw_tracked_state gen8_multisample_state; extern const struct brw_tracked_state gen8_pma_fix; -extern const struct brw_tracked_state gen8_vertices; extern const struct brw_tracked_state gen8_vf_topology; extern const struct brw_tracked_state brw_cs_work_groups_surface; diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index 32e1447..31d424d 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -34,336 +34,6 @@ #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" -#ifndef NDEBUG -static bool -is_passthru_format(uint32_t format) -{ - switch (format) { - case ISL_FORMAT_R64_PASSTHRU: - case ISL_FORMAT_R64G64_PASSTHRU: - case ISL_FORMAT_R64G64B64_PASSTHRU: - case ISL_FORMAT_R64G64B64A64_PASSTHRU: - return true; - default: - return false; - } -} -#endif - -static void -gen8_emit_vertices(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - bool uses_edge_flag; - - brw_prepare_vertices(brw); - brw_prepare_shader_draw_parameters(brw); - - uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); - - const struct brw_vs_prog_data *vs_prog_data = - brw_vs_prog_data(brw->vs.base.prog_data); - - if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { - unsigned vue = brw->vb.nr_enabled; - - /* The element for the edge flags must always be last, so we have to - * insert the SGVS before it in that case. - */ - if (uses_edge_flag) { - assert(vue > 0); - vue--; - } - - WARN_ONCE(vue >= 33, - "Trying to insert VID/IID past 33rd vertex element, " - "need to reorder the vertex attrbutes."); - - unsigned dw1 = 0; - if (vs_prog_data->uses_vertexid) { - dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID | - (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */ - (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT); - } - - if (vs_prog_data->uses_instanceid) { - dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID | - (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */ - (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT); - } - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(dw1); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* Normally we don't need an element for the SGVS attribute because the - * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an - * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if - * we're using draw parameters then we need an element for the those - * values. Additionally if there is an edge flag element then the SGVS - * can't be inserted past that so we need a dummy element to ensure that - * the edge flag is the last one. - */ - const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance || - ((vs_prog_data->uses_instanceid || - vs_prog_data->uses_vertexid) && - uses_edge_flag)); - const unsigned nr_elements = - brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; - - /* If the VS doesn't read any inputs (calculating vertex position from - * a state variable for some reason, for example), emit a single pad - * VERTEX_ELEMENT struct and bail. - * - * The stale VB state stays in place, but they don't do anything unless - * a VE loads from them. - */ - if (nr_elements == 0) { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2)); - OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (ISL_FORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - ADVANCE_BATCH(); - return; - } - - /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ - const bool uses_draw_params = - vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance; - const unsigned nr_buffers = brw->vb.nr_buffers + - uses_draw_params + vs_prog_data->uses_drawid; - - if (nr_buffers) { - assert(nr_buffers <= 33); - - BEGIN_BATCH(1 + 4 * nr_buffers); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); - for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, - buffer->offset, - buffer->offset + buffer->size, - buffer->stride, 0 /* unused */); - } - - if (uses_draw_params) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, - brw->draw.draw_params_bo, - brw->draw.draw_params_offset, - brw->draw.draw_params_bo->size, - 0 /* stride */, - 0 /* unused */); - } - - if (vs_prog_data->uses_drawid) { - EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers + 1, - brw->draw.draw_id_bo, - brw->draw.draw_id_offset, - brw->draw.draw_id_bo->size, - 0 /* stride */, - 0 /* unused */); - } - ADVANCE_BATCH(); - } - - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, - * presumably for VertexID/InstanceID. - */ - assert(nr_elements <= 34); - - struct brw_vertex_element *gen6_edgeflag_input = NULL; - - BEGIN_BATCH(1 + nr_elements * 2); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1)); - for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); - uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; - uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - - /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): - * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an - * element which has edge flag enabled." - */ - assert(!(is_passthru_format(format) && uses_edge_flag)); - - /* The gen4 driver expects edgeflag to come in as a float, and passes - * that float on to the tests in the clipper. Mesa's current vertex - * attribute value for EdgeFlag is stored as a float, which works out. - * glEdgeFlagPointer, on the other hand, gives us an unnormalized - * integer ubyte. Just rewrite that to convert to a float. - */ - if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { - /* Gen6+ passes edgeflag as sideband along with the vertex, instead - * of in the VUE. We have to upload it sideband as the last vertex - * element according to the B-Spec. - */ - gen6_edgeflag_input = input; - continue; - } - - switch (input->glarray->Size) { - case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; - case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; - case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; - case 3: - if (input->glarray->Doubles) { - comp3 = BRW_VE1_COMPONENT_STORE_0; - } else if (input->glarray->Integer) { - comp3 = BRW_VE1_COMPONENT_STORE_1_INT; - } else { - comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; - } - - break; - } - - /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): - * - * "When SourceElementFormat is set to one of the *64*_PASSTHRU - * formats, 64-bit components are stored in the URB without any - * conversion. In this case, vertex elements must be written as 128 - * or 256 bits, with VFCOMP_STORE_0 being used to pad the output - * as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red - * component into the URB, Component 1 must be specified as - * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) - * in order to output a 128-bit vertex element, or Components 1-3 must - * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex - * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 - * to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex - * element." - */ - if (input->glarray->Doubles && !input->is_dual_slot) { - /* Store vertex elements which correspond to double and dvec2 vertex - * shader inputs as 128-bit vertex elements, instead of 256-bits. - */ - comp2 = BRW_VE1_COMPONENT_NOSTORE; - comp3 = BRW_VE1_COMPONENT_NOSTORE; - } - - OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - (format << BRW_VE0_FORMAT_SHIFT) | - (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - if (needs_sgvs_element) { - if (vs_prog_data->uses_basevertex || - vs_prog_data->uses_baseinstance) { - OUT_BATCH(GEN6_VE0_VALID | - brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT | - ISL_FORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } else { - OUT_BATCH(GEN6_VE0_VALID); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - } - - if (vs_prog_data->uses_drawid) { - OUT_BATCH(GEN6_VE0_VALID | - ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) | - (ISL_FORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - - if (gen6_edgeflag_input) { - uint32_t format = - brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); - - OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) | - GEN6_VE0_VALID | - GEN6_VE0_EDGE_FLAG_ENABLE | - (format << BRW_VE0_FORMAT_SHIFT) | - (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | - (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT)); - } - ADVANCE_BATCH(); - - for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { - const struct brw_vertex_element *input = brw->vb.enabled[i]; - const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; - unsigned element_index; - - /* The edge flag element is reordered to be the last one in the code - * above so we need to compensate for that in the element indices used - * below. - */ - if (input == gen6_edgeflag_input) - element_index = nr_elements - 1; - else - element_index = j++; - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(element_index | - (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0)); - OUT_BATCH(buffer->step_rate); - ADVANCE_BATCH(); - } - - if (vs_prog_data->uses_drawid) { - const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(element); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -const struct brw_tracked_state gen8_vertices = { - .dirty = { - .mesa = _NEW_POLYGON, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_VERTICES | - BRW_NEW_VS_PROG_DATA, - }, - .emit = gen8_emit_vertices, -}; - static void gen8_emit_index_buffer(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 9f3afd1..d314fdc 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -25,11 +25,18 @@ #include "common/gen_device_info.h" #include "genxml/gen_macros.h" +/* #include "vbo/vbo.h" */ + +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/enums.h" +#include "main/macros.h" #include "brw_context.h" #if GEN_GEN < 7 #include "brw_defines.h" #endif +#include "brw_draw.h" #include "brw_state.h" #include "brw_wm.h" #include "brw_util.h" @@ -2944,6 +2951,540 @@ static const struct brw_tracked_state genX(wm_push_constants) = { /* ---------------------------------------------------------------------- */ +static uint32_t * +genX(emit_vertex_buffer_state)(struct brw_context *brw, + uint32_t *dw, + unsigned buffer_nr, + struct brw_bo *bo, + unsigned start_offset, + unsigned end_offset, + unsigned stride, + unsigned step_rate) +{ + struct GENX(VERTEX_BUFFER_STATE) buf_state = { + .VertexBufferIndex = buffer_nr, + .NullVertexBuffer = false, + .BufferPitch = stride, + .BufferStartingAddress.bo = bo, + .BufferStartingAddress.offset = start_offset, + .BufferStartingAddress.read_domains = I915_GEM_DOMAIN_VERTEX, + .BufferStartingAddress.write_domain = 0, +#if GEN_GEN >= 8 + .BufferSize = end_offset - start_offset, +#endif + +#if GEN_GEN >= 7 + .AddressModifyEnable = true, +#endif + +#if GEN_GEN < 8 + .BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA, + .InstanceDataStepRate = step_rate, + .EndAddress.bo = bo, + .EndAddress.offset = end_offset - 1, + .EndAddress.read_domains = I915_GEM_DOMAIN_VERTEX, + .EndAddress.write_domain = 0, +#endif + +#if GEN_GEN == 9 + .VertexBufferMOCS = (2 << 1), /* SKL_MOCS_WB */ +#elif GEN_GEN == 8 + .VertexBufferMOCS = 0x78, /* BDW_MOCS_WB */ +#elif GEN_GEN == 7 || GEN_GEN == 75 + .VertexBufferMOCS = GEN7_MOCS_L3, +#endif + }; + + GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &buf_state); + return dw + GENX(VERTEX_BUFFER_STATE_length); +} + +#ifndef NDEBUG +static bool +is_passthru_format(uint32_t format) +{ + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + case ISL_FORMAT_R64G64_PASSTHRU: + case ISL_FORMAT_R64G64B64_PASSTHRU: + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return true; + default: + return false; + } +} +#endif + +#if GEN_GEN < 8 +static int +genX(uploads_needed)(uint32_t format) +{ + if (!is_passthru_format(format)) + return 1; + + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + case ISL_FORMAT_R64G64_PASSTHRU: + return 1; + case ISL_FORMAT_R64G64B64_PASSTHRU: + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return 2; + default: + unreachable("not reached"); + } +} + +/* + * Returns the format that we are finally going to use when upload a vertex + * element. It will only change if we are using *64*PASSTHRU formats, as for + * gen < 8 they need to be splitted on two *32*FLOAT formats. + * + * @upload points in which upload we are. Valid values are [0,1] + */ +static uint32_t +downsize_format_if_needed(uint32_t format, + int upload) +{ + assert(upload == 0 || upload == 1); + + if (!is_passthru_format(format)) + return format; + + switch (format) { + case ISL_FORMAT_R64_PASSTHRU: + return ISL_FORMAT_R32G32_FLOAT; + case ISL_FORMAT_R64G64_PASSTHRU: + return ISL_FORMAT_R32G32B32A32_FLOAT; + case ISL_FORMAT_R64G64B64_PASSTHRU: + return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT + : ISL_FORMAT_R32G32_FLOAT; + case ISL_FORMAT_R64G64B64A64_PASSTHRU: + return ISL_FORMAT_R32G32B32A32_FLOAT; + default: + unreachable("not reached"); + } +} + +/* + * Returns the number of componentes associated with a format that is used on + * a 64 to 32 format split. See downsize_format() + */ +static int +upload_format_size(uint32_t upload_format) +{ + switch (upload_format) { + case ISL_FORMAT_R32G32_FLOAT: + return 2; + case ISL_FORMAT_R32G32B32A32_FLOAT: + return 4; + default: + unreachable("not reached"); + } +} +#endif + +static void +genX(emit_vertices)(struct brw_context *brw) +{ + uint32_t *dw; + + brw_prepare_vertices(brw); + brw_prepare_shader_draw_parameters(brw); + +#if GEN_GEN < 8 + brw_emit_query_begin(brw); +#endif + + const struct brw_vs_prog_data *vs_prog_data = + brw_vs_prog_data(brw->vs.base.prog_data); + +#if GEN_GEN >= 8 + struct gl_context *ctx = &brw->ctx; + bool uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) { + unsigned vue = brw->vb.nr_enabled; + + /* The element for the edge flags must always be last, so we have to + * insert the SGVS before it in that case. + */ + if (uses_edge_flag) { + assert(vue > 0); + vue--; + } + + WARN_ONCE(vue >= 33, + "Trying to insert VID/IID past 33rd vertex element, " + "need to reorder the vertex attrbutes."); + + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs) { + if (vs_prog_data->uses_vertexid) { + vfs.VertexIDEnable = true; + vfs.VertexIDComponentNumber = 2; + vfs.VertexIDElementOffset = vue; + } + + if (vs_prog_data->uses_instanceid) { + vfs.InstanceIDEnable = true; + vfs.InstanceIDComponentNumber = 3; + vfs.InstanceIDElementOffset = vue; + } + } + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.InstancingEnable = true; + vfi.VertexElementIndex = vue; + } + } else { + brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs); + } +#endif + +#if GEN_GEN >= 8 + /* Normally we don't need an element for the SGVS attribute because the + * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an + * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if + * we're using draw parameters then we need an element for the those + * values. Additionally if there is an edge flag element then the SGVS + * can't be inserted past that so we need a dummy element to ensure that + * the edge flag is the last one. + */ + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance || + ((vs_prog_data->uses_instanceid || + vs_prog_data->uses_vertexid) + && uses_edge_flag)); +#else + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance || + vs_prog_data->uses_instanceid || + vs_prog_data->uses_vertexid); +#endif + unsigned nr_elements = + brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; + +#if GEN_GEN < 8 + /* If any of the formats of vb.enabled needs more that one upload, we need + * to add it to nr_elements */ + for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); + + if (genX(uploads_needed(format)) > 1) + nr_elements++; + } +#endif + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (nr_elements == 0) { + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), 3 + GENX(VERTEX_ELEMENT_STATE_length)); + struct GENX(VERTEX_ELEMENT_STATE) elem = { + .Valid = true, + .SourceElementFormat = SF_R32G32B32A32_FLOAT, + .Component0Control = VFCOMP_STORE_0, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_1_FP, + }; + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem); + return; + } + + /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */ + const bool uses_draw_params = + vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance; + const unsigned nr_buffers = brw->vb.nr_buffers + + uses_draw_params + vs_prog_data->uses_drawid; + + if (nr_buffers) { + assert(nr_buffers <= 33); + + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), + 1 + GENX(VERTEX_BUFFER_STATE_length) * nr_buffers); + + for (unsigned i = 0; i < brw->vb.nr_buffers; i++) { + const struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; + /* Prior to Haswell and Bay Trail we have to use 4-component formats + * to fake 3-component ones. In particular, we do this for + * half-float and 8 and 16-bit integer formats. This means that the + * vertex element may poke over the end of the buffer by 2 bytes. + */ + unsigned padding = + (brw->gen <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2; + dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo, + buffer->offset, + buffer->offset + buffer->size + padding, + buffer->stride, + buffer->step_rate); + } + + if (uses_draw_params) { + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers, + brw->draw.draw_params_bo, + brw->draw.draw_params_offset, + brw->draw.draw_params_bo->size, + 0 /* stride */, + 0 /* step rate */); + } + + if (vs_prog_data->uses_drawid) { + dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1, + brw->draw.draw_id_bo, + brw->draw.draw_id_offset, + brw->draw.draw_id_bo->size, + 0 /* stride */, + 0 /* step rate */); + } + } + + /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, + * presumably for VertexID/InstanceID. + */ + assert(nr_elements <= 34); + + struct brw_vertex_element *gen6_edgeflag_input = NULL; + dw = brw_batch_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), + 1 + GENX(VERTEX_ELEMENT_STATE_length) * nr_elements); + for (unsigned i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t format = brw_get_vertex_surface_type(brw, input->glarray); + uint32_t comp0 = VFCOMP_STORE_SRC; + uint32_t comp1 = VFCOMP_STORE_SRC; + uint32_t comp2 = VFCOMP_STORE_SRC; + uint32_t comp3 = VFCOMP_STORE_SRC; + unsigned num_uploads = 1; + +#if GEN_GEN >= 8 + /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE): + * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an + * element which has edge flag enabled." + */ + assert(!(is_passthru_format(format) && uses_edge_flag)); +#endif + + /* The gen4 driver expects edgeflag to come in as a float, and passes + * that float on to the tests in the clipper. Mesa's current vertex + * attribute value for EdgeFlag is stored as a float, which works out. + * glEdgeFlagPointer, on the other hand, gives us an unnormalized + * integer ubyte. Just rewrite that to convert to a float. + */ + if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) { + /* Gen6+ passes edgeflag as sideband along with the vertex, instead + * of in the VUE. We have to upload it sideband as the last vertex + * element according to the B-Spec. + */ + gen6_edgeflag_input = input; + continue; + } + +#if GEN_GEN < 8 + num_uploads = genX(uploads_needed(format)); +#endif + + for (unsigned c = 0; c < num_uploads; c++) { +#if GEN_GEN < 8 + uint32_t upload_format = downsize_format_if_needed(format, c); +#endif + /* If we need more that one upload, the offset stride would be 128 + * bits (16 bytes), as for previous uploads we are using the full + * entry. */ + unsigned int offset = input->offset + c * 16; + int size = input->glarray->Size; + +#if GEN_GEN < 8 + if (is_passthru_format(format)) + size = upload_format_size(upload_format); +#endif + + switch (size) { + case 0: comp0 = VFCOMP_STORE_0; + case 1: comp1 = VFCOMP_STORE_0; + case 2: comp2 = VFCOMP_STORE_0; + case 3: +#if GEN_GEN >= 8 + if (input->glarray->Doubles) { + comp3 = VFCOMP_STORE_0; + } else +#endif + if (input->glarray->Integer) { + comp3 = VFCOMP_STORE_1_INT; + } else { + comp3 = VFCOMP_STORE_1_FP; + } + + break; + } + +#if GEN_GEN >= 8 + /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE): + * + * "When SourceElementFormat is set to one of the *64*_PASSTHRU + * formats, 64-bit components are stored in the URB without any + * conversion. In this case, vertex elements must be written as 128 + * or 256 bits, with VFCOMP_STORE_0 being used to pad the output as + * required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red + * component into the URB, Component 1 must be specified as + * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE) in + * order to output a 128-bit vertex element, or Components 1-3 must + * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex + * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3 + * to be specified as VFCOMP_STORE_0 in order to output a 256-bit + * vertex element." + */ + if (input->glarray->Doubles && !input->is_dual_slot) { + /* Store vertex elements which correspond to double and dvec2 vertex + * shader inputs as 128-bit vertex elements, instead of 256-bits. + */ + comp2 = VFCOMP_NOSTORE; + comp3 = VFCOMP_NOSTORE; + } +#endif + + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .VertexBufferIndex = input->buffer, + .Valid = true, +#if GEN_GEN < 8 + .SourceElementFormat = upload_format, +#else + .SourceElementFormat = format, +#endif + .SourceElementOffset = offset, + .Component0Control = comp0, + .Component1Control = comp1, + .Component2Control = comp2, + .Component3Control = comp3, + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + } + + if (needs_sgvs_element) { + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { 0 }; + + elem_state.Valid = true; + elem_state.Component0Control = VFCOMP_STORE_0; + elem_state.Component1Control = VFCOMP_STORE_0; + elem_state.Component2Control = VFCOMP_STORE_0; + elem_state.Component3Control = VFCOMP_STORE_0; + +#if GEN_GEN >= 8 + if (vs_prog_data->uses_basevertex || + vs_prog_data->uses_baseinstance) { + elem_state.VertexBufferIndex = brw->vb.nr_buffers; + elem_state.SourceElementFormat = SF_R32G32_UINT; + elem_state.Component0Control = VFCOMP_STORE_SRC; + elem_state.Component1Control = VFCOMP_STORE_SRC; + } +#else + elem_state.VertexBufferIndex = brw->vb.nr_buffers; + elem_state.SourceElementFormat = SF_R32G32_UINT; + if (vs_prog_data->uses_basevertex) + elem_state.Component0Control = VFCOMP_STORE_SRC; + + if (vs_prog_data->uses_baseinstance) + elem_state.Component1Control = VFCOMP_STORE_SRC; + + if (vs_prog_data->uses_vertexid) + elem_state.Component2Control = VFCOMP_STORE_VID; + + if (vs_prog_data->uses_instanceid) + elem_state.Component3Control = VFCOMP_STORE_IID; +#endif + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + + if (vs_prog_data->uses_drawid) { + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .VertexBufferIndex = brw->vb.nr_buffers + 1, + .SourceElementFormat = SF_R32_UINT, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + + if (gen6_edgeflag_input) { + uint32_t format = + brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray); + + struct GENX(VERTEX_ELEMENT_STATE) elem_state = { + .Valid = true, + .VertexBufferIndex = gen6_edgeflag_input->buffer, + .EdgeFlagEnable = true, + .SourceElementFormat = format, + .SourceElementOffset = gen6_edgeflag_input->offset, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_0, + .Component2Control = VFCOMP_STORE_0, + .Component3Control = VFCOMP_STORE_0, + }; + + GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &elem_state); + dw += GENX(VERTEX_ELEMENT_STATE_length); + } + +#if GEN_GEN >= 8 + for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) { + const struct brw_vertex_element *input = brw->vb.enabled[i]; + const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer]; + unsigned element_index; + + /* The edge flag element is reordered to be the last one in the code + * above so we need to compensate for that in the element indices used + * below. + */ + if (input == gen6_edgeflag_input) + element_index = nr_elements - 1; + else + element_index = j++; + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.VertexElementIndex = element_index; + vfi.InstancingEnable = buffer->step_rate ? true : false; + vfi.InstanceDataStepRate = buffer->step_rate; + } + } + + if (vs_prog_data->uses_drawid) { + const unsigned element = brw->vb.nr_enabled + needs_sgvs_element; + + brw_batch_emit(brw, GENX(3DSTATE_VF_INSTANCING), vfi) { + vfi.VertexElementIndex = element; + } + } +#endif +} + +static const struct brw_tracked_state genX(vertices) = { + .dirty = { + .mesa = _NEW_POLYGON, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_VERTICES | + BRW_NEW_VS_PROG_DATA, + }, + .emit = genX(emit_vertices), +}; + +/* ---------------------------------------------------------------------- */ + void genX(init_atoms)(struct brw_context *brw) { @@ -3010,7 +3551,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), }; #elif GEN_GEN == 7 static const struct brw_tracked_state *render_atoms[] = @@ -3098,7 +3639,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, /* must come before brw_vertices */ &brw_index_buffer, - &brw_vertices, + &genX(vertices), &haswell_cut_index, }; @@ -3191,7 +3732,7 @@ genX(init_atoms)(struct brw_context *brw) &brw_indices, &gen8_index_buffer, - &gen8_vertices, + &genX(vertices), &haswell_cut_index, &gen8_pma_fix, -- git-series 0.9.1 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
