From: Mathias Fröhlich <mathias.froehl...@web.de> The change basically reimplements array setup by walking the gl_contex::Array._DrawVAO on a per binding sequence. In this way we can make direct use of the application provided minimum set of buffer objects and emit fewer relocs.
Signed-off-by: Mathias Fröhlich <mathias.froehl...@web.de> --- src/mesa/drivers/dri/i965/brw_context.h | 1 - src/mesa/drivers/dri/i965/brw_draw.c | 13 +- src/mesa/drivers/dri/i965/brw_draw_upload.c | 386 ++++++++++++++-------------- 3 files changed, 195 insertions(+), 205 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7dcbd040f0..c9d03cfb5a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -440,7 +440,6 @@ struct brw_vertex_buffer { }; struct brw_vertex_element { const struct gl_array_attributes *glattrib; - const struct gl_vertex_buffer_binding *glbinding; int buffer; bool is_dual_slot; diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 2a7562a684..0758019de1 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -302,13 +302,6 @@ brw_merge_inputs(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; const struct gl_context *ctx = &brw->ctx; - GLuint i; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - struct brw_vertex_element *input = &brw->vb.inputs[i]; - _mesa_draw_attrib_and_binding(ctx, i, - &input->glattrib, &input->glbinding); - } if (devinfo->gen < 8 && !devinfo->is_haswell) { uint64_t mask = ctx->VertexProgram._Current->info.inputs_read; @@ -316,12 +309,10 @@ brw_merge_inputs(struct brw_context *brw) * 2_10_10_10_REV vertex formats. Set appropriate workaround flags. */ while (mask) { - const struct gl_array_attributes *glattrib; + gl_vert_attrib i = u_bit_scan64(&mask); + const struct gl_array_attributes *glattrib = _mesa_draw_attrib(ctx, i); uint8_t wa_flags = 0; - i = u_bit_scan64(&mask); - glattrib = brw->vb.inputs[i].glattrib; - switch (glattrib->Type) { case GL_FIXED: diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index bc9b2566de..3caa44dd8b 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -399,29 +399,11 @@ brw_get_vertex_surface_type(struct brw_context *brw, static void copy_array_to_vbo_array(struct brw_context *brw, - struct brw_vertex_element *element, - int min, int max, - struct brw_vertex_buffer *buffer, - GLuint dst_stride) + const GLubyte *const ptr, const int src_stride, + int min, int max, + struct brw_vertex_buffer *buffer, GLuint dst_stride) { - const struct gl_vertex_buffer_binding *glbinding = element->glbinding; - const struct gl_array_attributes *glattrib = element->glattrib; - const int src_stride = glbinding->Stride; - - /* If the source stride is zero, we just want to upload the current - * attribute once and set the buffer's stride to 0. There's no need - * to replicate it out. - */ - if (src_stride == 0) { - brw_upload_data(&brw->upload, glattrib->Ptr, glattrib->_ElementSize, - glattrib->_ElementSize, &buffer->bo, &buffer->offset); - - buffer->stride = 0; - buffer->size = glattrib->_ElementSize; - return; - } - - const unsigned char *src = glattrib->Ptr + min * src_stride; + const unsigned char *src = ptr + min * src_stride; int count = max - min + 1; GLuint size = count * dst_stride; uint8_t *dst = brw_upload_space(&brw->upload, size, dst_stride, @@ -434,7 +416,7 @@ copy_array_to_vbo_array(struct brw_context *brw, * * In this case, let's the dst with undefined values */ - if (src != NULL) { + if (ptr != NULL) { if (dst_stride == src_stride) { memcpy(dst, src, size); } else { @@ -458,45 +440,49 @@ brw_prepare_vertices(struct brw_context *brw) const struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(brw->vs.base.prog_data); GLbitfield64 vs_inputs = vs_prog_data->inputs_read; - const unsigned char *ptr = NULL; - GLuint interleaved = 0; unsigned int min_index = brw->vb.min_index + brw->basevertex; unsigned int max_index = brw->vb.max_index + brw->basevertex; unsigned i; int delta, j; - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; - GLuint nr_uploads = 0; - - /* _NEW_POLYGON - * - * On gen6+, edge flags don't end up in the VUE (either in or out of the - * VS). Instead, they're uploaded as the last vertex element, and the data - * is passed sideband through the fixed function units. So, we need to - * prepare the vertex buffer for it, but it's not present in inputs_read. - */ - if (devinfo->gen >= 6 && (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL)) { - vs_inputs |= VERT_BIT_EDGEFLAG; - } - if (0) fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index); /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; + GLbitfield inputs_read = 0; while (vs_inputs) { GLuint first = ffsll(vs_inputs) - 1; assert (first < 64); GLuint index = first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read & BITFIELD64_MASK(first)), 2); + assert (index < VERT_ATTRIB_MAX); struct brw_vertex_element *input = &brw->vb.inputs[index]; input->is_dual_slot = (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0; vs_inputs &= ~BITFIELD64_BIT(first); if (input->is_dual_slot) vs_inputs &= ~BITFIELD64_BIT(first + 1); brw->vb.enabled[brw->vb.nr_enabled++] = input; + inputs_read |= VERT_BIT(index); + } + + /* _NEW_POLYGON */ + if (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL) { + gl_vert_attrib attr = VERT_ATTRIB_EDGEFLAG; + struct brw_vertex_element *input = &brw->vb.inputs[attr]; + const struct gl_array_attributes *glattrib = _mesa_draw_attrib(ctx, attr); + input->glattrib = glattrib; + /* On gen6+, edge flags don't end up in the VUE (either in or out of the + * VS). Instead, they're uploaded as the last vertex element, and the + * data is passed sideband through the fixed function units. + * So, we need to prepare the vertex buffer for it, but it's not + * present in inputs_read. + */ + if (devinfo->gen >= 6) { + brw->vb.enabled[brw->vb.nr_enabled++] = input; + inputs_read |= VERT_BIT_EDGEFLAG; + } } if (brw->vb.nr_enabled == 0) @@ -505,133 +491,88 @@ brw_prepare_vertices(struct brw_context *brw) if (brw->vb.nr_buffers) return; - /* The range of data in a given buffer represented as [min, max) */ - struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; - uint32_t buffer_range_start[VERT_ATTRIB_MAX]; - uint32_t buffer_range_end[VERT_ATTRIB_MAX]; + i = 0; + j = 0; + struct gl_vertex_array_object *vao = ctx->Array._DrawVAO; + + GLbitfield vbomask = inputs_read & _mesa_draw_vbo_array_bits(ctx); + while (vbomask) { + const struct gl_vertex_buffer_binding *const glbinding = + _mesa_draw_buffer_binding(vao, ffs(vbomask) - 1); + const GLsizei stride = glbinding->Stride; + + assert(_mesa_is_bufferobj(glbinding->BufferObj)); + + /* Accumulate the range of a single vertex, start with inverted range */ + uint32_t vertex_range_start = ~(uint32_t)0; + uint32_t vertex_range_end = 0; + + const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(glbinding); + GLbitfield attrmask = vbomask & boundmask; + /* Mark the those attributes as processed */ + vbomask ^= attrmask; + /* We can assume that we have array for the binding */ + assert(attrmask); + /* Walk attributes belonging to the binding */ + while (attrmask) { + const gl_vert_attrib attr = u_bit_scan(&attrmask); + const struct gl_array_attributes *const glattrib = + _mesa_draw_array_attrib(vao, attr); + const uint32_t rel_offset = + _mesa_draw_attributes_relative_offset(glattrib); + const uint32_t rel_end = rel_offset + glattrib->_ElementSize; - for (i = j = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - const struct gl_vertex_buffer_binding *glbinding = input->glbinding; - const struct gl_array_attributes *glattrib = input->glattrib; + vertex_range_start = MIN2(vertex_range_start, rel_offset); + vertex_range_end = MAX2(vertex_range_end, rel_end); - if (_mesa_is_bufferobj(glbinding->BufferObj)) { - struct intel_buffer_object *intel_buffer = - intel_buffer_object(glbinding->BufferObj); + struct brw_vertex_element *input = &brw->vb.inputs[attr]; + input->glattrib = glattrib; + input->buffer = j; + /* input->is_dual_slot is already set above */ + input->offset = rel_offset; - const uint32_t offset = _mesa_draw_binding_offset(glbinding) + - _mesa_draw_attributes_relative_offset(glattrib); + /* Next input */ + i++; + } + assert(vertex_range_start <= vertex_range_end); - /* Start with the worst case */ - uint32_t start = 0; - uint32_t range = intel_buffer->Base.Size; - if (glbinding->InstanceDivisor) { - if (brw->num_instances) { - start = offset + glbinding->Stride * brw->baseinstance; - range = (glbinding->Stride * ((brw->num_instances - 1) / - glbinding->InstanceDivisor) + - glattrib->_ElementSize); - } - } else { - if (brw->vb.index_bounds_valid) { - start = offset + min_index * glbinding->Stride; - range = (glbinding->Stride * (max_index - min_index) + - glattrib->_ElementSize); - } - } + struct intel_buffer_object *intel_buffer = + intel_buffer_object(glbinding->BufferObj); + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - /* If we have a VB set to be uploaded for this buffer object - * already, reuse that VB state so that we emit fewer - * relocations. - */ - unsigned k; - for (k = 0; k < i; k++) { - struct brw_vertex_element *other = brw->vb.enabled[k]; - const struct gl_vertex_buffer_binding *obind = other->glbinding; - const struct gl_array_attributes *oattrib = other->glattrib; - const uint32_t ooffset = _mesa_draw_binding_offset(obind) + - _mesa_draw_attributes_relative_offset(oattrib); - if (glbinding->BufferObj == obind->BufferObj && - glbinding->Stride == obind->Stride && - glbinding->InstanceDivisor == obind->InstanceDivisor && - (offset - ooffset) < glbinding->Stride) - { - input->buffer = brw->vb.enabled[k]->buffer; - input->offset = offset - ooffset; - - buffer_range_start[input->buffer] = - MIN2(buffer_range_start[input->buffer], start); - buffer_range_end[input->buffer] = - MAX2(buffer_range_end[input->buffer], start + range); - break; - } - } - if (k == i) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - - /* Named buffer object: Just reference its contents directly. */ - buffer->offset = offset; - buffer->stride = glbinding->Stride; - buffer->step_rate = glbinding->InstanceDivisor; - buffer->size = glbinding->BufferObj->Size - offset; - - enabled_buffer[j] = intel_buffer; - buffer_range_start[j] = start; - buffer_range_end[j] = start + range; - - input->buffer = j++; - input->offset = 0; - } + const uint32_t offset = _mesa_draw_binding_offset(glbinding); + + /* If nothing else is known take the buffer size and offset as a bound */ + uint32_t start = vertex_range_start; + uint32_t range = intel_buffer->Base.Size - offset - vertex_range_start; + /* Check if we can get a more narrow range */ + if (glbinding->InstanceDivisor) { + if (brw->num_instances) { + const uint32_t vertex_size = vertex_range_end - vertex_range_start; + start = vertex_range_start + stride * brw->baseinstance; + range = (stride * ((brw->num_instances - 1) / + glbinding->InstanceDivisor) + + vertex_size); + } } else { - /* Queue the buffer object up to be uploaded in the next pass, - * when we've decided if we're doing interleaved or not. - */ - if (nr_uploads == 0) { - interleaved = glbinding->Stride; - ptr = glattrib->Ptr; - } - else if (interleaved != glbinding->Stride || - glbinding->InstanceDivisor != 0 || - glattrib->Ptr < ptr || - (uintptr_t)(glattrib->Ptr - ptr) + glattrib->_ElementSize > interleaved) - { - /* If our stride is different from the first attribute's stride, - * or if we are using an instance divisor or if the first - * attribute's stride didn't cover our element, disable the - * interleaved upload optimization. The second case can most - * commonly occur in cases where there is a single vertex and, for - * example, the data is stored on the application's stack. - * - * NOTE: This will also disable the optimization in cases where - * the data is in a different order than the array indices. - * Something like: - * - * float data[...]; - * glVertexAttribPointer(0, 4, GL_FLOAT, 32, &data[4]); - * glVertexAttribPointer(1, 4, GL_FLOAT, 32, &data[0]); - */ - interleaved = 0; - } - - upload[nr_uploads++] = input; + if (brw->vb.index_bounds_valid) { + const uint32_t vertex_size = vertex_range_end - vertex_range_start; + start = vertex_range_start + stride * min_index; + range = (stride * (max_index - min_index) + + vertex_size); + } } - } - - /* Now that we've set up all of the buffers, we walk through and reference - * each of them. We do this late so that we get the right size in each - * buffer and don't reference too little data. - */ - for (i = 0; i < j; i++) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - if (buffer->bo) - continue; - const uint32_t start = buffer_range_start[i]; - const uint32_t range = buffer_range_end[i] - buffer_range_start[i]; + buffer->offset = offset; + buffer->size = start + range; + buffer->stride = stride; + buffer->step_rate = glbinding->InstanceDivisor; - buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, + buffer->bo = intel_bufferobj_buffer(brw, intel_buffer, offset + start, range, false); brw_bo_reference(buffer->bo); + + j++; } /* If we need to upload all the arrays, then we can trim those arrays to @@ -640,43 +581,66 @@ brw_prepare_vertices(struct brw_context *brw) */ brw->vb.start_vertex_bias = 0; delta = min_index; - if (nr_uploads == brw->vb.nr_enabled) { + if ((inputs_read & _mesa_draw_vbo_array_bits(ctx)) == 0) { brw->vb.start_vertex_bias = -delta; delta = 0; } - /* Handle any arrays to be uploaded. */ - if (nr_uploads > 1) { - if (interleaved) { - struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - /* All uploads are interleaved, so upload the arrays together as - * interleaved. First, upload the contents and set up upload[0]. - */ - copy_array_to_vbo_array(brw, upload[0], min_index, max_index, - buffer, interleaved); - buffer->offset -= delta * interleaved; - buffer->size += delta * interleaved; - buffer->step_rate = 0; - - for (i = 0; i < nr_uploads; i++) { - const struct gl_array_attributes *glattrib = upload[i]->glattrib; - /* Then, just point upload[i] at upload[0]'s buffer. */ - upload[i]->offset = ((const unsigned char *)glattrib->Ptr - ptr); - upload[i]->buffer = j; - } - j++; - - nr_uploads = 0; + GLbitfield usermask = inputs_read & _mesa_draw_user_array_bits(ctx); + while (usermask) { + const struct gl_vertex_buffer_binding *const glbinding = + _mesa_draw_buffer_binding(vao, ffs(usermask) - 1); + const GLsizei stride = glbinding->Stride; + + assert(!_mesa_is_bufferobj(glbinding->BufferObj)); + + /* Accumulate the range of a single vertex, start with inverted range */ + uint32_t vertex_range_start = ~(uint32_t)0; + uint32_t vertex_range_end = 0; + + const GLbitfield boundmask = _mesa_draw_bound_attrib_bits(glbinding); + GLbitfield attrmask = usermask & boundmask; + /* Mark the those attributes as processed */ + usermask ^= attrmask; + /* We can assume that we have array for the binding */ + assert(attrmask); + /* Walk attributes belonging to the binding */ + while (attrmask) { + const gl_vert_attrib attr = u_bit_scan(&attrmask); + const struct gl_array_attributes *const glattrib = + _mesa_draw_array_attrib(vao, attr); + const uint32_t rel_offset = + _mesa_draw_attributes_relative_offset(glattrib); + const uint32_t rel_end = rel_offset + glattrib->_ElementSize; + + vertex_range_start = MIN2(vertex_range_start, rel_offset); + vertex_range_end = MAX2(vertex_range_end, rel_end); + + struct brw_vertex_element *input = &brw->vb.inputs[attr]; + input->glattrib = glattrib; + input->buffer = j; + /* input->is_dual_slot is already set above */ + input->offset = rel_offset; + + /* Next input */ + i++; } - } - /* Upload non-interleaved arrays */ - for (i = 0; i < nr_uploads; i++) { + assert(vertex_range_start <= vertex_range_end); + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; - const struct gl_vertex_buffer_binding *glbinding = upload[i]->glbinding; - const struct gl_array_attributes *glattrib = upload[i]->glattrib; - if (glbinding->InstanceDivisor == 0) { - copy_array_to_vbo_array(brw, upload[i], min_index, max_index, - buffer, glattrib->_ElementSize); + + const GLubyte *ptr = (const GLubyte*)_mesa_draw_binding_offset(glbinding); + ptr += vertex_range_start; + const uint32_t vertex_size = vertex_range_end - vertex_range_start; + if (glbinding->Stride == 0) { + /* If the source stride is zero, we just want to upload the current + * attribute once and set the buffer's stride to 0. There's no need + * to replicate it out. + */ + copy_array_to_vbo_array(brw, ptr, 0, 0, 0, buffer, vertex_size); + } else if (glbinding->InstanceDivisor == 0) { + copy_array_to_vbo_array(brw, ptr, stride, min_index, + max_index, buffer, vertex_size); } else { /* This is an instanced attribute, since its InstanceDivisor * is not zero. Therefore, its data will be stepped after the @@ -684,16 +648,52 @@ brw_prepare_vertices(struct brw_context *brw) */ uint32_t instanced_attr_max_index = (brw->num_instances - 1) / glbinding->InstanceDivisor; - copy_array_to_vbo_array(brw, upload[i], 0, instanced_attr_max_index, - buffer, glattrib->_ElementSize); + copy_array_to_vbo_array(brw, ptr, stride, 0, + instanced_attr_max_index, buffer, vertex_size); } - buffer->offset -= delta * buffer->stride; - buffer->size += delta * buffer->stride; + buffer->offset -= delta * buffer->stride + vertex_range_start; + buffer->size += delta * buffer->stride + vertex_range_start; buffer->step_rate = glbinding->InstanceDivisor; - upload[i]->buffer = j++; - upload[i]->offset = 0; + + j++; } + /* Upload the current values */ + GLbitfield curmask = inputs_read & _mesa_draw_current_bits(ctx); + if (curmask) { + /* For each attribute, upload the maximum possible size. */ + GLubyte data[VERT_ATTRIB_MAX * sizeof(GLdouble) * 4]; + GLubyte *cursor = data; + + while (curmask) { + const gl_vert_attrib attr = u_bit_scan(&curmask); + const struct gl_array_attributes *const glattrib + = _mesa_draw_current_attrib(ctx, attr); + const unsigned size = glattrib->_ElementSize; + const unsigned alignment = util_next_power_of_two(size); + memcpy(cursor, glattrib->Ptr, size); + if (alignment != size) + memset(cursor + size, 0, alignment - size); + + struct brw_vertex_element *input = &brw->vb.inputs[attr]; + input->glattrib = glattrib; + input->buffer = j; + /* input->is_dual_slot is already set above */ + input->offset = cursor - data; + + cursor += alignment; + } + + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; + unsigned size = cursor - data; + brw_upload_data(&brw->upload, data, size, size, + &buffer->bo, &buffer->offset); + buffer->stride = 0; + buffer->size = size; + buffer->step_rate = 0; + + j++; + } brw->vb.nr_buffers = j; } -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev