Previously, we were using the size of the BO which may be substantially larger than the actual vertex buffer size. --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_draw_upload.c | 52 +++++++++++++++++++++++++++- src/mesa/drivers/dri/i965/gen8_draw_upload.c | 2 +- 3 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 76ed1de..d1d31e0 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -569,6 +569,7 @@ struct brw_vertex_buffer { /** Buffer object containing the uploaded vertex data */ drm_intel_bo *bo; uint32_t offset; + uint32_t vf_upper_bound; /** Byte stride between elements in the uploaded array */ GLuint stride; GLuint step_rate; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b651fd2..2eac385 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -365,6 +365,17 @@ brw_get_vertex_surface_type(struct brw_context *brw, } } +static unsigned +attrib_vec4_size(GLenum type) +{ + const int type_size = _mesa_sizeof_type(type); + + /* _mesa_sizeof_type() returns > 0 for bare GL types and -1 for all of the + * packed formats. All of the packed formats have a size of 4. + */ + return type_size > 0 ? type_size * 4 : 4; +} + static void copy_array_to_vbo_array(struct brw_context *brw, struct brw_vertex_element *element, @@ -373,6 +384,7 @@ copy_array_to_vbo_array(struct brw_context *brw, GLuint dst_stride) { const int src_stride = element->glarray->StrideB; + const unsigned vec4_size = attrib_vec4_size(element->glarray->Type); /* If the source stride is zero, we just want to upload the current * attribute once and set the buffer's stride to 0. There's no need @@ -385,6 +397,7 @@ copy_array_to_vbo_array(struct brw_context *brw, &buffer->bo, &buffer->offset); buffer->stride = 0; + buffer->vf_upper_bound = vec4_size; return; } @@ -404,6 +417,7 @@ copy_array_to_vbo_array(struct brw_context *brw, } } buffer->stride = dst_stride; + buffer->vf_upper_bound = size + (vec4_size - dst_stride); } void @@ -457,6 +471,7 @@ brw_prepare_vertices(struct brw_context *brw) struct intel_buffer_object *enabled_buffer[VERT_ATTRIB_MAX]; uint32_t buffer_range_start[VERT_ATTRIB_MAX]; uint32_t buffer_range_end[VERT_ATTRIB_MAX]; + uint32_t buffer_range_vf_end[VERT_ATTRIB_MAX]; for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; @@ -486,6 +501,23 @@ brw_prepare_vertices(struct brw_context *brw) } } + /* This is ugly. It's completely undocumented (as far as I can tell) + * but based on a little reverse-engineering, it appears that the VF + * stage first fetches an entire vec4 and then swizzles components + * into the VUE. Therefore, if any part of the vec4 lies outside of + * the buffer's bounds, the entire vec4 is discarded and you get + * entirely zeros. + * + * This means that we can't actually use tight bounds for vertex + * buffers. Instead, we have to pad them out so that, for the last + * elment, the whole vec4 fits. Unfortunately, this means there are + * a few corner cases where we don't handle ARB_robust_buffer_access + * 100% correct but they're very hard to hit and it's still safe in + * the sense that you shouldn't end up in someone else's buffer. + */ + const unsigned vec4_size = attrib_vec4_size(glarray->Type); + const unsigned vf_range = range + (vec4_size - glarray->_ElementSize); + /* If we have a VB set to be uploaded for this buffer object * already, reuse that VB state so that we emit fewer * relocations. @@ -503,6 +535,7 @@ brw_prepare_vertices(struct brw_context *brw) buffer_range_start[k] = MIN2(buffer_range_start[k], start); buffer_range_end[k] = MAX2(buffer_range_end[k], start + range); + buffer_range_vf_end[k] = MAX2(buffer_range_end[k], start + vf_range); break; } } @@ -517,6 +550,7 @@ brw_prepare_vertices(struct brw_context *brw) enabled_buffer[j] = intel_buffer; buffer_range_start[j] = start; buffer_range_end[j] = start + range; + buffer_range_vf_end[j] = start + vf_range; input->buffer = j++; input->offset = 0; @@ -580,6 +614,8 @@ brw_prepare_vertices(struct brw_context *brw) buffer->bo = intel_bufferobj_buffer(brw, enabled_buffer[i], start, range); drm_intel_bo_reference(buffer->bo); + + buffer->vf_upper_bound = buffer_range_vf_end[i]; } /* If we need to upload all the arrays, then we can trim those arrays to @@ -604,12 +640,24 @@ brw_prepare_vertices(struct brw_context *brw) buffer, interleaved); buffer->offset -= delta * interleaved; + /* Because we just pass upload[0] in to copy_array_to_vbo_array + * above, it cannot provide us with the correct vf_upper_bound. + * Instead, we have to calculate that ourselves. + */ + unsigned elem_vf_size = 0; + for (i = 0; i < nr_uploads; i++) { /* Then, just point upload[i] at upload[0]'s buffer. */ upload[i]->offset = ((const unsigned char *)upload[i]->glarray->Ptr - ptr); upload[i]->buffer = j; + + unsigned vec4_size = attrib_vec4_size(upload[i]->glarray->Type); + elem_vf_size = MAX2(elem_vf_size, upload[i]->offset + vec4_size); } + buffer->vf_upper_bound = + (delta + max_index - min_index) * interleaved + elem_vf_size; + j++; nr_uploads = 0; @@ -632,6 +680,7 @@ brw_prepare_vertices(struct brw_context *brw) buffer, upload[i]->glarray->_ElementSize); } buffer->offset -= delta * buffer->stride; + buffer->vf_upper_bound += delta * buffer->stride; buffer->step_rate = upload[i]->glarray->InstanceDivisor; upload[i]->buffer = j++; upload[i]->offset = 0; @@ -773,7 +822,8 @@ brw_emit_vertices(struct brw_context *brw) OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1, + EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, + buffer->offset + buffer->vf_upper_bound - 1, buffer->offset, buffer->stride, buffer->step_rate); diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index dce11dd..722cde6 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -151,7 +151,7 @@ gen8_emit_vertices(struct brw_context *brw) OUT_BATCH(dw0); OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); - OUT_BATCH(buffer->bo->size); + OUT_BATCH(buffer->vf_upper_bound); } if (uses_draw_params) { -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev