Reviewed-by: Bruce Cherniak <bruce.chern...@intel.com> > On Feb 17, 2017, at 2:30 PM, George Kyriazis <george.kyria...@intel.com> > wrote: > > Fix issue with index buffers that do not contain a 0 index. 0 index > can be a non-valid index if the (copied) vertex buffers are a subset of the > user's (which happens because we only copy the range between min & max). > Core will use an index passed in from the driver to replace invalid indices. > > Only do this for calls that contain non-zero indices, to minimize performance > cost. > --- > src/gallium/drivers/swr/rasterizer/core/state.h | 1 + > .../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 60 +++++++++++++++++++--- > .../drivers/swr/rasterizer/jitter/fetch_jit.h | 2 + > src/gallium/drivers/swr/swr_draw.cpp | 1 + > src/gallium/drivers/swr/swr_state.cpp | 4 ++ > 5 files changed, 62 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/swr/rasterizer/core/state.h > b/src/gallium/drivers/swr/rasterizer/core/state.h > index 2f3b913..05347dc 100644 > --- a/src/gallium/drivers/swr/rasterizer/core/state.h > +++ b/src/gallium/drivers/swr/rasterizer/core/state.h > @@ -524,6 +524,7 @@ struct SWR_VERTEX_BUFFER_STATE > const uint8_t *pData; > uint32_t size; > uint32_t numaNode; > + uint32_t minVertex; // min vertex (for bounds checking) > uint32_t maxVertex; // size / pitch. precalculated value > used by fetch shader for OOB checks > uint32_t partialInboundsSize; // size % pitch. precalculated value > used by fetch shader for partially OOB vertices > }; > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > index 901bce6..ffa7605 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp > @@ -309,11 +309,29 @@ void FetchJit::JitLoadVertices(const > FETCH_COMPILE_STATE &fetchState, Value* str > > Value* startVertexOffset = MUL(Z_EXT(startOffset, mInt64Ty), stride); > > + Value *minVertex = NULL; > + Value *minVertexOffset = NULL; > + if (fetchState.bPartialVertexBuffer) { > + // fetch min index for low bounds checking > + minVertex = GEP(streams, {C(ied.StreamIndex), > C(SWR_VERTEX_BUFFER_STATE_minVertex)}); > + minVertex = LOAD(minVertex); > + if (!fetchState.bDisableIndexOOBCheck) { > + minVertexOffset = MUL(Z_EXT(minVertex, mInt64Ty), stride); > + } > + } > + > // Load from the stream. > for(uint32_t lane = 0; lane < mVWidth; ++lane) > { > // Get index > Value* index = VEXTRACT(vCurIndices, C(lane)); > + > + if (fetchState.bPartialVertexBuffer) { > + // clamp below minvertex > + Value *isBelowMin = ICMP_SLT(index, minVertex); > + index = SELECT(isBelowMin, minVertex, index); > + } > + > index = Z_EXT(index, mInt64Ty); > > Value* offset = MUL(index, stride); > @@ -321,10 +339,14 @@ void FetchJit::JitLoadVertices(const > FETCH_COMPILE_STATE &fetchState, Value* str > offset = ADD(offset, startVertexOffset); > > if (!fetchState.bDisableIndexOOBCheck) { > - // check for out of bound access, including partial OOB, and > mask them to 0 > + // check for out of bound access, including partial OOB, and > replace them with minVertex > Value *endOffset = ADD(offset, C((int64_t)info.Bpp)); > Value *oob = ICMP_ULE(endOffset, size); > - offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, 0)); > + if (fetchState.bPartialVertexBuffer) { > + offset = SELECT(oob, offset, minVertexOffset); > + } else { > + offset = SELECT(oob, offset, ConstantInt::get(mInt64Ty, > 0)); > + } > } > > Value* pointer = GEP(stream, offset); > @@ -732,6 +754,13 @@ void FetchJit::JitGatherVertices(const > FETCH_COMPILE_STATE &fetchState, > Value *maxVertex = GEP(streams, {C(ied.StreamIndex), > C(SWR_VERTEX_BUFFER_STATE_maxVertex)}); > maxVertex = LOAD(maxVertex); > > + Value *minVertex = NULL; > + if (fetchState.bPartialVertexBuffer) { > + // min vertex index for low bounds OOB checking > + minVertex = GEP(streams, {C(ied.StreamIndex), > C(SWR_VERTEX_BUFFER_STATE_minVertex)}); > + minVertex = LOAD(minVertex); > + } > + > Value *vCurIndices; > Value *startOffset; > if(ied.InstanceEnable) > @@ -769,9 +798,16 @@ void FetchJit::JitGatherVertices(const > FETCH_COMPILE_STATE &fetchState, > > // if we have a start offset, subtract from max vertex. Used for OOB > check > maxVertex = SUB(Z_EXT(maxVertex, mInt64Ty), Z_EXT(startOffset, > mInt64Ty)); > - Value* neg = ICMP_SLT(maxVertex, C((int64_t)0)); > + Value* maxNeg = ICMP_SLT(maxVertex, C((int64_t)0)); > // if we have a negative value, we're already OOB. clamp at 0. > - maxVertex = SELECT(neg, C(0), TRUNC(maxVertex, mInt32Ty)); > + maxVertex = SELECT(maxNeg, C(0), TRUNC(maxVertex, mInt32Ty)); > + > + if (fetchState.bPartialVertexBuffer) { > + // similary for min vertex > + minVertex = SUB(Z_EXT(minVertex, mInt64Ty), Z_EXT(startOffset, > mInt64Ty)); > + Value *minNeg = ICMP_SLT(minVertex, C((int64_t)0)); > + minVertex = SELECT(minNeg, C(0), TRUNC(minVertex, mInt32Ty)); > + } > > // Load the in bounds size of a partially valid vertex > Value *partialInboundsSize = GEP(streams, {C(ied.StreamIndex), > C(SWR_VERTEX_BUFFER_STATE_partialInboundsSize)}); > @@ -791,8 +827,20 @@ void FetchJit::JitGatherVertices(const > FETCH_COMPILE_STATE &fetchState, > Value* vMaxVertex = VBROADCAST(maxVertex); > Value* vPartialOOBMask = ICMP_EQ(vCurIndices, vMaxVertex); > > - // are vertices are fully in bounds? > - Value* vGatherMask = ICMP_ULT(vCurIndices, vMaxVertex); > + // are vertices fully in bounds? > + Value* vMaxGatherMask = ICMP_ULT(vCurIndices, vMaxVertex); > + > + Value *vGatherMask; > + if (fetchState.bPartialVertexBuffer) { > + // are vertices below minVertex limit? > + Value *vMinVertex = VBROADCAST(minVertex); > + Value *vMinGatherMask = ICMP_UGE(vCurIndices, vMinVertex); > + > + // only fetch lanes that pass both tests > + vGatherMask = AND(vMaxGatherMask, vMinGatherMask); > + } else { > + vGatherMask = vMaxGatherMask; > + } > > // blend in any partially OOB indices that have valid elements > vGatherMask = SELECT(vPartialOOBMask, vElementInBoundsMask, > vGatherMask); > diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h > b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h > index 622608a..68c6f60 100644 > --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h > +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.h > @@ -104,6 +104,7 @@ struct FETCH_COMPILE_STATE > bool bDisableIndexOOBCheck; // If enabled, FetchJit will > exclude index OOB check > bool bEnableCutIndex{ false }; // Compares indices with the cut > index and returns a cut mask > bool bVertexIDOffsetEnable{ false }; // Offset vertexID by StartVertex > for non-indexed draws or BaseVertex for indexed draws > + bool bPartialVertexBuffer{ false }; // for indexed draws, map > illegal indices to a known resident vertex > > FETCH_COMPILE_STATE(bool disableVGATHER = false, bool diableIndexOOBCheck > = false): > bDisableVGATHER(disableVGATHER), > bDisableIndexOOBCheck(diableIndexOOBCheck){ }; > @@ -117,6 +118,7 @@ struct FETCH_COMPILE_STATE > if (bEnableCutIndex != other.bEnableCutIndex) return false; > if (cutIndex != other.cutIndex) return false; > if (bVertexIDOffsetEnable != other.bVertexIDOffsetEnable) return > false; > + if (bPartialVertexBuffer != other.bPartialVertexBuffer) return false; > > for(uint32_t i = 0; i < numAttribs; ++i) > { > diff --git a/src/gallium/drivers/swr/swr_draw.cpp > b/src/gallium/drivers/swr/swr_draw.cpp > index 4bdd3bb..f764efe 100644 > --- a/src/gallium/drivers/swr/swr_draw.cpp > +++ b/src/gallium/drivers/swr/swr_draw.cpp > @@ -143,6 +143,7 @@ swr_draw_vbo(struct pipe_context *pipe, const struct > pipe_draw_info *info) > struct swr_vertex_element_state *velems = ctx->velems; > velems->fsState.cutIndex = info->restart_index; > velems->fsState.bEnableCutIndex = info->primitive_restart; > + velems->fsState.bPartialVertexBuffer = (info->min_index > 0); > > swr_jit_fetch_key key; > swr_generate_fetch_key(key, velems); > diff --git a/src/gallium/drivers/swr/swr_state.cpp > b/src/gallium/drivers/swr/swr_state.cpp > index 116f19f..5e3d58d 100644 > --- a/src/gallium/drivers/swr/swr_state.cpp > +++ b/src/gallium/drivers/swr/swr_state.cpp > @@ -1106,6 +1106,7 @@ swr_update_derived(struct pipe_context *pipe, > SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS]; > for (UINT i = 0; i < ctx->num_vertex_buffers; i++) { > uint32_t size, pitch, elems, partial_inbounds; > + uint32_t min_vertex_index; > const uint8_t *p_data; > struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i]; > > @@ -1117,6 +1118,7 @@ swr_update_derived(struct pipe_context *pipe, > size = vb->buffer->width0; > elems = size / pitch; > partial_inbounds = size % pitch; > + min_vertex_index = 0; > > p_data = swr_resource_data(vb->buffer) + vb->buffer_offset; > } else { > @@ -1128,6 +1130,7 @@ swr_update_derived(struct pipe_context *pipe, > uint32_t base; > swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, > &size); > partial_inbounds = 0; > + min_vertex_index = info.min_index; > > /* Copy only needed vertices to scratch space */ > size = AlignUp(size, 4); > @@ -1143,6 +1146,7 @@ swr_update_derived(struct pipe_context *pipe, > swrVertexBuffers[i].pitch = pitch; > swrVertexBuffers[i].pData = p_data; > swrVertexBuffers[i].size = size; > + swrVertexBuffers[i].minVertex = min_vertex_index; > swrVertexBuffers[i].maxVertex = elems; > swrVertexBuffers[i].partialInboundsSize = partial_inbounds; > } > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev