We must be careful to only compute the address once based on the per-context information (rather than accessing the unlocked global bo->offset64) so that the value in the batch does match the reloc.presumed_offset we declare to the kernel. Otherwise, highly unlikely, but we may see GPU hangs in multithreaded users.
The only real complication here is isl_surf_fill_state() which needs to adjust the reloc.delta to both general a tile offset and to encode state into the lower 12 bits. In a few of the gen4/5 emitters, I had to rearrange the code slightly in order to have the correct state ready prior to emitting the relocation, these should hopefully continue to disappear as they are converted to genxml routines. Cc: Kenneth Graunke <[email protected]> Cc: Matt Turner <[email protected]> Cc: Jason Ekstrand <[email protected]> Cc: Daniel Vetter <[email protected]> --- src/mesa/drivers/dri/i965/brw_clip_state.c | 13 ++--- src/mesa/drivers/dri/i965/brw_sf_state.c | 36 +++++++------- src/mesa/drivers/dri/i965/brw_wm_state.c | 55 +++++++++------------- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 60 ++++++++++-------------- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 8 ++-- src/mesa/drivers/dri/i965/genX_state_upload.c | 13 +++-- src/mesa/drivers/dri/i965/intel_batchbuffer.h | 6 +-- 7 files changed, 84 insertions(+), 107 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 8f22c0ff67..2ff2b4a51a 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -101,14 +101,11 @@ brw_upload_clip_unit(struct brw_context *brw) /* enable guardband clipping if we can */ clip->clip5.guard_band_enable = 1; clip->clip6.clipper_viewport_state_ptr = - (brw->batch.bo->offset64 + brw->clip.vp_offset) >> 5; - - /* emit clip viewport relocation */ - brw_emit_reloc(&brw->batch, - (brw->clip.state_offset + - offsetof(struct brw_clip_unit_state, clip6)), - brw->batch.bo, brw->clip.vp_offset, - I915_GEM_DOMAIN_INSTRUCTION, 0); + brw_emit_reloc(&brw->batch, + (brw->clip.state_offset + + offsetof(struct brw_clip_unit_state, clip6)), + brw->batch.bo, brw->clip.vp_offset, + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; /* _NEW_TRANSFORM */ if (!ctx->Transform.DepthClamp) diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index c739201f7b..6a9653545f 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -86,24 +86,35 @@ static void upload_sf_unit( struct brw_context *brw ) brw->urb.nr_sf_entries) - 1; /* BRW_NEW_SF_VP */ - sf->sf5.sf_viewport_state_offset = (brw->batch.bo->offset64 + - brw->sf.vp_offset) >> 5; /* reloc */ - - sf->sf5.viewport_transform = 1; - + int viewport_transform = 1; sf->sf6.scissor = 1; /* _NEW_POLYGON */ + int front_winding; if (brw->polygon_front_bit) - sf->sf5.front_winding = BRW_FRONTWINDING_CW; + front_winding = BRW_FRONTWINDING_CW; else - sf->sf5.front_winding = BRW_FRONTWINDING_CCW; + front_winding = BRW_FRONTWINDING_CCW; /* _NEW_BUFFERS * The viewport is inverted for rendering to a FBO, and that inverts * polygon front/back orientation. */ - sf->sf5.front_winding ^= render_to_fbo; + front_winding ^= render_to_fbo; + + /* Emit SF viewport relocation */ + sf->sf5.sf_viewport_state_offset = + brw_emit_reloc(&brw->batch, + brw->sf.state_offset + + offsetof(struct brw_sf_unit_state, sf5), + brw->batch.bo, + brw->sf.vp_offset | + front_winding | + (viewport_transform << 1), + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; + + sf->sf5.viewport_transform = viewport_transform; + sf->sf5.front_winding = front_winding; /* _NEW_POLYGON */ switch (ctx->Polygon.CullFlag ? ctx->Polygon.CullFaceMode : GL_NONE) { @@ -168,15 +179,6 @@ static void upload_sf_unit( struct brw_context *brw ) * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ - /* Emit SF viewport relocation */ - brw_emit_reloc(&brw->batch, - brw->sf.state_offset + - offsetof(struct brw_sf_unit_state, sf5), - brw->batch.bo, - brw->sf.vp_offset | sf->sf5.front_winding | - (sf->sf5.viewport_transform << 1), - I915_GEM_DOMAIN_INSTRUCTION, 0); - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 69bbeb26d4..551d02a961 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -132,10 +132,15 @@ brw_upload_wm_unit(struct brw_context *brw) prog_data->base.binding_table.size_bytes / 4; if (prog_data->base.total_scratch != 0) { + int per_thread_scratch_space = ffs(brw->wm.base.per_thread_scratch) - 11; wm->thread2.scratch_space_base_pointer = - brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */ - wm->thread2.per_thread_scratch_space = - ffs(brw->wm.base.per_thread_scratch) - 11; + brw_emit_reloc(&brw->batch, + brw->wm.base.state_offset + + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.base.scratch_bo, + per_thread_scratch_space, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER) >> 10; + wm->thread2.per_thread_scratch_space = per_thread_scratch_space; } else { wm->thread2.scratch_space_base_pointer = 0; wm->thread2.per_thread_scratch_space = 0; @@ -151,20 +156,31 @@ brw_upload_wm_unit(struct brw_context *brw) /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + int sampler_count; if (brw->gen == 5) - wm->wm4.sampler_count = 0; /* hardware requirement */ + sampler_count = 0; /* hardware requirement */ else { - wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4; + sampler_count = (brw->wm.base.sampler_count + 1) / 4; } if (brw->wm.base.sampler_count) { /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */ - wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 + - brw->wm.base.sampler_offset) >> 5; + wm->wm4.sampler_state_pointer = + brw_emit_reloc(&brw->batch, + brw->wm.base.state_offset + + offsetof(struct brw_wm_unit_state, wm4), + brw->batch.bo, + brw->wm.base.sampler_offset | + brw->stats_wm | /* BRW_NEW_STATS_WM */ + (sampler_count << 2), + I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; } else { wm->wm4.sampler_state_pointer = 0; } + wm->wm4.stats_enable = brw->stats_wm; + wm->wm4.sampler_count = sampler_count; + /* BRW_NEW_FRAGMENT_PROGRAM */ wm->wm5.program_uses_depth = prog_data->uses_src_depth; wm->wm5.program_computes_depth = (fp->info.outputs_written & @@ -216,31 +232,6 @@ brw_upload_wm_unit(struct brw_context *brw) /* _NEW_LINE */ wm->wm5.line_stipple = ctx->Line.StippleFlag; - /* BRW_NEW_STATS_WM */ - if (brw->stats_wm) - wm->wm4.stats_enable = 1; - - /* Emit scratch space relocation */ - if (prog_data->base.total_scratch != 0) { - brw_emit_reloc(&brw->batch, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.base.scratch_bo, - wm->thread2.per_thread_scratch_space, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - } - - /* Emit sampler state relocation */ - if (brw->wm.base.sampler_count != 0) { - brw_emit_reloc(&brw->batch, - brw->wm.base.state_offset + - offsetof(struct brw_wm_unit_state, wm4), - brw->batch.bo, - brw->wm.base.sampler_offset | wm->wm4.stats_enable | - (wm->wm4.sampler_count << 2), - I915_GEM_DOMAIN_INSTRUCTION, 0); - } - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; /* _NEW_POLGYON */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 8d18179d89..1068469937 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -149,12 +149,12 @@ brw_emit_surface_state(struct brw_context *brw, assert(mt->mcs_buf->offset == 0); aux_bo = mt->mcs_buf->bo; - aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset; + aux_offset = mt->mcs_buf->offset; } else { aux_surf = &mt->hiz_buf->surf; aux_bo = mt->hiz_buf->bo; - aux_offset = mt->hiz_buf->bo->offset64; + aux_offset = 0; } /* We only really need a clear color if we also have an auxiliary @@ -169,28 +169,28 @@ brw_emit_surface_state(struct brw_context *brw, surf_offset); isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view, - .address = mt->bo->offset64 + offset, + .address = brw_emit_reloc(&brw->batch, + *surf_offset + brw->isl_dev.ss.addr_offset, + mt->bo, offset, read_domains, write_domains), .aux_surf = aux_surf, .aux_usage = aux_usage, .aux_address = aux_offset, .mocs = mocs, .clear_color = clear_color, .x_offset_sa = tile_x, .y_offset_sa = tile_y); - - brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset, - mt->bo, offset, read_domains, write_domains); - if (aux_surf) { /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits * contain other control information. Since buffer addresses are always * on 4k boundaries (and thus have their lower 12 bits zero), we can use * an ordinary reloc to do the necessary address translation. + * + * FIXME: move to the point of assignment. */ assert((aux_offset & 0xfff) == 0); uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset; - brw_emit_reloc(&brw->batch, - *surf_offset + brw->isl_dev.ss.aux_addr_offset, - aux_bo, *aux_addr - aux_bo->offset64, - read_domains, write_domains); + *aux_addr = brw_emit_reloc(&brw->batch, + *surf_offset + brw->isl_dev.ss.aux_addr_offset, + aux_bo, *aux_addr, /* XXX read from WC */ + read_domains, write_domains); } } @@ -667,18 +667,15 @@ brw_emit_buffer_surface_state(struct brw_context *brw, out_offset); isl_buffer_fill_state(&brw->isl_dev, dw, - .address = (bo ? bo->offset64 : 0) + buffer_offset, + .address = brw_emit_reloc(&brw->batch, + *out_offset + brw->isl_dev.ss.addr_offset, + bo, buffer_offset, + I915_GEM_DOMAIN_SAMPLER, + (rw ? I915_GEM_DOMAIN_SAMPLER : 0)), .size = buffer_size, .format = surface_format, .stride = pitch, .mocs = tex_mocs[brw->gen]); - - if (bo) { - brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset, - bo, buffer_offset, - I915_GEM_DOMAIN_SAMPLER, - (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); - } } void @@ -841,17 +838,15 @@ brw_update_sol_surface(struct brw_context *brw, BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | surface_format << BRW_SURFACE_FORMAT_SHIFT | BRW_SURFACE_RC_READ_WRITE; - surf[1] = bo->offset64 + offset_bytes; /* reloc */ + surf[1] = brw_emit_reloc(&brw->batch, + *out_offset + 4, bo, offset_bytes, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | height << BRW_SURFACE_HEIGHT_SHIFT); surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); surf[4] = 0; surf[5] = 0; - - /* Emit relocation to surface contents. */ - brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } /* Creates a new WM constant buffer reflecting the current fragment program's @@ -959,7 +954,8 @@ brw_emit_null_surface_state(struct brw_context *brw, 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT | 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT); } - surf[1] = bo ? bo->offset64 : 0; + surf[1] = brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT | (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); @@ -972,11 +968,6 @@ brw_emit_null_surface_state(struct brw_context *brw, pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); surf[4] = multisampling_state; surf[5] = 0; - - if (bo) { - brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - } } /** @@ -1033,8 +1024,10 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, /* reloc */ assert(mt->offset % mt->cpp == 0); - surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) + - mt->bo->offset64 + mt->offset); + surf[1] = brw_emit_reloc(&brw->batch, offset + 4, + mt->bo, + mt->offset + intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y), + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT | (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT); @@ -1076,9 +1069,6 @@ gen4_update_renderbuffer_surface(struct brw_context *brw, } } - brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - return offset; } diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 5718d7429c..2fc3a7d925 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -74,10 +74,10 @@ blorp_surface_reloc(struct blorp_batch *batch, struct brw_context *brw = batch->driver_batch; struct brw_bo *bo = address.buffer; - brw_emit_reloc(&brw->batch, ss_offset, bo, address.offset + delta, - address.read_domains, address.write_domain); - - uint64_t reloc_val = bo->offset64 + address.offset + delta; + uint64_t reloc_val = + brw_emit_reloc(&brw->batch, ss_offset, bo, + address.offset + delta, + address.read_domains, address.write_domain); #if GEN_GEN >= 8 *(uint64_t *)location = reloc_val; #else diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 54a547cb79..7c443602af 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -4840,14 +4840,13 @@ genX(update_sampler_state)(struct brw_context *brw, texObj->StencilSampling, &border_color_offset); } - - samp_st.BorderColorPointer = border_color_offset; - if (GEN_GEN < 6) { - samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */ - brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8, - brw->batch.bo, border_color_offset, - I915_GEM_DOMAIN_SAMPLER, 0); + samp_st.BorderColorPointer = + brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8, + brw->batch.bo, border_color_offset, + I915_GEM_DOMAIN_SAMPLER, 0); + } else { + samp_st.BorderColorPointer = border_color_offset; } #if GEN_GEN >= 8 diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 2783ba3c0f..7763f0564e 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -83,10 +83,8 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset, return prog_offset; } - brw_emit_reloc(&brw->batch, state_offset, brw->cache.bo, prog_offset, - I915_GEM_DOMAIN_INSTRUCTION, 0); - - return brw->cache.bo->offset64 + prog_offset; + return brw_emit_reloc(&brw->batch, state_offset, brw->cache.bo, prog_offset, + I915_GEM_DOMAIN_INSTRUCTION, 0); } #define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map)) -- 2.13.2 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
