Batch emission logic for launching glsl-based programs without reading/writing the current gl-context.
Initially I wrote support also for launching the handwritten assembly programs but I took it out. It is very unlikely that we ever want to use them for gen >= 8. The whole idea of this series is to allow even gen < 8 to use the glsl-based programs instead. I have the (untested) support in my local trees though. I thought about exposing the i965 core support for disabling HS, DS, etc. stages and re-using it here. I decided against in the end as the core is likely to enable some of them in the future. If we like to do this instead I can do it as follow-up, similar thing applies to gen7 also. Finally, it should be noted that the patch does not introduce gen8_emit_surface_states(). Instead gen8_blorp_exec() calls gen6_emit_surface_states() which is already implemented for glsl-based programs using the gen-aware jump tables that the core i965 batch submission mechanism provides. Signed-off-by: Topi Pohjolainen <topi.pohjolai...@intel.com> --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_blorp.cpp | 6 +- src/mesa/drivers/dri/i965/brw_blorp.h | 12 + src/mesa/drivers/dri/i965/gen8_blorp.cpp | 494 +++++++++++++++++++++++++++++ 4 files changed, 512 insertions(+), 1 deletion(-) create mode 100644 src/mesa/drivers/dri/i965/gen8_blorp.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 6d4659f..62d6860 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -164,6 +164,7 @@ i965_FILES = \ gen7_wm_state.c \ gen7_wm_surface_state.c \ gen8_blend_state.c \ + gen8_blorp.cpp \ gen8_depth_state.c \ gen8_disable.c \ gen8_draw_upload.c \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index bd080b0..eb1a950 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -210,7 +210,7 @@ void brw_blorp_exec(struct brw_context *brw, brw_blorp_params *params) { struct gl_context *ctx = &brw->ctx; - uint32_t estimated_max_batch_usage = 1500; + uint32_t estimated_max_batch_usage = brw->gen >= 8 ? 2048 : 1500; bool check_aperture_failed_once = false; /* Flush the sampler and render caches. We definitely need to flush the @@ -236,6 +236,10 @@ retry: case 7: gen7_blorp_exec(brw, params); break; + case 8: + case 9: + gen8_blorp_exec(brw, params); + break; default: /* BLORP is not supported before Gen6. */ unreachable("not reached"); diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 750ec5f..638d6e7 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -222,14 +222,18 @@ public: virtual void gen6_emit_wm_config(struct brw_context *brw) const; virtual void gen7_emit_wm_config(struct brw_context *brw) const; + virtual void gen8_emit_wm_config(struct brw_context *brw) const; virtual void gen6_emit_sampler_state(struct brw_context *brw) const; virtual void gen6_emit_multisample_state(struct brw_context *brw) const; + virtual void gen8_emit_multisample_state(struct brw_context *brw) const; virtual void gen7_emit_ps_config(struct brw_context *brw) const; + virtual void gen8_emit_ps_config(struct brw_context *brw) const; virtual void gen6_emit_vertices(struct brw_context *brw) const; + virtual void gen8_emit_vertices_extra(struct brw_context *brw) const; uint32_t x0; uint32_t y0; @@ -251,6 +255,9 @@ public: void brw_blorp_exec(struct brw_context *brw, brw_blorp_params *params); +void +gen8_blorp_exec(struct brw_context *brw, brw_blorp_params *params); + /** * Parameters for a HiZ or depth resolve operation. @@ -409,10 +416,14 @@ public: virtual void gen6_emit_wm_config(struct brw_context *brw) const; virtual void gen7_emit_wm_config(struct brw_context *brw) const; + virtual void gen8_emit_wm_config(struct brw_context *brw) const; + + virtual void gen8_emit_multisample_state(struct brw_context *brw) const; virtual void gen6_emit_multisample_state(struct brw_context *brw) const; virtual void gen7_emit_ps_config(struct brw_context *brw) const; + virtual void gen8_emit_ps_config(struct brw_context *brw) const; protected: static void gen6_emit_vertex_elems(struct brw_context *brw, @@ -453,6 +464,7 @@ public: bool mirror_x, bool mirror_y); virtual void gen6_emit_vertices(struct brw_context *brw) const; + virtual void gen8_emit_vertices_extra(struct brw_context *brw) const; virtual void gen6_emit_surface_states(struct brw_context *brw); virtual void gen7_emit_surface_states(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.cpp b/src/mesa/drivers/dri/i965/gen8_blorp.cpp new file mode 100644 index 0000000..e242173 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen8_blorp.cpp @@ -0,0 +1,494 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> + +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "brw_blorp.h" + + +static uint32_t +gen8_blorp_emit_blend_state(struct brw_context *brw, unsigned num_draw_bufs) +{ + assume(num_draw_bufs); + + uint32_t blend_state_offset; + const unsigned size = 4 + 8 * num_draw_bufs; + uint32_t *blend = (uint32_t *)brw_state_batch(brw, AUB_TRACE_BLEND_STATE, + size, 64, + &blend_state_offset); + memset(blend, 0, size); + + for (unsigned i = 0; i < num_draw_bufs; ++i) { + blend[1 + 2 * i + 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE | + GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE | + GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT; + } + + return blend_state_offset; +} + +static void +gen8_blorp_emit_disable_constant_state(struct brw_context *brw, + unsigned opcode) +{ + BEGIN_BATCH(11); + OUT_BATCH(opcode << 16 | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_VS + * + * Disable vertex shader. + */ +static void +gen8_blorp_emit_vs_disable(struct brw_context *brw) +{ + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_VS); + + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_VS << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_HS + * + * Disable the hull shader. + */ +static void +gen8_blorp_emit_hs_disable(struct brw_context *brw) +{ + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_HS); + + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_HS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_DS + * + * Disable the domain shader. + */ +static void +gen8_blorp_emit_ds_disable(struct brw_context *brw) +{ + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_DS); + + int ds_pkt_len = brw->gen >= 9 ? 11 : 9; + BEGIN_BATCH(ds_pkt_len); + OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2)); + for (int i = 0; i < ds_pkt_len - 1; i++) + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_DS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_wm_chromakey_disable(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_GS + * + * Disable the geometry shader. + */ +static void +gen8_blorp_emit_gs_disable(struct brw_context *brw) +{ + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_GS); + + BEGIN_BATCH(10); + OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_STREAMOUT + * + * Disable streamout. + */ +static void +gen8_blorp_emit_streamout_disable(struct brw_context *brw) +{ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_raster_state(struct brw_context *brw) +{ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2)); + OUT_BATCH(GEN8_RASTER_CULL_NONE | GEN8_RASTER_FRONT_WINDING_CCW); + OUT_BATCH_F(0); + OUT_BATCH_F(0); + OUT_BATCH_F(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_sbe_state(struct brw_context *brw) +{ + /* 3DSTATE_SBE */ + { + const unsigned sbe_cmd_length = brw->gen == 8 ? 4 : 6; + BEGIN_BATCH(sbe_cmd_length); + OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2)); + OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE | + (2 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */ + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + BRW_SF_URB_ENTRY_READ_OFFSET << + GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | + GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | + GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET); + OUT_BATCH(0); + OUT_BATCH(0); + if (sbe_cmd_length >= 6) { + OUT_BATCH(GEN9_SBE_ACTIVE_COMPONENT_XYZW << (0 << 1)); + OUT_BATCH(0); + } + ADVANCE_BATCH(); + } + + { + BEGIN_BATCH(11); + OUT_BATCH(_3DSTATE_SBE_SWIZ << 16 | (11 - 2)); + + /* Output DWords 1 through 8: */ + for (int i = 0; i < 8; i++) { + OUT_BATCH(0); + } + + OUT_BATCH(0); /* wrapshortest enables 0-7 */ + OUT_BATCH(0); /* wrapshortest enables 8-15 */ + ADVANCE_BATCH(); + } +} + +static void +gen8_blorp_emit_sf_config(struct brw_context *brw) +{ + /* See gen6_blorp_emit_sf_config() */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/** + * Disable thread dispatch (dw5.19) and enable the HiZ op. + */ +static void +gen8_blorp_emit_wm_state(struct brw_context *brw, + unsigned barycentric_interp_mode) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); + OUT_BATCH(GEN7_WM_LINE_AA_WIDTH_1_0 | + GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 | + GEN7_WM_POINT_RASTRULE_UPPER_RIGHT | + barycentric_interp_mode << + GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_ps_blend(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_PS_BLEND << 16 | (2 - 2)); + OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_depth_disable(struct brw_context *brw) +{ + /* Skip repeated NULL depth/stencil emits (think 2D rendering). */ + if (brw->no_depth_or_stencil) + return; + + intel_emit_depth_stall_flushes(brw); + + BEGIN_BATCH(8); + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (8 - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(5); + OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(5); + OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_vf_topology(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2)); + OUT_BATCH(_3DPRIM_RECTLIST); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_vf_instancing_state(struct brw_context *brw, + unsigned num_elems) +{ + for (unsigned i = 0; i < num_elems; ++i) { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); + OUT_BATCH(i); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +static void +gen8_blorp_emit_vf_state(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VF << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_depth_stencil_state(struct brw_context *brw, + const brw_blorp_params *params) +{ + uint32_t dw1 = 0; + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) + dw1 |= GEN8_WM_DS_STENCIL_TEST_ENABLE; + + const unsigned pkt_len = brw->gen >= 9 ? 4 : 3; + + BEGIN_BATCH(pkt_len); + OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL << 16 | (pkt_len - 2)); + OUT_BATCH(dw1); + OUT_BATCH(0); + if (pkt_len > 3) { + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +void +brw_blorp_params::gen8_emit_wm_config(struct brw_context *brw) const +{ + unreachable("Handwritten assembly programs are not supported on gen >= 8"); +} + +void +brw_blorp_params::gen8_emit_multisample_state(struct brw_context *brw) const +{ + unreachable("Handwritten assembly programs are not supported on gen >= 8"); +} + +void +brw_blorp_params::gen8_emit_ps_config(struct brw_context *brw) const +{ + unreachable("Handwritten assembly programs are not supported on gen >= 8"); +} + +void +brw_blorp_params::gen8_emit_vertices_extra(struct brw_context *brw) const +{ + unreachable("Handwritten assembly programs are not supported on gen >= 8"); +} + +void +brw_meta_fs_params::gen8_emit_wm_config(struct brw_context *brw) const +{ + gen8_blorp_emit_wm_state(brw, wm_prog_data->barycentric_interp_modes); +} + +void +brw_meta_fs_params::gen8_emit_multisample_state(struct brw_context *brw) const +{ + const unsigned sample_mask = dst_num_samples > 1 ? + (1 << dst_num_samples) - 1 : 1; + + gen8_emit_3dstate_multisample(brw, dst_num_samples); + gen6_emit_3dstate_sample_mask(brw, sample_mask); +} + +void +brw_meta_fs_params::gen8_emit_ps_config(struct brw_context *brw) const +{ + const bool multisampled_fbo = dst_num_samples > 1; + gen8_blorp_emit_ps_blend(brw); + gen8_upload_ps_extra(brw, fp, wm_prog_data, multisampled_fbo); + gen8_upload_ps_state(brw, fp, &wm_stage_state, wm_prog_data, + fast_clear_op); +} + +void +brw_meta_blit_params::gen8_emit_vertices_extra(struct brw_context *brw) const +{ + const unsigned num_elements = 3; + gen8_blorp_emit_vf_instancing_state(brw, num_elements); +} + +/** + * \copydoc gen6_blorp_exec() + */ +void +gen8_blorp_exec(struct brw_context *brw, brw_blorp_params *params) +{ + /* Handwritten assembly programs are not supported on gen >= 8 */ + assert(!params->depth.mt); + assert(params->use_wm_prog); + + params->gen8_emit_multisample_state(brw); + gen8_upload_state_base_address(brw); + gen8_blorp_emit_vf_topology(brw); + gen8_blorp_emit_vf_sys_gen_vals_state(brw); + params->gen6_emit_vertices(brw); + params->gen8_emit_vertices_extra(brw); + gen8_blorp_emit_vf_state(brw); + gen7_blorp_emit_urb_config(brw); + + const uint32_t cc_blend_state_offset = gen8_blorp_emit_blend_state( + brw, params->num_draw_buffers); + const uint32_t cc_state_offset = gen6_blorp_emit_cc_state(brw); + gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); + gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); + + gen8_blorp_emit_depth_stencil_state(brw, params); + params->gen6_emit_wm_constants(brw); + params->gen6_emit_surface_states(brw); + params->gen6_emit_sampler_state(brw); + gen8_blorp_emit_vs_disable(brw); + gen8_blorp_emit_hs_disable(brw); + gen7_blorp_emit_te_disable(brw); + gen8_blorp_emit_ds_disable(brw); + gen8_blorp_emit_wm_chromakey_disable(brw); + gen8_blorp_emit_gs_disable(brw); + gen8_blorp_emit_streamout_disable(brw); + gen6_blorp_emit_clip_disable(brw); + gen8_blorp_emit_raster_state(brw); + gen8_blorp_emit_sbe_state(brw); + gen8_blorp_emit_sf_config(brw); + params->gen8_emit_wm_config(brw); + params->gen8_emit_ps_config(brw); + gen7_blorp_emit_cc_viewport(brw); + + gen8_blorp_emit_depth_disable(brw); + + gen7_blorp_emit_clear_params(brw, params); + gen6_blorp_emit_drawing_rectangle(brw, params); + gen7_blorp_emit_primitive(brw, params); +} -- 1.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev