Re: [Mesa-dev] [PATCH] i965/Gen7: Fix HiZ ops for MSAA depth
Ping. This has been broken for ages, and at least tesseract is carrying hacks to work around it (extra color buffer containing depth values, in their g-buffer). Getting rid of that wins us about 15%. -- Chris On Sat, Feb 8, 2014 at 3:54 PM, Chris Forbes chr...@ijw.co.nz wrote: Previously, we would program the sample count based on the blorp operation's dst sample count -- which is zero for a HiZ op; we'd also ignore the difference between physical and logical surface size for the depth surface. That kindof worked -- the two errors almost cancel out -- but produced strange blocky artifacts. Instead, program the sample count properly, and use the logical dimensions of the depth surface. Fixes broken rendering in `Tesseract` with msaa=4 glineardepth=0 NOTE: I've hacked the BLORP depth alignment code a bit (at `Not quite sure ...`). This is almost certainly not the right thing. Signed-off-by: Chris Forbes chr...@ijw.co.nz --- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 41 +++- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 4bf9396..58dc497 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -447,7 +447,8 @@ gen7_blorp_emit_streamout_disable(struct brw_context *brw, static void gen7_blorp_emit_sf_config(struct brw_context *brw, - const brw_blorp_params *params) + const brw_blorp_params *params, + int num_samples) { /* 3DSTATE_SF * @@ -472,7 +473,7 @@ gen7_blorp_emit_sf_config(struct brw_context *brw, OUT_BATCH(_3DSTATE_SF 16 | (7 - 2)); OUT_BATCH(params-depth_format GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); - OUT_BATCH(params-dst.num_samples 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); + OUT_BATCH(num_samples 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -500,7 +501,8 @@ gen7_blorp_emit_sf_config(struct brw_context *brw, static void gen7_blorp_emit_wm_config(struct brw_context *brw, const brw_blorp_params *params, - brw_blorp_prog_data *prog_data) + brw_blorp_prog_data *prog_data, + int num_samples) { uint32_t dw1 = 0, dw2 = 0; @@ -528,7 +530,7 @@ gen7_blorp_emit_wm_config(struct brw_context *brw, dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */ } - if (params-dst.num_samples 1) { + if (num_samples 1) { dw1 |= GEN7_WM_MSRAST_ON_PATTERN; if (prog_data prog_data-persample_msaa_dispatch) dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; @@ -562,7 +564,8 @@ static void gen7_blorp_emit_ps_config(struct brw_context *brw, const brw_blorp_params *params, uint32_t prog_offset, - brw_blorp_prog_data *prog_data) + brw_blorp_prog_data *prog_data, + int num_samples) { uint32_t dw2, dw4, dw5; const int max_threads_shift = brw-is_haswell ? @@ -579,8 +582,10 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, */ dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - if (brw-is_haswell) - dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ + if (brw-is_haswell) { + int sample_mask = (num_samples 1) ? (1 num_samples) - 1 : 1; + dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK); + } if (params-use_wm_prog) { dw2 |= 1 GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; @@ -714,7 +719,9 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, lod = params-depth.level - params-depth.mt-first_level; - if (params-hiz_op != GEN6_HIZ_OP_NONE lod == 0) { + /* not quite sure about the MSAA interaction here! */ + if (params-hiz_op != GEN6_HIZ_OP_NONE lod == 0 + params-depth.mt-num_samples = 1) { /* HIZ ops for lod 0 may set the width height a little * larger to allow the fast depth clear to fit the hardware * alignment requirements. (8x4) @@ -722,8 +729,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, surfwidth = params-depth.width; surfheight = params-depth.height; } else { - surfwidth = params-depth.mt-logical_width0; - surfheight = params-depth.mt-logical_height0; + surfwidth = ALIGN(params-depth.mt-logical_width0, 8); + surfheight = ALIGN(params-depth.mt-logical_height0, 4); } /* 3DSTATE_DEPTH_BUFFER */ @@ -863,10 +870,12 @@ gen7_blorp_exec(struct brw_context *brw, uint32_t sampler_offset = 0; uint32_t prog_offset = params-get_wm_prog(brw, prog_data); -
[Mesa-dev] [PATCH] i965/Gen7: Fix HiZ ops for MSAA depth
Previously, we would program the sample count based on the blorp operation's dst sample count -- which is zero for a HiZ op; we'd also ignore the difference between physical and logical surface size for the depth surface. That kindof worked -- the two errors almost cancel out -- but produced strange blocky artifacts. Instead, program the sample count properly, and use the logical dimensions of the depth surface. Fixes broken rendering in `Tesseract` with msaa=4 glineardepth=0 NOTE: I've hacked the BLORP depth alignment code a bit (at `Not quite sure ...`). This is almost certainly not the right thing. Signed-off-by: Chris Forbes chr...@ijw.co.nz --- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 41 +++- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 4bf9396..58dc497 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -447,7 +447,8 @@ gen7_blorp_emit_streamout_disable(struct brw_context *brw, static void gen7_blorp_emit_sf_config(struct brw_context *brw, - const brw_blorp_params *params) + const brw_blorp_params *params, + int num_samples) { /* 3DSTATE_SF * @@ -472,7 +473,7 @@ gen7_blorp_emit_sf_config(struct brw_context *brw, OUT_BATCH(_3DSTATE_SF 16 | (7 - 2)); OUT_BATCH(params-depth_format GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); - OUT_BATCH(params-dst.num_samples 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); + OUT_BATCH(num_samples 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -500,7 +501,8 @@ gen7_blorp_emit_sf_config(struct brw_context *brw, static void gen7_blorp_emit_wm_config(struct brw_context *brw, const brw_blorp_params *params, - brw_blorp_prog_data *prog_data) + brw_blorp_prog_data *prog_data, + int num_samples) { uint32_t dw1 = 0, dw2 = 0; @@ -528,7 +530,7 @@ gen7_blorp_emit_wm_config(struct brw_context *brw, dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */ } - if (params-dst.num_samples 1) { + if (num_samples 1) { dw1 |= GEN7_WM_MSRAST_ON_PATTERN; if (prog_data prog_data-persample_msaa_dispatch) dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; @@ -562,7 +564,8 @@ static void gen7_blorp_emit_ps_config(struct brw_context *brw, const brw_blorp_params *params, uint32_t prog_offset, - brw_blorp_prog_data *prog_data) + brw_blorp_prog_data *prog_data, + int num_samples) { uint32_t dw2, dw4, dw5; const int max_threads_shift = brw-is_haswell ? @@ -579,8 +582,10 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, */ dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - if (brw-is_haswell) - dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ + if (brw-is_haswell) { + int sample_mask = (num_samples 1) ? (1 num_samples) - 1 : 1; + dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK); + } if (params-use_wm_prog) { dw2 |= 1 GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; @@ -714,7 +719,9 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, lod = params-depth.level - params-depth.mt-first_level; - if (params-hiz_op != GEN6_HIZ_OP_NONE lod == 0) { + /* not quite sure about the MSAA interaction here! */ + if (params-hiz_op != GEN6_HIZ_OP_NONE lod == 0 + params-depth.mt-num_samples = 1) { /* HIZ ops for lod 0 may set the width height a little * larger to allow the fast depth clear to fit the hardware * alignment requirements. (8x4) @@ -722,8 +729,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, surfwidth = params-depth.width; surfheight = params-depth.height; } else { - surfwidth = params-depth.mt-logical_width0; - surfheight = params-depth.mt-logical_height0; + surfwidth = ALIGN(params-depth.mt-logical_width0, 8); + surfheight = ALIGN(params-depth.mt-logical_height0, 4); } /* 3DSTATE_DEPTH_BUFFER */ @@ -863,10 +870,12 @@ gen7_blorp_exec(struct brw_context *brw, uint32_t sampler_offset = 0; uint32_t prog_offset = params-get_wm_prog(brw, prog_data); - gen6_emit_3dstate_multisample(brw, params-dst.num_samples); + int num_samples = (params-hiz_op == GEN6_HIZ_OP_NONE) ? + params-dst.num_samples : params-depth.mt-num_samples; + gen6_emit_3dstate_multisample(brw, num_samples); gen6_emit_3dstate_sample_mask(brw, - params-dst.num_samples 1 ? - (1