Re: [Mesa-dev] [PATCH] i965/Gen7: Fix HiZ ops for MSAA depth

2014-06-15 Thread Chris Forbes
Ping.

This has been broken for ages, and at least tesseract is carrying
hacks to work around it (extra color buffer containing depth values,
in their g-buffer). Getting rid of that wins us about 15%.

-- Chris

On Sat, Feb 8, 2014 at 3:54 PM, Chris Forbes chr...@ijw.co.nz wrote:
 Previously, we would program the sample count based on the blorp
 operation's dst sample count -- which is zero for a HiZ op; we'd also
 ignore the difference between physical and logical surface size for
 the depth surface.

 That kindof worked -- the two errors almost cancel out -- but produced
 strange blocky artifacts.

 Instead, program the sample count properly, and use the logical
 dimensions of the depth surface.

 Fixes broken rendering in `Tesseract` with msaa=4 glineardepth=0

 NOTE: I've hacked the BLORP depth alignment code a bit (at `Not quite
 sure ...`). This is almost certainly not the right thing.

 Signed-off-by: Chris Forbes chr...@ijw.co.nz
 ---
  src/mesa/drivers/dri/i965/gen7_blorp.cpp | 41 
 +++-
  1 file changed, 25 insertions(+), 16 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp 
 b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
 index 4bf9396..58dc497 100644
 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
 +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
 @@ -447,7 +447,8 @@ gen7_blorp_emit_streamout_disable(struct brw_context *brw,

  static void
  gen7_blorp_emit_sf_config(struct brw_context *brw,
 -  const brw_blorp_params *params)
 +  const brw_blorp_params *params,
 +  int num_samples)
  {
 /* 3DSTATE_SF
  *
 @@ -472,7 +473,7 @@ gen7_blorp_emit_sf_config(struct brw_context *brw,
OUT_BATCH(_3DSTATE_SF  16 | (7 - 2));
OUT_BATCH(params-depth_format 
  GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
 -  OUT_BATCH(params-dst.num_samples  1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
 +  OUT_BATCH(num_samples  1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 @@ -500,7 +501,8 @@ gen7_blorp_emit_sf_config(struct brw_context *brw,
  static void
  gen7_blorp_emit_wm_config(struct brw_context *brw,
const brw_blorp_params *params,
 -  brw_blorp_prog_data *prog_data)
 +  brw_blorp_prog_data *prog_data,
 +  int num_samples)
  {
 uint32_t dw1 = 0, dw2 = 0;

 @@ -528,7 +530,7 @@ gen7_blorp_emit_wm_config(struct brw_context *brw,
dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */
 }

 -  if (params-dst.num_samples  1) {
 +  if (num_samples  1) {
   dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
   if (prog_data  prog_data-persample_msaa_dispatch)
  dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
 @@ -562,7 +564,8 @@ static void
  gen7_blorp_emit_ps_config(struct brw_context *brw,
const brw_blorp_params *params,
uint32_t prog_offset,
 -  brw_blorp_prog_data *prog_data)
 +  brw_blorp_prog_data *prog_data,
 +  int num_samples)
  {
 uint32_t dw2, dw4, dw5;
 const int max_threads_shift = brw-is_haswell ?
 @@ -579,8 +582,10 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
  */
 dw4 |= GEN7_PS_16_DISPATCH_ENABLE;

 -   if (brw-is_haswell)
 -  dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
 +   if (brw-is_haswell) {
 +  int sample_mask = (num_samples  1) ? (1  num_samples) - 1 : 1;
 +  dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK);
 +   }
 if (params-use_wm_prog) {
dw2 |= 1  GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
 @@ -714,7 +719,9 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context 
 *brw,

 lod = params-depth.level - params-depth.mt-first_level;

 -   if (params-hiz_op != GEN6_HIZ_OP_NONE  lod == 0) {
 +   /* not quite sure about the MSAA interaction here! */
 +   if (params-hiz_op != GEN6_HIZ_OP_NONE  lod == 0 
 + params-depth.mt-num_samples = 1) {
/* HIZ ops for lod 0 may set the width  height a little
 * larger to allow the fast depth clear to fit the hardware
 * alignment requirements. (8x4)
 @@ -722,8 +729,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context 
 *brw,
surfwidth = params-depth.width;
surfheight = params-depth.height;
 } else {
 -  surfwidth = params-depth.mt-logical_width0;
 -  surfheight = params-depth.mt-logical_height0;
 +  surfwidth = ALIGN(params-depth.mt-logical_width0, 8);
 +  surfheight = ALIGN(params-depth.mt-logical_height0, 4);
 }

 /* 3DSTATE_DEPTH_BUFFER */
 @@ -863,10 +870,12 @@ gen7_blorp_exec(struct brw_context *brw,
 uint32_t sampler_offset = 0;

 uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
 -   

[Mesa-dev] [PATCH] i965/Gen7: Fix HiZ ops for MSAA depth

2014-02-07 Thread Chris Forbes
Previously, we would program the sample count based on the blorp
operation's dst sample count -- which is zero for a HiZ op; we'd also
ignore the difference between physical and logical surface size for
the depth surface.

That kindof worked -- the two errors almost cancel out -- but produced
strange blocky artifacts.

Instead, program the sample count properly, and use the logical
dimensions of the depth surface.

Fixes broken rendering in `Tesseract` with msaa=4 glineardepth=0

NOTE: I've hacked the BLORP depth alignment code a bit (at `Not quite
sure ...`). This is almost certainly not the right thing.

Signed-off-by: Chris Forbes chr...@ijw.co.nz
---
 src/mesa/drivers/dri/i965/gen7_blorp.cpp | 41 +++-
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 4bf9396..58dc497 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -447,7 +447,8 @@ gen7_blorp_emit_streamout_disable(struct brw_context *brw,
 
 static void
 gen7_blorp_emit_sf_config(struct brw_context *brw,
-  const brw_blorp_params *params)
+  const brw_blorp_params *params,
+  int num_samples)
 {
/* 3DSTATE_SF
 *
@@ -472,7 +473,7 @@ gen7_blorp_emit_sf_config(struct brw_context *brw,
   OUT_BATCH(_3DSTATE_SF  16 | (7 - 2));
   OUT_BATCH(params-depth_format 
 GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
-  OUT_BATCH(params-dst.num_samples  1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
+  OUT_BATCH(num_samples  1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
   OUT_BATCH(0);
   OUT_BATCH(0);
   OUT_BATCH(0);
@@ -500,7 +501,8 @@ gen7_blorp_emit_sf_config(struct brw_context *brw,
 static void
 gen7_blorp_emit_wm_config(struct brw_context *brw,
   const brw_blorp_params *params,
-  brw_blorp_prog_data *prog_data)
+  brw_blorp_prog_data *prog_data,
+  int num_samples)
 {
uint32_t dw1 = 0, dw2 = 0;
 
@@ -528,7 +530,7 @@ gen7_blorp_emit_wm_config(struct brw_context *brw,
   dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */
}
 
-  if (params-dst.num_samples  1) {
+  if (num_samples  1) {
  dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
  if (prog_data  prog_data-persample_msaa_dispatch)
 dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
@@ -562,7 +564,8 @@ static void
 gen7_blorp_emit_ps_config(struct brw_context *brw,
   const brw_blorp_params *params,
   uint32_t prog_offset,
-  brw_blorp_prog_data *prog_data)
+  brw_blorp_prog_data *prog_data,
+  int num_samples)
 {
uint32_t dw2, dw4, dw5;
const int max_threads_shift = brw-is_haswell ?
@@ -579,8 +582,10 @@ gen7_blorp_emit_ps_config(struct brw_context *brw,
 */
dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
 
-   if (brw-is_haswell)
-  dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
+   if (brw-is_haswell) {
+  int sample_mask = (num_samples  1) ? (1  num_samples) - 1 : 1;
+  dw4 |= SET_FIELD(sample_mask, HSW_PS_SAMPLE_MASK);
+   }
if (params-use_wm_prog) {
   dw2 |= 1  GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
   dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
@@ -714,7 +719,9 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
 
lod = params-depth.level - params-depth.mt-first_level;
 
-   if (params-hiz_op != GEN6_HIZ_OP_NONE  lod == 0) {
+   /* not quite sure about the MSAA interaction here! */
+   if (params-hiz_op != GEN6_HIZ_OP_NONE  lod == 0 
+ params-depth.mt-num_samples = 1) {
   /* HIZ ops for lod 0 may set the width  height a little
* larger to allow the fast depth clear to fit the hardware
* alignment requirements. (8x4)
@@ -722,8 +729,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
   surfwidth = params-depth.width;
   surfheight = params-depth.height;
} else {
-  surfwidth = params-depth.mt-logical_width0;
-  surfheight = params-depth.mt-logical_height0;
+  surfwidth = ALIGN(params-depth.mt-logical_width0, 8);
+  surfheight = ALIGN(params-depth.mt-logical_height0, 4);
}
 
/* 3DSTATE_DEPTH_BUFFER */
@@ -863,10 +870,12 @@ gen7_blorp_exec(struct brw_context *brw,
uint32_t sampler_offset = 0;
 
uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
-   gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
+   int num_samples = (params-hiz_op == GEN6_HIZ_OP_NONE) ?
+  params-dst.num_samples : params-depth.mt-num_samples;
+   gen6_emit_3dstate_multisample(brw, num_samples);
gen6_emit_3dstate_sample_mask(brw,
- params-dst.num_samples  1 ?
- (1