Re: [Mesa-dev] [PATCH 3/7] i965/blorp: Fix integer downsampling on Gen7.

2012-07-19 Thread Anuj Phogat
On Thu, Jul 12, 2012 at 10:43 AM, Paul Berry stereotype...@gmail.com wrote:
 When downsampling an integer-format buffer on Gen7, we need to use the
 avg instruction rather than the add instruction, to ensure that we
 don't overflow the range of 32-bit integers.  Also, we need to use the
 proper register type (BRW_REGISTER_TYPE_D or BRW_REGISTER_TYPE_UD) for
 intermediate color data and for writing to the render target.

 Note: this patch causes blorp to use the proper register type for all
 operations (downsampling, upsampling, and ordinary blits).  Strictly
 speaking, this is only necessary for downsampling, because the other
 operations exclusively use MOV instructions on the color data.  But
 it's simpler to use the proper register type in all cases.
 ---
  src/mesa/drivers/dri/i965/brw_blorp.h|5 ++
  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp |   61 
 +-
  2 files changed, 55 insertions(+), 11 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
 b/src/mesa/drivers/dri/i965/brw_blorp.h
 index 053eef7..9af492d 100644
 --- a/src/mesa/drivers/dri/i965/brw_blorp.h
 +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
 @@ -223,6 +223,11 @@ struct brw_blorp_blit_prog_key
 /* Actual MSAA layout used by the destination image. */
 intel_msaa_layout dst_layout;

 +   /* Type of the data to be read from the texture (one of
 +* BRW_REGISTER_TYPE_{UD,D,F}).
 +*/
 +   unsigned texture_data_type;
 +
 /* True if the source image is W tiled.  If true, the surface state for 
 the
  * source image must be configured as Y tiled, and tex_samples must be 0.
  */
 diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
 b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
 index 74ae52d..32fd48e 100644
 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
 @@ -696,7 +696,9 @@ brw_blorp_blit_program::alloc_regs()
 alloc_push_const_regs(reg);
 reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
 for (unsigned i = 0; i  ARRAY_SIZE(texture_data); ++i) {
 -  this-texture_data[i] = vec16(brw_vec8_grf(reg, 0)); reg += 8;
 +  this-texture_data[i] =
 + retype(vec16(brw_vec8_grf(reg, 0)), key-texture_data_type);
 +  reg += 8;
 }
 this-mcs_data =
retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8;
 @@ -1117,7 +1119,16 @@ brw_blorp_blit_program::manual_blend()
  * operations we do is equal to the number of trailing 1 bits in i.  This
  * works provided the total number of samples is a power of two, which it
  * always is for i965.
 +*
 +* For integer formats, we replace the add operations with average
 +* operations and skip the final division.
  */
 +   typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *,
 +  struct brw_reg,
 +  struct brw_reg,
 +  struct brw_reg);
 +   brw_op2_ptr combine_op =
 +  key-texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG;
 unsigned stack_depth = 0;
 for (int i = 0; i  num_samples; ++i) {
assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
 @@ -1139,9 +1150,9 @@ brw_blorp_blit_program::manual_blend()

   /* TODO: should use a smaller loop bound for non_RGBA formats */
   for (int k = 0; k  4; ++k) {
 -brw_ADD(func, offset(texture_data[stack_depth - 1], 2*k),
 -offset(vec8(texture_data[stack_depth - 1]), 2*k),
 -offset(vec8(texture_data[stack_depth]), 2*k));
 +combine_op(func, offset(texture_data[stack_depth - 1], 2*k),
 +   offset(vec8(texture_data[stack_depth - 1]), 2*k),
 +   offset(vec8(texture_data[stack_depth]), 2*k));
   }
}
 }
 @@ -1149,12 +1160,14 @@ brw_blorp_blit_program::manual_blend()
 /* We should have just 1 sample on the stack now. */
 assert(stack_depth == 1);

 -   /* Scale the result down by a factor of num_samples */
 -   /* TODO: should use a smaller loop bound for non-RGBA formats */
 -   for (int j = 0; j  4; ++j) {
 -  brw_MUL(func, offset(texture_data[0], 2*j),
 -  offset(vec8(texture_data[0]), 2*j),
 -  brw_imm_f(1.0/num_samples));
 +   if (key-texture_data_type == BRW_REGISTER_TYPE_F) {
 +  /* Scale the result down by a factor of num_samples */
 +  /* TODO: should use a smaller loop bound for non-RGBA formats */
 +  for (int j = 0; j  4; ++j) {
 + brw_MUL(func, offset(texture_data[0], 2*j),
 + offset(vec8(texture_data[0]), 2*j),
 + brw_imm_f(1.0/num_samples));
 +  }
 }
  }

 @@ -1319,7 +1332,8 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg 
 dst,
  void
  brw_blorp_blit_program::render_target_write()
  {
 -   struct brw_reg mrf_rt_write = 

[Mesa-dev] [PATCH 3/7] i965/blorp: Fix integer downsampling on Gen7.

2012-07-12 Thread Paul Berry
When downsampling an integer-format buffer on Gen7, we need to use the
avg instruction rather than the add instruction, to ensure that we
don't overflow the range of 32-bit integers.  Also, we need to use the
proper register type (BRW_REGISTER_TYPE_D or BRW_REGISTER_TYPE_UD) for
intermediate color data and for writing to the render target.

Note: this patch causes blorp to use the proper register type for all
operations (downsampling, upsampling, and ordinary blits).  Strictly
speaking, this is only necessary for downsampling, because the other
operations exclusively use MOV instructions on the color data.  But
it's simpler to use the proper register type in all cases.
---
 src/mesa/drivers/dri/i965/brw_blorp.h|5 ++
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp |   61 +-
 2 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
b/src/mesa/drivers/dri/i965/brw_blorp.h
index 053eef7..9af492d 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -223,6 +223,11 @@ struct brw_blorp_blit_prog_key
/* Actual MSAA layout used by the destination image. */
intel_msaa_layout dst_layout;
 
+   /* Type of the data to be read from the texture (one of
+* BRW_REGISTER_TYPE_{UD,D,F}).
+*/
+   unsigned texture_data_type;
+
/* True if the source image is W tiled.  If true, the surface state for the
 * source image must be configured as Y tiled, and tex_samples must be 0.
 */
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 74ae52d..32fd48e 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -696,7 +696,9 @@ brw_blorp_blit_program::alloc_regs()
alloc_push_const_regs(reg);
reg += BRW_BLORP_NUM_PUSH_CONST_REGS;
for (unsigned i = 0; i  ARRAY_SIZE(texture_data); ++i) {
-  this-texture_data[i] = vec16(brw_vec8_grf(reg, 0)); reg += 8;
+  this-texture_data[i] =
+ retype(vec16(brw_vec8_grf(reg, 0)), key-texture_data_type);
+  reg += 8;
}
this-mcs_data =
   retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8;
@@ -1117,7 +1119,16 @@ brw_blorp_blit_program::manual_blend()
 * operations we do is equal to the number of trailing 1 bits in i.  This
 * works provided the total number of samples is a power of two, which it
 * always is for i965.
+*
+* For integer formats, we replace the add operations with average
+* operations and skip the final division.
 */
+   typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *,
+  struct brw_reg,
+  struct brw_reg,
+  struct brw_reg);
+   brw_op2_ptr combine_op =
+  key-texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG;
unsigned stack_depth = 0;
for (int i = 0; i  num_samples; ++i) {
   assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
@@ -1139,9 +1150,9 @@ brw_blorp_blit_program::manual_blend()
 
  /* TODO: should use a smaller loop bound for non_RGBA formats */
  for (int k = 0; k  4; ++k) {
-brw_ADD(func, offset(texture_data[stack_depth - 1], 2*k),
-offset(vec8(texture_data[stack_depth - 1]), 2*k),
-offset(vec8(texture_data[stack_depth]), 2*k));
+combine_op(func, offset(texture_data[stack_depth - 1], 2*k),
+   offset(vec8(texture_data[stack_depth - 1]), 2*k),
+   offset(vec8(texture_data[stack_depth]), 2*k));
  }
   }
}
@@ -1149,12 +1160,14 @@ brw_blorp_blit_program::manual_blend()
/* We should have just 1 sample on the stack now. */
assert(stack_depth == 1);
 
-   /* Scale the result down by a factor of num_samples */
-   /* TODO: should use a smaller loop bound for non-RGBA formats */
-   for (int j = 0; j  4; ++j) {
-  brw_MUL(func, offset(texture_data[0], 2*j),
-  offset(vec8(texture_data[0]), 2*j),
-  brw_imm_f(1.0/num_samples));
+   if (key-texture_data_type == BRW_REGISTER_TYPE_F) {
+  /* Scale the result down by a factor of num_samples */
+  /* TODO: should use a smaller loop bound for non-RGBA formats */
+  for (int j = 0; j  4; ++j) {
+ brw_MUL(func, offset(texture_data[0], 2*j),
+ offset(vec8(texture_data[0]), 2*j),
+ brw_imm_f(1.0/num_samples));
+  }
}
 }
 
@@ -1319,7 +1332,8 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
 void
 brw_blorp_blit_program::render_target_write()
 {
-   struct brw_reg mrf_rt_write = vec16(brw_message_reg(base_mrf));
+   struct brw_reg mrf_rt_write =
+  retype(vec16(brw_message_reg(base_mrf)), key-texture_data_type);
int mrf_offset = 0;
 
/* If we may have