Re: [Mesa-dev] [PATCH 3/7] i965/blorp: Fix integer downsampling on Gen7.
On Thu, Jul 12, 2012 at 10:43 AM, Paul Berry stereotype...@gmail.com wrote: When downsampling an integer-format buffer on Gen7, we need to use the avg instruction rather than the add instruction, to ensure that we don't overflow the range of 32-bit integers. Also, we need to use the proper register type (BRW_REGISTER_TYPE_D or BRW_REGISTER_TYPE_UD) for intermediate color data and for writing to the render target. Note: this patch causes blorp to use the proper register type for all operations (downsampling, upsampling, and ordinary blits). Strictly speaking, this is only necessary for downsampling, because the other operations exclusively use MOV instructions on the color data. But it's simpler to use the proper register type in all cases. --- src/mesa/drivers/dri/i965/brw_blorp.h|5 ++ src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 61 +- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 053eef7..9af492d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -223,6 +223,11 @@ struct brw_blorp_blit_prog_key /* Actual MSAA layout used by the destination image. */ intel_msaa_layout dst_layout; + /* Type of the data to be read from the texture (one of +* BRW_REGISTER_TYPE_{UD,D,F}). +*/ + unsigned texture_data_type; + /* True if the source image is W tiled. If true, the surface state for the * source image must be configured as Y tiled, and tex_samples must be 0. */ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 74ae52d..32fd48e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -696,7 +696,9 @@ brw_blorp_blit_program::alloc_regs() alloc_push_const_regs(reg); reg += BRW_BLORP_NUM_PUSH_CONST_REGS; for (unsigned i = 0; i ARRAY_SIZE(texture_data); ++i) { - this-texture_data[i] = vec16(brw_vec8_grf(reg, 0)); reg += 8; + this-texture_data[i] = + retype(vec16(brw_vec8_grf(reg, 0)), key-texture_data_type); + reg += 8; } this-mcs_data = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8; @@ -1117,7 +1119,16 @@ brw_blorp_blit_program::manual_blend() * operations we do is equal to the number of trailing 1 bits in i. This * works provided the total number of samples is a power of two, which it * always is for i965. +* +* For integer formats, we replace the add operations with average +* operations and skip the final division. */ + typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg); + brw_op2_ptr combine_op = + key-texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG; unsigned stack_depth = 0; for (int i = 0; i num_samples; ++i) { assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */ @@ -1139,9 +1150,9 @@ brw_blorp_blit_program::manual_blend() /* TODO: should use a smaller loop bound for non_RGBA formats */ for (int k = 0; k 4; ++k) { -brw_ADD(func, offset(texture_data[stack_depth - 1], 2*k), -offset(vec8(texture_data[stack_depth - 1]), 2*k), -offset(vec8(texture_data[stack_depth]), 2*k)); +combine_op(func, offset(texture_data[stack_depth - 1], 2*k), + offset(vec8(texture_data[stack_depth - 1]), 2*k), + offset(vec8(texture_data[stack_depth]), 2*k)); } } } @@ -1149,12 +1160,14 @@ brw_blorp_blit_program::manual_blend() /* We should have just 1 sample on the stack now. */ assert(stack_depth == 1); - /* Scale the result down by a factor of num_samples */ - /* TODO: should use a smaller loop bound for non-RGBA formats */ - for (int j = 0; j 4; ++j) { - brw_MUL(func, offset(texture_data[0], 2*j), - offset(vec8(texture_data[0]), 2*j), - brw_imm_f(1.0/num_samples)); + if (key-texture_data_type == BRW_REGISTER_TYPE_F) { + /* Scale the result down by a factor of num_samples */ + /* TODO: should use a smaller loop bound for non-RGBA formats */ + for (int j = 0; j 4; ++j) { + brw_MUL(func, offset(texture_data[0], 2*j), + offset(vec8(texture_data[0]), 2*j), + brw_imm_f(1.0/num_samples)); + } } } @@ -1319,7 +1332,8 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst, void brw_blorp_blit_program::render_target_write() { - struct brw_reg mrf_rt_write =
[Mesa-dev] [PATCH 3/7] i965/blorp: Fix integer downsampling on Gen7.
When downsampling an integer-format buffer on Gen7, we need to use the avg instruction rather than the add instruction, to ensure that we don't overflow the range of 32-bit integers. Also, we need to use the proper register type (BRW_REGISTER_TYPE_D or BRW_REGISTER_TYPE_UD) for intermediate color data and for writing to the render target. Note: this patch causes blorp to use the proper register type for all operations (downsampling, upsampling, and ordinary blits). Strictly speaking, this is only necessary for downsampling, because the other operations exclusively use MOV instructions on the color data. But it's simpler to use the proper register type in all cases. --- src/mesa/drivers/dri/i965/brw_blorp.h|5 ++ src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 61 +- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index 053eef7..9af492d 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -223,6 +223,11 @@ struct brw_blorp_blit_prog_key /* Actual MSAA layout used by the destination image. */ intel_msaa_layout dst_layout; + /* Type of the data to be read from the texture (one of +* BRW_REGISTER_TYPE_{UD,D,F}). +*/ + unsigned texture_data_type; + /* True if the source image is W tiled. If true, the surface state for the * source image must be configured as Y tiled, and tex_samples must be 0. */ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 74ae52d..32fd48e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -696,7 +696,9 @@ brw_blorp_blit_program::alloc_regs() alloc_push_const_regs(reg); reg += BRW_BLORP_NUM_PUSH_CONST_REGS; for (unsigned i = 0; i ARRAY_SIZE(texture_data); ++i) { - this-texture_data[i] = vec16(brw_vec8_grf(reg, 0)); reg += 8; + this-texture_data[i] = + retype(vec16(brw_vec8_grf(reg, 0)), key-texture_data_type); + reg += 8; } this-mcs_data = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8; @@ -1117,7 +1119,16 @@ brw_blorp_blit_program::manual_blend() * operations we do is equal to the number of trailing 1 bits in i. This * works provided the total number of samples is a power of two, which it * always is for i965. +* +* For integer formats, we replace the add operations with average +* operations and skip the final division. */ + typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg); + brw_op2_ptr combine_op = + key-texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG; unsigned stack_depth = 0; for (int i = 0; i num_samples; ++i) { assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */ @@ -1139,9 +1150,9 @@ brw_blorp_blit_program::manual_blend() /* TODO: should use a smaller loop bound for non_RGBA formats */ for (int k = 0; k 4; ++k) { -brw_ADD(func, offset(texture_data[stack_depth - 1], 2*k), -offset(vec8(texture_data[stack_depth - 1]), 2*k), -offset(vec8(texture_data[stack_depth]), 2*k)); +combine_op(func, offset(texture_data[stack_depth - 1], 2*k), + offset(vec8(texture_data[stack_depth - 1]), 2*k), + offset(vec8(texture_data[stack_depth]), 2*k)); } } } @@ -1149,12 +1160,14 @@ brw_blorp_blit_program::manual_blend() /* We should have just 1 sample on the stack now. */ assert(stack_depth == 1); - /* Scale the result down by a factor of num_samples */ - /* TODO: should use a smaller loop bound for non-RGBA formats */ - for (int j = 0; j 4; ++j) { - brw_MUL(func, offset(texture_data[0], 2*j), - offset(vec8(texture_data[0]), 2*j), - brw_imm_f(1.0/num_samples)); + if (key-texture_data_type == BRW_REGISTER_TYPE_F) { + /* Scale the result down by a factor of num_samples */ + /* TODO: should use a smaller loop bound for non-RGBA formats */ + for (int j = 0; j 4; ++j) { + brw_MUL(func, offset(texture_data[0], 2*j), + offset(vec8(texture_data[0]), 2*j), + brw_imm_f(1.0/num_samples)); + } } } @@ -1319,7 +1332,8 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst, void brw_blorp_blit_program::render_target_write() { - struct brw_reg mrf_rt_write = vec16(brw_message_reg(base_mrf)); + struct brw_reg mrf_rt_write = + retype(vec16(brw_message_reg(base_mrf)), key-texture_data_type); int mrf_offset = 0; /* If we may have