Re: [Mesa-dev] [PATCH v2 21.5/22 (was 18/22)] intel: Use TXS for image_size when we have a typed surface

2018-08-29 Thread Kenneth Graunke
On Wednesday, August 29, 2018 10:12:59 AM PDT Jason Ekstrand wrote:
> ---
>  src/intel/compiler/brw_eu_defines.h   |  2 ++
>  src/intel/compiler/brw_fs_generator.cpp   | 23 +---
>  src/intel/compiler/brw_fs_nir.cpp | 35 +++
>  .../compiler/brw_nir_lower_image_load_store.c | 15 
>  src/intel/compiler/brw_shader.cpp |  3 ++
>  5 files changed, 74 insertions(+), 4 deletions(-)
> 
> diff --git a/src/intel/compiler/brw_eu_defines.h 
> b/src/intel/compiler/brw_eu_defines.h
> index 883616d6bab..52957882b10 100644
> --- a/src/intel/compiler/brw_eu_defines.h
> +++ b/src/intel/compiler/brw_eu_defines.h
> @@ -354,6 +354,8 @@ enum opcode {
> SHADER_OPCODE_SAMPLEINFO,
> SHADER_OPCODE_SAMPLEINFO_LOGICAL,
>  
> +   SHADER_OPCODE_IMAGE_SIZE,
> +
> /**
>  * Combines multiple sources of size 1 into a larger virtual GRF.
>  * For example, parameters for a send-from-GRF message.  Or, updating
> diff --git a/src/intel/compiler/brw_fs_generator.cpp 
> b/src/intel/compiler/brw_fs_generator.cpp
> index d40ce2ce0d7..cb402cd4e75 100644
> --- a/src/intel/compiler/brw_fs_generator.cpp
> +++ b/src/intel/compiler/brw_fs_generator.cpp
> @@ -958,6 +958,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
> dst, struct brw_reg src
>   }
>   break;
>case SHADER_OPCODE_TXS:
> +  case SHADER_OPCODE_IMAGE_SIZE:
>msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
>break;
>case SHADER_OPCODE_TXD:
> @@ -1126,10 +1127,19 @@ fs_generator::generate_tex(fs_inst *inst, struct 
> brw_reg dst, struct brw_reg src
>}
> }
>  
> -   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
> - inst->opcode == SHADER_OPCODE_TG4_OFFSET)
> - ? prog_data->binding_table.gather_texture_start
> - : prog_data->binding_table.texture_start;
> +   uint32_t base_binding_table_index;
> +   switch (inst->opcode) {
> +   case SHADER_OPCODE_TG4:
> +   case SHADER_OPCODE_TG4_OFFSET:
> +  base_binding_table_index = 
> prog_data->binding_table.gather_texture_start;
> +  break;
> +   case SHADER_OPCODE_IMAGE_SIZE:
> +  base_binding_table_index = prog_data->binding_table.image_start;
> +  break;
> +   default:
> +  base_binding_table_index = prog_data->binding_table.texture_start;
> +  break;
> +   }
>  
> if (surface_index.file == BRW_IMMEDIATE_VALUE &&
> sampler_index.file == BRW_IMMEDIATE_VALUE) {
> @@ -2114,6 +2124,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>case SHADER_OPCODE_SAMPLEINFO:
>generate_tex(inst, dst, src[0], src[1], src[2]);
>break;
> +
> +  case SHADER_OPCODE_IMAGE_SIZE:
> + generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
> + break;
> +
>case FS_OPCODE_DDX_COARSE:
>case FS_OPCODE_DDX_FINE:
>   generate_ddx(inst, dst, src[0]);
> diff --git a/src/intel/compiler/brw_fs_nir.cpp 
> b/src/intel/compiler/brw_fs_nir.cpp
> index aaba0e2a693..2fef050f81a 100644
> --- a/src/intel/compiler/brw_fs_nir.cpp
> +++ b/src/intel/compiler/brw_fs_nir.cpp
> @@ -3918,6 +3918,41 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
>break;
> }
>  
> +   case nir_intrinsic_image_size: {
> +  /* Unlike the [un]typed load and store opcodes, the TXS that this turns
> +   * into will handle the binding table index for us in the geneerator.
> +   */
> +  fs_reg image = retype(get_nir_src_imm(instr->src[0]),
> +BRW_REGISTER_TYPE_UD);
> +  image = bld.emit_uniformize(image);
> +
> +  /* Since the image size is always uniform, we can just emit a SIMD8
> +   * query instruction and splat the result out.
> +   */
> +  const fs_builder ubld = bld.exec_all().group(8, 0);

Ah good, you remembered the exec_all().  Matt just reminded me about it.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 21.5/22 (was 18/22)] intel: Use TXS for image_size when we have a typed surface

2018-08-29 Thread Jason Ekstrand
---
 src/intel/compiler/brw_eu_defines.h   |  2 ++
 src/intel/compiler/brw_fs_generator.cpp   | 23 +---
 src/intel/compiler/brw_fs_nir.cpp | 35 +++
 .../compiler/brw_nir_lower_image_load_store.c | 15 
 src/intel/compiler/brw_shader.cpp |  3 ++
 5 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_eu_defines.h 
b/src/intel/compiler/brw_eu_defines.h
index 883616d6bab..52957882b10 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -354,6 +354,8 @@ enum opcode {
SHADER_OPCODE_SAMPLEINFO,
SHADER_OPCODE_SAMPLEINFO_LOGICAL,
 
+   SHADER_OPCODE_IMAGE_SIZE,
+
/**
 * Combines multiple sources of size 1 into a larger virtual GRF.
 * For example, parameters for a send-from-GRF message.  Or, updating
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index d40ce2ce0d7..cb402cd4e75 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -958,6 +958,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
  }
  break;
   case SHADER_OPCODE_TXS:
+  case SHADER_OPCODE_IMAGE_SIZE:
 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
 break;
   case SHADER_OPCODE_TXD:
@@ -1126,10 +1127,19 @@ fs_generator::generate_tex(fs_inst *inst, struct 
brw_reg dst, struct brw_reg src
   }
}
 
-   uint32_t base_binding_table_index = (inst->opcode == SHADER_OPCODE_TG4 ||
- inst->opcode == SHADER_OPCODE_TG4_OFFSET)
- ? prog_data->binding_table.gather_texture_start
- : prog_data->binding_table.texture_start;
+   uint32_t base_binding_table_index;
+   switch (inst->opcode) {
+   case SHADER_OPCODE_TG4:
+   case SHADER_OPCODE_TG4_OFFSET:
+  base_binding_table_index = prog_data->binding_table.gather_texture_start;
+  break;
+   case SHADER_OPCODE_IMAGE_SIZE:
+  base_binding_table_index = prog_data->binding_table.image_start;
+  break;
+   default:
+  base_binding_table_index = prog_data->binding_table.texture_start;
+  break;
+   }
 
if (surface_index.file == BRW_IMMEDIATE_VALUE &&
sampler_index.file == BRW_IMMEDIATE_VALUE) {
@@ -2114,6 +2124,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
   case SHADER_OPCODE_SAMPLEINFO:
 generate_tex(inst, dst, src[0], src[1], src[2]);
 break;
+
+  case SHADER_OPCODE_IMAGE_SIZE:
+ generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
+ break;
+
   case FS_OPCODE_DDX_COARSE:
   case FS_OPCODE_DDX_FINE:
  generate_ddx(inst, dst, src[0]);
diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index aaba0e2a693..2fef050f81a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3918,6 +3918,41 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   break;
}
 
+   case nir_intrinsic_image_size: {
+  /* Unlike the [un]typed load and store opcodes, the TXS that this turns
+   * into will handle the binding table index for us in the geneerator.
+   */
+  fs_reg image = retype(get_nir_src_imm(instr->src[0]),
+BRW_REGISTER_TYPE_UD);
+  image = bld.emit_uniformize(image);
+
+  /* Since the image size is always uniform, we can just emit a SIMD8
+   * query instruction and splat the result out.
+   */
+  const fs_builder ubld = bld.exec_all().group(8, 0);
+
+  /* The LOD also serves as the message payload */
+  fs_reg lod = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+  ubld.MOV(lod, brw_imm_ud(0));
+
+  fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+  fs_inst *inst = ubld.emit(SHADER_OPCODE_IMAGE_SIZE, tmp, lod, image);
+  inst->mlen = 1;
+  inst->size_written = 4 * REG_SIZE;
+
+  for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
+ if (c == 2 && nir_intrinsic_image_dim(instr) == 
GLSL_SAMPLER_DIM_CUBE) {
+bld.emit(SHADER_OPCODE_INT_QUOTIENT,
+ offset(retype(dest, tmp.type), bld, c),
+ component(offset(tmp, ubld, c), 0), brw_imm_ud(6));
+ } else {
+bld.MOV(offset(retype(dest, tmp.type), bld, c),
+component(offset(tmp, ubld, c), 0));
+ }
+  }
+  break;
+   }
+
case nir_intrinsic_image_load_raw_intel: {
   const fs_reg image = get_nir_image_intrinsic_image(bld, instr);
   const fs_reg addr = retype(get_nir_src(instr->src[1]),
diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c 
b/src/intel/compiler/brw_nir_lower_image_load_store.c
index 5eba9ddabd3..e8e00e1aa19 100644
--- a/src/intel/compiler/brw_nir_lower_image_load_store.c
+++ b/src/intel/compiler/brw_nir_lower_image_load_store.c
@@ -725,6 +725,21 @@