This looks good to me, albeit I know nothing about the hw.
So VI could do (just with some restrictios) even full-speed fp32 denorms
whereas SI/CI can't? Interesting, I suppose that would be intended for
compute. intel x86 can't even do that (actually, I think skylake can),
though certainly other cpus could do that for ages.

(Albeit there's still nothing in the glsl spec which says this is
required for fp16 pack...)

Roland

Am 06.02.2016 um 13:15 schrieb Marek Olšák:
> From: Marek Olšák <marek.ol...@amd.com>
> 
> This fixes FP16 conversion instructions for VI, which has 16-bit floats,
> but not SI & CI, which can't disable denorms for those instructions.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c        | 14 ++++++++++++++
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 18 ++++++++++++------
>  src/gallium/drivers/radeonsi/sid.h              |  3 +++
>  3 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index a4680ce..3f1db70 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4155,6 +4155,20 @@ int si_compile_llvm(struct si_screen *sscreen,
>  
>       si_shader_binary_read_config(binary, conf, 0);
>  
> +     /* Enable 64-bit and 16-bit denormals, because there is no performance
> +      * cost.
> +      *
> +      * If denormals are enabled, all floating-point output modifiers are
> +      * ignored.
> +      *
> +      * Don't enable denormals for 32-bit floats, because:
> +      * - Floating-point output modifiers would be ignored by the hw.
> +      * - Some opcodes don't support denormals, such as v_mad_f32. We would
> +      *   have to stop using those.
> +      * - SI & CI would be very slow.
> +      */
> +     conf->float_mode |= V_00B028_FP_64_DENORMS;
> +
>       FREE(binary->config);
>       FREE(binary->global_symbol_offsets);
>       binary->config = NULL;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index ce795c0..77a4e47 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader)
>       shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 
> 4) |
>                          S_00B528_SGPRS((num_sgprs - 1) / 8) |
>                          S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -                        S_00B528_DX10_CLAMP(1);
> +                        S_00B528_DX10_CLAMP(1) |
> +                        S_00B528_FLOAT_MODE(shader->config.float_mode);
>       shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
>                          
> S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
>  }
> @@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader)
>       si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
>                      S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
>                      S_00B428_SGPRS((num_sgprs - 1) / 8) |
> -                    S_00B428_DX10_CLAMP(1));
> +                    S_00B428_DX10_CLAMP(1) |
> +                    S_00B428_FLOAT_MODE(shader->config.float_mode));
>       si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
>                      S_00B42C_USER_SGPR(num_user_sgprs) |
>                      
> S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
> @@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader)
>                      S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
>                      S_00B328_SGPRS((num_sgprs - 1) / 8) |
>                      S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -                    S_00B328_DX10_CLAMP(1));
> +                    S_00B328_DX10_CLAMP(1) |
> +                    S_00B328_FLOAT_MODE(shader->config.float_mode));
>       si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
>                      S_00B32C_USER_SGPR(num_user_sgprs) |
>                      
> S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
> @@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader)
>       si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
>                      S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
>                      S_00B228_SGPRS((num_sgprs - 1) / 8) |
> -                    S_00B228_DX10_CLAMP(1));
> +                    S_00B228_DX10_CLAMP(1) |
> +                    S_00B228_FLOAT_MODE(shader->config.float_mode));
>       si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
>                      S_00B22C_USER_SGPR(num_user_sgprs) |
>                      
> S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
> @@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct 
> si_shader *gs)
>                      S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
>                      S_00B128_SGPRS((num_sgprs - 1) / 8) |
>                      S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -                    S_00B128_DX10_CLAMP(1));
> +                    S_00B128_DX10_CLAMP(1) |
> +                    S_00B128_FLOAT_MODE(shader->config.float_mode));
>       si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
>                      S_00B12C_USER_SGPR(num_user_sgprs) |
>                      S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
> @@ -567,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader)
>       si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
>                      S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
>                      S_00B028_SGPRS((num_sgprs - 1) / 8) |
> -                    S_00B028_DX10_CLAMP(1));
> +                    S_00B028_DX10_CLAMP(1) |
> +                    S_00B028_FLOAT_MODE(shader->config.float_mode));
>       si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
>                      S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
>                      S_00B02C_USER_SGPR(num_user_sgprs) |
> diff --git a/src/gallium/drivers/radeonsi/sid.h 
> b/src/gallium/drivers/radeonsi/sid.h
> index 9e1e158..8920847 100644
> --- a/src/gallium/drivers/radeonsi/sid.h
> +++ b/src/gallium/drivers/radeonsi/sid.h
> @@ -2845,6 +2845,9 @@
>  #define   S_00B028_FLOAT_MODE(x)                                      (((x) 
> & 0xFF) << 12)
>  #define   G_00B028_FLOAT_MODE(x)                                      (((x) 
> >> 12) & 0xFF)
>  #define   C_00B028_FLOAT_MODE                                         
> 0xFFF00FFF
> +#define     V_00B028_FP_32_DENORMS                                   0x30
> +#define     V_00B028_FP_64_DENORMS                                   0xc0
> +#define     V_00B028_FP_ALL_DENORMS                                  0xf0
>  #define   S_00B028_PRIV(x)                                            (((x) 
> & 0x1) << 20)
>  #define   G_00B028_PRIV(x)                                            (((x) 
> >> 20) & 0x1)
>  #define   C_00B028_PRIV                                               
> 0xFFEFFFFF
> 

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to