Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset


On 3/28/19 4:45 PM, Jason Ekstrand wrote:
On Thu, Mar 28, 2019 at 10:27 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


Calling it after the first call of radv_optimize_nir() is worse.


Ugh Do we know why?  I mean, it does emit the offset calculations 
in a slightly different order but I wouldn't expect it to hurt this 
bad. :-/

I don't know exactly what the problem is, I will investigate soon.


27747 shaders in 14347 tests
Totals:
SGPRS: 1248039 -> 1248031 (-0.00 %)
VGPRS: 868360 -> 868772 (0.05 %)
Spilled SGPRs: 24108 -> 24134 (0.11 %)
Spilled VGPRs: 122 -> 122 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 128 -> 128 (0.00 %) dwords per thread
Code Size: 46282020 -> 46336692 (0.12 %) bytes
LDS: 770 -> 770 (0.00 %) blocks
Max Waves: 199898 -> 199871 (-0.01 %)
Wait states: 0 -> 0 (0.00 %)

Totals from affected shaders:
SGPRS: 52848 -> 52840 (-0.02 %)
VGPRS: 47472 -> 47884 (0.87 %)
Spilled SGPRs: 5079 -> 5105 (0.51 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 4760924 -> 4815596 (1.15 %) bytes
LDS: 26 -> 26 (0.00 %) blocks
Max Waves: 3084 -> 3057 (-0.88 %)
Wait states: 0 -> 0 (0.00 %)

On 3/28/19 3:08 PM, Jason Ekstrand wrote:

On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset
mailto:samuel.pitoi...@gmail.com>> wrote:

This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx,
instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac,
values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct
radv_device *device,
                        }
                }
                const struct spirv_to_nir_options
spirv_options = {
-  .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct
radv_device *device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir,
nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);


If you actually want to get SSBO access optimization, you need to
call this *after* your first call to your main optimization loop.
Otherwise, the change is basically just a no-op which just
shuffles around the way address calculations are done a bit.

--Jason

        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
   

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
On Thu, Mar 28, 2019 at 10:27 AM Samuel Pitoiset 
wrote:

> Calling it after the first call of radv_optimize_nir() is worse.
>

Ugh Do we know why?  I mean, it does emit the offset calculations in a
slightly different order but I wouldn't expect it to hurt this bad. :-/


> 27747 shaders in 14347 tests
> Totals:
> SGPRS: 1248039 -> 1248031 (-0.00 %)
> VGPRS: 868360 -> 868772 (0.05 %)
> Spilled SGPRs: 24108 -> 24134 (0.11 %)
> Spilled VGPRs: 122 -> 122 (0.00 %)
> Private memory VGPRs: 0 -> 0 (0.00 %)
> Scratch size: 128 -> 128 (0.00 %) dwords per thread
> Code Size: 46282020 -> 46336692 (0.12 %) bytes
> LDS: 770 -> 770 (0.00 %) blocks
> Max Waves: 199898 -> 199871 (-0.01 %)
> Wait states: 0 -> 0 (0.00 %)
>
> Totals from affected shaders:
> SGPRS: 52848 -> 52840 (-0.02 %)
> VGPRS: 47472 -> 47884 (0.87 %)
> Spilled SGPRs: 5079 -> 5105 (0.51 %)
> Spilled VGPRs: 0 -> 0 (0.00 %)
> Private memory VGPRs: 0 -> 0 (0.00 %)
> Scratch size: 0 -> 0 (0.00 %) dwords per thread
> Code Size: 4760924 -> 4815596 (1.15 %) bytes
> LDS: 26 -> 26 (0.00 %) blocks
> Max Waves: 3084 -> 3057 (-0.88 %)
> Wait states: 0 -> 0 (0.00 %)
> On 3/28/19 3:08 PM, Jason Ekstrand wrote:
>
> On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
> wrote:
>
>> This helps few compute shaders, mostly for F12017.
>>
>> 27670 shaders in 14347 tests
>> Totals:
>> SGPRS: 1231173 -> 1231173 (0.00 %)
>> VGPRS: 866056 -> 865928 (-0.01 %)
>> Spilled SGPRs: 24201 -> 24201 (0.00 %)
>> Code Size: 46137040 -> 46144868 (0.02 %) bytes
>> Max Waves: 232287 -> 232302 (0.01 %)
>>
>> Totals from affected shaders:
>> SGPRS: 24624 -> 24624 (0.00 %)
>> VGPRS: 25960 -> 25832 (-0.49 %)
>> Spilled SGPRs: 0 -> 0 (0.00 %)
>> Code Size: 2922632 -> 2930460 (0.27 %) bytes
>> Max Waves: 1216 -> 1231 (1.23 %)
>>
>> Suggested-by: 
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 8 
>>  src/amd/vulkan/radv_shader.c| 5 -
>>  2 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>> b/src/amd/common/ac_nir_to_llvm.c
>> index b25cc6a0a84..c46d98e6dd9 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
>> *ctx,
>> case nir_intrinsic_vulkan_resource_reindex:
>> result = visit_vulkan_resource_reindex(ctx, instr);
>> break;
>> +   case nir_intrinsic_load_vulkan_descriptor: {
>> +   LLVMValueRef values[2] = {
>> +   get_src(ctx, instr->src[0]),
>> +   ctx->ac.i32_0,
>> +   };
>> +   result = ac_build_gather_values(>ac, values, 2);
>> +   break;
>> +   }
>> case nir_intrinsic_store_ssbo:
>> visit_store_ssbo(ctx, instr);
>> break;
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 19a807df199..2751302e8b9 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> }
>> }
>> const struct spirv_to_nir_options spirv_options = {
>> -   .lower_ubo_ssbo_access_to_offsets = true,
>> .caps = {
>> .descriptor_array_dynamic_indexing = true,
>> .device_group = true,
>> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
>> *device,
>> NIR_PASS_V(nir, nir_lower_system_values);
>> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
>> NIR_PASS_V(nir, nir_lower_frexp);
>> +
>> +   NIR_PASS_V(nir, nir_lower_explicit_io,
>> + nir_var_mem_ubo | nir_var_mem_ssbo,
>> + nir_address_format_32bit_index_offset);
>>
>
> If you actually want to get SSBO access optimization, you need to call
> this *after* your first call to your main optimization loop.  Otherwise,
> the change is basically just a no-op which just shuffles around the way
> address calculations are done a bit.
>
> --Jason
>
>
>> }
>>
>> /* Vulkan uses the separate-shader linking model */
>> --
>> 2.21.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset

Calling it after the first call of radv_optimize_nir() is worse.

27747 shaders in 14347 tests
Totals:
SGPRS: 1248039 -> 1248031 (-0.00 %)
VGPRS: 868360 -> 868772 (0.05 %)
Spilled SGPRs: 24108 -> 24134 (0.11 %)
Spilled VGPRs: 122 -> 122 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 128 -> 128 (0.00 %) dwords per thread
Code Size: 46282020 -> 46336692 (0.12 %) bytes
LDS: 770 -> 770 (0.00 %) blocks
Max Waves: 199898 -> 199871 (-0.01 %)
Wait states: 0 -> 0 (0.00 %)

Totals from affected shaders:
SGPRS: 52848 -> 52840 (-0.02 %)
VGPRS: 47472 -> 47884 (0.87 %)
Spilled SGPRs: 5079 -> 5105 (0.51 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 4760924 -> 4815596 (1.15 %) bytes
LDS: 26 -> 26 (0.00 %) blocks
Max Waves: 3084 -> 3057 (-0.88 %)
Wait states: 0 -> 0 (0.00 %)

On 3/28/19 3:08 PM, Jason Ekstrand wrote:
On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx, instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac, values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                        }
                }
                const struct spirv_to_nir_options spirv_options = {
-                       .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);


If you actually want to get SSBO access optimization, you need to call 
this *after* your first call to your main optimization loop.  
Otherwise, the change is basically just a no-op which just shuffles 
around the way address calculations are done a bit.


--Jason

        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset


On 3/28/19 3:08 PM, Jason Ekstrand wrote:
On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx, instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac, values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                        }
                }
                const struct spirv_to_nir_options spirv_options = {
-                       .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);


If you actually want to get SSBO access optimization, you need to call 
this *after* your first call to your main optimization loop.  
Otherwise, the change is basically just a no-op which just shuffles 
around the way address calculations are done a bit.

Oh okay, let me try that.


--Jason

        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
On Thu, Mar 28, 2019 at 9:29 AM Jason Ekstrand  wrote:

> I did some benchmarking of Assassin's Creed Odyssey today on ANV.
> Disabling the SSBO optimization possibilities in ANV that moving to derefs
> unlocks (I just moved the lowering super-early; moving back to index/offset
> would be insane) drops the perf of ACO by 20%.  You want SSBO derefs. :D
>

Or not Aparently the benchmark numbers aren't as reproducible as one
would like.  In any case, if you put the lowering after optimization, you
should see a more noticable (and positive) vkpipeline-db result.

--Jason


> On Thu, Mar 28, 2019 at 9:08 AM Jason Ekstrand 
> wrote:
>
>> On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset <
>> samuel.pitoi...@gmail.com> wrote:
>>
>>> This helps few compute shaders, mostly for F12017.
>>>
>>> 27670 shaders in 14347 tests
>>> Totals:
>>> SGPRS: 1231173 -> 1231173 (0.00 %)
>>> VGPRS: 866056 -> 865928 (-0.01 %)
>>> Spilled SGPRs: 24201 -> 24201 (0.00 %)
>>> Code Size: 46137040 -> 46144868 (0.02 %) bytes
>>> Max Waves: 232287 -> 232302 (0.01 %)
>>>
>>> Totals from affected shaders:
>>> SGPRS: 24624 -> 24624 (0.00 %)
>>> VGPRS: 25960 -> 25832 (-0.49 %)
>>> Spilled SGPRs: 0 -> 0 (0.00 %)
>>> Code Size: 2922632 -> 2930460 (0.27 %) bytes
>>> Max Waves: 1216 -> 1231 (1.23 %)
>>>
>>> Suggested-by: 
>>> Signed-off-by: Samuel Pitoiset 
>>> ---
>>>  src/amd/common/ac_nir_to_llvm.c | 8 
>>>  src/amd/vulkan/radv_shader.c| 5 -
>>>  2 files changed, 12 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>>> b/src/amd/common/ac_nir_to_llvm.c
>>> index b25cc6a0a84..c46d98e6dd9 100644
>>> --- a/src/amd/common/ac_nir_to_llvm.c
>>> +++ b/src/amd/common/ac_nir_to_llvm.c
>>> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
>>> *ctx,
>>> case nir_intrinsic_vulkan_resource_reindex:
>>> result = visit_vulkan_resource_reindex(ctx, instr);
>>> break;
>>> +   case nir_intrinsic_load_vulkan_descriptor: {
>>> +   LLVMValueRef values[2] = {
>>> +   get_src(ctx, instr->src[0]),
>>> +   ctx->ac.i32_0,
>>> +   };
>>> +   result = ac_build_gather_values(>ac, values, 2);
>>> +   break;
>>> +   }
>>> case nir_intrinsic_store_ssbo:
>>> visit_store_ssbo(ctx, instr);
>>> break;
>>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>>> index 19a807df199..2751302e8b9 100644
>>> --- a/src/amd/vulkan/radv_shader.c
>>> +++ b/src/amd/vulkan/radv_shader.c
>>> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>> }
>>> }
>>> const struct spirv_to_nir_options spirv_options = {
>>> -   .lower_ubo_ssbo_access_to_offsets = true,
>>> .caps = {
>>> .descriptor_array_dynamic_indexing =
>>> true,
>>> .device_group = true,
>>> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
>>> *device,
>>> NIR_PASS_V(nir, nir_lower_system_values);
>>> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
>>> NIR_PASS_V(nir, nir_lower_frexp);
>>> +
>>> +   NIR_PASS_V(nir, nir_lower_explicit_io,
>>> + nir_var_mem_ubo | nir_var_mem_ssbo,
>>> + nir_address_format_32bit_index_offset);
>>>
>>
>> If you actually want to get SSBO access optimization, you need to call
>> this *after* your first call to your main optimization loop.  Otherwise,
>> the change is basically just a no-op which just shuffles around the way
>> address calculations are done a bit.
>>
>> --Jason
>>
>>
>>> }
>>>
>>> /* Vulkan uses the separate-shader linking model */
>>> --
>>> 2.21.0
>>>
>>> ___
>>> mesa-dev mailing list
>>> mesa-dev@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
I did some benchmarking of Assassin's Creed Odyssey today on ANV.
Disabling the SSBO optimization possibilities in ANV that moving to derefs
unlocks (I just moved the lowering super-early; moving back to index/offset
would be insane) drops the perf of ACO by 20%.  You want SSBO derefs. :D

On Thu, Mar 28, 2019 at 9:08 AM Jason Ekstrand  wrote:

> On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
> wrote:
>
>> This helps few compute shaders, mostly for F12017.
>>
>> 27670 shaders in 14347 tests
>> Totals:
>> SGPRS: 1231173 -> 1231173 (0.00 %)
>> VGPRS: 866056 -> 865928 (-0.01 %)
>> Spilled SGPRs: 24201 -> 24201 (0.00 %)
>> Code Size: 46137040 -> 46144868 (0.02 %) bytes
>> Max Waves: 232287 -> 232302 (0.01 %)
>>
>> Totals from affected shaders:
>> SGPRS: 24624 -> 24624 (0.00 %)
>> VGPRS: 25960 -> 25832 (-0.49 %)
>> Spilled SGPRs: 0 -> 0 (0.00 %)
>> Code Size: 2922632 -> 2930460 (0.27 %) bytes
>> Max Waves: 1216 -> 1231 (1.23 %)
>>
>> Suggested-by: 
>> Signed-off-by: Samuel Pitoiset 
>> ---
>>  src/amd/common/ac_nir_to_llvm.c | 8 
>>  src/amd/vulkan/radv_shader.c| 5 -
>>  2 files changed, 12 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/amd/common/ac_nir_to_llvm.c
>> b/src/amd/common/ac_nir_to_llvm.c
>> index b25cc6a0a84..c46d98e6dd9 100644
>> --- a/src/amd/common/ac_nir_to_llvm.c
>> +++ b/src/amd/common/ac_nir_to_llvm.c
>> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
>> *ctx,
>> case nir_intrinsic_vulkan_resource_reindex:
>> result = visit_vulkan_resource_reindex(ctx, instr);
>> break;
>> +   case nir_intrinsic_load_vulkan_descriptor: {
>> +   LLVMValueRef values[2] = {
>> +   get_src(ctx, instr->src[0]),
>> +   ctx->ac.i32_0,
>> +   };
>> +   result = ac_build_gather_values(>ac, values, 2);
>> +   break;
>> +   }
>> case nir_intrinsic_store_ssbo:
>> visit_store_ssbo(ctx, instr);
>> break;
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 19a807df199..2751302e8b9 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> }
>> }
>> const struct spirv_to_nir_options spirv_options = {
>> -   .lower_ubo_ssbo_access_to_offsets = true,
>> .caps = {
>> .descriptor_array_dynamic_indexing = true,
>> .device_group = true,
>> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
>> *device,
>> NIR_PASS_V(nir, nir_lower_system_values);
>> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
>> NIR_PASS_V(nir, nir_lower_frexp);
>> +
>> +   NIR_PASS_V(nir, nir_lower_explicit_io,
>> + nir_var_mem_ubo | nir_var_mem_ssbo,
>> + nir_address_format_32bit_index_offset);
>>
>
> If you actually want to get SSBO access optimization, you need to call
> this *after* your first call to your main optimization loop.  Otherwise,
> the change is basically just a no-op which just shuffles around the way
> address calculations are done a bit.
>
> --Jason
>
>
>> }
>>
>> /* Vulkan uses the separate-shader linking model */
>> --
>> 2.21.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Jason Ekstrand
On Wed, Mar 27, 2019 at 4:13 AM Samuel Pitoiset 
wrote:

> This helps few compute shaders, mostly for F12017.
>
> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1231173 (0.00 %)
> VGPRS: 866056 -> 865928 (-0.01 %)
> Spilled SGPRs: 24201 -> 24201 (0.00 %)
> Code Size: 46137040 -> 46144868 (0.02 %) bytes
> Max Waves: 232287 -> 232302 (0.01 %)
>
> Totals from affected shaders:
> SGPRS: 24624 -> 24624 (0.00 %)
> VGPRS: 25960 -> 25832 (-0.49 %)
> Spilled SGPRs: 0 -> 0 (0.00 %)
> Code Size: 2922632 -> 2930460 (0.27 %) bytes
> Max Waves: 1216 -> 1231 (1.23 %)
>
> Suggested-by: 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 8 
>  src/amd/vulkan/radv_shader.c| 5 -
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index b25cc6a0a84..c46d98e6dd9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
> *ctx,
> case nir_intrinsic_vulkan_resource_reindex:
> result = visit_vulkan_resource_reindex(ctx, instr);
> break;
> +   case nir_intrinsic_load_vulkan_descriptor: {
> +   LLVMValueRef values[2] = {
> +   get_src(ctx, instr->src[0]),
> +   ctx->ac.i32_0,
> +   };
> +   result = ac_build_gather_values(>ac, values, 2);
> +   break;
> +   }
> case nir_intrinsic_store_ssbo:
> visit_store_ssbo(ctx, instr);
> break;
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 19a807df199..2751302e8b9 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
> }
> }
> const struct spirv_to_nir_options spirv_options = {
> -   .lower_ubo_ssbo_access_to_offsets = true,
> .caps = {
> .descriptor_array_dynamic_indexing = true,
> .device_group = true,
> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device *device,
> NIR_PASS_V(nir, nir_lower_system_values);
> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
> NIR_PASS_V(nir, nir_lower_frexp);
> +
> +   NIR_PASS_V(nir, nir_lower_explicit_io,
> + nir_var_mem_ubo | nir_var_mem_ssbo,
> + nir_address_format_32bit_index_offset);
>

If you actually want to get SSBO access optimization, you need to call this
*after* your first call to your main optimization loop.  Otherwise, the
change is basically just a no-op which just shuffles around the way address
calculations are done a bit.

--Jason


> }
>
> /* Vulkan uses the separate-shader linking model */
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Samuel Pitoiset


On 3/28/19 10:18 AM, Bas Nieuwenhuizen wrote:

R-b

Though not sure it really helps given code size increase?
I haven't benchmarked F12017 yet. The code size increases seems to be 
LLVM related but that shouldn't matter much.


On Wed, Mar 27, 2019, 10:13 AM Samuel Pitoiset 
mailto:samuel.pitoi...@gmail.com>> wrote:


This helps few compute shaders, mostly for F12017.

27670 shaders in 14347 tests
Totals:
SGPRS: 1231173 -> 1231173 (0.00 %)
VGPRS: 866056 -> 865928 (-0.01 %)
Spilled SGPRs: 24201 -> 24201 (0.00 %)
Code Size: 46137040 -> 46144868 (0.02 %) bytes
Max Waves: 232287 -> 232302 (0.01 %)

Totals from affected shaders:
SGPRS: 24624 -> 24624 (0.00 %)
VGPRS: 25960 -> 25832 (-0.49 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Code Size: 2922632 -> 2930460 (0.27 %) bytes
Max Waves: 1216 -> 1231 (1.23 %)

Suggested-by: mailto:ja...@jlekstrand.net>>
Signed-off-by: Samuel Pitoiset mailto:samuel.pitoi...@gmail.com>>
---
 src/amd/common/ac_nir_to_llvm.c | 8 
 src/amd/vulkan/radv_shader.c    | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c
b/src/amd/common/ac_nir_to_llvm.c
index b25cc6a0a84..c46d98e6dd9 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct
ac_nir_context *ctx,
        case nir_intrinsic_vulkan_resource_reindex:
                result = visit_vulkan_resource_reindex(ctx, instr);
                break;
+       case nir_intrinsic_load_vulkan_descriptor: {
+               LLVMValueRef values[2] = {
+                       get_src(ctx, instr->src[0]),
+                       ctx->ac.i32_0,
+               };
+               result = ac_build_gather_values(>ac, values, 2);
+               break;
+       }
        case nir_intrinsic_store_ssbo:
                visit_store_ssbo(ctx, instr);
                break;
diff --git a/src/amd/vulkan/radv_shader.c
b/src/amd/vulkan/radv_shader.c
index 19a807df199..2751302e8b9 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                        }
                }
                const struct spirv_to_nir_options spirv_options = {
-                       .lower_ubo_ssbo_access_to_offsets = true,
                        .caps = {
.descriptor_array_dynamic_indexing = true,
                                .device_group = true,
@@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device
*device,
                NIR_PASS_V(nir, nir_lower_system_values);
                NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
                NIR_PASS_V(nir, nir_lower_frexp);
+
+               NIR_PASS_V(nir, nir_lower_explicit_io,
+                         nir_var_mem_ubo | nir_var_mem_ssbo,
+  nir_address_format_32bit_index_offset);
        }

        /* Vulkan uses the separate-shader linking model */
-- 
2.21.0


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org 
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv: do not lower UBO/SSBO access to offsets

2019-03-28 Thread Bas Nieuwenhuizen
R-b

Though not sure it really helps given code size increase?

On Wed, Mar 27, 2019, 10:13 AM Samuel Pitoiset 
wrote:

> This helps few compute shaders, mostly for F12017.
>
> 27670 shaders in 14347 tests
> Totals:
> SGPRS: 1231173 -> 1231173 (0.00 %)
> VGPRS: 866056 -> 865928 (-0.01 %)
> Spilled SGPRs: 24201 -> 24201 (0.00 %)
> Code Size: 46137040 -> 46144868 (0.02 %) bytes
> Max Waves: 232287 -> 232302 (0.01 %)
>
> Totals from affected shaders:
> SGPRS: 24624 -> 24624 (0.00 %)
> VGPRS: 25960 -> 25832 (-0.49 %)
> Spilled SGPRs: 0 -> 0 (0.00 %)
> Code Size: 2922632 -> 2930460 (0.27 %) bytes
> Max Waves: 1216 -> 1231 (1.23 %)
>
> Suggested-by: 
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/common/ac_nir_to_llvm.c | 8 
>  src/amd/vulkan/radv_shader.c| 5 -
>  2 files changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c
> b/src/amd/common/ac_nir_to_llvm.c
> index b25cc6a0a84..c46d98e6dd9 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -3275,6 +3275,14 @@ static void visit_intrinsic(struct ac_nir_context
> *ctx,
> case nir_intrinsic_vulkan_resource_reindex:
> result = visit_vulkan_resource_reindex(ctx, instr);
> break;
> +   case nir_intrinsic_load_vulkan_descriptor: {
> +   LLVMValueRef values[2] = {
> +   get_src(ctx, instr->src[0]),
> +   ctx->ac.i32_0,
> +   };
> +   result = ac_build_gather_values(>ac, values, 2);
> +   break;
> +   }
> case nir_intrinsic_store_ssbo:
> visit_store_ssbo(ctx, instr);
> break;
> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
> index 19a807df199..2751302e8b9 100644
> --- a/src/amd/vulkan/radv_shader.c
> +++ b/src/amd/vulkan/radv_shader.c
> @@ -220,7 +220,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
> }
> }
> const struct spirv_to_nir_options spirv_options = {
> -   .lower_ubo_ssbo_access_to_offsets = true,
> .caps = {
> .descriptor_array_dynamic_indexing = true,
> .device_group = true,
> @@ -306,6 +305,10 @@ radv_shader_compile_to_nir(struct radv_device *device,
> NIR_PASS_V(nir, nir_lower_system_values);
> NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
> NIR_PASS_V(nir, nir_lower_frexp);
> +
> +   NIR_PASS_V(nir, nir_lower_explicit_io,
> + nir_var_mem_ubo | nir_var_mem_ssbo,
> + nir_address_format_32bit_index_offset);
> }
>
> /* Vulkan uses the separate-shader linking model */
> --
> 2.21.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev