Module: Mesa
Branch: master
Commit: 29c1f67e9f166da4393493d213ee06498aecac51
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=29c1f67e9f166da4393493d213ee06498aecac51

Author: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>
Date:   Sat Jan 28 23:51:19 2017 +0100

radv/ac: Add compiler support for spilling.

Based on code written by Dave Airlie.

Signed-off-by: Bas Nieuwenhuizen <ba...@google.com>
Reviewed-by: Dave Airlie <airl...@redhat.com>

---

 src/amd/common/ac_binary.c      | 30 +++++++++++++++++++-----------
 src/amd/common/ac_binary.h      |  4 +++-
 src/amd/common/ac_llvm_util.c   |  4 ++--
 src/amd/common/ac_llvm_util.h   |  2 +-
 src/amd/common/ac_nir_to_llvm.c | 16 +++++++++++-----
 src/amd/common/ac_nir_to_llvm.h |  6 ++++--
 src/amd/vulkan/radv_pipeline.c  |  3 ++-
 7 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/src/amd/common/ac_binary.c b/src/amd/common/ac_binary.c
index 01cf000..9c66a82 100644
--- a/src/amd/common/ac_binary.c
+++ b/src/amd/common/ac_binary.c
@@ -212,23 +212,28 @@ static const char *scratch_rsrc_dword1_symbol =
 
 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                                  struct ac_shader_config *conf,
-                                 unsigned symbol_offset)
+                                 unsigned symbol_offset,
+                                 bool supports_spill)
 {
        unsigned i;
        const unsigned char *config =
                ac_shader_binary_config_start(binary, symbol_offset);
        bool really_needs_scratch = false;
-
+       uint32_t wavesize = 0;
        /* LLVM adds SGPR spills to the scratch size.
         * Find out if we really need the scratch buffer.
         */
-       for (i = 0; i < binary->reloc_count; i++) {
-               const struct ac_shader_reloc *reloc = &binary->relocs[i];
+       if (supports_spill) {
+               really_needs_scratch = true;
+       } else {
+               for (i = 0; i < binary->reloc_count; i++) {
+                       const struct ac_shader_reloc *reloc = 
&binary->relocs[i];
 
-               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
-                   !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
-                       really_needs_scratch = true;
-                       break;
+                       if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
+                           !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+                               really_needs_scratch = true;
+                               break;
+                       }
                }
        }
 
@@ -259,9 +264,7 @@ void ac_shader_binary_read_config(struct ac_shader_binary 
*binary,
                case R_0286E8_SPI_TMPRING_SIZE:
                case R_00B860_COMPUTE_TMPRING_SIZE:
                        /* WAVESIZE is in units of 256 dwords. */
-                       if (really_needs_scratch)
-                               conf->scratch_bytes_per_wave =
-                                       G_00B860_WAVESIZE(value) * 256 * 4;
+                       wavesize = value;
                        break;
                case SPILLED_SGPRS:
                        conf->spilled_sgprs = value;
@@ -285,4 +288,9 @@ void ac_shader_binary_read_config(struct ac_shader_binary 
*binary,
                if (!conf->spi_ps_input_addr)
                        conf->spi_ps_input_addr = conf->spi_ps_input_ena;
        }
+
+       if (really_needs_scratch) {
+               /* sgprs spills aren't spilling */
+               conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 
256 * 4;
+       }
 }
diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h
index 282f33d..06fd855 100644
--- a/src/amd/common/ac_binary.h
+++ b/src/amd/common/ac_binary.h
@@ -27,6 +27,7 @@
 #pragma once
 
 #include <stdint.h>
+#include <stdbool.h>
 
 struct ac_shader_reloc {
        char name[32];
@@ -85,4 +86,5 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 
 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
                                  struct ac_shader_config *conf,
-                                 unsigned symbol_offset);
+                                 unsigned symbol_offset,
+                                 bool supports_spill);
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 7317db7..f3cab92 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -126,11 +126,11 @@ static const char *ac_get_llvm_processor_name(enum 
radeon_family family)
        }
 }
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool 
supports_spill)
 {
        assert(family >= CHIP_TAHITI);
 
-       const char *triple = "amdgcn--";
+       const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--";
        LLVMTargetRef target = ac_get_llvm_target(triple);
        LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
                                     target,
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 2d301c9..c07f67a 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -56,7 +56,7 @@ struct ac_llvm_context {
        LLVMValueRef fpmath_md_2p5_ulp;
 };
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family);
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool 
supports_spill);
 
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
 bool ac_is_sgpr_param(LLVMValueRef param);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e83c7a2..dedea65 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -458,10 +458,10 @@ static void create_function(struct nir_to_llvm_context 
*ctx)
            arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
        set_llvm_calling_convention(ctx->main_function, ctx->stage);
 
-
        ctx->shader_info->num_input_sgprs = 0;
        ctx->shader_info->num_input_vgprs = 0;
 
+       ctx->shader_info->num_user_sgprs = ctx->options->supports_spill ? 2 : 0;
        for (i = 0; i < user_sgpr_count; i++)
                ctx->shader_info->num_user_sgprs += 
llvm_get_type_size(arg_types[i]) / 4;
 
@@ -475,6 +475,12 @@ static void create_function(struct nir_to_llvm_context 
*ctx)
 
        arg_idx = 0;
        user_sgpr_idx = 0;
+
+       if (ctx->options->supports_spill) {
+               set_userdata_location_shader(ctx, AC_UD_SCRATCH, user_sgpr_idx, 
2);
+               user_sgpr_idx += 2;
+       }
+
        for (unsigned i = 0; i < num_sets; ++i) {
                if (ctx->options->layout->set[i].layout->shader_stages & (1 << 
ctx->stage)) {
                        
set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], 
user_sgpr_idx, 2);
@@ -4432,7 +4438,7 @@ LLVMModuleRef 
ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 
        memset(shader_info, 0, sizeof(*shader_info));
 
-       LLVMSetTarget(ctx.module, "amdgcn--");
+       LLVMSetTarget(ctx.module, options->supports_spill ? 
"amdgcn-mesa-mesa3d" : "amdgcn--");
        setup_types(&ctx);
 
        ctx.builder = LLVMCreateBuilderInContext(ctx.context);
@@ -4566,7 +4572,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef 
tm,
                                   struct ac_shader_config *config,
                                   struct ac_shader_variant_info *shader_info,
                                   gl_shader_stage stage,
-                                  bool dump_shader)
+                                  bool dump_shader, bool supports_spill)
 {
        if (dump_shader)
                ac_dump_module(llvm_module);
@@ -4580,7 +4586,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef 
tm,
        if (dump_shader)
                fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
 
-       ac_shader_binary_read_config(binary, config, 0);
+       ac_shader_binary_read_config(binary, config, 0, supports_spill);
 
        LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
        LLVMDisposeModule(llvm_module);
@@ -4640,7 +4646,7 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
        LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, 
shader_info,
                                                             options);
 
-       ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, 
nir->stage, dump_shader);
+       ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, 
nir->stage, dump_shader, options->supports_spill);
        switch (nir->stage) {
        case MESA_SHADER_COMPUTE:
                for (int i = 0; i < 3; ++i)
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
index a57558e..9d66f94 100644
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -52,6 +52,7 @@ struct ac_nir_compiler_options {
        struct radv_pipeline_layout *layout;
        union ac_shader_variant_key key;
        bool unsafe_math;
+       bool supports_spill;
        enum radeon_family family;
        enum chip_class chip_class;
 };
@@ -64,8 +65,9 @@ struct ac_userdata_info {
 };
 
 enum ac_ud_index {
-       AC_UD_PUSH_CONSTANTS = 0,
-       AC_UD_SHADER_START = 1,
+       AC_UD_SCRATCH = 0,
+       AC_UD_PUSH_CONSTANTS = 1,
+       AC_UD_SHADER_START = 2,
        AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
        AC_UD_VS_BASE_VERTEX_START_INSTANCE,
        AC_UD_VS_MAX_UD,
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 360b519..4d88ed7 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -424,7 +424,8 @@ static struct radv_shader_variant 
*radv_shader_variant_create(struct radv_device
        options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
        options.family = chip_family;
        options.chip_class = device->physical_device->rad_info.chip_class;
-       tm = ac_create_target_machine(chip_family);
+       options.supports_spill = false;
+       tm = ac_create_target_machine(chip_family, false);
        ac_compile_nir_shader(tm, &binary, &variant->config,
                              &variant->info, shader, &options, dump);
        LLVMDisposeTargetMachine(tm);

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to