[Mesa-dev] [PATCH 1/9] radeonsi: add struct si_compiler containing LLVMTargetMachineRef

Marek Olšák Mon, 16 Apr 2018 17:52:58 -0700

From: Marek Olšák <marek.ol...@amd.com>

It will contain more variables.
---
 src/gallium/drivers/radeonsi/si_compute.c     |  8 +--
 src/gallium/drivers/radeonsi/si_pipe.c        | 52 ++++++++-------
 src/gallium/drivers/radeonsi/si_pipe.h        |  6 +-
 src/gallium/drivers/radeonsi/si_shader.c      | 66 +++++++++----------
 src/gallium/drivers/radeonsi/si_shader.h      | 13 ++--
 .../drivers/radeonsi/si_shader_internal.h     |  6 +-
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 13 ++--
 .../drivers/radeonsi/si_state_shaders.c       | 28 ++++----
 8 files changed, 101 insertions(+), 91 deletions(-)


diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 69c3dce0124..e95e79c7b46 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -79,27 +79,27 @@ static void code_object_to_config(const amd_kernel_code_t 
*code_object,
        out_config->scratch_bytes_per_wave =
                align(code_object->workitem_private_segment_byte_size * 64, 
1024);
 }
 
 /* Asynchronous compute shader compilation. */
 static void si_create_compute_state_async(void *job, int thread_index)
 {
        struct si_compute *program = (struct si_compute *)job;
        struct si_shader *shader = &program->shader;
        struct si_shader_selector sel;
-       LLVMTargetMachineRef tm;
+       struct si_compiler *compiler;
        struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
 
        assert(!debug->debug_message || debug->async);
        assert(thread_index >= 0);
-       assert(thread_index < ARRAY_SIZE(program->screen->tm));
-       tm = program->screen->tm[thread_index];
+       assert(thread_index < ARRAY_SIZE(program->screen->compiler));
+       compiler = &program->screen->compiler[thread_index];
 
        memset(&sel, 0, sizeof(sel));
 
        sel.screen = program->screen;
 
        if (program->ir_type == PIPE_SHADER_IR_TGSI) {
                tgsi_scan_shader(program->ir.tgsi, &sel.info);
                sel.tokens = program->ir.tgsi;
        } else {
                assert(program->ir_type == PIPE_SHADER_IR_NIR);
@@ -116,21 +116,21 @@ static void si_create_compute_state_async(void *job, int 
thread_index)
                                 &program->active_const_and_shader_buffers,
                                 &program->active_samplers_and_images);
 
        program->shader.selector = &sel;
        program->shader.is_monolithic = true;
        program->uses_grid_size = sel.info.uses_grid_size;
        program->uses_block_size = sel.info.uses_block_size;
        program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
        program->uses_bindless_images = sel.info.uses_bindless_images;
 
-       if (si_shader_create(program->screen, tm, &program->shader, debug)) {
+       if (si_shader_create(program->screen, compiler, &program->shader, 
debug)) {
                program->shader.compilation_failed = true;
        } else {
                bool scratch_enabled = shader->config.scratch_bytes_per_wave > 
0;
                unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
                                      (sel.info.uses_grid_size ? 3 : 0) +
                                      (sel.info.uses_block_size ? 3 : 0);
 
                shader->config.rsrc1 =
                        S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
                        S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 3de843af11c..fcf3556bcc1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -95,20 +95,38 @@ static const struct debug_named_value debug_options[] = {
 
        /* Tests: */
        { "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
        { "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and 
exit." },
        { "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault 
test and exit." },
        { "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM 
fault test and exit." },
 
        DEBUG_NAMED_VALUE_END /* must be last */
 };
 
+static void si_init_compiler(struct si_screen *sscreen,
+                            struct si_compiler *compiler)
+{
+       enum ac_target_machine_options tm_options =
+               (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
+               (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 
0) |
+               (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 
0) |
+               (!sscreen->llvm_has_working_vgpr_indexing ? 
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
+
+       compiler->tm = ac_create_target_machine(sscreen->info.family, 
tm_options);
+}
+
+static void si_destroy_compiler(struct si_compiler *compiler)
+{
+       if (compiler->tm)
+               LLVMDisposeTargetMachine(compiler->tm);
+}
+
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
 {
        struct si_context *sctx = (struct si_context *)context;
        int i;
 
        /* Unreference the framebuffer normally to disable related logic
         * properly.
@@ -193,21 +211,21 @@ static void si_destroy_context(struct pipe_context 
*context)
        slab_destroy_child(&sctx->pool_transfers);
        slab_destroy_child(&sctx->pool_transfers_unsync);
 
        if (sctx->allocator_zeroed_memory)
                u_suballocator_destroy(sctx->allocator_zeroed_memory);
 
        sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
        sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL);
        r600_resource_reference(&sctx->eop_bug_scratch, NULL);
 
-       LLVMDisposeTargetMachine(sctx->tm);
+       si_destroy_compiler(&sctx->compiler);
 
        si_saved_cs_reference(&sctx->current_saved_cs, NULL);
 
        _mesa_hash_table_destroy(sctx->tex_handles, NULL);
        _mesa_hash_table_destroy(sctx->img_handles, NULL);
 
        util_dynarray_fini(&sctx->resident_tex_handles);
        util_dynarray_fini(&sctx->resident_img_handles);
        util_dynarray_fini(&sctx->resident_tex_needs_color_decompress);
        util_dynarray_fini(&sctx->resident_img_needs_color_decompress);
@@ -278,32 +296,20 @@ static void si_emit_string_marker(struct pipe_context 
*ctx,
                                  const char *string, int len)
 {
        struct si_context *sctx = (struct si_context *)ctx;
 
        dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
 
        if (sctx->log)
                u_log_printf(sctx->log, "\nString marker: %*s\n", len, string);
 }
 
-static LLVMTargetMachineRef
-si_create_llvm_target_machine(struct si_screen *sscreen)
-{
-       enum ac_target_machine_options tm_options =
-               (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
-               (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 
0) |
-               (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 
0) |
-               (!sscreen->llvm_has_working_vgpr_indexing ? 
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
-
-       return ac_create_target_machine(sscreen->info.family, tm_options);
-}
-
 static void si_set_debug_callback(struct pipe_context *ctx,
                                  const struct pipe_debug_callback *cb)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        struct si_screen *screen = sctx->screen;
 
        util_queue_finish(&screen->shader_compiler_queue);
        util_queue_finish(&screen->shader_compiler_queue_low_priority);
 
        if (cb)
@@ -543,21 +549,21 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
         * 1 threadgroup, so that the hw doesn't hang from being unable
         * to start any.
         *
         * The recommended value is 4 per CU at most. Higher numbers don't
         * bring much benefit, but they still occupy chip resources (think
         * async compute). I've seen ~2% performance difference between 4 and 
32.
         */
        sctx->scratch_waves = MAX2(32 * sscreen->info.num_good_compute_units,
                                   max_threads_per_block / 64);
 
-       sctx->tm = si_create_llvm_target_machine(sscreen);
+       si_init_compiler(sscreen, &sctx->compiler);
 
        /* Bindless handles. */
        sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                    _mesa_key_pointer_equal);
        sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
                                                    _mesa_key_pointer_equal);
 
        util_dynarray_init(&sctx->resident_tex_handles, NULL);
        util_dynarray_init(&sctx->resident_img_handles, NULL);
        util_dynarray_init(&sctx->resident_tex_needs_color_decompress, NULL);
@@ -616,27 +622,25 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
                sscreen->ps_epilogs
        };
        unsigned i;
 
        if (!sscreen->ws->unref(sscreen->ws))
                return;
 
        util_queue_destroy(&sscreen->shader_compiler_queue);
        util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
 
-       for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
-               if (sscreen->tm[i])
-                       LLVMDisposeTargetMachine(sscreen->tm[i]);
+       for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
+               si_destroy_compiler(&sscreen->compiler[i]);
 
-       for (i = 0; i < ARRAY_SIZE(sscreen->tm_low_priority); i++)
-               if (sscreen->tm_low_priority[i])
-                       LLVMDisposeTargetMachine(sscreen->tm_low_priority[i]);
+       for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++)
+               si_destroy_compiler(&sscreen->compiler_lowp[i]);
 
        /* Free shader parts. */
        for (i = 0; i < ARRAY_SIZE(parts); i++) {
                while (parts[i]) {
                        struct si_shader_part *part = parts[i];
 
                        parts[i] = part->next;
                        ac_shader_binary_clean(&part->binary);
                        FREE(part);
                }
@@ -857,23 +861,23 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
                return NULL;
        }
 
        si_disk_cache_create(sscreen);
 
        /* Only enable as many threads as we have target machines, but at most
         * the number of CPUs - 1 if there is more than one.
         */
        num_threads = sysconf(_SC_NPROCESSORS_ONLN);
        num_threads = MAX2(1, num_threads - 1);
-       num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
+       num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->compiler));
        num_compiler_threads_lowprio =
-               MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));
+               MIN2(num_threads, ARRAY_SIZE(sscreen->compiler_lowp));
 
        if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
                             32, num_compiler_threads,
                             UTIL_QUEUE_INIT_RESIZE_IF_FULL)) {
                si_destroy_shader_cache(sscreen);
                FREE(sscreen);
                return NULL;
        }
 
        if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
@@ -1023,23 +1027,23 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
                                            SI_CONTEXT_INV_VMEM_L1;
        if (sscreen->info.chip_class <= VI) {
                sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
                sscreen->barrier_flags.L2_to_cp |= 
SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }
 
        if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
                sscreen->debug_flags |= DBG_ALL_SHADERS;
 
        for (i = 0; i < num_compiler_threads; i++)
-               sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
+               si_init_compiler(sscreen, &sscreen->compiler[i]);
        for (i = 0; i < num_compiler_threads_lowprio; i++)
-               sscreen->tm_low_priority[i] = 
si_create_llvm_target_machine(sscreen);
+               si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
 
        /* Create the auxiliary context. This must be done last. */
        sscreen->aux_context = si_create_context(&sscreen->b, 0);
 
        if (sscreen->debug_flags & DBG(TEST_DMA))
                si_test_dma(sscreen);
 
        if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
                                      DBG(TEST_VMFAULT_SDMA) |
                                      DBG(TEST_VMFAULT_SHADER)))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 823509524d4..54c9b725fcb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -523,26 +523,26 @@ struct si_screen {
         *   those as well.
         */
        mtx_t                   shader_cache_mutex;
        struct hash_table               *shader_cache;
 
        /* Shader compiler queue for multithreaded compilation. */
        struct util_queue               shader_compiler_queue;
        /* Use at most 3 normal compiler threads on quadcore and better.
         * Hyperthreaded CPUs report the number of threads, but we want
         * the number of cores. */
-       LLVMTargetMachineRef            tm[3]; /* used by the queue only */
+       struct si_compiler              compiler[3]; /* used by the queue only 
*/
 
        struct util_queue               shader_compiler_queue_low_priority;
        /* Use at most 2 low priority threads on quadcore and better.
         * We want to minimize the impact on multithreaded Mesa. */
-       LLVMTargetMachineRef            tm_low_priority[2]; /* at most 2 
threads */
+       struct si_compiler              compiler_lowp[2]; /* at most 2 threads 
*/
 };
 
 struct si_blend_color {
        struct pipe_blend_color         state;
        bool                            any_nonzeros;
 };
 
 struct si_sampler_view {
        struct pipe_sampler_view        base;
         /* [0..7] = image descriptor
@@ -769,21 +769,21 @@ struct si_context {
        void                            *custom_blend_fmask_decompress;
        void                            *custom_blend_eliminate_fastclear;
        void                            *custom_blend_dcc_decompress;
        void                            *vs_blit_pos;
        void                            *vs_blit_pos_layered;
        void                            *vs_blit_color;
        void                            *vs_blit_color_layered;
        void                            *vs_blit_texcoord;
        struct si_screen                *screen;
        struct pipe_debug_callback      debug;
-       LLVMTargetMachineRef            tm; /* only non-threaded compilation */
+       struct si_compiler              compiler; /* only non-threaded 
compilation */
        struct si_shader_ctx_state      fixed_func_tcs_shader;
        struct r600_resource            *wait_mem_scratch;
        unsigned                        wait_mem_number;
        uint16_t                        prefetch_L2_mask;
 
        bool                            gfx_flush_in_progress:1;
        bool                            gfx_last_ib_is_busy:1;
        bool                            compute_is_busy:1;
 
        unsigned                        num_gfx_cs_flushes;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 765daa52bcb..ce654bb7610 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,21 +70,21 @@ struct si_function_info {
        unsigned num_params;
 };
 
 enum si_arg_regfile {
        ARG_SGPR,
        ARG_VGPR
 };
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
                               struct si_screen *sscreen,
-                              LLVMTargetMachineRef tm);
+                              struct si_compiler *compiler);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
                                 struct lp_build_tgsi_context *bld_base,
                                 struct lp_build_emit_data *emit_data);
 
 static void si_dump_shader_key(unsigned processor, const struct si_shader 
*shader,
                               FILE *f);
 
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
                                        union si_shader_part_key *key);
@@ -5641,21 +5641,21 @@ void si_shader_dump(struct si_screen *sscreen, const 
struct si_shader *shader,
                fprintf(file, "\n");
        }
 
        si_shader_dump_stats(sscreen, shader, processor, file,
                             check_debug_option);
 }
 
 static int si_compile_llvm(struct si_screen *sscreen,
                           struct ac_shader_binary *binary,
                           struct si_shader_config *conf,
-                          LLVMTargetMachineRef tm,
+                          struct si_compiler *compiler,
                           LLVMModuleRef mod,
                           struct pipe_debug_callback *debug,
                           unsigned processor,
                           const char *name)
 {
        int r = 0;
        unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
 
        if (si_can_dump_shader(sscreen, processor)) {
                fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
@@ -5667,21 +5667,21 @@ static int si_compile_llvm(struct si_screen *sscreen,
                }
        }
 
        if (sscreen->record_llvm_ir) {
                char *ir = LLVMPrintModuleToString(mod);
                binary->llvm_ir_string = strdup(ir);
                LLVMDisposeMessage(ir);
        }
 
        if (!si_replace_shader(count, binary)) {
-               r = si_llvm_compile(mod, binary, tm, debug);
+               r = si_llvm_compile(mod, binary, compiler, debug);
                if (r)
                        return r;
        }
 
        si_shader_binary_read_config(binary, conf, 0);
 
        /* Enable 64-bit and 16-bit denormals, because there is no performance
         * cost.
         *
         * If denormals are enabled, all floating-point output modifiers are
@@ -5719,21 +5719,21 @@ static void si_llvm_build_ret(struct si_shader_context 
*ctx, LLVMValueRef ret)
 {
        if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
                LLVMBuildRetVoid(ctx->ac.builder);
        else
                LLVMBuildRet(ctx->ac.builder, ret);
 }
 
 /* Generate code for the hardware VS shader stage to go with a geometry shader 
*/
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
-                          LLVMTargetMachineRef tm,
+                          struct si_compiler *compiler,
                           struct si_shader_selector *gs_selector,
                           struct pipe_debug_callback *debug)
 {
        struct si_shader_context ctx;
        struct si_shader *shader;
        LLVMBuilderRef builder;
        struct lp_build_tgsi_context *bld_base = &ctx.bld_base;
        struct lp_build_context *uint = &bld_base->uint_bld;
        struct si_shader_output_values *outputs;
        struct tgsi_shader_info *gsinfo = &gs_selector->info;
@@ -5750,21 +5750,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                return NULL;
        }
 
        /* We can leave the fence as permanently signaled because the GS copy
         * shader only becomes visible globally after it has been compiled. */
        util_queue_fence_init(&shader->ready);
 
        shader->selector = gs_selector;
        shader->is_gs_copy_shader = true;
 
-       si_init_shader_ctx(&ctx, sscreen, tm);
+       si_init_shader_ctx(&ctx, sscreen, compiler);
        ctx.shader = shader;
        ctx.type = PIPE_SHADER_VERTEX;
 
        builder = ctx.ac.builder;
 
        create_function(&ctx);
        preload_ring_buffers(&ctx);
 
        LLVMValueRef voffset =
                lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
@@ -5845,21 +5845,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
        }
 
        LLVMPositionBuilderAtEnd(builder, end_bb);
 
        LLVMBuildRetVoid(ctx.ac.builder);
 
        ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */
        si_llvm_optimize_module(&ctx);
 
        r = si_compile_llvm(sscreen, &ctx.shader->binary,
-                           &ctx.shader->config, ctx.tm,
+                           &ctx.shader->config, ctx.compiler,
                            ctx.gallivm.module,
                            debug, PIPE_SHADER_GEOMETRY,
                            "GS Copy Shader");
        if (!r) {
                if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
                        fprintf(stderr, "GS Copy Shader:\n");
                si_shader_dump(sscreen, ctx.shader, debug,
                               PIPE_SHADER_GEOMETRY, stderr, true);
                r = si_shader_binary_upload(sscreen, ctx.shader);
        }
@@ -5967,25 +5967,25 @@ static void si_dump_shader_key(unsigned processor, 
const struct si_shader *shade
             processor == PIPE_SHADER_TESS_EVAL ||
             processor == PIPE_SHADER_VERTEX) &&
            !key->as_es && !key->as_ls) {
                fprintf(f, "  opt.kill_outputs = 0x%"PRIx64"\n", 
key->opt.kill_outputs);
                fprintf(f, "  opt.clip_disable = %u\n", key->opt.clip_disable);
        }
 }
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
                               struct si_screen *sscreen,
-                              LLVMTargetMachineRef tm)
+                              struct si_compiler *compiler)
 {
        struct lp_build_tgsi_context *bld_base;
 
-       si_llvm_context_init(ctx, sscreen, tm);
+       si_llvm_context_init(ctx, sscreen, compiler);
 
        bld_base = &ctx->bld_base;
        bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
 
        bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
        bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
        bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
 
        bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
 
@@ -6749,41 +6749,41 @@ static void si_build_wrapper_function(struct 
si_shader_context *ctx,
                                        num_out_sgpr = num_out;
                                }
                        }
                }
        }
 
        LLVMBuildRetVoid(builder);
 }
 
 int si_compile_tgsi_shader(struct si_screen *sscreen,
-                          LLVMTargetMachineRef tm,
+                          struct si_compiler *compiler,
                           struct si_shader *shader,
                           bool is_monolithic,
                           struct pipe_debug_callback *debug)
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_shader_context ctx;
        int r = -1;
 
        /* Dump TGSI code before doing TGSI->LLVM conversion in case the
         * conversion fails. */
        if (si_can_dump_shader(sscreen, sel->info.processor) &&
            !(sscreen->debug_flags & DBG(NO_TGSI))) {
                if (sel->tokens)
                        tgsi_dump(sel->tokens, 0);
                else
                        nir_print_shader(sel->nir, stderr);
                si_dump_streamout(&sel->so);
        }
 
-       si_init_shader_ctx(&ctx, sscreen, tm);
+       si_init_shader_ctx(&ctx, sscreen, compiler);
        si_llvm_context_set_tgsi(&ctx, shader);
        ctx.separate_prolog = !is_monolithic;
 
        memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
               sizeof(shader->info.vs_output_param_offset));
 
        shader->info.uses_instanceid = sel->info.uses_instanceid;
 
        if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
                si_llvm_dispose(&ctx);
@@ -6979,21 +6979,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
            si_can_dump_shader(sscreen, ctx.type)) {
                ctx.shader->config.private_mem_vgprs =
                        ac_count_scratch_private_memory(ctx.main_fn);
        }
 
        /* Make sure the input is a pointer and not integer followed by 
inttoptr. */
        assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==
               LLVMPointerTypeKind);
 
        /* Compile to bytecode. */
-       r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
+       r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
                            ctx.gallivm.module, debug, ctx.type, "TGSI shader");
        si_llvm_dispose(&ctx);
        if (r) {
                fprintf(stderr, "LLVM failed to compile shader\n");
                return r;
        }
 
        /* Validate SGPR and VGPR usage for compute to detect compiler bugs.
         * LLVM 3.9svn has this bug.
         */
@@ -7090,21 +7090,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
  * \param debug                debug callback
  * \param build                the callback responsible for building the main 
function
  * \return             non-NULL on success
  */
 static struct si_shader_part *
 si_get_shader_part(struct si_screen *sscreen,
                   struct si_shader_part **list,
                   enum pipe_shader_type type,
                   bool prolog,
                   union si_shader_part_key *key,
-                  LLVMTargetMachineRef tm,
+                  struct si_compiler *compiler,
                   struct pipe_debug_callback *debug,
                   void (*build)(struct si_shader_context *,
                                 union si_shader_part_key *),
                   const char *name)
 {
        struct si_shader_part *result;
 
        mtx_lock(&sscreen->shader_parts_mutex);
 
        /* Find existing. */
@@ -7115,21 +7115,21 @@ si_get_shader_part(struct si_screen *sscreen,
                }
        }
 
        /* Compile a new one. */
        result = CALLOC_STRUCT(si_shader_part);
        result->key = *key;
 
        struct si_shader shader = {};
        struct si_shader_context ctx;
 
-       si_init_shader_ctx(&ctx, sscreen, tm);
+       si_init_shader_ctx(&ctx, sscreen, compiler);
        ctx.shader = &shader;
        ctx.type = type;
 
        switch (type) {
        case PIPE_SHADER_VERTEX:
                shader.key.as_ls = key->vs_prolog.as_ls;
                shader.key.as_es = key->vs_prolog.as_es;
                break;
        case PIPE_SHADER_TESS_CTRL:
                assert(!prolog);
@@ -7146,21 +7146,21 @@ si_get_shader_part(struct si_screen *sscreen,
                break;
        default:
                unreachable("bad shader part");
        }
 
        build(&ctx, key);
 
        /* Compile. */
        si_llvm_optimize_module(&ctx);
 
-       if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
+       if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler,
                            ctx.ac.module, debug, ctx.type, name)) {
                FREE(result);
                result = NULL;
                goto out;
        }
 
        result->next = *list;
        *list = result;
 
 out:
@@ -7336,53 +7336,53 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
 
                index = ac_to_float(&ctx->ac, index);
                ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index,
                                           fninfo.num_params + i, "");
        }
 
        si_llvm_build_ret(ctx, ret);
 }
 
 static bool si_get_vs_prolog(struct si_screen *sscreen,
-                            LLVMTargetMachineRef tm,
+                            struct si_compiler *compiler,
                             struct si_shader *shader,
                             struct pipe_debug_callback *debug,
                             struct si_shader *main_part,
                             const struct si_vs_prolog_bits *key)
 {
        struct si_shader_selector *vs = main_part->selector;
 
        if (!si_vs_needs_prolog(vs, key))
                return true;
 
        /* Get the prolog. */
        union si_shader_part_key prolog_key;
        si_get_vs_prolog_key(&vs->info, main_part->info.num_input_sgprs,
                             key, shader, &prolog_key);
 
        shader->prolog =
                si_get_shader_part(sscreen, &sscreen->vs_prologs,
-                                  PIPE_SHADER_VERTEX, true, &prolog_key, tm,
+                                  PIPE_SHADER_VERTEX, true, &prolog_key, 
compiler,
                                   debug, si_build_vs_prolog_function,
                                   "Vertex Shader Prolog");
        return shader->prolog != NULL;
 }
 
 /**
  * Select and compile (or reuse) vertex shader parts (prolog & epilog).
  */
 static bool si_shader_select_vs_parts(struct si_screen *sscreen,
-                                     LLVMTargetMachineRef tm,
+                                     struct si_compiler *compiler,
                                      struct si_shader *shader,
                                      struct pipe_debug_callback *debug)
 {
-       return si_get_vs_prolog(sscreen, tm, shader, debug, shader,
+       return si_get_vs_prolog(sscreen, compiler, shader, debug, shader,
                                &shader->key.part.vs.prolog);
 }
 
 /**
  * Compile the TCS epilog function. This writes tesselation factors to memory
  * based on the output primitive type of the tesselator (determined by TES).
  */
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
                                         union si_shader_part_key *key)
 {
@@ -7453,78 +7453,78 @@ static void si_build_tcs_epilog_function(struct 
si_shader_context *ctx,
                              LLVMGetParam(func, tess_factors_idx + 2),
                              invoc0_tess_factors, invoc0_tess_factors + 4);
 
        LLVMBuildRetVoid(ctx->ac.builder);
 }
 
 /**
  * Select and compile (or reuse) TCS parts (epilog).
  */
 static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
-                                      LLVMTargetMachineRef tm,
+                                      struct si_compiler *compiler,
                                       struct si_shader *shader,
                                       struct pipe_debug_callback *debug)
 {
        if (sscreen->info.chip_class >= GFX9) {
                struct si_shader *ls_main_part =
                        shader->key.part.tcs.ls->main_shader_part_ls;
 
-               if (!si_get_vs_prolog(sscreen, tm, shader, debug, ls_main_part,
+               if (!si_get_vs_prolog(sscreen, compiler, shader, debug, 
ls_main_part,
                                      &shader->key.part.tcs.ls_prolog))
                        return false;
 
                shader->previous_stage = ls_main_part;
        }
 
        /* Get the epilog. */
        union si_shader_part_key epilog_key;
        memset(&epilog_key, 0, sizeof(epilog_key));
        epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
 
        shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
                                            PIPE_SHADER_TESS_CTRL, false,
-                                           &epilog_key, tm, debug,
+                                           &epilog_key, compiler, debug,
                                            si_build_tcs_epilog_function,
                                            "Tessellation Control Shader 
Epilog");
        return shader->epilog != NULL;
 }
 
 /**
  * Select and compile (or reuse) GS parts (prolog).
  */
 static bool si_shader_select_gs_parts(struct si_screen *sscreen,
-                                     LLVMTargetMachineRef tm,
+                                     struct si_compiler *compiler,
                                      struct si_shader *shader,
                                      struct pipe_debug_callback *debug)
 {
        if (sscreen->info.chip_class >= GFX9) {
                struct si_shader *es_main_part =
                        shader->key.part.gs.es->main_shader_part_es;
 
                if (shader->key.part.gs.es->type == PIPE_SHADER_VERTEX &&
-                   !si_get_vs_prolog(sscreen, tm, shader, debug, es_main_part,
+                   !si_get_vs_prolog(sscreen, compiler, shader, debug, 
es_main_part,
                                      &shader->key.part.gs.vs_prolog))
                        return false;
 
                shader->previous_stage = es_main_part;
        }
 
        if (!shader->key.part.gs.prolog.tri_strip_adj_fix)
                return true;
 
        union si_shader_part_key prolog_key;
        memset(&prolog_key, 0, sizeof(prolog_key));
        prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
 
        shader->prolog2 = si_get_shader_part(sscreen, &sscreen->gs_prologs,
                                            PIPE_SHADER_GEOMETRY, true,
-                                           &prolog_key, tm, debug,
+                                           &prolog_key, compiler, debug,
                                            si_build_gs_prolog_function,
                                            "Geometry Shader Prolog");
        return shader->prolog2 != NULL;
 }
 
 /**
  * Build the pixel shader prolog function. This handles:
  * - two-side color selection and interpolation
  * - overriding interpolation parameters for the API PS
  * - polygon stippling
@@ -7898,49 +7898,49 @@ static void si_build_ps_epilog_function(struct 
si_shader_context *ctx,
                si_emit_ps_exports(ctx, &exp);
 
        /* Compile. */
        LLVMBuildRetVoid(ctx->ac.builder);
 }
 
 /**
  * Select and compile (or reuse) pixel shader parts (prolog & epilog).
  */
 static bool si_shader_select_ps_parts(struct si_screen *sscreen,
-                                     LLVMTargetMachineRef tm,
+                                     struct si_compiler *compiler,
                                      struct si_shader *shader,
                                      struct pipe_debug_callback *debug)
 {
        union si_shader_part_key prolog_key;
        union si_shader_part_key epilog_key;
 
        /* Get the prolog. */
        si_get_ps_prolog_key(shader, &prolog_key, true);
 
        /* The prolog is a no-op if these aren't set. */
        if (si_need_ps_prolog(&prolog_key)) {
                shader->prolog =
                        si_get_shader_part(sscreen, &sscreen->ps_prologs,
                                           PIPE_SHADER_FRAGMENT, true,
-                                          &prolog_key, tm, debug,
+                                          &prolog_key, compiler, debug,
                                           si_build_ps_prolog_function,
                                           "Fragment Shader Prolog");
                if (!shader->prolog)
                        return false;
        }
 
        /* Get the epilog. */
        si_get_ps_epilog_key(shader, &epilog_key);
 
        shader->epilog =
                si_get_shader_part(sscreen, &sscreen->ps_epilogs,
                                   PIPE_SHADER_FRAGMENT, false,
-                                  &epilog_key, tm, debug,
+                                  &epilog_key, compiler, debug,
                                   si_build_ps_epilog_function,
                                   "Fragment Shader Epilog");
        if (!shader->epilog)
                return false;
 
        /* Enable POS_FIXED_PT if polygon stippling is enabled. */
        if (shader->key.part.ps.prolog.poly_stipple) {
                shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
                
assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
        }
@@ -8029,39 +8029,39 @@ static void si_fix_resource_usage(struct si_screen 
*sscreen,
 
        shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
 
        if (shader->selector->type == PIPE_SHADER_COMPUTE &&
            si_get_max_workgroup_size(shader) > 64) {
                si_multiwave_lds_size_workaround(sscreen,
                                                 &shader->config.lds_size);
        }
 }
 
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug)
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
        int r;
 
        /* LS, ES, VS are compiled on demand if the main part hasn't been
         * compiled for that stage.
         *
         * Vertex shaders are compiled on demand when a vertex fetch
         * workaround must be applied.
         */
        if (shader->is_monolithic) {
                /* Monolithic shader (compiled as a whole, has many variants,
                 * may take a long time to compile).
                 */
-               r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
+               r = si_compile_tgsi_shader(sscreen, compiler, shader, true, 
debug);
                if (r)
                        return r;
        } else {
                /* The shader consists of several parts:
                 *
                 * - the middle part is the user shader, it has 1 variant only
                 *   and it was compiled during the creation of the shader
                 *   selector
                 * - the prolog part is inserted at the beginning
                 * - the epilog part is inserted at the end
@@ -8087,35 +8087,35 @@ int si_shader_create(struct si_screen *sscreen, 
LLVMTargetMachineRef tm,
                memcpy(shader->info.vs_output_param_offset,
                       mainp->info.vs_output_param_offset,
                       sizeof(mainp->info.vs_output_param_offset));
                shader->info.uses_instanceid = mainp->info.uses_instanceid;
                shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
                shader->info.nr_param_exports = mainp->info.nr_param_exports;
 
                /* Select prologs and/or epilogs. */
                switch (sel->type) {
                case PIPE_SHADER_VERTEX:
-                       if (!si_shader_select_vs_parts(sscreen, tm, shader, 
debug))
+                       if (!si_shader_select_vs_parts(sscreen, compiler, 
shader, debug))
                                return -1;
                        break;
                case PIPE_SHADER_TESS_CTRL:
-                       if (!si_shader_select_tcs_parts(sscreen, tm, shader, 
debug))
+                       if (!si_shader_select_tcs_parts(sscreen, compiler, 
shader, debug))
                                return -1;
                        break;
                case PIPE_SHADER_TESS_EVAL:
                        break;
                case PIPE_SHADER_GEOMETRY:
-                       if (!si_shader_select_gs_parts(sscreen, tm, shader, 
debug))
+                       if (!si_shader_select_gs_parts(sscreen, compiler, 
shader, debug))
                                return -1;
                        break;
                case PIPE_SHADER_FRAGMENT:
-                       if (!si_shader_select_ps_parts(sscreen, tm, shader, 
debug))
+                       if (!si_shader_select_ps_parts(sscreen, compiler, 
shader, debug))
                                return -1;
 
                        /* Make sure we have at least as many VGPRs as there
                         * are allocated inputs.
                         */
                        shader->config.num_vgprs = 
MAX2(shader->config.num_vgprs,
                                                        
shader->info.num_input_vgprs);
                        break;
                }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index c26ccafdd69..8c479d638a9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -304,25 +304,30 @@ enum {
        SI_FIX_FETCH_RGB_64_FLOAT,
        SI_FIX_FETCH_RGBA_64_FLOAT,
        SI_FIX_FETCH_RGB_8,     /* A = 1.0 */
        SI_FIX_FETCH_RGB_8_INT, /* A = 1 */
        SI_FIX_FETCH_RGB_16,
        SI_FIX_FETCH_RGB_16_INT,
 };
 
 struct si_shader;
 
+/* Per-thread persistent LLVM objects. */
+struct si_compiler {
+       LLVMTargetMachineRef            tm;
+};
+
 /* State of the context creating the shader object. */
 struct si_compiler_ctx_state {
        /* Should only be used by si_init_shader_selector_async and
         * si_build_shader_variant if thread_index == -1 (non-threaded). */
-       LLVMTargetMachineRef            tm;
+       struct si_compiler              *compiler;
 
        /* Used if thread_index == -1 or if debug.async is true. */
        struct pipe_debug_callback      debug;
 
        /* Used for creating the log string for gallium/ddebug. */
        bool                            is_debug_context;
 };
 
 /* A shader selector is a gallium CSO and contains shader variants and
  * binaries for one TGSI program. This can be shared by multiple contexts.
@@ -639,29 +644,29 @@ struct si_shader {
 struct si_shader_part {
        struct si_shader_part *next;
        union si_shader_part_key key;
        struct ac_shader_binary binary;
        struct si_shader_config config;
 };
 
 /* si_shader.c */
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
-                          LLVMTargetMachineRef tm,
+                          struct si_compiler *compiler,
                           struct si_shader_selector *gs_selector,
                           struct pipe_debug_callback *debug);
 int si_compile_tgsi_shader(struct si_screen *sscreen,
-                          LLVMTargetMachineRef tm,
+                          struct si_compiler *compiler,
                           struct si_shader *shader,
                           bool is_monolithic,
                           struct pipe_debug_callback *debug);
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned 
index);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
                    struct pipe_debug_callback *debug, unsigned processor,
                    FILE *f, bool check_debug_option);
 void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index afcc14e38cc..0a347172d62 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -172,21 +172,21 @@ struct si_shader_context {
        /* API GS */
        int param_gs2vs_offset;
        int param_gs_wave_id; /* GFX6 */
        LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */
        int param_gs_vtx01_offset; /* in dwords (GFX9) */
        int param_gs_vtx23_offset; /* in dwords (GFX9) */
        int param_gs_vtx45_offset; /* in dwords (GFX9) */
        /* CS */
        int param_block_size;
 
-       LLVMTargetMachineRef tm;
+       struct si_compiler *compiler;
 
        /* Preloaded descriptors. */
        LLVMValueRef esgs_ring;
        LLVMValueRef gsvs_ring[4];
        LLVMValueRef tess_offchip_ring;
 
        LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
        LLVMValueRef gs_next_vertex[4];
        LLVMValueRef postponed_kill;
        LLVMValueRef return_value;
@@ -214,36 +214,36 @@ si_shader_context(struct lp_build_tgsi_context *bld_base)
 }
 
 static inline struct si_shader_context *
 si_shader_context_from_abi(struct ac_shader_abi *abi)
 {
        struct si_shader_context *ctx = NULL;
        return container_of(abi, ctx, abi);
 }
 
 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
-                        LLVMTargetMachineRef tm,
+                        struct si_compiler *compiler,
                         struct pipe_debug_callback *debug);
 
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
                          enum tgsi_opcode_type type);
 
 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
                     enum tgsi_opcode_type type, LLVMValueRef value);
 
 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
                                 LLVMValueRef index,
                                 unsigned num);
 
 void si_llvm_context_init(struct si_shader_context *ctx,
                          struct si_screen *sscreen,
-                         LLVMTargetMachineRef tm);
+                         struct si_compiler *compiler);
 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
                              struct si_shader *shader);
 
 void si_llvm_create_func(struct si_shader_context *ctx,
                         const char *name,
                         LLVMTypeRef *return_types, unsigned num_return_elems,
                         LLVMTypeRef *ParamTypes, unsigned ParamCount);
 
 void si_llvm_dispose(struct si_shader_context *ctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index acd7e0b2b2f..d0332a6078c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -92,42 +92,43 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, 
void *context)
 
        LLVMDisposeMessage(description);
 }
 
 /**
  * Compile an LLVM module to machine code.
  *
  * @returns 0 for success, 1 for failure
  */
 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
-                        LLVMTargetMachineRef tm,
+                        struct si_compiler *compiler,
                         struct pipe_debug_callback *debug)
 {
        struct si_llvm_diagnostics diag;
        char *err;
        LLVMContextRef llvm_ctx;
        LLVMMemoryBufferRef out_buffer;
        unsigned buffer_size;
        const char *buffer_data;
        LLVMBool mem_err;
 
        diag.debug = debug;
        diag.retval = 0;
 
        /* Setup Diagnostic Handler*/
        llvm_ctx = LLVMGetModuleContext(M);
 
        LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 
        /* Compile IR*/
-       mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, 
&err,
-                                                                &out_buffer);
+       mem_err = LLVMTargetMachineEmitToMemoryBuffer(compiler->tm, M,
+                                                     LLVMObjectFile, &err,
+                                                     &out_buffer);
 
        /* Process Errors/Warnings */
        if (mem_err) {
                fprintf(stderr, "%s: %s", __FUNCTION__, err);
                pipe_debug_message(debug, SHADER_INFO,
                                   "LLVM emit error: %s", err);
                FREE(err);
                diag.retval = 1;
                goto out;
        }
@@ -985,39 +986,39 @@ static void emit_immediate(struct lp_build_tgsi_context 
*bld_base,
        for (i = 0; i < 4; ++i) {
                ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
                                LLVMConstInt(ctx->i32, imm->u[i].Uint, false   
);
        }
 
        ctx->imms_num++;
 }
 
 void si_llvm_context_init(struct si_shader_context *ctx,
                          struct si_screen *sscreen,
-                         LLVMTargetMachineRef tm)
+                         struct si_compiler *compiler)
 {
        struct lp_type type;
 
        /* Initialize the gallivm object:
         * We are only using the module, context, and builder fields of this 
struct.
         * This should be enough for us to be able to pass our gallivm struct 
to the
         * helper functions in the gallivm module.
         */
        memset(ctx, 0, sizeof(*ctx));
        ctx->screen = sscreen;
-       ctx->tm = tm;
+       ctx->compiler = compiler;
 
        ctx->gallivm.context = LLVMContextCreate();
        ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
                                                ctx->gallivm.context);
        LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
 
-       LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
+       LLVMTargetDataRef data_layout = 
LLVMCreateTargetDataLayout(compiler->tm);
        char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
        LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
        LLVMDisposeTargetData(data_layout);
        LLVMDisposeMessage(data_layout_str);
 
        bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
        enum ac_float_mode float_mode =
                unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
                                AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f23ce098208..2e215b91ce8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1481,40 +1481,40 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
        if (unlikely(sctx->screen->debug_flags & DBG(NO_OPT_VARIANT)))
                memset(&key->opt, 0, sizeof(key->opt));
 }
 
 static void si_build_shader_variant(struct si_shader *shader,
                                    int thread_index,
                                    bool low_priority)
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_screen *sscreen = sel->screen;
-       LLVMTargetMachineRef tm;
+       struct si_compiler *compiler;
        struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
        int r;
 
        if (thread_index >= 0) {
                if (low_priority) {
-                       assert(thread_index < 
ARRAY_SIZE(sscreen->tm_low_priority));
-                       tm = sscreen->tm_low_priority[thread_index];
+                       assert(thread_index < 
ARRAY_SIZE(sscreen->compiler_lowp));
+                       compiler = &sscreen->compiler_lowp[thread_index];
                } else {
-                       assert(thread_index < ARRAY_SIZE(sscreen->tm));
-                       tm = sscreen->tm[thread_index];
+                       assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+                       compiler = &sscreen->compiler[thread_index];
                }
                if (!debug->async)
                        debug = NULL;
        } else {
                assert(!low_priority);
-               tm = shader->compiler_ctx_state.tm;
+               compiler = shader->compiler_ctx_state.compiler;
        }
 
-       r = si_shader_create(sscreen, tm, shader, debug);
+       r = si_shader_create(sscreen, compiler, shader, debug);
        if (unlikely(r)) {
                PRINT_ERR("Failed to build shader variant (type=%u) %d\n",
                         sel->type, r);
                shader->compilation_failed = true;
                return;
        }
 
        if (shader->compiler_ctx_state.is_debug_context) {
                FILE *f = open_memstream(&shader->shader_log,
                                         &shader->shader_log_size);
@@ -1553,21 +1553,21 @@ static bool si_check_missing_main_part(struct si_screen 
*sscreen,
 
                /* We can leave the fence as permanently signaled because the
                 * main part becomes visible globally only after it has been
                 * compiled. */
                util_queue_fence_init(&main_part->ready);
 
                main_part->selector = sel;
                main_part->key.as_es = key->as_es;
                main_part->key.as_ls = key->as_ls;
 
-               if (si_compile_tgsi_shader(sscreen, compiler_state->tm,
+               if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
                                           main_part, false,
                                           &compiler_state->debug) != 0) {
                        FREE(main_part);
                        return false;
                }
                *mainp = main_part;
        }
        return true;
 }
 
@@ -1828,27 +1828,27 @@ static void si_parse_next_shader_property(const struct 
tgsi_shader_info *info,
 
 /**
  * Compile the main shader part or the monolithic shader as part of
  * si_shader_selector initialization. Since it can be done asynchronously,
  * there is no way to report compile failures to applications.
  */
 static void si_init_shader_selector_async(void *job, int thread_index)
 {
        struct si_shader_selector *sel = (struct si_shader_selector *)job;
        struct si_screen *sscreen = sel->screen;
-       LLVMTargetMachineRef tm;
+       struct si_compiler *compiler;
        struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
 
        assert(!debug->debug_message || debug->async);
        assert(thread_index >= 0);
-       assert(thread_index < ARRAY_SIZE(sscreen->tm));
-       tm = sscreen->tm[thread_index];
+       assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+       compiler = &sscreen->compiler[thread_index];
 
        /* Compile the main shader part for use with a prolog and/or epilog.
         * If this fails, the driver will try to compile a monolithic shader
         * on demand.
         */
        if (!sscreen->use_monolithic_shaders) {
                struct si_shader *shader = CALLOC_STRUCT(si_shader);
                void *ir_binary = NULL;
 
                if (!shader) {
@@ -1872,21 +1872,21 @@ static void si_init_shader_selector_async(void *job, 
int thread_index)
                mtx_lock(&sscreen->shader_cache_mutex);
 
                if (ir_binary &&
                    si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
                        mtx_unlock(&sscreen->shader_cache_mutex);
                        si_shader_dump_stats_for_shader_db(shader, debug);
                } else {
                        mtx_unlock(&sscreen->shader_cache_mutex);
 
                        /* Compile the shader if it hasn't been loaded from the 
cache. */
-                       if (si_compile_tgsi_shader(sscreen, tm, shader, false,
+                       if (si_compile_tgsi_shader(sscreen, compiler, shader, 
false,
                                                   debug) != 0) {
                                FREE(shader);
                                FREE(ir_binary);
                                fprintf(stderr, "radeonsi: can't compile a main 
shader part\n");
                                return;
                        }
 
                        if (ir_binary) {
                                mtx_lock(&sscreen->shader_cache_mutex);
                                if (!si_shader_cache_insert_shader(sscreen, 
ir_binary, shader, true))
@@ -1935,21 +1935,21 @@ static void si_init_shader_selector_async(void *job, 
int thread_index)
                                case TGSI_SEMANTIC_CLIPVERTEX:
                                case TGSI_SEMANTIC_EDGEFLAG:
                                        break;
                                }
                        }
                }
        }
 
        /* The GS copy shader is always pre-compiled. */
        if (sel->type == PIPE_SHADER_GEOMETRY) {
-               sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, tm, 
sel, debug);
+               sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, 
compiler, sel, debug);
                if (!sel->gs_copy_shader) {
                        fprintf(stderr, "radeonsi: can't create GS copy 
shader\n");
                        return;
                }
 
                si_shader_vs(sscreen, sel->gs_copy_shader, sel);
        }
 }
 
 /* Return descriptor slot usage masks from the given shader info. */
@@ -3127,21 +3127,21 @@ bool si_update_shaders(struct si_context *sctx)
        struct pipe_context *ctx = (struct pipe_context*)sctx;
        struct si_compiler_ctx_state compiler_state;
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
        struct si_shader *old_vs = si_get_vs_state(sctx);
        bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false;
        struct si_shader *old_ps = sctx->ps_shader.current;
        unsigned old_spi_shader_col_format =
                old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
        int r;
 
-       compiler_state.tm = sctx->tm;
+       compiler_state.compiler = &sctx->compiler;
        compiler_state.debug = sctx->debug;
        compiler_state.is_debug_context = sctx->is_debug;
 
        /* Update stages before GS. */
        if (sctx->tes_shader.cso) {
                if (!sctx->tess_rings) {
                        si_init_tess_factor_ring(sctx);
                        if (!sctx->tess_rings)
                                return false;
                }
-- 
2.17.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/9] radeonsi: add struct si_compiler containing LLVMTargetMachineRef

Reply via email to