Module: Mesa Branch: master Commit: 4dbcf24f6826055529a7b2212f113cb8ad089fed URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4dbcf24f6826055529a7b2212f113cb8ad089fed
Author: Jesse Natalie <[email protected]> Date: Wed Nov 18 12:22:27 2020 -0800 microsoft/clc: Hook up printf Rewrites the original lowering pass to use the one shared with Clover, instead only handling the new load_printf_buffer_address intrinsic. Exports the new metadata to the runtime containing strings and arg sizes. Reviewed-by: Erik Faye-Lund <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8254> --- src/microsoft/clc/clc_compiler.c | 39 +++++++++++++-- src/microsoft/clc/clc_compiler.h | 12 ++++- src/microsoft/clc/clc_nir.c | 99 +++++++++++++------------------------- src/microsoft/clc/clc_nir.h | 3 +- src/microsoft/clc/compute_test.cpp | 4 +- 5 files changed, 81 insertions(+), 76 deletions(-) diff --git a/src/microsoft/clc/clc_compiler.c b/src/microsoft/clc/clc_compiler.c index dc841865132..642dca16872 100644 --- a/src/microsoft/clc/clc_compiler.c +++ b/src/microsoft/clc/clc_compiler.c @@ -1051,6 +1051,7 @@ clc_to_dxil(struct clc_context *ctx, .kernel = true, .kernel_image = true, .literal_sampler = true, + .printf = true, }, }; nir_shader_compiler_options nir_options = @@ -1235,9 +1236,26 @@ clc_to_dxil(struct clc_context *ctx, // Lower memcpy NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref); - bool has_printf = false; - //NIR_PASS(has_printf, nir, clc_nir_lower_printf, uav_id); - metadata->printf_uav_id = has_printf ? uav_id++ : -1; + // Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't + // necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later + assert(nir->scratch_size == 0); + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align); + + nir_lower_printf_options printf_options = { + .treat_doubles_as_floats = true, + .max_buffer_size = 1024 * 1024 + }; + NIR_PASS_V(nir, nir_lower_printf, &printf_options); + + metadata->printf.info_count = nir->printf_info_count; + metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info)); + for (unsigned i = 0; i < nir->printf_info_count; i++) { + metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size); + memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size); + metadata->printf.infos[i].num_args = nir->printf_info[i].num_args; + metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned)); + memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned)); + } // copy propagate to prepare for lower_explicit_io NIR_PASS_V(nir, nir_split_var_copies); @@ -1258,8 +1276,8 @@ clc_to_dxil(struct clc_context *ctx, int_sampler_states, NULL, 14.0f); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL); - assert(nir->scratch_size == 0); - + + nir->scratch_size = 0; NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared | nir_var_function_temp | nir_var_uniform | nir_var_mem_global | nir_var_mem_constant, glsl_get_cl_type_size_align); @@ -1267,6 +1285,11 @@ clc_to_dxil(struct clc_context *ctx, NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp); NIR_PASS_V(nir, clc_lower_constant_to_ssbo, dxil->kernel, &uav_id); NIR_PASS_V(nir, clc_lower_global_to_ssbo); + + bool has_printf = false; + NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id); + metadata->printf.uav_id = has_printf ? uav_id++ : -1; + NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo); NIR_PASS_V(nir, split_unaligned_loads_stores); @@ -1432,6 +1455,12 @@ void clc_free_dxil_object(struct clc_dxil_object *dxil) for (unsigned i = 0; i < dxil->metadata.num_consts; i++) free(dxil->metadata.consts[i].data); + for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) { + free(dxil->metadata.printf.infos[i].arg_sizes); + free(dxil->metadata.printf.infos[i].str); + } + free(dxil->metadata.printf.infos); + free(dxil->binary.data); free(dxil); } diff --git a/src/microsoft/clc/clc_compiler.h b/src/microsoft/clc/clc_compiler.h index 8b73d9edc48..6981bbaf415 100644 --- a/src/microsoft/clc/clc_compiler.h +++ b/src/microsoft/clc/clc_compiler.h @@ -118,6 +118,12 @@ struct clc_object { #define CLC_MAX_BINDINGS_PER_ARG 3 #define CLC_MAX_SAMPLERS 16 +struct clc_printf_info { + unsigned num_args; + unsigned *arg_sizes; + char *str; +}; + struct clc_dxil_metadata { struct { unsigned offset; @@ -165,7 +171,11 @@ struct clc_dxil_metadata { uint16_t local_size[3]; uint16_t local_size_hint[3]; - int printf_uav_id; + struct { + unsigned info_count; + struct clc_printf_info *infos; + int uav_id; + } printf; }; struct clc_dxil_object { diff --git a/src/microsoft/clc/clc_nir.c b/src/microsoft/clc/clc_nir.c index 2dfeb925bff..206365ab896 100644 --- a/src/microsoft/clc/clc_nir.c +++ b/src/microsoft/clc/clc_nir.c @@ -245,81 +245,48 @@ add_printf_var(struct nir_shader *nir, unsigned uav_id) nir_variable *var = nir_variable_create(nir, nir_var_mem_ssbo, glsl_array_type(glsl_uint_type(), printf_array_size, sizeof(unsigned)), - "kernel_work_properies"); + "printf"); var->data.binding = uav_id; return var; } -static void -lower_printf_impl(nir_builder *b, nir_intrinsic_instr *instr, nir_variable *var) +bool +clc_lower_printf_base(nir_shader *nir, unsigned uav_id) { - /* Atomic add a buffer size counter to determine where to write. - * If overflowed, return -1, otherwise, store the arguments and return 0. - */ - b->cursor = nir_before_instr(&instr->instr); - nir_deref_instr *ssbo_deref = nir_build_deref_var(b, var); - nir_deref_instr *counter_deref = nir_build_deref_array_imm(b, ssbo_deref, 0); - nir_deref_instr *struct_deref = nir_instr_as_deref(instr->src[1].ssa->parent_instr); - nir_variable *struct_var = nir_deref_instr_get_variable(struct_deref); - const struct glsl_type *struct_type = struct_var->type; - /* Align the struct size to 4 for natural SSBO alignment */ - int struct_size = align(glsl_get_cl_size(struct_type), 4); - - /* Hardcoding 64bit pointers to simplify some code below */ - assert(instr->src[0].ssa->num_components == 1 && instr->src[0].ssa->bit_size == 64); - - nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, nir_intrinsic_deref_atomic_add); - nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL); - atomic->src[0] = nir_src_for_ssa(&counter_deref->dest.ssa); - atomic->src[1] = nir_src_for_ssa(nir_imm_int(b, struct_size + sizeof(uint64_t))); - nir_builder_instr_insert(b, &atomic->instr); - - int max_valid_offset = - glsl_get_cl_size(var->type) - /* buffer size */ - struct_size - /* printf args size */ - sizeof(uint64_t) - /* format string */ - sizeof(int); /* the first int in the buffer is for the counter */ - nir_push_if(b, nir_ilt(b, &atomic->dest.ssa, nir_imm_int(b, max_valid_offset))); - nir_ssa_def *printf_succ_val = nir_imm_int(b, 0); - - nir_ssa_def *start_offset = nir_u2u64(b, nir_iadd(b, &atomic->dest.ssa, nir_imm_int(b, sizeof(int)))); - nir_deref_instr *as_byte_array = nir_build_deref_cast(b, &ssbo_deref->dest.ssa, nir_var_mem_ssbo, glsl_uint8_t_type(), 1); - nir_deref_instr *as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, start_offset); - nir_deref_instr *format_string_write_deref = - nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, glsl_uint64_t_type(), 8); - nir_store_deref(b, format_string_write_deref, instr->src[0].ssa, ~0); - - for (unsigned i = 0; i < glsl_get_length(struct_type); ++i) { - nir_ssa_def *field_offset_from_start = nir_imm_int64(b, glsl_get_struct_field_offset(struct_type, i) + sizeof(uint64_t)); - nir_ssa_def *field_offset = nir_iadd(b, start_offset, field_offset_from_start); - - const struct glsl_type *field_type = glsl_get_struct_field(struct_type, i); - nir_deref_instr *field_read_deref = nir_build_deref_struct(b, struct_deref, i); - nir_ssa_def *field_value = nir_load_deref(b, field_read_deref); - - /* Clang does promotion of arguments to their "native" size. That means that any floats - * have been converted to doubles for the call to printf. Since we don't support doubles, - * convert them back here; copy-prop and other optimizations should remove all hint of doubles. - */ - if (glsl_get_base_type(field_type) == GLSL_TYPE_DOUBLE) { - field_value = nir_f2f32(b, field_value); - field_type = glsl_float_type(); - } + nir_variable *printf_var = NULL; + nir_ssa_def *printf_deref = NULL; + nir_foreach_function(func, nir) { + nir_builder b; + nir_builder_init(&b, func->impl); + b.cursor = nir_before_instr(nir_block_first_instr(nir_start_block(func->impl))); + bool progress = false; - as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, field_offset); - nir_deref_instr *field_write_deref = - nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, nir_var_mem_ssbo, field_type, glsl_get_cl_size(field_type)); + nir_foreach_block(block, func->impl) { + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_printf_buffer_address) + continue; - nir_store_deref(b, field_write_deref, field_value, ~0); - } + if (!printf_var) { + printf_var = add_printf_var(nir, uav_id); + nir_deref_instr *deref = nir_build_deref_var(&b, printf_var); + printf_deref = &deref->dest.ssa; + } + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(printf_deref)); + } + } - nir_push_else(b, NULL); - nir_ssa_def *printf_fail_val = nir_imm_int(b, -1); - nir_pop_if(b, NULL); + if (progress) + nir_metadata_preserve(func->impl, nir_metadata_loop_analysis | + nir_metadata_block_index | + nir_metadata_dominance); + else + nir_metadata_preserve(func->impl, nir_metadata_all); + } - nir_ssa_def *return_value = nir_if_phi(b, printf_succ_val, printf_fail_val); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(return_value)); - nir_instr_remove(&instr->instr); + return printf_var != NULL; } static nir_variable * diff --git a/src/microsoft/clc/clc_nir.h b/src/microsoft/clc/clc_nir.h index a452b7a7ff5..77be44ac1ec 100644 --- a/src/microsoft/clc/clc_nir.h +++ b/src/microsoft/clc/clc_nir.h @@ -31,8 +31,7 @@ bool clc_nir_lower_system_values(nir_shader *nir, nir_variable *var); bool dxil_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var); -bool -clc_nir_lower_printf(nir_shader *nir, unsigned uav_id); +bool clc_lower_printf_base(nir_shader *nir, unsigned uav_id); bool clc_nir_dedupe_const_samplers(nir_shader *nir); diff --git a/src/microsoft/clc/compute_test.cpp b/src/microsoft/clc/compute_test.cpp index 44e48871eb5..647523d3142 100644 --- a/src/microsoft/clc/compute_test.cpp +++ b/src/microsoft/clc/compute_test.cpp @@ -557,8 +557,8 @@ ComputeTest::run_shader_with_raw_args(Shader shader, } } - if (dxil->metadata.printf_uav_id > 0) - add_uav_resource(resources, 0, dxil->metadata.printf_uav_id, NULL, 1024 * 1024 / 4, 4); + if (dxil->metadata.printf.uav_id > 0) + add_uav_resource(resources, 0, dxil->metadata.printf.uav_id, NULL, 1024 * 1024 / 4, 4); for (unsigned i = 0; i < dxil->metadata.num_consts; ++i) add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id, _______________________________________________ mesa-commit mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-commit
