Module: Mesa
Branch: master
Commit: 4dbcf24f6826055529a7b2212f113cb8ad089fed
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4dbcf24f6826055529a7b2212f113cb8ad089fed

Author: Jesse Natalie <[email protected]>
Date:   Wed Nov 18 12:22:27 2020 -0800

microsoft/clc: Hook up printf

Rewrites the original lowering pass to use the one shared with Clover,
instead only handling the new load_printf_buffer_address intrinsic.

Exports the new metadata to the runtime containing strings and arg sizes.

Reviewed-by: Erik Faye-Lund <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8254>

---

 src/microsoft/clc/clc_compiler.c   | 39 +++++++++++++--
 src/microsoft/clc/clc_compiler.h   | 12 ++++-
 src/microsoft/clc/clc_nir.c        | 99 +++++++++++++-------------------------
 src/microsoft/clc/clc_nir.h        |  3 +-
 src/microsoft/clc/compute_test.cpp |  4 +-
 5 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/src/microsoft/clc/clc_compiler.c b/src/microsoft/clc/clc_compiler.c
index dc841865132..642dca16872 100644
--- a/src/microsoft/clc/clc_compiler.c
+++ b/src/microsoft/clc/clc_compiler.c
@@ -1051,6 +1051,7 @@ clc_to_dxil(struct clc_context *ctx,
          .kernel = true,
          .kernel_image = true,
          .literal_sampler = true,
+         .printf = true,
       },
    };
    nir_shader_compiler_options nir_options =
@@ -1235,9 +1236,26 @@ clc_to_dxil(struct clc_context *ctx,
    // Lower memcpy
    NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref);
 
-   bool has_printf = false;
-   //NIR_PASS(has_printf, nir, clc_nir_lower_printf, uav_id);
-   metadata->printf_uav_id = has_printf ? uav_id++ : -1;
+   // Ensure the printf struct has explicit types, but we'll throw away the 
scratch size, because we haven't
+   // necessarily removed all temp variables (e.g. the printf struct itself) 
at this point, so we'll rerun this later
+   assert(nir->scratch_size == 0);
+   NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, 
glsl_get_cl_type_size_align);
+
+   nir_lower_printf_options printf_options = {
+      .treat_doubles_as_floats = true,
+      .max_buffer_size = 1024 * 1024
+   };
+   NIR_PASS_V(nir, nir_lower_printf, &printf_options);
+
+   metadata->printf.info_count = nir->printf_info_count;
+   metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct 
clc_printf_info));
+   for (unsigned i = 0; i < nir->printf_info_count; i++) {
+      metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size);
+      memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, 
nir->printf_info[i].string_size);
+      metadata->printf.infos[i].num_args = nir->printf_info[i].num_args;
+      metadata->printf.infos[i].arg_sizes = 
malloc(nir->printf_info[i].num_args * sizeof(unsigned));
+      memcpy(metadata->printf.infos[i].arg_sizes, 
nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
+   }
 
    // copy propagate to prepare for lower_explicit_io
    NIR_PASS_V(nir, nir_split_var_copies);
@@ -1258,8 +1276,8 @@ clc_to_dxil(struct clc_context *ctx,
               int_sampler_states, NULL, 14.0f);
 
    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | 
nir_var_function_temp, NULL);
-   assert(nir->scratch_size == 0);
-   
+
+   nir->scratch_size = 0;
    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
               nir_var_mem_shared | nir_var_function_temp | nir_var_uniform | 
nir_var_mem_global | nir_var_mem_constant,
               glsl_get_cl_type_size_align);
@@ -1267,6 +1285,11 @@ clc_to_dxil(struct clc_context *ctx,
    NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
    NIR_PASS_V(nir, clc_lower_constant_to_ssbo, dxil->kernel, &uav_id);
    NIR_PASS_V(nir, clc_lower_global_to_ssbo);
+
+   bool has_printf = false;
+   NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id);
+   metadata->printf.uav_id = has_printf ? uav_id++ : -1;
+
    NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
 
    NIR_PASS_V(nir, split_unaligned_loads_stores);
@@ -1432,6 +1455,12 @@ void clc_free_dxil_object(struct clc_dxil_object *dxil)
    for (unsigned i = 0; i < dxil->metadata.num_consts; i++)
       free(dxil->metadata.consts[i].data);
 
+   for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) {
+      free(dxil->metadata.printf.infos[i].arg_sizes);
+      free(dxil->metadata.printf.infos[i].str);
+   }
+   free(dxil->metadata.printf.infos);
+
    free(dxil->binary.data);
    free(dxil);
 }
diff --git a/src/microsoft/clc/clc_compiler.h b/src/microsoft/clc/clc_compiler.h
index 8b73d9edc48..6981bbaf415 100644
--- a/src/microsoft/clc/clc_compiler.h
+++ b/src/microsoft/clc/clc_compiler.h
@@ -118,6 +118,12 @@ struct clc_object {
 #define CLC_MAX_BINDINGS_PER_ARG 3
 #define CLC_MAX_SAMPLERS 16
 
+struct clc_printf_info {
+   unsigned num_args;
+   unsigned *arg_sizes;
+   char *str;
+};
+
 struct clc_dxil_metadata {
    struct {
       unsigned offset;
@@ -165,7 +171,11 @@ struct clc_dxil_metadata {
    uint16_t local_size[3];
    uint16_t local_size_hint[3];
 
-   int printf_uav_id;
+   struct {
+      unsigned info_count;
+      struct clc_printf_info *infos;
+      int uav_id;
+   } printf;
 };
 
 struct clc_dxil_object {
diff --git a/src/microsoft/clc/clc_nir.c b/src/microsoft/clc/clc_nir.c
index 2dfeb925bff..206365ab896 100644
--- a/src/microsoft/clc/clc_nir.c
+++ b/src/microsoft/clc/clc_nir.c
@@ -245,81 +245,48 @@ add_printf_var(struct nir_shader *nir, unsigned uav_id)
    nir_variable *var =
       nir_variable_create(nir, nir_var_mem_ssbo,
                           glsl_array_type(glsl_uint_type(), printf_array_size, 
sizeof(unsigned)),
-                          "kernel_work_properies");
+                          "printf");
    var->data.binding = uav_id;
    return var;
 }
 
-static void
-lower_printf_impl(nir_builder *b, nir_intrinsic_instr *instr, nir_variable 
*var)
+bool
+clc_lower_printf_base(nir_shader *nir, unsigned uav_id)
 {
-   /* Atomic add a buffer size counter to determine where to write.
-    * If overflowed, return -1, otherwise, store the arguments and return 0.
-    */
-   b->cursor = nir_before_instr(&instr->instr);
-   nir_deref_instr *ssbo_deref = nir_build_deref_var(b, var);
-   nir_deref_instr *counter_deref = nir_build_deref_array_imm(b, ssbo_deref, 
0);
-   nir_deref_instr *struct_deref = 
nir_instr_as_deref(instr->src[1].ssa->parent_instr);
-   nir_variable *struct_var = nir_deref_instr_get_variable(struct_deref);
-   const struct glsl_type *struct_type = struct_var->type;
-   /* Align the struct size to 4 for natural SSBO alignment */
-   int struct_size = align(glsl_get_cl_size(struct_type), 4);
-
-   /* Hardcoding 64bit pointers to simplify some code below */
-   assert(instr->src[0].ssa->num_components == 1 && 
instr->src[0].ssa->bit_size == 64);
-
-   nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, 
nir_intrinsic_deref_atomic_add);
-   nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
-   atomic->src[0] = nir_src_for_ssa(&counter_deref->dest.ssa);
-   atomic->src[1] = nir_src_for_ssa(nir_imm_int(b, struct_size + 
sizeof(uint64_t)));
-   nir_builder_instr_insert(b, &atomic->instr);
-
-   int max_valid_offset =
-      glsl_get_cl_size(var->type) - /* buffer size */
-      struct_size - /* printf args size */
-      sizeof(uint64_t) - /* format string */
-      sizeof(int); /* the first int in the buffer is for the counter */
-   nir_push_if(b, nir_ilt(b, &atomic->dest.ssa, nir_imm_int(b, 
max_valid_offset)));
-   nir_ssa_def *printf_succ_val = nir_imm_int(b, 0);
-
-   nir_ssa_def *start_offset = nir_u2u64(b, nir_iadd(b, &atomic->dest.ssa, 
nir_imm_int(b, sizeof(int))));
-   nir_deref_instr *as_byte_array = nir_build_deref_cast(b, 
&ssbo_deref->dest.ssa, nir_var_mem_ssbo, glsl_uint8_t_type(), 1);
-   nir_deref_instr *as_offset_byte_array = nir_build_deref_ptr_as_array(b, 
as_byte_array, start_offset);
-   nir_deref_instr *format_string_write_deref =
-      nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, 
nir_var_mem_ssbo, glsl_uint64_t_type(), 8);
-   nir_store_deref(b, format_string_write_deref, instr->src[0].ssa, ~0);
-
-   for (unsigned i = 0; i < glsl_get_length(struct_type); ++i) {
-      nir_ssa_def *field_offset_from_start = nir_imm_int64(b, 
glsl_get_struct_field_offset(struct_type, i) + sizeof(uint64_t));
-      nir_ssa_def *field_offset = nir_iadd(b, start_offset, 
field_offset_from_start);
-
-      const struct glsl_type *field_type = glsl_get_struct_field(struct_type, 
i);
-      nir_deref_instr *field_read_deref = nir_build_deref_struct(b, 
struct_deref, i);
-      nir_ssa_def *field_value = nir_load_deref(b, field_read_deref);
-
-      /* Clang does promotion of arguments to their "native" size. That means 
that any floats
-       * have been converted to doubles for the call to printf. Since we don't 
support doubles,
-       * convert them back here; copy-prop and other optimizations should 
remove all hint of doubles.
-       */
-      if (glsl_get_base_type(field_type) == GLSL_TYPE_DOUBLE) {
-         field_value = nir_f2f32(b, field_value);
-         field_type = glsl_float_type();
-      }
+   nir_variable *printf_var = NULL;
+   nir_ssa_def *printf_deref = NULL;
+   nir_foreach_function(func, nir) {
+      nir_builder b;
+      nir_builder_init(&b, func->impl);
+      b.cursor = 
nir_before_instr(nir_block_first_instr(nir_start_block(func->impl)));
+      bool progress = false;
 
-      as_offset_byte_array = nir_build_deref_ptr_as_array(b, as_byte_array, 
field_offset);
-      nir_deref_instr *field_write_deref =
-         nir_build_deref_cast(b, &as_offset_byte_array->dest.ssa, 
nir_var_mem_ssbo, field_type, glsl_get_cl_size(field_type));
+      nir_foreach_block(block, func->impl) {
+         nir_foreach_instr_safe(instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+            if (intrin->intrinsic != nir_intrinsic_load_printf_buffer_address)
+               continue;
 
-      nir_store_deref(b, field_write_deref, field_value, ~0);
-   }
+            if (!printf_var) {
+               printf_var = add_printf_var(nir, uav_id);
+               nir_deref_instr *deref = nir_build_deref_var(&b, printf_var);
+               printf_deref = &deref->dest.ssa;
+            }
+            nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 
nir_src_for_ssa(printf_deref));
+         }
+      }
 
-   nir_push_else(b, NULL);
-   nir_ssa_def *printf_fail_val = nir_imm_int(b, -1);
-   nir_pop_if(b, NULL);
+      if (progress)
+         nir_metadata_preserve(func->impl, nir_metadata_loop_analysis |
+                                           nir_metadata_block_index |
+                                           nir_metadata_dominance);
+      else
+         nir_metadata_preserve(func->impl, nir_metadata_all);
+   }
 
-   nir_ssa_def *return_value = nir_if_phi(b, printf_succ_val, printf_fail_val);
-   nir_ssa_def_rewrite_uses(&instr->dest.ssa, nir_src_for_ssa(return_value));
-   nir_instr_remove(&instr->instr);
+   return printf_var != NULL;
 }
 
 static nir_variable *
diff --git a/src/microsoft/clc/clc_nir.h b/src/microsoft/clc/clc_nir.h
index a452b7a7ff5..77be44ac1ec 100644
--- a/src/microsoft/clc/clc_nir.h
+++ b/src/microsoft/clc/clc_nir.h
@@ -31,8 +31,7 @@ bool
 clc_nir_lower_system_values(nir_shader *nir, nir_variable *var);
 bool dxil_nir_lower_kernel_input_loads(nir_shader *nir, nir_variable *var);
 
-bool
-clc_nir_lower_printf(nir_shader *nir, unsigned uav_id);
+bool clc_lower_printf_base(nir_shader *nir, unsigned uav_id);
 
 bool
 clc_nir_dedupe_const_samplers(nir_shader *nir);
diff --git a/src/microsoft/clc/compute_test.cpp 
b/src/microsoft/clc/compute_test.cpp
index 44e48871eb5..647523d3142 100644
--- a/src/microsoft/clc/compute_test.cpp
+++ b/src/microsoft/clc/compute_test.cpp
@@ -557,8 +557,8 @@ ComputeTest::run_shader_with_raw_args(Shader shader,
       }
    }
 
-   if (dxil->metadata.printf_uav_id > 0)
-      add_uav_resource(resources, 0, dxil->metadata.printf_uav_id, NULL, 1024 
* 1024 / 4, 4);
+   if (dxil->metadata.printf.uav_id > 0)
+      add_uav_resource(resources, 0, dxil->metadata.printf.uav_id, NULL, 1024 
* 1024 / 4, 4);
 
    for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
       add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to