On Sat, Mar 14, 2015 at 9:54 PM, Jordan Justen <jordan.l.jus...@intel.com> wrote: > Signed-off-by: Jordan Justen <jordan.l.jus...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_context.h | 1 + > src/mesa/drivers/dri/i965/brw_cs.cpp | 216 > +++++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/brw_state_upload.c | 3 + > 3 files changed, 220 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_context.h > b/src/mesa/drivers/dri/i965/brw_context.h > index fb24f0e..170c0c6 100644 > --- a/src/mesa/drivers/dri/i965/brw_context.h > +++ b/src/mesa/drivers/dri/i965/brw_context.h > @@ -148,6 +148,7 @@ struct brw_vs_prog_key; > struct brw_vue_prog_key; > struct brw_wm_prog_key; > struct brw_wm_prog_data; > +struct brw_cs_prog_key; > struct brw_cs_prog_data; > > enum brw_pipeline { > diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp > b/src/mesa/drivers/dri/i965/brw_cs.cpp > index 8021147..5be740c 100644 > --- a/src/mesa/drivers/dri/i965/brw_cs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp > @@ -22,8 +22,15 @@ > */ > > > +#include "util/ralloc.h" > #include "brw_context.h" > #include "brw_cs.h" > +#include "brw_fs.h" > +#include "brw_eu.h" > +#include "brw_wm.h" > +#include "intel_mipmap_tree.h" > +#include "brw_state.h" > +#include "intel_batchbuffer.h" > > extern "C" > bool > @@ -46,3 +53,212 @@ brw_cs_prog_data_compare(const void *in_a, const void > *in_b) > > return true; > } > + > + > +static const unsigned * > +brw_cs_emit(struct brw_context *brw, > + void *mem_ctx, > + const struct brw_cs_prog_key *key, > + struct brw_cs_prog_data *prog_data, > + struct gl_compute_program *cp, > + struct gl_shader_program *prog, > + unsigned *final_assembly_size) > +{ > + bool start_busy = false; > + double start_time = 0; > + > + if (unlikely(brw->perf_debug)) { > + start_busy = (brw->batch.last_bo && > + drm_intel_bo_busy(brw->batch.last_bo)); > + start_time = get_time(); > + } > + > + struct brw_shader *shader = NULL; > + if (prog) > + shader = (struct brw_shader *) > prog->_LinkedShaders[MESA_SHADER_COMPUTE]; > + > + if (unlikely(INTEL_DEBUG & DEBUG_CS)) > + brw_dump_ir("compute", prog, &shader->base, &cp->Base); > + > + /* Now the main event: Visit the shader IR and generate our CS IR for it. > + */ > + fs_visitor v(brw, mem_ctx, key, prog_data, prog, cp, 8); > + if (!v.run_cs()) { > + if (prog) { > + prog->LinkStatus = false; > + ralloc_strcat(&prog->InfoLog, v.fail_msg); > + } > + > + _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", > + v.fail_msg); > + > + return NULL; > + } > + > + cfg_t *simd16_cfg = NULL; > + fs_visitor v2(brw, mem_ctx, key, prog_data, prog, cp, 16); > + if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) {
If CS is Gen7+ we don't need the >= 5 check here. > + if (!v.simd16_unsupported) { > + /* Try a SIMD16 compile */ > + v2.import_uniforms(&v); > + if (!v2.run_cs()) { > + perf_debug("SIMD16 shader failed to compile, falling back to " > + "SIMD8 at a 10-20%% performance cost: %s", > v2.fail_msg); > + } else { > + simd16_cfg = v2.cfg; > + } > + } else { > + perf_debug("SIMD16 shader unsupported, falling back to " > + "SIMD8 at a 10-20%% performance cost: %s", v.no16_msg); > + } > + } > + > + prog_data->local_size[0] = cp->LocalSize[0]; > + prog_data->local_size[1] = cp->LocalSize[1]; > + prog_data->local_size[2] = cp->LocalSize[2]; > + > + cfg_t *simd8_cfg; > + int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8; > + if (no_simd8 && simd16_cfg) { > + simd8_cfg = NULL; > + prog_data->no_8 = true; > + } else { > + simd8_cfg = v.cfg; > + prog_data->no_8 = false; > + } > + > + fs_generator g(brw, mem_ctx, (void*) key, &prog_data->base, &cp->Base, > + v.runtime_check_aads_emit, "CS"); > + if (INTEL_DEBUG & DEBUG_CS) { > + char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d", > + prog->Label ? prog->Label : "unnamed", > + prog->Name); > + g.enable_debug(name); > + } > + if (simd16_cfg) { > + prog_data->simd_size = 16; > + g.generate_code(simd16_cfg, 16); > + } else if (simd8_cfg) { > + prog_data->simd_size = 8; > + g.generate_code(simd8_cfg, 8); > + } > + > + if (unlikely(brw->perf_debug) && shader) { > + if (shader->compiled_once) { > + _mesa_problem(&brw->ctx, "CS programs shouldn't need recompiles"); > + } > + shader->compiled_once = true; > + > + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { > + perf_debug("CS compile took %.03f ms and stalled the GPU\n", > + (get_time() - start_time) * 1000); > + } > + } > + > + return g.get_assembly(final_assembly_size); > +} > + > +static bool > +do_cs_prog(struct brw_context *brw, > + struct gl_shader_program *prog, > + struct brw_compute_program *cp, > + struct brw_cs_prog_key *key) > +{ > + struct gl_context *ctx = &brw->ctx; > + const GLuint *program; > + void *mem_ctx = ralloc_context(NULL); > + GLuint program_size; > + struct brw_cs_prog_data prog_data; > + struct gl_shader *cs = NULL; > + > + if (prog) > + cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; > + > + memset(&prog_data, 0, sizeof(prog_data)); > + > + /* Allocate the references to the uniforms that will end up in the > + * prog_data associated with the compiled program, and which will be freed > + * by the state cache. > + */ > + int param_count; > + if (cs) { > + param_count = cs->num_uniform_components; > + } else { > + param_count = cp->program.Base.Parameters->NumParameters * 4; > + } > + > + /* The backend also sometimes adds params for texture size. */ > + param_count += 2 * > ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; > + prog_data.base.param = > + rzalloc_array(NULL, const gl_constant_value *, param_count); > + prog_data.base.pull_param = > + rzalloc_array(NULL, const gl_constant_value *, param_count); > + prog_data.base.nr_params = param_count; > + > + program = brw_cs_emit(brw, mem_ctx, key, &prog_data, > + &cp->program, prog, &program_size); > + if (program == NULL) { > + ralloc_free(mem_ctx); > + return false; > + } > + > + // if (prog_data.total_scratch) { > + // brw_get_scratch_bo(brw, &brw->wm.base.scratch_bo, > + // prog_data.total_scratch * brw->max_wm_threads); > + // } This looks unfinished :) > + if (unlikely(INTEL_DEBUG & DEBUG_CS)) > + fprintf(stderr, "\n"); > + > + brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG, > + key, sizeof(*key), > + program, program_size, > + &prog_data, sizeof(prog_data), > + &brw->cs.base.prog_offset, &brw->cs.prog_data); > + ralloc_free(mem_ctx); > + > + return true; > +} > + > + > +static void > +brw_cs_populate_key(struct brw_context *brw, struct brw_cs_prog_key *key) > +{ > + /* BRW_NEW_COMPUTE_PROGRAM */ > + const struct brw_compute_program *cp = > + (struct brw_compute_program *) brw->compute_program; > + > + memset(key, 0, sizeof(*key)); We wan't brw_populate_sampler_prog_key_data() here, right? > + /* The unique compute program ID */ > + key->program_string_id = cp->id; > +} > + > + > +extern "C" > +void > +brw_upload_cs_prog(struct brw_context *brw) > +{ > + struct gl_context *ctx = &brw->ctx; > + struct brw_cs_prog_key key; > + struct brw_compute_program *cp = (struct brw_compute_program *) > + brw->compute_program; > + > + if (!cp) > + return; > + > + if (!brw_state_dirty(brw, 0, BRW_NEW_COMPUTE_PROGRAM)) > + return; > + > + brw_cs_populate_key(brw, &key); > + > + if (!brw_search_cache(&brw->cache, BRW_CACHE_CS_PROG, > + &key, sizeof(key), > + &brw->cs.base.prog_offset, &brw->cs.prog_data)) { > + bool success = do_cs_prog(brw, > ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE], cp, > + &key); > + (void) success; > + assert(success); > + } > + brw->cs.base.prog_data = &brw->cs.prog_data->base; > +} > diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c > b/src/mesa/drivers/dri/i965/brw_state_upload.c > index 25f2ff0..952d3c1 100644 > --- a/src/mesa/drivers/dri/i965/brw_state_upload.c > +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c > @@ -40,6 +40,7 @@ > #include "brw_ff_gs.h" > #include "brw_gs.h" > #include "brw_wm.h" > +#include "brw_cs.h" > > static const struct brw_tracked_state *gen4_atoms[] = > { > @@ -606,6 +607,8 @@ brw_upload_programs(struct brw_context *brw) > brw_upload_gs_prog(brw); > > brw_upload_wm_prog(brw); > + > + brw_upload_cs_prog(brw); > } > > static inline void > -- > 2.1.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev