This patch adds support for large shaders on GC3000. For example the "terrain"
glmark benchmark with a large fragment shader will work after this.

If the GPU supports ICACHE, shaders larger than the available state area will
be uploaded to a bo of their own and instructed to be loaded from memory on
demand. Small shaders will be uploaded in the usual way. This mimics the
behavior of the blob.

On GPUs that don't support ICACHE, this patch should make no difference.

Signed-off-by: Wladimir J. van der Laan <laa...@gmail.com>
---
 src/gallium/drivers/etnaviv/etnaviv_compiler.c |  3 +-
 src/gallium/drivers/etnaviv/etnaviv_compiler.h |  5 +++
 src/gallium/drivers/etnaviv/etnaviv_emit.c     | 52 ++++++++++++++++++--------
 src/gallium/drivers/etnaviv/etnaviv_internal.h |  4 ++
 src/gallium/drivers/etnaviv/etnaviv_screen.c   |  4 +-
 src/gallium/drivers/etnaviv/etnaviv_shader.c   | 45 +++++++++++++++++++++-
 6 files changed, 95 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.c 
b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
index fbe66d0..0664d52 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.c
@@ -2277,7 +2277,7 @@ etna_compile_check_limits(struct etna_compile *c)
    /* round up number of uniforms, including immediates, in units of four */
    int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
 
-   if (c->inst_ptr > c->specs->max_instructions) {
+   if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
       DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
           c->specs->max_instructions);
       return false;
@@ -2501,6 +2501,7 @@ etna_compile_shader(struct etna_shader_variant *v)
    v->vs_pointsize_out_reg = -1;
    v->ps_color_out_reg = -1;
    v->ps_depth_out_reg = -1;
+   v->needs_icache = c->inst_ptr > c->specs->max_instructions;
    copy_uniform_state_to_shader(c, v);
 
    if (c->info.processor == PIPE_SHADER_VERTEX) {
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler.h 
b/src/gallium/drivers/etnaviv/etnaviv_compiler.h
index 88a093f..f5c1689 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler.h
@@ -94,12 +94,17 @@ struct etna_shader_variant {
    /* unknown input property (XX_INPUT_COUNT, field UNK8) */
    uint32_t input_count_unk8;
 
+   /* shader is larger than GPU instruction limit, thus needs icache */
+   bool needs_icache;
+
    /* shader variants form a linked list */
    struct etna_shader_variant *next;
 
    /* replicated here to avoid passing extra ptrs everywhere */
    struct etna_shader *shader;
    struct etna_shader_key key;
+
+   struct etna_bo *bo; /* cached code memory bo handle (for icache) */
 };
 
 struct etna_varying {
diff --git a/src/gallium/drivers/etnaviv/etnaviv_emit.c 
b/src/gallium/drivers/etnaviv/etnaviv_emit.c
index 273b3d0..c2117d5 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
@@ -421,9 +421,6 @@ etna_emit_state(struct etna_context *ctx)
    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
       /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, 
ctx->shader_state.VS_LOAD_BALANCING);
       /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
-      if (ctx->specs.has_shader_range_registers) {
-         /*0085C*/ EMIT_STATE(VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 
4 - 1) << 16);
-      }
    }
    if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
       /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, 
ctx->viewport.PA_VIEWPORT_SCALE_X);
@@ -534,10 +531,6 @@ etna_emit_state(struct etna_context *ctx)
                               : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
       /*01010*/ EMIT_STATE(PS_CONTROL, ctx->shader_state.PS_CONTROL);
       /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
-      if (ctx->specs.has_shader_range_registers) {
-         /*0101C*/ EMIT_STATE(PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 
4 - 1 + 0x100) << 16) |
-                                        0x100);
-      }
    }
    if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
       uint32_t val = etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG;
@@ -739,14 +732,43 @@ etna_emit_state(struct etna_context *ctx)
    if (dirty & (ETNA_DIRTY_SHADER)) {
       /* Special case: a new shader was loaded; simply re-load all uniforms and
        * shader code at once */
-      /*04000 or 0C000*/
-      etna_set_state_multi(stream, ctx->specs.vs_offset,
-                           ctx->shader_state.vs_inst_mem_size,
-                           ctx->shader_state.VS_INST_MEM);
-      /*06000 or 0D000*/
-      etna_set_state_multi(stream, ctx->specs.ps_offset,
-                           ctx->shader_state.ps_inst_mem_size,
-                           ctx->shader_state.PS_INST_MEM);
+      if (ctx->shader_state.VS_INST_ADDR.bo || 
ctx->shader_state.PS_INST_ADDR.bo) {
+         assert(ctx->specs.has_icache && 
ctx->specs.has_shader_range_registers);
+         /* Set icache (VS) */
+         etna_set_state(stream, VIVS_VS_RANGE, 
(ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
+         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
+               VIVS_VS_ICACHE_CONTROL_ENABLE |
+               VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
+         assert(ctx->shader_state.VS_INST_ADDR.bo);
+         etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, 
&ctx->shader_state.VS_INST_ADDR);
+
+         /* Set icache (PS) */
+         etna_set_state(stream, VIVS_PS_RANGE, 
(ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
+         etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
+               VIVS_VS_ICACHE_CONTROL_ENABLE |
+               VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
+         assert(ctx->shader_state.PS_INST_ADDR.bo);
+         etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, 
&ctx->shader_state.PS_INST_ADDR);
+      } else {
+         /* Upload shader directly, first flushing and disabling icache if
+          * supported on this hw */
+         if (ctx->specs.has_icache) {
+            etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
+                  VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
+                  VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
+         }
+         if (ctx->specs.has_shader_range_registers) {
+            etna_set_state(stream, VIVS_VS_RANGE, 
(ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
+            etna_set_state(stream, VIVS_PS_RANGE, 
((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
+                                        0x100);
+         }
+         etna_set_state_multi(stream, ctx->specs.vs_offset,
+                              ctx->shader_state.vs_inst_mem_size,
+                              ctx->shader_state.VS_INST_MEM);
+         etna_set_state_multi(stream, ctx->specs.ps_offset,
+                              ctx->shader_state.ps_inst_mem_size,
+                              ctx->shader_state.PS_INST_MEM);
+      }
 
       if (ctx->specs.has_unified_uniforms) {
          etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_internal.h 
b/src/gallium/drivers/etnaviv/etnaviv_internal.h
index 5c13f23..a6544f6 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_internal.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_internal.h
@@ -76,6 +76,8 @@ struct etna_specs {
    unsigned single_buffer : 1;
    /* has unified uniforms memory */
    unsigned has_unified_uniforms : 1;
+   /* can load shader instructions from memory */
+   unsigned has_icache : 1;
    /* can use any kind of wrapping mode on npot textures */
    unsigned npot_tex_any_wrap;
    /* number of bits per TS tile */
@@ -250,6 +252,8 @@ struct compiled_shader_state {
    uint32_t VS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
    uint32_t *PS_INST_MEM;
    uint32_t PS_UNIFORMS[ETNA_MAX_UNIFORMS * 4];
+   struct etna_reloc PS_INST_ADDR;
+   struct etna_reloc VS_INST_ADDR;
 };
 
 /* state of some 3d and common registers relevant to etna driver */
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 81480e9..4fcbe87 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -665,7 +665,8 @@ etna_get_specs(struct etna_screen *screen)
        * same.
        */
       screen->specs.ps_offset = 0x8000 + 0x1000;
-      screen->specs.max_instructions = 256;
+      screen->specs.max_instructions = 256; /* maximum number instructions for 
non-icache use */
+      screen->specs.has_icache = true;
    } else {
       if (instruction_count > 256) { /* unified instruction memory? */
          screen->specs.vs_offset = 0xC000;
@@ -676,6 +677,7 @@ etna_get_specs(struct etna_screen *screen)
          screen->specs.ps_offset = 0x6000;
          screen->specs.max_instructions = instruction_count / 2;
       }
+      screen->specs.has_icache = false;
    }
 
    if (VIV_FEATURE(screen, chipMinorFeatures1, HALTI0)) {
diff --git a/src/gallium/drivers/etnaviv/etnaviv_shader.c 
b/src/gallium/drivers/etnaviv/etnaviv_shader.c
index b5256e4..6012680 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_shader.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_shader.c
@@ -29,12 +29,30 @@
 #include "etnaviv_compiler.h"
 #include "etnaviv_context.h"
 #include "etnaviv_debug.h"
+#include "etnaviv_screen.h"
 #include "etnaviv_util.h"
 
 #include "tgsi/tgsi_parse.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
+/* Upload shader code to bo, if not already done */
+static bool etna_icache_upload_shader(struct etna_context *ctx, struct 
etna_shader_variant *v)
+{
+   if (v->bo)
+      return true;
+   v->bo = etna_bo_new(ctx->screen->dev, v->code_size*4, 
DRM_ETNA_GEM_CACHE_UNCACHED);
+   if (!v->bo)
+      return false;
+
+   void *buf = etna_bo_map(v->bo);
+   etna_bo_cpu_prep(v->bo, DRM_ETNA_PREP_WRITE);
+   memcpy(buf, v->code, v->code_size*4);
+   etna_bo_cpu_fini(v->bo);
+   DBG("Uploaded %s of %u words to bo %p", v->processor == 
PIPE_SHADER_FRAGMENT ? "fs":"vs", v->code_size, v->bo);
+   return true;
+}
+
 /* Link vs and fs together: fill in shader_state from vs and fs
  * as this function is called every time a new fs or vs is bound, the goal is 
to
  * do little processing as possible here, and to precompute as much as 
possible in
@@ -45,7 +63,7 @@
  */
 static bool
 etna_link_shaders(struct etna_context *ctx, struct compiled_shader_state *cs,
-                  const struct etna_shader_variant *vs, const struct 
etna_shader_variant *fs)
+                  struct etna_shader_variant *vs, struct etna_shader_variant 
*fs)
 {
    struct etna_shader_link_info link = { };
 
@@ -164,9 +182,32 @@ etna_link_shaders(struct etna_context *ctx, struct 
compiled_shader_state *cs,
    /* reference instruction memory */
    cs->vs_inst_mem_size = vs->code_size;
    cs->VS_INST_MEM = vs->code;
+
    cs->ps_inst_mem_size = fs->code_size;
    cs->PS_INST_MEM = fs->code;
 
+   if (vs->needs_icache | fs->needs_icache) {
+      /* If either of the shaders needs ICACHE, we use it for both. It is
+       * either switched on or off for the entire shader processor.
+       */
+      if (!etna_icache_upload_shader(ctx, vs) ||
+          !etna_icache_upload_shader(ctx, fs)) {
+         assert(0);
+         return false;
+      }
+
+      cs->VS_INST_ADDR.bo = vs->bo;
+      cs->VS_INST_ADDR.offset = 0;
+      cs->VS_INST_ADDR.flags = ETNA_RELOC_READ;
+      cs->PS_INST_ADDR.bo = fs->bo;
+      cs->PS_INST_ADDR.offset = 0;
+      cs->PS_INST_ADDR.flags = ETNA_RELOC_READ;
+   } else {
+      /* clear relocs */
+      memset(&cs->VS_INST_ADDR, 0, sizeof(cs->VS_INST_ADDR));
+      memset(&cs->PS_INST_ADDR, 0, sizeof(cs->PS_INST_ADDR));
+   }
+
    return true;
 }
 
@@ -352,6 +393,8 @@ etna_delete_shader_state(struct pipe_context *pctx, void 
*ss)
    while (v) {
       t = v;
       v = v->next;
+      if (t->bo)
+         etna_bo_del(t->bo);
       etna_destroy_shader(t);
    }
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to