Bug #18074
---
 src/mesa/drivers/dri/i915/i915_context.c  |    3 +-
 src/mesa/drivers/dri/i915/i915_context.h  |    1 +
 src/mesa/drivers/dri/i915/i915_fragprog.c |   82 +++++++++++++++++++++++++----
 src/mesa/drivers/dri/i915/i915_reg.h      |    4 +-
 4 files changed, 76 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i915/i915_context.c 
b/src/mesa/drivers/dri/i915/i915_context.c
index 9bff742..010cf0c 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -159,7 +159,8 @@ i915CreateContext(const __GLcontextModes * mesaVis,
     * instruction can translate to more than one HW instruction, so
     * we'll still have to check and fallback each time.
     */
-   ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_TEMPORARY;
+   ctx->Const.FragmentProgram.MaxTemps = I915_MAX_TEMPS;
+   ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_NATIVE_TEMPS;
    ctx->Const.FragmentProgram.MaxNativeAttribs = 11;    /* 8 tex, 2 color, fog 
*/
    ctx->Const.FragmentProgram.MaxNativeParameters = I915_MAX_CONSTANT;
    ctx->Const.FragmentProgram.MaxNativeAluInstructions = I915_MAX_ALU_INSN;
diff --git a/src/mesa/drivers/dri/i915/i915_context.h 
b/src/mesa/drivers/dri/i915/i915_context.h
index 87bbf5f..a5df3fa 100644
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -177,6 +177,7 @@ struct i915_fragment_program
     * it's read. */
    GLuint usedRegs[I915_MAX_INSN];
 
+   GLuint temp_reg_mapping[I915_MAX_TEMPS];
    /* Helpers for i915_fragprog.c:
     */
    GLuint wpos_tex;
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c 
b/src/mesa/drivers/dri/i915/i915_fragprog.c
index 46f1740..ef22beb 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -87,11 +87,7 @@ src_vector(struct i915_fragment_program *p,
       /* Registers:
        */
    case PROGRAM_TEMPORARY:
-      if (source->Index >= I915_MAX_TEMPORARY) {
-         i915_program_error(p, "Exceeded max temporary reg");
-         return 0;
-      }
-      src = UREG(REG_TYPE_R, source->Index);
+      src = UREG(REG_TYPE_R, p->temp_reg_mapping[source->Index]);
       break;
    case PROGRAM_INPUT:
       switch (source->Index) {
@@ -272,15 +268,17 @@ do {                                                      
                \
 static void calc_live_regs( struct i915_fragment_program *p )
 {
     const struct gl_fragment_program *program = 
p->ctx->FragmentProgram._Current;
-    GLuint regsUsed = 0xffff0000;
+    GLuint regsUsed = 0;
     GLint i;
 
     /* Work from the front marking regs as live when they're written to. */
     for (i = 0; i < program->Base.NumInstructions; i++) {
         struct prog_instruction *inst = &program->Base.Instructions[i];
 
-        if (inst->DstReg.File == PROGRAM_TEMPORARY)
+        if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+           assert(inst->DstReg.Index < I915_MAX_TEMPS);
             regsUsed |= 1 << inst->DstReg.Index;
+       }
         p->usedRegs[i] = regsUsed;
     }
 
@@ -292,20 +290,83 @@ static void calc_live_regs( struct i915_fragment_program 
*p )
         int a;
 
         for (a = 0; a < opArgs; a++) {
-            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY)
+           if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
+               assert(inst->SrcReg[a].Index < I915_MAX_TEMPS);
                 regsUsed |= 1 << inst->SrcReg[a].Index;
+           }
         }
         p->usedRegs[i] &= regsUsed;
     }
 }
 
+/* Returns the set of live hw registers at the moment, by working from
+ * the set of live sw temporaries.
+ */
 static GLuint get_live_regs( struct i915_fragment_program *p, 
                              const struct prog_instruction *inst )
 {
     const struct gl_fragment_program *program = 
p->ctx->FragmentProgram._Current;
     GLuint nr = inst - program->Base.Instructions;
+    GLuint live_sw_regs = p->usedRegs[nr];
+    GLuint live_hw_regs = 0;
+
+    while (live_sw_regs != 0) {
+       int sw_reg = ffs(live_sw_regs) - 1;
+
+       live_sw_regs &= ~(1 << sw_reg);
+       live_hw_regs |= 1 << p->temp_reg_mapping[sw_reg];
+    }
 
-    return p->usedRegs[nr];
+    /* Return invalid hw regs as live, as the consumer looking for a
+     * temporary will use ffs on the inverse.
+     */
+    return 0xffff0000 | live_hw_regs;
+}
+
+/**
+ * Creates the mapping from ARB program temporary indices to native indices.
+ *
+ * This lets us run some programs which would otherwise exceed our limits on
+ * number of temporaries.  Note that this code relies on calc_live_regs
+ * marking live from first write through last read, as otherwise we might
+ * assign it two different hw regs and end up stomping on live stuff.
+ */
+static void
+create_temp_mapping(struct i915_fragment_program *p)
+{
+   const struct gl_fragment_program *program = 
p->ctx->FragmentProgram._Current;
+   GLuint avail_hw_regs = 0xffff; /* 1 << I915_MAX_NATIVE_TEMPS - 1 */
+   GLuint assigned_sw_indices = 0;
+   int i;
+
+   for (i = 0; i < program->Base.NumInstructions; i++) {
+      int sw_to_assign;
+
+      /* Get a hardware index for any newly-live registers. */
+      while ((sw_to_assign = ffs(p->usedRegs[i] &
+                                ~assigned_sw_indices) - 1) != -1) {
+        int hw_reg = ffs(avail_hw_regs) - 1;
+        if (hw_reg == -1) {
+           i915_program_error(p, "Exceeded max native temporaries");
+           return;
+        }
+
+        avail_hw_regs &= ~(1 << hw_reg);
+        assigned_sw_indices |= 1 << sw_to_assign;
+        p->temp_reg_mapping[sw_to_assign] = hw_reg;
+      }
+
+      /* Restore availability for newly-dead registers */
+      if (i > 0) {
+        int sw_new_dead = p->usedRegs[i - 1] & ~p->usedRegs[i];
+        int sw_to_restore;
+
+        while ((sw_to_restore = ffs(sw_new_dead) - 1) != -1) {
+           avail_hw_regs |= 1 << p->temp_reg_mapping[sw_to_restore];
+           sw_new_dead &= ~(1 << sw_to_restore);
+        }
+      }
+   }
 }
 
 /* Possible concerns:
@@ -347,9 +408,8 @@ upload_program(struct i915_fragment_program *p)
        return;
     }
 
-   /* Not always needed:
-    */
    calc_live_regs(p);
+   create_temp_mapping(p);
 
    while (1) {
       GLuint src0, src1, src2, flags;
diff --git a/src/mesa/drivers/dri/i915/i915_reg.h 
b/src/mesa/drivers/dri/i915/i915_reg.h
index 8891e11..b4117eb 100644
--- a/src/mesa/drivers/dri/i915/i915_reg.h
+++ b/src/mesa/drivers/dri/i915/i915_reg.h
@@ -374,8 +374,8 @@
 #define I915_MAX_TEX_INSN     32
 #define I915_MAX_ALU_INSN     64
 #define I915_MAX_DECL_INSN    27
-#define I915_MAX_TEMPORARY    16
-
+#define I915_MAX_NATIVE_TEMPS 16
+#define I915_MAX_TEMPS        32
 
 /* Each instruction is 3 dwords long, though most don't require all
  * this space.  Maximum of 123 instructions.  Smaller maxes per insn
-- 
1.5.6.5


------------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It is the best place to buy or sell services for
just about anything Open Source.
http://p.sf.net/sfu/Xq1LFB
_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Reply via email to