Hi,
this patch fixes problem when KIL instruction input depends on some other
instruction (KIL inst is in >1 node). The KIL instructions weren't marked as
dependencies for output instructions so they were emitted before the output
instruction.
It fixes piglit/fp-kil tests.
Should apply cleanly to master and radeon-rewrite
Regards,
Maciej Cencora
From 9c35c2972e5581a5e47856de601856ddba25d52c Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Tue, 21 Apr 2009 02:38:08 +0200
Subject: [PATCH] r300: always emit output insts after all KIL insts
---
src/mesa/drivers/dri/r300/r300_state.c | 4 ++-
src/mesa/drivers/dri/r300/radeon_program_pair.c | 45 ++++++++++++++++++++++-
2 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 8095538..6b79aa4 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -472,7 +472,9 @@ static void r300SetEarlyZState(GLcontext * ctx)
if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
topZ = R300_ZTOP_DISABLE;
- if (current_fragment_program_writes_depth(ctx))
+ else if (current_fragment_program_writes_depth(ctx))
+ topZ = R300_ZTOP_DISABLE;
+ else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill)
topZ = R300_ZTOP_DISABLE;
if (topZ != r300->hw.zstencil_format.cmd[2]) {
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
index 4aa2319..2e21f7b 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
@@ -47,6 +47,7 @@
struct pair_state_instruction {
GLuint IsTex:1; /**< Is a texture instruction */
+ GLuint IsOutput:1; /**< Is output instruction */
GLuint NeedRGB:1; /**< Needs the RGB ALU */
GLuint NeedAlpha:1; /**< Needs the Alpha ALU */
GLuint IsTranscendent:1; /**< Is a special transcendent instruction */
@@ -123,6 +124,7 @@ struct pair_state {
GLboolean Debug;
GLboolean Verbose;
void *UserData;
+ GLubyte NumKillInsts;
/**
* Translate Mesa registers to hardware registers
@@ -149,6 +151,11 @@ struct pair_state {
struct pair_state_instruction *ReadyTEX;
/**
+ * Linked list of deferred instructions
+ */
+ struct pair_state_instruction *DeferredInsts;
+
+ /**
* Pool of @ref reg_value structures for fast allocation.
*/
struct reg_value *ValuePool;
@@ -231,7 +238,9 @@ static void instruction_ready(struct pair_state *s, int ip)
if (s->Verbose)
_mesa_printf("instruction_ready(%i)\n", ip);
- if (pairinst->IsTex)
+ if (s->NumKillInsts > 0 && pairinst->IsOutput)
+ add_pairinst_to_list(&s->DeferredInsts, pairinst);
+ else if (pairinst->IsTex)
add_pairinst_to_list(&s->ReadyTEX, pairinst);
else if (!pairinst->NeedAlpha)
add_pairinst_to_list(&s->ReadyRGB, pairinst);
@@ -339,6 +348,8 @@ static void classify_instruction(struct pair_state *s,
error("Unknown opcode %d\n", inst->Opcode);
break;
}
+
+ pairinst->IsOutput = (inst->DstReg.File == PROGRAM_OUTPUT);
}
@@ -602,8 +613,11 @@ static void emit_all_tex(struct pair_state *s)
struct prog_instruction *inst = s->Program->Instructions + ip;
commit_instruction(s, ip);
- if (inst->Opcode != OPCODE_KIL)
+ if (inst->Opcode == OPCODE_KIL)
+ --s->NumKillInsts;
+ else
inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index);
+
inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index);
if (s->Debug) {
@@ -861,6 +875,17 @@ static void emit_alu(struct pair_state *s)
s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);
}
+static GLubyte countKillInsts(struct gl_program *prog)
+{
+ GLubyte i, count = 0;
+
+ for (i = 0; i < prog->NumInstructions; ++i) {
+ if (prog->Instructions[i].Opcode == OPCODE_KIL)
+ ++count;
+ }
+
+ return count;
+}
GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
const struct radeon_pair_handler* handler, void *userdata)
@@ -874,6 +899,7 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
s.UserData = userdata;
s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE;
s.Verbose = GL_FALSE && s.Debug;
+ s.NumKillInsts = countKillInsts(program);
s.Instructions = (struct pair_state_instruction*)_mesa_calloc(
sizeof(struct pair_state_instruction)*s.Program->NumInstructions);
@@ -892,6 +918,21 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
if (s.ReadyTEX)
emit_all_tex(&s);
+ if (!s.NumKillInsts) {
+ struct pair_state_instruction *pairinst = s.DeferredInsts;
+ while (pairinst) {
+ if (!pairinst->NeedAlpha)
+ add_pairinst_to_list(&s.ReadyRGB, pairinst);
+ else if (!pairinst->NeedRGB)
+ add_pairinst_to_list(&s.ReadyAlpha, pairinst);
+ else
+ add_pairinst_to_list(&s.ReadyFullALU, pairinst);
+
+ pairinst = pairinst->NextReady;
+ }
+ s.DeferredInsts = NULL;
+ }
+
while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha)
emit_alu(&s);
}
--
1.5.6.3
------------------------------------------------------------------------------
Crystal Reports - New Free Runtime and 30 Day Trial
Check out the new simplified licensign option that enables unlimited
royalty-free distribution of the report engine for externally facing
server and web deployment.
http://p.sf.net/sfu/businessobjects
_______________________________________________
Mesa3d-dev mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev