On piątek, 24 kwietnia 2009 17:00:07 Maciej Cencora wrote:
> On piątek, 24 kwietnia 2009 16:42:53 you wrote:
> > Hi,
> >
> > among some general cleanup this patchset contains performance regression
> > fix for non TCL cards, and some debugging related improvements.
> > Any comments?
> >
> > Regards,
> > Maciej Cencora
>
> please replace the last patch with these two. Since the patch that fixes
> perf regression we use fixed function SWTCL if possible for non TCL
> hardware, we need to add point attenuation stage.
>
> Maciej Cencora

here's updated patchset. It includes the additional patch from previous email 
and also reverts part of 0007-r300-always-route-4-texcoord-components-to-
RS.patch (alpha component of fogcoord should be 1 according to 
GL_ARB_fragment_program spec).

Maciej Cencora
From d15216da97c3d3c53d595b2ef654f635c4c1b299 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Sun, 19 Apr 2009 21:13:18 +0200
Subject: [PATCH] r300: remove unnecessary function calls

r300SetEarlyZState is called during r300UpdateShaderStates which is called for every rendering operation.
---
 src/mesa/drivers/dri/r300/r300_state.c |    4 ----
 1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 4cbbfd4..c86c16b 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -524,8 +524,6 @@ static void r300SetAlphaState(GLcontext * ctx)
 	R300_STATECHANGE(r300, at);
 	r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
 	r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
-
-	r300SetEarlyZState(ctx);
 }
 
 static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
@@ -573,8 +571,6 @@ static void r300SetDepthState(GLcontext * ctx)
 		r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
 		    translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT;
 	}
-
-	r300SetEarlyZState(ctx);
 }
 
 static void r300SetStencilState(GLcontext * ctx, GLboolean state)
-- 
1.5.6.3

From 56df0f96f8fd7a89ac54ac7531bd5369233a96cb Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Sun, 19 Apr 2009 21:25:01 +0200
Subject: [PATCH] r300: rename state

According to r300_reg.h from radeon drm module 0x4f30 is ZB_ZMASK_OFFSET.
Also cleanup as trailing whitespaces.
---
 src/mesa/drivers/dri/r300/r300_cmdbuf.c  |   12 ++++++------
 src/mesa/drivers/dri/r300/r300_context.h |    2 +-
 src/mesa/drivers/dri/r300/r300_reg.h     |    6 ++++++
 src/mesa/drivers/dri/r300/r300_state.c   |    4 ++--
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index c575c9a..14c2562 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -83,7 +83,7 @@ void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
 	BATCH_LOCALS(&r300->radeon);
 	drm_r300_cmd_header_t cmd;
 	uint32_t addr, ndw, i;
-	
+
 	if (!r300->radeon.radeonScreen->kernel_mm) {
 		uint32_t dwords;
 		dwords = (*atom->check) (ctx, atom);
@@ -92,7 +92,7 @@ void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
 		END_BATCH();
 		return;
 	}
-	
+
 	cmd.u = atom->cmd[0];
 	addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
 	ndw = cmd.vpu.count * 4;
@@ -175,7 +175,7 @@ static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
 
 		for(i = 0; i < numtmus; ++i) {
 		    radeonTexObj *t = r300->hw.textures[i];
-		
+
 		    if (!t)
 			notexture = 1;
 		}
@@ -280,7 +280,7 @@ static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
 	if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
 		zbpitch |= R300_DEPTHMICROTILE_TILED;
 	}
-	
+
 	BEGIN_BATCH_NO_AUTOSTATE(6);
 	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
 	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
@@ -582,8 +582,8 @@ void r300InitCmdBuf(r300ContextPtr r300)
 	r300->hw.zb.emit = emit_zb_offset;
 	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
 	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
-	ALLOC_STATE(unk4F30, always, 3, 0);
-	r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2);
+	ALLOC_STATE(zb_zmask, always, 3, 0);
+	r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2);
 	ALLOC_STATE(zb_hiz_offset, always, 2, 0);
 	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
 	ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index 1c7bfc8..f8d914e 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -351,7 +351,7 @@ struct r300_hw_state {
 	struct radeon_state_atom zstencil_format;
 	struct radeon_state_atom zb;	/* z buffer (4F20) */
 	struct radeon_state_atom zb_depthclearvalue;	/* (4F28) */
-	struct radeon_state_atom unk4F30;	/* (4F30) */
+	struct radeon_state_atom zb_zmask;	/* (4F30) */
 	struct radeon_state_atom zb_hiz_offset;	/* (4F44) */
 	struct radeon_state_atom zb_hiz_pitch;	/* (4F54) */
 
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index ed552d0..79dd1e1 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -2432,6 +2432,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 /* Z Buffer Clear Value */
 #define R300_ZB_DEPTHCLEARVALUE                  0x4f28
 
+#define R300_ZB_ZMASK_OFFSET                     0x4f30
+#define R300_ZB_ZMASK_PITCH                      0x4f34
+#define R300_ZB_ZMASK_WRINDEX                    0x4f38
+#define R300_ZB_ZMASK_DWORD                      0x4f3c
+#define R300_ZB_ZMASK_RDINDEX                    0x4f40
+
 /* Hierarchical Z Memory Offset */
 #define R300_ZB_HIZ_OFFSET                       0x4f44
 
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index c86c16b..14749bf 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2205,8 +2205,8 @@ static void r300ResetHwState(r300ContextPtr r300)
 	r300->hw.zstencil_format.cmd[4] = 0x00000000;
 	r300SetEarlyZState(ctx);
 
-	r300->hw.unk4F30.cmd[1] = 0;
-	r300->hw.unk4F30.cmd[2] = 0;
+	r300->hw.zb_zmask.cmd[1] = 0;
+	r300->hw.zb_zmask.cmd[2] = 0;
 
 	r300->hw.zb_hiz_offset.cmd[1] = 0;
 
-- 
1.5.6.3

From 943c17fc86effdcd24a35efd09e20251bad8124a Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Thu, 23 Apr 2009 16:08:48 +0200
Subject: [PATCH] r300: add atom print function for kernel mm path

---
 src/mesa/drivers/dri/radeon/radeon_common.c |   64 ++++++++++++++++++++++-----
 1 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index dc281ee..312c2a7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -851,20 +851,57 @@ void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei he
 	radeon_window_moved(radeon);
 	radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer);
 	ctx->Driver.Viewport = old_viewport;
-
-
 }
-static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state )
-{
-	int i;
-	int dwords = (*state->check)(radeon->glCtx, state);
 
-	fprintf(stderr, "emit %s %d/%d\n", state->name, state->cmd_size, dwords);
-
-	if (RADEON_DEBUG & DEBUG_VERBOSE) 
-		for (i = 0 ; i < dwords; i++) 
-			fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]);
+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+	int i, j, reg;
+	int dwords = (*state->check) (radeon->glCtx, state);
+	drm_r300_cmd_header_t cmd;
+
+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+	if (RADEON_DEBUG & DEBUG_VERBOSE) {
+		for (i = 0; i < dwords;) {
+			cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
+			reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+					state->name, i, reg, cmd.packet0.count);
+			++i;
+			for (j = 0; j < cmd.packet0.count && i < dwords; j++) {
+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+						state->name, i, reg, state->cmd[i]);
+				reg += 4;
+				++i;
+			}
+		}
+	}
+}
 
+static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+	int i, j, reg, count;
+	int dwords = (*state->check) (radeon->glCtx, state);
+	uint32_t packet0;
+
+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+	if (RADEON_DEBUG & DEBUG_VERBOSE) {
+		for (i = 0; i < dwords;) {
+			packet0 = state->cmd[i];
+			reg = (packet0 & 0x1FFF) << 2;
+			count = ((packet0 & 0x3FFF0000) >> 16) + 1;
+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+					state->name, i, reg, count);
+			++i;
+			for (j = 0; j < count && i < dwords; j++) {
+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+						state->name, i, reg, state->cmd[i]);
+				reg += 4;
+				++i;
+			}
+		}
+	}
 }
 
 static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty)
@@ -882,7 +919,10 @@ static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty)
 			dwords = (*atom->check) (radeon->glCtx, atom);
 			if (dwords) {
 				if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) {
-					radeon_print_state_atom(radeon, atom);
+					if (radeon->radeonScreen->kernel_mm)
+						radeon_print_state_atom_kmm(radeon, atom);
+					else
+						radeon_print_state_atom(radeon, atom);
 				}
 				if (atom->emit) {
 					(*atom->emit)(radeon->glCtx, atom);
-- 
1.5.6.3

From f1a21fb835b80378e60635141a402302ab9c4e40 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Thu, 23 Apr 2009 15:35:29 +0200
Subject: [PATCH] r300: remove unnecessary function calls

ae_create_context is called by vbo_CreateContext
ae_invalidate_state is called by vbo_InvalidateState
---
 src/mesa/drivers/dri/r300/r300_context.c |    1 -
 src/mesa/drivers/dri/r300/r300_state.c   |    1 -
 2 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 10836bb..3bde9ca 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -373,7 +373,6 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 	_tnl_CreateContext(ctx);
 	_swsetup_CreateContext(ctx);
 	_swsetup_Wakeup(ctx);
-	_ae_create_context(ctx);
 
 	/* Install the customized pipeline:
 	 */
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 14749bf..a1f78c5 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -2477,7 +2477,6 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
 	_swsetup_InvalidateState(ctx, new_state);
 	_vbo_InvalidateState(ctx, new_state);
 	_tnl_InvalidateState(ctx, new_state);
-	_ae_invalidate_state(ctx, new_state);
 
 	if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
 		_mesa_update_framebuffer(ctx);
-- 
1.5.6.3

From 87c3e9aba422aac15f1fe5746e51dba0f0a00d7b Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Thu, 23 Apr 2009 15:41:08 +0200
Subject: [PATCH] r300: handle texcoords properly

add 1D texture case and setup default Q value to 1.0
---
 src/mesa/drivers/dri/r300/r300_swtcl.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index 256a2bb..55187d4 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -211,15 +211,19 @@ static void r300SetVertexFormat( GLcontext *ctx )
 			if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) {
 				switch (VB->TexCoordPtr[i]->size) {
 					case 1:
+						format = EMIT_1F;
+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+						mask = MASK_X;
+						break;
 					case 2:
 						format = EMIT_2F;
-						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
 						mask = MASK_X | MASK_Y;
 						size = 2;
 						break;
 					case 3:
 						format = EMIT_3F;
-						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
 						mask = MASK_X | MASK_Y | MASK_Z;
 						size = 3;
 						break;
-- 
1.5.6.3

From 16cf5502e174537dff0de105144b869346ea1325 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Fri, 24 Apr 2009 16:15:19 +0200
Subject: [PATCH] r300: always route 4 texcoord components to RS

Routing <4 components may lead to lock up.

Thanks to Alex Deucher for suggestion.
---
 src/mesa/drivers/dri/r300/r300_state.c |   64 ++++---------------------------
 src/mesa/drivers/dri/r300/r300_swtcl.c |   19 ++++-----
 2 files changed, 17 insertions(+), 66 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index a1f78c5..9c8b8ad 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1482,6 +1482,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
 		}
 	}
 
+	/* We always route 4 texcoord components */
 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
 		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
 		    continue;
@@ -1491,26 +1492,10 @@ static void r300SetupRSUnit(GLcontext * ctx)
 		    continue;
 		}
 
-		int swiz;
-
-		/* with TCL we always seem to route 4 components */
-		if (hw_tcl_on)
-			count = 4;
-		else
-			count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
-
-		switch(count) {
-		case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break;
-		case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
-		default:
-		case 1:
-		case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
-		};
-
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count);
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
 		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
 		InputsRead &= ~(FRAG_BIT_TEX0 << i);
-		rs_tex_count += count;
+		rs_tex_count += 4;
 		++tex_ip;
 		++fp_reg;
 	}
@@ -1633,7 +1618,7 @@ static void r500SetupRSUnit(GLcontext * ctx)
 		}
 	}
 
-
+	/* We always route 4 texcoord components */
 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
 		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
 		    continue;
@@ -1643,45 +1628,14 @@ static void r500SetupRSUnit(GLcontext * ctx)
 		    continue;
 		}
 
-		int swiz = 0;
-
-		/* with TCL we always seem to route 4 components */
-		if (hw_tcl_on)
-		  count = 4;
-		else
-		  count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
-
-		if (count == 4) {
-			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
-			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
-			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
-			swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT;
-		} else if (count == 3) {
-			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
-			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
-			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
-			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
-		} else if (count == 2) {
-			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
-			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
-			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
-			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
-		} else if (count == 1) {
-			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
-			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
-			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
-			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
-		} else {
-			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT;
-			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
-			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
-			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
-		}
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
+			((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+			((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+			((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
 
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz;
 		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
 		InputsRead &= ~(FRAG_BIT_TEX0 << i);
-		rs_tex_count += count;
+		rs_tex_count += 4;
 		++tex_ip;
 		++fp_reg;
 	}
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index 55187d4..fc84988 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -204,34 +204,31 @@ static void r300SetVertexFormat( GLcontext *ctx )
 		ADD_ATTR(VERT_ATTRIB_POINT_SIZE, EMIT_1F, SWTCL_OVM_POINT_SIZE, swiz, MASK_X);
 	}
 
+	/**
+	 *  Sending only one texcoord component may lead to lock up,
+	 *  so for all textures always output 4 texcoord components to RS.
+	 */
 	if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
-		int i, size;
-		GLuint swiz, mask, format;
+		int i;
+		GLuint swiz, format;
 		for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
 			if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) {
 				switch (VB->TexCoordPtr[i]->size) {
 					case 1:
 						format = EMIT_1F;
 						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
-						mask = MASK_X;
 						break;
 					case 2:
 						format = EMIT_2F;
 						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
-						mask = MASK_X | MASK_Y;
-						size = 2;
 						break;
 					case 3:
 						format = EMIT_3F;
 						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
-						mask = MASK_X | MASK_Y | MASK_Z;
-						size = 3;
 						break;
 					case 4:
 						format = EMIT_4F;
 						swiz = SWIZZLE_XYZW;
-						mask = MASK_XYZW;
-						size = 4;
 						break;
 					default:
 						continue;
@@ -239,8 +236,8 @@ static void r300SetVertexFormat( GLcontext *ctx )
 				InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
 				OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
 				EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
-				ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, mask);
-				vap_out_fmt_1 |= size << (i * 3);
+				ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, MASK_XYZW);
+				vap_out_fmt_1 |= 4 << (i * 3);
 				++first_free_tex;
 			}
 		}
-- 
1.5.6.3

From 87d8dc179aa34b9ff6f5f2a7e2fb48eb613f4c5f Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Thu, 23 Apr 2009 16:12:09 +0200
Subject: [PATCH] r300: flush stdout to get consistent debugging info

---
 src/mesa/drivers/dri/r300/r300_fragprog_common.c |    4 ++++
 src/mesa/drivers/dri/r300/radeon_program_pair.c  |    1 +
 2 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 3d4bd5d..6eaad76 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -214,8 +214,10 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
 		compiler.program = _mesa_clone_program(ctx, &fp->Base);
 
 		if (RADEON_DEBUG & DEBUG_PIXEL) {
+			fflush(stdout);
 			_mesa_printf("Fragment Program: Initial program:\n");
 			_mesa_print_program(compiler.program);
+			fflush(stdout);
 		}
 
 		insert_WPOS_trailer(&compiler);
@@ -240,6 +242,7 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
 		if (RADEON_DEBUG & DEBUG_PIXEL) {
 			_mesa_printf("Fragment Program: After native rewrite:\n");
 			_mesa_print_program(compiler.program);
+			fflush(stdout);
 		}
 
 		if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
@@ -263,6 +266,7 @@ void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp)
 		if (RADEON_DEBUG & DEBUG_PIXEL) {
 			_mesa_printf("Compiler: after NqSSA-DCE:\n");
 			_mesa_print_program(compiler.program);
+			fflush(stdout);
 		}
 
 		if (!r300->vtbl.FragmentProgramEmit(&compiler))
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
index 5c6594b..906d36e 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
@@ -609,6 +609,7 @@ static void emit_all_tex(struct pair_state *s)
 		if (s->Debug) {
 			_mesa_printf("   ");
 			_mesa_print_instruction(inst);
+			fflush(stdout);
 		}
 		s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst);
 	}
-- 
1.5.6.3

From 6849cf07528ab35acaacda1bd3cde9b4e20c2919 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Fri, 24 Apr 2009 16:52:33 +0200
Subject: [PATCH] r300: add point attenuation stage for TCL fallbacks

---
 src/mesa/drivers/dri/r300/r300_context.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 3bde9ca..5119890 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -172,6 +172,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = {
 	&_tnl_fog_coordinate_stage,
 	&_tnl_texgen_stage,
 	&_tnl_texture_transform_stage,
+	&_tnl_point_attenuation_stage,
 	&_tnl_vertex_program_stage,
 
 	/* Try again to go to tcl?
-- 
1.5.6.3

From 9c43ae5809b5a1e756ac6834b0a83eeb51473c6d Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Fri, 24 Apr 2009 16:28:47 +0200
Subject: [PATCH] r300: fix performance regression

This performance regression on non TCL hw was introduced by ed4c6cbe017b4e8bacb7e012d4baaf77a20a2c33.
This patch depends on "r300: always route 4 texcoord components to RS" and "r300: add point attenuation stage for TCL fallbacks".
---
 src/mesa/drivers/dri/r300/r300_context.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 5119890..c80dcd8 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -364,7 +364,10 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual,
 
 	ctx = r300->radeon.glCtx;
 	r300InitConstValues(ctx, screen);
-	ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+
+	if (hw_tcl_on)
+		ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+
 	ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
 
 	/* Initialize the software rasterizer and helper modules.
-- 
1.5.6.3

------------------------------------------------------------------------------
Crystal Reports &#45; New Free Runtime and 30 Day Trial
Check out the new simplified licensign option that enables unlimited
royalty&#45;free distribution of the report engine for externally facing 
server and web deployment.
http://p.sf.net/sfu/businessobjects
_______________________________________________
Mesa3d-dev mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Reply via email to