Here's a somewhat experimental patch to enable
GL_ARB_texture_env_crossbar on r200. It got more ugly than I wanted...
Works with tests/crossbar, glean(texcombine), couldn't find anything
more which uses it (well ut2k4 seems to, but I couldn't see any difference).
There is some overhead associated with it unfortunately (figuring out
what register to use for the output of the stages), I hope it's not too
serious (might roughly double the amount of cpu cycles spent for that
tex env stuff). Still, if you have all 6 texture units enabled and
reference textures back and forth like mad (since you can have 3
arguments per environment, both for rgb and alpha, that means at the
worst case you will need to reference all 6 registers in a single env
stage) you're somewhat likely to hit a raster fallback I guess :-(. Only
one register more and there would be no problem (as you can reference
arbitrary texture sampling results, but only the previous tex env result)...
I'm actually wondering how ATI solved that problem in their driver, I
couldn't see an easy way out to avoid the fallback - even using the 2
additional tex env stages or the second phase of the fragment pipeline
isn't going to fix the issue I think. Maybe someone else has a good idea?
Roland
Index: r200_context.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_context.c,v
retrieving revision 1.44
diff -u -r1.44 r200_context.c
--- r200_context.c 6 Jul 2005 12:49:46 -0000 1.44
+++ r200_context.c 19 Jul 2005 17:44:33 -0000
@@ -140,6 +140,7 @@
{ "GL_ARB_texture_env_add", NULL },
{ "GL_ARB_texture_env_combine", NULL },
{ "GL_ARB_texture_env_dot3", NULL },
+ { "GL_ARB_texture_env_crossbar", NULL },
{ "GL_ARB_texture_mirrored_repeat", NULL },
{ "GL_ARB_vertex_buffer_object",
GL_ARB_vertex_buffer_object_functions },
{ "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions },
Index: r200_context.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_context.h,v
retrieving revision 1.29
diff -u -r1.29 r200_context.h
--- r200_context.h 31 May 2005 04:26:13 -0000 1.29
+++ r200_context.h 19 Jul 2005 17:44:34 -0000
@@ -172,8 +172,7 @@
struct r200_texture_env_state {
r200TexObjPtr texobj;
- GLenum format;
- GLenum envMode;
+ GLuint outputreg;
};
#define R200_MAX_TEXTURE_UNITS 6
Index: r200_reg.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_reg.h,v
retrieving revision 1.12
diff -u -r1.12 r200_reg.h
--- r200_reg.h 15 Mar 2005 22:23:29 -0000 1.12
+++ r200_reg.h 19 Jul 2005 17:44:35 -0000
@@ -1146,6 +1146,7 @@
#define R200_TXC_CLAMP_WRAP (0 << 12)
#define R200_TXC_CLAMP_0_1 (1 << 12)
#define R200_TXC_CLAMP_8_8 (2 << 12)
+#define R200_TXC_OUTPUT_REG_SHIFT 16
#define R200_TXC_OUTPUT_REG_MASK (7 << 16)
#define R200_TXC_OUTPUT_REG_NONE (0 << 16)
#define R200_TXC_OUTPUT_REG_R0 (1 << 16)
@@ -1288,6 +1289,7 @@
#define R200_TXA_CLAMP_WRAP (0 << 12)
#define R200_TXA_CLAMP_0_1 (1 << 12)
#define R200_TXA_CLAMP_8_8 (2 << 12)
+#define R200_TXA_OUTPUT_REG_SHIFT 16
#define R200_TXA_OUTPUT_REG_MASK (7 << 16)
#define R200_TXA_OUTPUT_REG_NONE (0 << 16)
#define R200_TXA_OUTPUT_REG_R0 (1 << 16)
Index: r200_texstate.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_texstate.c,v
retrieving revision 1.20
diff -u -r1.20 r200_texstate.c
--- r200_texstate.c 31 May 2005 23:32:47 -0000 1.20
+++ r200_texstate.c 19 Jul 2005 17:44:37 -0000
@@ -506,9 +506,9 @@
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
GLuint color_combine, alpha_combine;
GLuint color_scale = rmesa->hw.pix[unit].cmd[PIX_PP_TXCBLEND2] &
- ~(R200_TXC_SCALE_MASK);
+ ~(R200_TXC_SCALE_MASK | R200_TXC_OUTPUT_REG_MASK);
GLuint alpha_scale = rmesa->hw.pix[unit].cmd[PIX_PP_TXABLEND2] &
- ~(R200_TXA_DOT_ALPHA | R200_TXA_SCALE_MASK);
+ ~(R200_TXA_DOT_ALPHA | R200_TXA_SCALE_MASK | R200_TXA_OUTPUT_REG_MASK);
/* texUnit->_Current can be NULL if and only if the texture unit is
* not actually enabled.
@@ -526,10 +526,9 @@
* reduces the amount of special-casing we have to do, alpha-only
* textures being a notable exception.
*/
- /* Don't cache these results.
- */
- rmesa->state.texture.unit[unit].format = 0;
- rmesa->state.texture.unit[unit].envMode = 0;
+
+ color_scale |= (rmesa->state.texture.unit[unit].outputreg + 1) <<
R200_TXC_OUTPUT_REG_SHIFT;
+ alpha_scale |= (rmesa->state.texture.unit[unit].outputreg + 1) <<
R200_TXA_OUTPUT_REG_SHIFT;
if ( !texUnit->_ReallyEnabled ) {
if ( unit == 0 ) {
@@ -559,9 +558,10 @@
*/
for ( i = 0 ; i < numColorArgs ; i++ ) {
const GLint op = texUnit->_CurrentCombine->OperandRGB[i] -
GL_SRC_COLOR;
+ const GLint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
assert(op >= 0);
assert(op <= 3);
- switch ( texUnit->_CurrentCombine->SourceRGB[i] ) {
+ switch ( srcRGBi ) {
case GL_TEXTURE:
color_arg[i] = r200_register_color[op][unit];
break;
@@ -575,7 +575,7 @@
if (unit == 0)
color_arg[i] = r200_primary_color[op];
else
- color_arg[i] = r200_register_color[op][0];
+ color_arg[i] =
r200_register_color[op][rmesa->state.texture.unit[unit - 1].outputreg];
break;
case GL_ZERO:
color_arg[i] = r200_zero_color[op];
@@ -583,6 +583,14 @@
case GL_ONE:
color_arg[i] = r200_zero_color[op+1];
break;
+ case GL_TEXTURE0:
+ case GL_TEXTURE1:
+ case GL_TEXTURE2:
+ case GL_TEXTURE3:
+ case GL_TEXTURE4:
+ case GL_TEXTURE5:
+ color_arg[i] = r200_register_color[op][srcRGBi - GL_TEXTURE0];
+ break;
default:
return GL_FALSE;
}
@@ -590,9 +598,10 @@
for ( i = 0 ; i < numAlphaArgs ; i++ ) {
const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+ const GLint srcAi = texUnit->_CurrentCombine->SourceA[i];
assert(op >= 0);
assert(op <= 1);
- switch ( texUnit->_CurrentCombine->SourceA[i] ) {
+ switch ( srcAi ) {
case GL_TEXTURE:
alpha_arg[i] = r200_register_alpha[op][unit];
break;
@@ -606,7 +615,7 @@
if (unit == 0)
alpha_arg[i] = r200_primary_alpha[op];
else
- alpha_arg[i] = r200_register_alpha[op][0];
+ alpha_arg[i] =
r200_register_alpha[op][rmesa->state.texture.unit[unit - 1].outputreg];
break;
case GL_ZERO:
alpha_arg[i] = r200_zero_alpha[op];
@@ -614,6 +623,14 @@
case GL_ONE:
alpha_arg[i] = r200_zero_alpha[op+1];
break;
+ case GL_TEXTURE0:
+ case GL_TEXTURE1:
+ case GL_TEXTURE2:
+ case GL_TEXTURE3:
+ case GL_TEXTURE4:
+ case GL_TEXTURE5:
+ alpha_arg[i] = r200_register_alpha[op][srcAi - GL_TEXTURE0];
+ break;
default:
return GL_FALSE;
}
@@ -815,6 +832,100 @@
return GL_TRUE;
}
+static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
+{
+ r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ GLint i, j;
+ GLboolean texregfree[6] = {GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE,
GL_TRUE};
+ GLboolean stageref = GL_TRUE;
+ GLboolean ok;
+
+ for ( j = R200_MAX_TEXTURE_UNITS - 1; j >= 0; j-- ) {
+ const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[j];
+
+ rmesa->state.texture.unit[j].outputreg = -1;
+
+ if (stageref) {
+
+ for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS; i++ ) {
+ if (texregfree[i]) {
+ rmesa->state.texture.unit[j].outputreg = i;
+ break;
+ }
+ }
+ if (rmesa->state.texture.unit[j].outputreg == -1) {
+ /* no more free regs we can use. Need a fallback :-( */
+ return GL_FALSE;
+ }
+
+ if (!texUnit->_ReallyEnabled) {
+ continue;
+ }
+
+ const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+ const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+
+ stageref = GL_FALSE;
+ for ( i = 0 ; i < numColorArgs ; i++ ) {
+ const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+ switch ( srcRGBi ) {
+ case GL_PREVIOUS:
+ stageref = GL_TRUE;
+ break;
+ case GL_TEXTURE:
+ texregfree[j] = GL_FALSE;
+ break;
+ case GL_TEXTURE0:
+ case GL_TEXTURE1:
+ case GL_TEXTURE2:
+ case GL_TEXTURE3:
+ case GL_TEXTURE4:
+ case GL_TEXTURE5:
+ texregfree[srcRGBi - GL_TEXTURE0] = GL_FALSE;
+ break;
+ default: /* don't care about other sources here */
+ break;
+ }
+ }
+
+ if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ||
+ (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT) )
+ continue;
+
+ for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+ const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
+ switch ( srcAi ) {
+ case GL_PREVIOUS:
+ stageref = GL_TRUE;
+ break;
+ case GL_TEXTURE:
+ texregfree[j] = GL_FALSE;
+ break;
+ case GL_TEXTURE0:
+ case GL_TEXTURE1:
+ case GL_TEXTURE2:
+ case GL_TEXTURE3:
+ case GL_TEXTURE4:
+ case GL_TEXTURE5:
+ texregfree[srcAi - GL_TEXTURE0] = GL_FALSE;
+ break;
+ default: /* don't care about other sources here */
+ break;
+ }
+ }
+ }
+ }
+
+ ok = (r200UpdateTextureEnv( ctx, 0 ) &&
+ r200UpdateTextureEnv( ctx, 1 ) &&
+ r200UpdateTextureEnv( ctx, 2 ) &&
+ r200UpdateTextureEnv( ctx, 3 ) &&
+ r200UpdateTextureEnv( ctx, 4 ) &&
+ r200UpdateTextureEnv( ctx, 5 ));
+
+ return ok;
+}
+
#define TEXOBJ_TXFILTER_MASK (R200_MAX_MIP_LEVEL_MASK | \
R200_MIN_FILTER_MASK | \
R200_MAG_FILTER_MASK | \
@@ -1077,10 +1190,6 @@
/* Actually want to keep all units less than max active texture
* enabled, right? Fix this for >2 texunits.
*/
- /* FIXME: What should happen here if r200UpdateTextureEnv fails? */
- if (unit == 0)
- r200UpdateTextureEnv( ctx, unit );
-
{
GLuint tmp = rmesa->TexGenEnabled;
@@ -1258,7 +1367,6 @@
struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
struct gl_texture_object *tObj = texUnit->_Current;
r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData;
- GLenum format;
/* Fallback if there's a texture border */
if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 )
@@ -1309,16 +1417,6 @@
rmesa->NewGLState |= _NEW_TEXTURE_MATRIX;
}
- format = tObj->Image[0][tObj->BaseLevel]->Format;
- if ( rmesa->state.texture.unit[unit].format != format ||
- rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) {
- rmesa->state.texture.unit[unit].format = format;
- rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode;
- if ( ! r200UpdateTextureEnv( ctx, unit ) ) {
- return GL_FALSE;
- }
- }
-
FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback );
return !t->border_fallback;
}
@@ -1370,6 +1468,10 @@
r200UpdateTextureUnit( ctx, 4 ) &&
r200UpdateTextureUnit( ctx, 5 ));
+ if (ok) {
+ ok = r200UpdateAllTexEnv( ctx );
+ }
+
FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
if (rmesa->TclFallback)