Hi,

I thought those gr_gb and rg_bg formats were a perfect match for packed
yuv data. That should make the code simpler and faster (though unless
you've got some HD3000-class IGPs that shouldn't matter at all).

There's only a slight problem, it doesn't actually work...
The colors are just all wrong, swapping u and v (y/z) doesn't do much it
just is wrong differently. Maybe there's some sign issue somewhere but I
wouldn't see how as it looks like all those values were just set up as
uint8 values before.
I've attached some test images (tested on EG, they were created with
"gst-launch videotestsrc ! xvimagesink"), maybe someone has some idea
what's wrong?
I _thought_ those formats should just work like that (the dri driver
also supports them), the change is fairly trivial (not saying there
can't be other bugs, that shader code is difficult to understand...).


Roland

<<attachment: testimgnormal.png>>

<<attachment: testimgxyz.png>>

<<attachment: testimgxzy.png>>

diff --git a/src/cayman_shader.c b/src/cayman_shader.c
index 01b612a..18e9f50 100644
--- a/src/cayman_shader.c
+++ b/src/cayman_shader.c
@@ -1338,7 +1338,7 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
                             COND(SQ_CF_COND_ACTIVE),
-                            I_COUNT(2),
+                            I_COUNT(1),
                             VALID_PIXEL_MODE(0),
                             CF_INST(SQ_CF_INST_TC),
                             BARRIER(1));
@@ -1365,8 +1365,8 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = TEX_DWORD1(DST_GPR(1),
                              DST_REL(ABSOLUTE),
                              DST_SEL_X(SQ_SEL_X),
-                             DST_SEL_Y(SQ_SEL_MASK),
-                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_Y),
+                             DST_SEL_Z(SQ_SEL_Z),
                              DST_SEL_W(SQ_SEL_1),
                              LOD_BIAS(0),
                              COORD_TYPE_X(TEX_NORMALIZED),
@@ -1382,36 +1382,6 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                              SRC_SEL_Z(SQ_SEL_0),
                              SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 34/35 */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-                             INST_MOD(0),
-                             FETCH_WHOLE_QUAD(0),
-                             RESOURCE_ID(1),
-                             SRC_GPR(0),
-                             SRC_REL(ABSOLUTE),
-                             ALT_CONST(0),
-                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
-                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
-    shader[i++] = TEX_DWORD1(DST_GPR(1),
-                             DST_REL(ABSOLUTE),
-                             DST_SEL_X(SQ_SEL_MASK),
-                             DST_SEL_Y(SQ_SEL_X),
-                             DST_SEL_Z(SQ_SEL_Y),
-                             DST_SEL_W(SQ_SEL_MASK),
-                             LOD_BIAS(0),
-                             COORD_TYPE_X(TEX_NORMALIZED),
-                             COORD_TYPE_Y(TEX_NORMALIZED),
-                             COORD_TYPE_Z(TEX_NORMALIZED),
-                             COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-                             OFFSET_Y(0),
-                             OFFSET_Z(0),
-                             SAMPLER_ID(1),
-                             SRC_SEL_X(SQ_SEL_X),
-                             SRC_SEL_Y(SQ_SEL_Y),
-                             SRC_SEL_Z(SQ_SEL_0),
-                             SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
 
     return i;
 }
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index bbdd7a7..a6faba0 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -1306,7 +1306,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
                             COND(SQ_CF_COND_ACTIVE),
-                            I_COUNT(2),
+                            I_COUNT(1),
                             VALID_PIXEL_MODE(0),
                             END_OF_PROGRAM(0),
                             CF_INST(SQ_CF_INST_TC),
@@ -1337,8 +1337,8 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = TEX_DWORD1(DST_GPR(1),
                              DST_REL(ABSOLUTE),
                              DST_SEL_X(SQ_SEL_X),
-                             DST_SEL_Y(SQ_SEL_MASK),
-                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_Y),
+                             DST_SEL_Z(SQ_SEL_Z),
                              DST_SEL_W(SQ_SEL_1),
                              LOD_BIAS(0),
                              COORD_TYPE_X(TEX_NORMALIZED),
@@ -1354,36 +1354,6 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                              SRC_SEL_Z(SQ_SEL_0),
                              SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 34/35 */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-                             INST_MOD(0),
-                             FETCH_WHOLE_QUAD(0),
-                             RESOURCE_ID(1),
-                             SRC_GPR(0),
-                             SRC_REL(ABSOLUTE),
-                             ALT_CONST(0),
-                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
-                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
-    shader[i++] = TEX_DWORD1(DST_GPR(1),
-                             DST_REL(ABSOLUTE),
-                             DST_SEL_X(SQ_SEL_MASK),
-                             DST_SEL_Y(SQ_SEL_X),
-                             DST_SEL_Z(SQ_SEL_Y),
-                             DST_SEL_W(SQ_SEL_MASK),
-                             LOD_BIAS(0),
-                             COORD_TYPE_X(TEX_NORMALIZED),
-                             COORD_TYPE_Y(TEX_NORMALIZED),
-                             COORD_TYPE_Z(TEX_NORMALIZED),
-                             COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-                             OFFSET_Y(0),
-                             OFFSET_Z(0),
-                             SAMPLER_ID(1),
-                             SRC_SEL_X(SQ_SEL_X),
-                             SRC_SEL_Y(SQ_SEL_Y),
-                             SRC_SEL_Z(SQ_SEL_0),
-                             SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
 
     return i;
 }
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 8ca8e62..93038c9 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -331,7 +331,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     default:
 	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
 
-	/* Y texture */
+	/* YUV texture */
 	tex_res.id                  = 0;
 	tex_res.w                   = accel_state->src_obj[0].width;
 	tex_res.h                   = accel_state->src_obj[0].height;
@@ -345,13 +345,13 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.mip_bo              = accel_state->src_obj[0].bo;
 	tex_res.surface             = NULL;
 
-	tex_res.format              = FMT_8_8;
 	if (pPriv->id == FOURCC_UYVY)
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
+	    tex_res.format              = FMT_GB_GR;
 	else
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
-	tex_res.dst_sel_y           = SQ_SEL_1;
-	tex_res.dst_sel_z           = SQ_SEL_1;
+	    tex_res.format              = FMT_BG_RG;
+	tex_res.dst_sel_x           = SQ_SEL_X;
+	tex_res.dst_sel_y           = SQ_SEL_Z;
+	tex_res.dst_sel_z           = SQ_SEL_Y;
 	tex_res.dst_sel_w           = SQ_SEL_1;
 
 	tex_res.base_level          = 0;
@@ -362,7 +362,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	    tex_res.array_mode          = 1;
 	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
 
-	/* Y sampler */
+	/* YUV sampler */
 	tex_samp.id                 = 0;
 	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -375,33 +375,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_samp.mip_filter         = 0;			/* no mipmap */
 	evergreen_set_tex_sampler(pScrn, &tex_samp);
 
-	/* UV texture */
-	tex_res.id                  = 1;
-	tex_res.format              = FMT_8_8_8_8;
-	tex_res.w                   = accel_state->src_obj[0].width >> 1;
-	tex_res.h                   = accel_state->src_obj[0].height;
-	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
-	if (pPriv->id == FOURCC_UYVY) {
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
-	} else {
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
-	}
-	tex_res.dst_sel_z           = SQ_SEL_1;
-	tex_res.dst_sel_w           = SQ_SEL_1;
-	tex_res.interlaced          = 0;
-
-	tex_res.base                = accel_state->src_obj[0].offset;
-	tex_res.mip_base            = accel_state->src_obj[0].offset;
-	tex_res.size                = accel_state->src_size[0];
-	if (accel_state->src_obj[0].tiling_flags == 0)
-	    tex_res.array_mode          = 1;
-	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
-
-	/* UV sampler */
-	tex_samp.id                 = 1;
-	evergreen_set_tex_sampler(pScrn, &tex_samp);
 	break;
     }
 
diff --git a/src/r600_shader.c b/src/r600_shader.c
index ab2f485..4cb2fc8 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1090,7 +1090,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
                             COND(SQ_CF_COND_ACTIVE),
-                            I_COUNT(2),
+                            I_COUNT(1),
                             CALL_COUNT(0),
                             END_OF_PROGRAM(0),
                             VALID_PIXEL_MODE(0),
@@ -1120,8 +1120,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = TEX_DWORD1(DST_GPR(1),
                              DST_REL(ABSOLUTE),
                              DST_SEL_X(SQ_SEL_X),
-                             DST_SEL_Y(SQ_SEL_MASK),
-                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_Y),
+                             DST_SEL_Z(SQ_SEL_Z),
                              DST_SEL_W(SQ_SEL_1),
                              LOD_BIAS(0),
                              COORD_TYPE_X(TEX_NORMALIZED),
@@ -1137,34 +1137,6 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                              SRC_SEL_Z(SQ_SEL_0),
                              SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 28/29 */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-                             BC_FRAC_MODE(0),
-                             FETCH_WHOLE_QUAD(0),
-                             RESOURCE_ID(1),
-                             SRC_GPR(0),
-                             SRC_REL(ABSOLUTE),
-                             R7xx_ALT_CONST(0));
-    shader[i++] = TEX_DWORD1(DST_GPR(1),
-                             DST_REL(ABSOLUTE),
-                             DST_SEL_X(SQ_SEL_MASK),
-                             DST_SEL_Y(SQ_SEL_X),
-                             DST_SEL_Z(SQ_SEL_Y),
-                             DST_SEL_W(SQ_SEL_MASK),
-                             LOD_BIAS(0),
-                             COORD_TYPE_X(TEX_NORMALIZED),
-                             COORD_TYPE_Y(TEX_NORMALIZED),
-                             COORD_TYPE_Z(TEX_NORMALIZED),
-                             COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-                             OFFSET_Y(0),
-                             OFFSET_Z(0),
-                             SAMPLER_ID(1),
-                             SRC_SEL_X(SQ_SEL_X),
-                             SRC_SEL_Y(SQ_SEL_Y),
-                             SRC_SEL_Z(SQ_SEL_0),
-                             SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
 
     return i;
 }
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 62da992..7e41958 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -358,7 +358,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     default:
 	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
 
-	/* Y texture */
+	/* YUV texture */
 	tex_res.id                  = 0;
 	tex_res.w                   = accel_state->src_obj[0].width;
 	tex_res.h                   = accel_state->src_obj[0].height;
@@ -371,13 +371,13 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.bo                  = accel_state->src_obj[0].bo;
 	tex_res.mip_bo              = accel_state->src_obj[0].bo;
 
-	tex_res.format              = FMT_8_8;
 	if (pPriv->id == FOURCC_UYVY)
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
+	    tex_res.format              = FMT_GB_GR;
 	else
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
-	tex_res.dst_sel_y           = SQ_SEL_1;
-	tex_res.dst_sel_z           = SQ_SEL_1;
+	    tex_res.format              = FMT_BG_RG;
+	tex_res.dst_sel_x           = SQ_SEL_X;
+	tex_res.dst_sel_y           = SQ_SEL_Y;
+	tex_res.dst_sel_z           = SQ_SEL_Z;
 	tex_res.dst_sel_w           = SQ_SEL_1;
 
 	tex_res.request_size        = 1;
@@ -389,7 +389,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	    tex_res.tile_mode           = 1;
 	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
-	/* Y sampler */
+	/* YUV sampler */
 	tex_samp.id                 = 0;
 	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -403,33 +403,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_samp.mip_filter         = 0;			/* no mipmap */
 	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 
-	/* UV texture */
-	tex_res.id                  = 1;
-	tex_res.format              = FMT_8_8_8_8;
-	tex_res.w                   = accel_state->src_obj[0].width >> 1;
-	tex_res.h                   = accel_state->src_obj[0].height;
-	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
-	if (pPriv->id == FOURCC_UYVY) {
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
-	} else {
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
-	}
-	tex_res.dst_sel_z           = SQ_SEL_1;
-	tex_res.dst_sel_w           = SQ_SEL_1;
-	tex_res.interlaced          = 0;
-
-	tex_res.base                = accel_state->src_obj[0].offset;
-	tex_res.mip_base            = accel_state->src_obj[0].offset;
-	tex_res.size                = accel_state->src_size[0];
-	if (accel_state->src_obj[0].tiling_flags == 0)
-	    tex_res.tile_mode           = 1;
-	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
-
-	/* UV sampler */
-	tex_samp.id                 = 1;
-	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 	break;
     }
 
_______________________________________________
xorg-driver-ati mailing list
[email protected]
http://lists.x.org/mailman/listinfo/xorg-driver-ati

Reply via email to