Those formats were invented for exactly that purpose so use them.
This saves some code and also some hw resources (only need one
sampler instead of two for packed yuv).
(Note the output is not quite pixel exact probably some rounding errors
on coords before caused some subtle chroma filter bugs, or the bilinear filter
is just slightly different for that format.)
Only tested on EG.
---
src/cayman_shader.c | 36 ++----------------------------
src/evergreen_shader.c | 36 ++----------------------------
src/evergreen_textured_videofuncs.c | 41 ++++++-----------------------------
src/r600_shader.c | 34 ++--------------------------
src/r600_textured_videofuncs.c | 41 ++++++-----------------------------
5 files changed, 23 insertions(+), 165 deletions(-)
diff --git a/src/cayman_shader.c b/src/cayman_shader.c
index 01b612a..18e9f50 100644
--- a/src/cayman_shader.c
+++ b/src/cayman_shader.c
@@ -1338,7 +1338,7 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t*
shader)
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_TC),
BARRIER(1));
@@ -1365,8 +1365,8 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t*
shader)
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_MASK),
- DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
DST_SEL_W(SQ_SEL_1),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1382,36 +1382,6 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t*
shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 34/35 */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_MASK),
- DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_Y),
- DST_SEL_W(SQ_SEL_MASK),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
return i;
}
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index bbdd7a7..a6faba0 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -1306,7 +1306,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t*
shader)
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_TC),
@@ -1337,8 +1337,8 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t*
shader)
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_MASK),
- DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
DST_SEL_W(SQ_SEL_1),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1354,36 +1354,6 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t*
shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 34/35 */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_MASK),
- DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_Y),
- DST_SEL_W(SQ_SEL_MASK),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
return i;
}
diff --git a/src/evergreen_textured_videofuncs.c
b/src/evergreen_textured_videofuncs.c
index 8ca8e62..7c5d15d 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -331,7 +331,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
default:
accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
- /* Y texture */
+ /* YUV texture */
tex_res.id = 0;
tex_res.w = accel_state->src_obj[0].width;
tex_res.h = accel_state->src_obj[0].height;
@@ -345,13 +345,13 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
tex_res.mip_bo = accel_state->src_obj[0].bo;
tex_res.surface = NULL;
- tex_res.format = FMT_8_8;
if (pPriv->id == FOURCC_UYVY)
- tex_res.dst_sel_x = SQ_SEL_Y; /* Y */
+ tex_res.format = FMT_GB_GR;
else
- tex_res.dst_sel_x = SQ_SEL_X; /* Y */
- tex_res.dst_sel_y = SQ_SEL_1;
- tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.format = FMT_BG_RG;
+ tex_res.dst_sel_x = SQ_SEL_Y;
+ tex_res.dst_sel_y = SQ_SEL_X;
+ tex_res.dst_sel_z = SQ_SEL_Z;
tex_res.dst_sel_w = SQ_SEL_1;
tex_res.base_level = 0;
@@ -362,7 +362,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
tex_res.array_mode = 1;
evergreen_set_tex_resource(pScrn, &tex_res,
accel_state->src_obj[0].domain);
- /* Y sampler */
+ /* YUV sampler */
tex_samp.id = 0;
tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -375,33 +375,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
tex_samp.mip_filter = 0; /* no mipmap */
evergreen_set_tex_sampler(pScrn, &tex_samp);
- /* UV texture */
- tex_res.id = 1;
- tex_res.format = FMT_8_8_8_8;
- tex_res.w = accel_state->src_obj[0].width >> 1;
- tex_res.h = accel_state->src_obj[0].height;
- tex_res.pitch = accel_state->src_obj[0].pitch >> 2;
- if (pPriv->id == FOURCC_UYVY) {
- tex_res.dst_sel_x = SQ_SEL_X; /* V */
- tex_res.dst_sel_y = SQ_SEL_Z; /* U */
- } else {
- tex_res.dst_sel_x = SQ_SEL_Y; /* V */
- tex_res.dst_sel_y = SQ_SEL_W; /* U */
- }
- tex_res.dst_sel_z = SQ_SEL_1;
- tex_res.dst_sel_w = SQ_SEL_1;
- tex_res.interlaced = 0;
-
- tex_res.base = accel_state->src_obj[0].offset;
- tex_res.mip_base = accel_state->src_obj[0].offset;
- tex_res.size = accel_state->src_size[0];
- if (accel_state->src_obj[0].tiling_flags == 0)
- tex_res.array_mode = 1;
- evergreen_set_tex_resource(pScrn, &tex_res,
accel_state->src_obj[0].domain);
-
- /* UV sampler */
- tex_samp.id = 1;
- evergreen_set_tex_sampler(pScrn, &tex_samp);
break;
}
diff --git a/src/r600_shader.c b/src/r600_shader.c
index ab2f485..4cb2fc8 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1090,7 +1090,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
CALL_COUNT(0),
END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
@@ -1120,8 +1120,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_MASK),
- DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
DST_SEL_W(SQ_SEL_1),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1137,34 +1137,6 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t*
shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 28/29 */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- BC_FRAC_MODE(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- R7xx_ALT_CONST(0));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_MASK),
- DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_Y),
- DST_SEL_W(SQ_SEL_MASK),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
return i;
}
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 62da992..7610050 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -358,7 +358,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
default:
accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
- /* Y texture */
+ /* YUV texture */
tex_res.id = 0;
tex_res.w = accel_state->src_obj[0].width;
tex_res.h = accel_state->src_obj[0].height;
@@ -371,13 +371,13 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
tex_res.bo = accel_state->src_obj[0].bo;
tex_res.mip_bo = accel_state->src_obj[0].bo;
- tex_res.format = FMT_8_8;
if (pPriv->id == FOURCC_UYVY)
- tex_res.dst_sel_x = SQ_SEL_Y; /* Y */
+ tex_res.format = FMT_GB_GR;
else
- tex_res.dst_sel_x = SQ_SEL_X; /* Y */
- tex_res.dst_sel_y = SQ_SEL_1;
- tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.format = FMT_BG_RG;
+ tex_res.dst_sel_x = SQ_SEL_Y;
+ tex_res.dst_sel_y = SQ_SEL_X;
+ tex_res.dst_sel_z = SQ_SEL_Z;
tex_res.dst_sel_w = SQ_SEL_1;
tex_res.request_size = 1;
@@ -389,7 +389,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
tex_res.tile_mode = 1;
r600_set_tex_resource(pScrn, accel_state->ib, &tex_res,
accel_state->src_obj[0].domain);
- /* Y sampler */
+ /* YUV sampler */
tex_samp.id = 0;
tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -403,33 +403,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn,
RADEONPortPrivPtr pPriv)
tex_samp.mip_filter = 0; /* no mipmap */
r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
- /* UV texture */
- tex_res.id = 1;
- tex_res.format = FMT_8_8_8_8;
- tex_res.w = accel_state->src_obj[0].width >> 1;
- tex_res.h = accel_state->src_obj[0].height;
- tex_res.pitch = accel_state->src_obj[0].pitch >> 2;
- if (pPriv->id == FOURCC_UYVY) {
- tex_res.dst_sel_x = SQ_SEL_X; /* V */
- tex_res.dst_sel_y = SQ_SEL_Z; /* U */
- } else {
- tex_res.dst_sel_x = SQ_SEL_Y; /* V */
- tex_res.dst_sel_y = SQ_SEL_W; /* U */
- }
- tex_res.dst_sel_z = SQ_SEL_1;
- tex_res.dst_sel_w = SQ_SEL_1;
- tex_res.interlaced = 0;
-
- tex_res.base = accel_state->src_obj[0].offset;
- tex_res.mip_base = accel_state->src_obj[0].offset;
- tex_res.size = accel_state->src_size[0];
- if (accel_state->src_obj[0].tiling_flags == 0)
- tex_res.tile_mode = 1;
- r600_set_tex_resource(pScrn, accel_state->ib, &tex_res,
accel_state->src_obj[0].domain);
-
- /* UV sampler */
- tex_samp.id = 1;
- r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
break;
}
--
1.7.7
_______________________________________________
xorg-driver-ati mailing list
[email protected]
http://lists.x.org/mailman/listinfo/xorg-driver-ati