--- radeon_video.c.orig	2003-11-08 12:14:14.000000000 -0500
+++ radeon_video.c	2003-11-23 18:06:57.000000000 -0500
@@ -11,6 +11,11 @@
 
 #include "Xv.h"
 #include "fourcc.h"
+#ifdef XF86DRI
+#define _XF86DRI_SERVER_
+#include "radeon_dri.h"
+#include "radeon_sarea.h"
+#endif
 
 #define OFF_DELAY       250  /* milliseconds */
 #define FREE_DELAY      15000
@@ -255,6 +260,11 @@
     CARD32	    dwOvGCb, dwOvGCr;
     CARD32	    dwOvBCb, dwOvBCr;
 
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled)
+	RING_LOCALS;
+#endif
+
     if (ref >= 2) 
 	return;
 
@@ -331,12 +341,27 @@
 	dwOvBCb = (((INT32)(OvBCb * 256.0))&0x7ff)<<4;
 	dwOvBCr = (((INT32)(OvBCr * 256.0))&0x7ff)<<20;
     }
-    OUTREG(RADEON_OV0_LIN_TRANS_A, dwOvRCb | dwOvLuma);
-    OUTREG(RADEON_OV0_LIN_TRANS_B, dwOvROff | dwOvRCr);
-    OUTREG(RADEON_OV0_LIN_TRANS_C, dwOvGCb | dwOvLuma);
-    OUTREG(RADEON_OV0_LIN_TRANS_D, dwOvGOff | dwOvGCr);
-    OUTREG(RADEON_OV0_LIN_TRANS_E, dwOvBCb | dwOvLuma);
-    OUTREG(RADEON_OV0_LIN_TRANS_F, dwOvBOff | dwOvBCr);
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled) {
+	BEGIN_RING(6);
+    	OUT_RING_REG(RADEON_OV0_LIN_TRANS_A, dwOvRCb | dwOvLuma);
+    	OUT_RING_REG(RADEON_OV0_LIN_TRANS_B, dwOvROff | dwOvRCr);
+    	OUT_RING_REG(RADEON_OV0_LIN_TRANS_C, dwOvGCb | dwOvLuma);
+    	OUT_RING_REG(RADEON_OV0_LIN_TRANS_D, dwOvGOff | dwOvGCr);
+    	OUT_RING_REG(RADEON_OV0_LIN_TRANS_E, dwOvBCb | dwOvLuma);
+    	OUT_RING_REG(RADEON_OV0_LIN_TRANS_F, dwOvBOff | dwOvBCr);
+	ADVANCE_RING();
+    } else {
+#endif
+    	OUTREG(RADEON_OV0_LIN_TRANS_A, dwOvRCb | dwOvLuma);
+    	OUTREG(RADEON_OV0_LIN_TRANS_B, dwOvROff | dwOvRCr);
+    	OUTREG(RADEON_OV0_LIN_TRANS_C, dwOvGCb | dwOvLuma);
+    	OUTREG(RADEON_OV0_LIN_TRANS_D, dwOvGOff | dwOvGCr);
+    	OUTREG(RADEON_OV0_LIN_TRANS_E, dwOvBCb | dwOvLuma);
+    	OUTREG(RADEON_OV0_LIN_TRANS_F, dwOvBOff | dwOvBCr);
+#ifdef ACCEL_CP
+    }
+#endif
 }
 
 static void RADEONSetColorKey(ScrnInfoPtr pScrn, CARD32 colorKey)
@@ -346,6 +371,11 @@
     CARD32 min, max;
     CARD8 r, g, b;
 
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled)
+	RING_LOCALS;
+#endif
+
     if (info->CurrentLayout.depth > 8)
     {
 	CARD32	rbits, gbits, bbits;
@@ -370,9 +400,20 @@
     min = (r << 16) | (g << 8) | (b);
     max = (0xff << 24) | (r << 16) | (g << 8) | (b);
 
-    RADEONWaitForFifo(pScrn, 2);
-    OUTREG(RADEON_OV0_GRAPHICS_KEY_CLR_HIGH, max);
-    OUTREG(RADEON_OV0_GRAPHICS_KEY_CLR_LOW, min);
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled) {
+	BEGIN_RING(2);
+    	OUT_RING_REG(RADEON_OV0_GRAPHICS_KEY_CLR_HIGH, max);
+    	OUT_RING_REG(RADEON_OV0_GRAPHICS_KEY_CLR_LOW, min);
+	ADVANCE_RING();
+    } else {
+#endif
+    	RADEONWaitForFifo(pScrn, 2);
+    	OUTREG(RADEON_OV0_GRAPHICS_KEY_CLR_HIGH, max);
+    	OUTREG(RADEON_OV0_GRAPHICS_KEY_CLR_LOW, min);
+#ifdef ACCEL_CP
+    }
+#endif
 }
 
 void
@@ -382,19 +423,43 @@
     unsigned char *RADEONMMIO = info->MMIO;
     RADEONPortPrivPtr pPriv = info->adaptor->pPortPrivates[0].ptr;
 
-    if (info->accelOn) info->accel->Sync(pScrn);
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled)
+	RING_LOCALS;
+#endif
 
-    RADEONWaitForIdleMMIO(pScrn);
-    OUTREG(RADEON_OV0_SCALE_CNTL, 0x80000000);
-    OUTREG(RADEON_OV0_AUTO_FLIP_CNTL, 0);   /* maybe */
-    OUTREG(RADEON_OV0_EXCLUSIVE_HORZ, 0);
-    OUTREG(RADEON_OV0_FILTER_CNTL, 0x0000000f);
-    OUTREG(RADEON_OV0_KEY_CNTL, RADEON_GRAPHIC_KEY_FN_EQ |
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled) {
+	BEGIN_RING(8);
+    	OUT_RING_REG(RADEON_OV0_SCALE_CNTL, 0x80000000);
+    	OUT_RING_REG(RADEON_OV0_AUTO_FLIP_CNTL, 0);   /* maybe */
+    	OUT_RING_REG(RADEON_OV0_EXCLUSIVE_HORZ, 0);
+    	OUT_RING_REG(RADEON_OV0_FILTER_CNTL, 0x0000000f);
+    	OUT_RING_REG(RADEON_OV0_KEY_CNTL, RADEON_GRAPHIC_KEY_FN_EQ |
 				RADEON_VIDEO_KEY_FN_FALSE |
 				RADEON_CMP_MIX_OR);
-    OUTREG(RADEON_OV0_TEST, 0);
-    OUTREG(RADEON_FCP_CNTL, RADEON_FCP0_SRC_GND);
-    OUTREG(RADEON_CAP0_TRIG_CNTL, 0);
+    	OUT_RING_REG(RADEON_OV0_TEST, 0);
+    	OUT_RING_REG(RADEON_FCP_CNTL, RADEON_FCP0_SRC_GND);
+    	OUT_RING_REG(RADEON_CAP0_TRIG_CNTL, 0);
+	ADVANCE_RING();
+    } else {
+#endif
+    	if (info->accelOn) info->accel->Sync(pScrn);
+
+    	RADEONWaitForIdleMMIO(pScrn);
+    	OUTREG(RADEON_OV0_SCALE_CNTL, 0x80000000);
+    	OUTREG(RADEON_OV0_AUTO_FLIP_CNTL, 0);   /* maybe */
+    	OUTREG(RADEON_OV0_EXCLUSIVE_HORZ, 0);
+    	OUTREG(RADEON_OV0_FILTER_CNTL, 0x0000000f);
+    	OUTREG(RADEON_OV0_KEY_CNTL, RADEON_GRAPHIC_KEY_FN_EQ |
+				RADEON_VIDEO_KEY_FN_FALSE |
+				RADEON_CMP_MIX_OR);
+    	OUTREG(RADEON_OV0_TEST, 0);
+    	OUTREG(RADEON_FCP_CNTL, RADEON_FCP0_SRC_GND);
+    	OUTREG(RADEON_CAP0_TRIG_CNTL, 0);
+#ifdef ACCEL_CP
+    }
+#endif
     RADEONSetColorKey(pScrn, pPriv->colorKey);
     
     if ((info->ChipFamily == CHIP_FAMILY_R300) ||
@@ -404,31 +469,73 @@
 	(info->ChipFamily == CHIP_FAMILY_RADEON)) {
 	int i;
 
-	OUTREG(RADEON_OV0_LIN_TRANS_A, 0x12a20000);
-	OUTREG(RADEON_OV0_LIN_TRANS_B, 0x198a190e);
-	OUTREG(RADEON_OV0_LIN_TRANS_C, 0x12a2f9da);
-	OUTREG(RADEON_OV0_LIN_TRANS_D, 0xf2fe0442);
-	OUTREG(RADEON_OV0_LIN_TRANS_E, 0x12a22046);
-	OUTREG(RADEON_OV0_LIN_TRANS_F, 0x175f);
-
-	/*
-	 * Set default Gamma ramp:
-	 *
-	 * Of 18 segments for gamma curve, all segments in R200 (and
-	 * newer) are programmable, while only lower 4 and upper 2
-	 * segments are programmable in the older Radeons.
-	 */
-	for (i = 0; i < 18; i++) {
-	    OUTREG(def_gamma[i].gammaReg,
+#ifdef ACCEL_CP
+    	if (info->directRenderingEnabled) {
+	    BEGIN_RING(24);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_A, 0x12a20000);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_B, 0x198a190e);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_C, 0x12a2f9da);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_D, 0xf2fe0442);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_E, 0x12a22046);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_F, 0x175f);
+
+	    /*
+	     * Set default Gamma ramp:
+	     *
+	     * Of 18 segments for gamma curve, all segments in R200 (and
+	     * newer) are programmable, while only lower 4 and upper 2
+	     * segments are programmable in the older Radeons.
+	     */
+	    for (i = 0; i < 18; i++) {
+	        OUT_RING_REG(def_gamma[i].gammaReg,
 		   (def_gamma[i].gammaSlope<<16) | def_gamma[i].gammaOffset);
+	    }
+	    ADVANCE_RING();
+	} else {
+#endif
+	    OUTREG(RADEON_OV0_LIN_TRANS_A, 0x12a20000);
+	    OUTREG(RADEON_OV0_LIN_TRANS_B, 0x198a190e);
+	    OUTREG(RADEON_OV0_LIN_TRANS_C, 0x12a2f9da);
+	    OUTREG(RADEON_OV0_LIN_TRANS_D, 0xf2fe0442);
+	    OUTREG(RADEON_OV0_LIN_TRANS_E, 0x12a22046);
+	    OUTREG(RADEON_OV0_LIN_TRANS_F, 0x175f);
+
+	    /*
+	     * Set default Gamma ramp:
+	     *
+	     * Of 18 segments for gamma curve, all segments in R200 (and
+	     * newer) are programmable, while only lower 4 and upper 2
+	     * segments are programmable in the older Radeons.
+	     */
+	    for (i = 0; i < 18; i++) {
+	        OUTREG(def_gamma[i].gammaReg,
+		   (def_gamma[i].gammaSlope<<16) | def_gamma[i].gammaOffset);
+	    }
+#ifdef ACCEL_CP
 	}
+#endif
     } else {
-	OUTREG(RADEON_OV0_LIN_TRANS_A, 0x12a00000);
-	OUTREG(RADEON_OV0_LIN_TRANS_B, 0x1990190e);
-	OUTREG(RADEON_OV0_LIN_TRANS_C, 0x12a0f9c0);
-	OUTREG(RADEON_OV0_LIN_TRANS_D, 0xf3000442);
-	OUTREG(RADEON_OV0_LIN_TRANS_E, 0x12a02040);
-	OUTREG(RADEON_OV0_LIN_TRANS_F, 0x175f);
+#ifdef ACCEL_CP
+    	if (info->directRenderingEnabled) {
+	    BEGINE_RING(6);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_A, 0x12a00000);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_B, 0x1990190e);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_C, 0x12a0f9c0);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_D, 0xf3000442);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_E, 0x12a02040);
+	    OUT_RING_REG(RADEON_OV0_LIN_TRANS_F, 0x175f);
+	    ADVANCE_RING();
+	} else {
+#endif
+	    OUTREG(RADEON_OV0_LIN_TRANS_A, 0x12a00000);
+	    OUTREG(RADEON_OV0_LIN_TRANS_B, 0x1990190e);
+	    OUTREG(RADEON_OV0_LIN_TRANS_C, 0x12a0f9c0);
+	    OUTREG(RADEON_OV0_LIN_TRANS_D, 0xf3000442);
+	    OUTREG(RADEON_OV0_LIN_TRANS_E, 0x12a02040);
+	    OUTREG(RADEON_OV0_LIN_TRANS_F, 0x175f);
+#ifdef ACCEL_CP
+	}
+#endif
     }
 }
 
@@ -497,10 +604,6 @@
     OUTPLL(RADEON_VCLK_ECP_CNTL, (INPLL(pScrn, RADEON_VCLK_ECP_CNTL) & 
 				  0xfffffCff) | (pPriv->ecp_div << 8));
 
-    /* I suspect we may need a usleep after writing to the PLL.  if you play a video too soon
-       after switching crtcs in mergedfb clone mode you get a temporary one pixel line of colorkey 
-       on the right edge video output.  */
-
 
     if ((info->ChipFamily == CHIP_FAMILY_RS100) || 
 	(info->ChipFamily == CHIP_FAMILY_RS200) ||
@@ -895,6 +998,11 @@
     CARD32 dot_clock;
     DisplayModePtr overlay_mode;
 
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled)
+	RING_LOCALS;
+#endif
+
     /* Unlike older Mach64 chips, RADEON has only two ECP settings: 0 for PIXCLK < 175Mhz, and 1 (divide by 2)
        for higher clocks, sure makes life nicer 
        
@@ -914,10 +1022,6 @@
 
     OUTPLL(RADEON_VCLK_ECP_CNTL, (INPLL(pScrn, RADEON_VCLK_ECP_CNTL) & 0xfffffCff) | (ecp_div << 8));
 
-    /* I suspect we may need a usleep after writing to the PLL.  if you play a video too soon
-       after switching crtcs in mergedfb clone mode you get a temporary one pixel line of colorkey 
-       on the right edge video output.  */
-
     v_inc_shift = 20;
     y_mult = 1;
 
@@ -981,14 +1085,26 @@
 
     left = (left >> 16) & 7;
 
-    RADEONWaitForFifo(pScrn, 2);
-    OUTREG(RADEON_OV0_REG_LOAD_CNTL, 1);
-    if (info->accelOn) info->accel->Sync(pScrn);
-    while(!(INREG(RADEON_OV0_REG_LOAD_CNTL) & (1 << 3)));
-
-    RADEONWaitForFifo(pScrn, 14);
-    OUTREG(RADEON_OV0_H_INC, h_inc | ((h_inc >> 1) << 16));
-    OUTREG(RADEON_OV0_STEP_BY, step_by | (step_by << 8));
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled) {
+	BEGIN_RING(3);
+	OUT_RING_REG(RADEON_OV0_REG_LOAD_CNTL, 1);
+    	OUT_RING_REG(RADEON_OV0_H_INC, h_inc | ((h_inc >> 1) << 16));
+    	OUT_RING_REG(RADEON_OV0_STEP_BY, step_by | (step_by << 8));
+	ADVANCE_RING();
+    } else {
+#endif
+    	RADEONWaitForFifo(pScrn, 2);
+    	OUTREG(RADEON_OV0_REG_LOAD_CNTL, 1);
+    	if (info->accelOn) info->accel->Sync(pScrn);
+    	while(!(INREG(RADEON_OV0_REG_LOAD_CNTL) & (1 << 3)));
+
+    	RADEONWaitForFifo(pScrn, 14);
+    	OUTREG(RADEON_OV0_H_INC, h_inc | ((h_inc >> 1) << 16));
+    	OUTREG(RADEON_OV0_STEP_BY, step_by | (step_by << 8));
+#ifdef ACCEL_CP
+    }
+#endif
 
     x_off = 8;
     y_off = 0;
@@ -1021,40 +1137,102 @@
      * rendering for the second head.
      */
 
-    if ((info->MergedFB && info->OverlayOnCRTC2) || info->IsSecondary) {
-        x_off = 0;
-        OUTREG(RADEON_OV1_Y_X_START, ((dstBox->x1 + x_off) |
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled) {
+	BEGIN_RING(20);
+    	if ((info->MergedFB && info->OverlayOnCRTC2) || info->IsSecondary) {
+        	x_off = 0;
+        	OUT_RING_REG(RADEON_OV1_Y_X_START, ((dstBox->x1 + x_off) |
                                       ((dstBox->y1*y_mult) << 16)));
-        OUTREG(RADEON_OV1_Y_X_END,   ((dstBox->x2 + x_off) |
+        	OUT_RING_REG(RADEON_OV1_Y_X_END,   ((dstBox->x2 + x_off) |
                                       ((dstBox->y2*y_mult) << 16)));
-        scaler_src = (1 << 14);
+        	scaler_src = (1 << 14);
+    	} else {
+		OUT_RING_REG(RADEON_OV0_Y_X_START, ((dstBox->x1 + x_off) |
+				      (((dstBox->y1*y_mult) + y_off) << 16)));
+		OUT_RING_REG(RADEON_OV0_Y_X_END,   ((dstBox->x2 + x_off) |
+				      (((dstBox->y2*y_mult) + y_off) << 16)));
+		scaler_src = 0;
+    	}
+
+    	OUT_RING_REG(RADEON_OV0_V_INC, v_inc);
+    	OUT_RING_REG(RADEON_OV0_P1_BLANK_LINES_AT_TOP, 0x00000fff | ((src_h - 1) << 16));
+    	OUT_RING_REG(RADEON_OV0_VID_BUF_PITCH0_VALUE, pitch);
+    	OUT_RING_REG(RADEON_OV0_VID_BUF_PITCH1_VALUE, pitch);
+    	OUT_RING_REG(RADEON_OV0_P1_X_START_END, (src_w + left - 1) | (left << 16));
+    	left >>= 1; src_w >>= 1;
+    	OUT_RING_REG(RADEON_OV0_P2_X_START_END, (src_w + left - 1) | (left << 16));
+    	OUT_RING_REG(RADEON_OV0_P3_X_START_END, (src_w + left - 1) | (left << 16));
+    	OUT_RING_REG(RADEON_OV0_VID_BUF0_BASE_ADRS, offset1 & 0xfffffff0);
+    	OUT_RING_REG(RADEON_OV0_VID_BUF1_BASE_ADRS, offset2 & 0xfffffff0);
+    	OUT_RING_REG(RADEON_OV0_VID_BUF2_BASE_ADRS, offset1 & 0xfffffff0);
+    	OUT_RING_REG(RADEON_OV0_VID_BUF3_BASE_ADRS, offset2 & 0xfffffff0);
+    	OUT_RING_REG(RADEON_OV0_VID_BUF4_BASE_ADRS, offset1 & 0xfffffff0);
+    	OUT_RING_REG(RADEON_OV0_VID_BUF5_BASE_ADRS, offset2 & 0xfffffff0);
+    	OUT_RING_REG(RADEON_OV0_P1_V_ACCUM_INIT, p1_v_accum_init);
+    	OUT_RING_REG(RADEON_OV0_P1_H_ACCUM_INIT, p1_h_accum_init);
+    	OUT_RING_REG(RADEON_OV0_P23_H_ACCUM_INIT, p23_h_accum_init);
+
+#if 0
+    if(id == FOURCC_UYVY)
+       OUT_RING_REG(RADEON_OV0_SCALE_CNTL, 0x41008C03);
+    else
+       OUT_RING_REG(RADEON_OV0_SCALE_CNTL, 0x41008B03);
+#endif
+
+    	if (id == FOURCC_UYVY)
+		OUT_RING_REG(RADEON_OV0_SCALE_CNTL, (RADEON_SCALER_SOURCE_YVYU422
+				       | RADEON_SCALER_ADAPTIVE_DEINT
+				       | RADEON_SCALER_SMART_SWITCH
+				       | RADEON_SCALER_DOUBLE_BUFFER
+				       | RADEON_SCALER_ENABLE
+				       | scaler_src));
+    	else
+		OUT_RING_REG(RADEON_OV0_SCALE_CNTL, (RADEON_SCALER_SOURCE_VYUY422
+				       | RADEON_SCALER_ADAPTIVE_DEINT
+				       | RADEON_SCALER_SMART_SWITCH
+				       | RADEON_SCALER_DOUBLE_BUFFER
+				       | RADEON_SCALER_ENABLE
+				       | scaler_src));
+
+    	OUT_RING_REG(RADEON_OV0_REG_LOAD_CNTL, 0);
+	ADVANCE_RING();
     } else {
-	OUTREG(RADEON_OV0_Y_X_START, ((dstBox->x1 + x_off) |
+#endif
+    	if ((info->MergedFB && info->OverlayOnCRTC2) || info->IsSecondary) {
+        	x_off = 0;
+        	OUTREG(RADEON_OV1_Y_X_START, ((dstBox->x1 + x_off) |
+                                      ((dstBox->y1*y_mult) << 16)));
+        	OUTREG(RADEON_OV1_Y_X_END,   ((dstBox->x2 + x_off) |
+                                      ((dstBox->y2*y_mult) << 16)));
+        	scaler_src = (1 << 14);
+    	} else {
+		OUTREG(RADEON_OV0_Y_X_START, ((dstBox->x1 + x_off) |
 				      (((dstBox->y1*y_mult) + y_off) << 16)));
-	OUTREG(RADEON_OV0_Y_X_END,   ((dstBox->x2 + x_off) |
+		OUTREG(RADEON_OV0_Y_X_END,   ((dstBox->x2 + x_off) |
 				      (((dstBox->y2*y_mult) + y_off) << 16)));
-	scaler_src = 0;
-    }
+		scaler_src = 0;
+    	}
 
-    OUTREG(RADEON_OV0_V_INC, v_inc);
-    OUTREG(RADEON_OV0_P1_BLANK_LINES_AT_TOP, 0x00000fff | ((src_h - 1) << 16));
-    OUTREG(RADEON_OV0_VID_BUF_PITCH0_VALUE, pitch);
-    OUTREG(RADEON_OV0_VID_BUF_PITCH1_VALUE, pitch);
-    OUTREG(RADEON_OV0_P1_X_START_END, (src_w + left - 1) | (left << 16));
-    left >>= 1; src_w >>= 1;
-    OUTREG(RADEON_OV0_P2_X_START_END, (src_w + left - 1) | (left << 16));
-    OUTREG(RADEON_OV0_P3_X_START_END, (src_w + left - 1) | (left << 16));
-    OUTREG(RADEON_OV0_VID_BUF0_BASE_ADRS, offset1 & 0xfffffff0);
-    OUTREG(RADEON_OV0_VID_BUF1_BASE_ADRS, offset2 & 0xfffffff0);
-    OUTREG(RADEON_OV0_VID_BUF2_BASE_ADRS, offset1 & 0xfffffff0);
-
-    RADEONWaitForFifo(pScrn, 9);
-    OUTREG(RADEON_OV0_VID_BUF3_BASE_ADRS, offset2 & 0xfffffff0);
-    OUTREG(RADEON_OV0_VID_BUF4_BASE_ADRS, offset1 & 0xfffffff0);
-    OUTREG(RADEON_OV0_VID_BUF5_BASE_ADRS, offset2 & 0xfffffff0);
-    OUTREG(RADEON_OV0_P1_V_ACCUM_INIT, p1_v_accum_init);
-    OUTREG(RADEON_OV0_P1_H_ACCUM_INIT, p1_h_accum_init);
-    OUTREG(RADEON_OV0_P23_H_ACCUM_INIT, p23_h_accum_init);
+    	OUTREG(RADEON_OV0_V_INC, v_inc);
+    	OUTREG(RADEON_OV0_P1_BLANK_LINES_AT_TOP, 0x00000fff | ((src_h - 1) << 16));
+    	OUTREG(RADEON_OV0_VID_BUF_PITCH0_VALUE, pitch);
+    	OUTREG(RADEON_OV0_VID_BUF_PITCH1_VALUE, pitch);
+    	OUTREG(RADEON_OV0_P1_X_START_END, (src_w + left - 1) | (left << 16));
+    	left >>= 1; src_w >>= 1;
+    	OUTREG(RADEON_OV0_P2_X_START_END, (src_w + left - 1) | (left << 16));
+    	OUTREG(RADEON_OV0_P3_X_START_END, (src_w + left - 1) | (left << 16));
+    	OUTREG(RADEON_OV0_VID_BUF0_BASE_ADRS, offset1 & 0xfffffff0);
+    	OUTREG(RADEON_OV0_VID_BUF1_BASE_ADRS, offset2 & 0xfffffff0);
+    	OUTREG(RADEON_OV0_VID_BUF2_BASE_ADRS, offset1 & 0xfffffff0);
+
+    	RADEONWaitForFifo(pScrn, 9);
+    	OUTREG(RADEON_OV0_VID_BUF3_BASE_ADRS, offset2 & 0xfffffff0);
+    	OUTREG(RADEON_OV0_VID_BUF4_BASE_ADRS, offset1 & 0xfffffff0);
+    	OUTREG(RADEON_OV0_VID_BUF5_BASE_ADRS, offset2 & 0xfffffff0);
+    	OUTREG(RADEON_OV0_P1_V_ACCUM_INIT, p1_v_accum_init);
+    	OUTREG(RADEON_OV0_P1_H_ACCUM_INIT, p1_h_accum_init);
+    	OUTREG(RADEON_OV0_P23_H_ACCUM_INIT, p23_h_accum_init);
 
 #if 0
     if(id == FOURCC_UYVY)
@@ -1063,22 +1241,25 @@
        OUTREG(RADEON_OV0_SCALE_CNTL, 0x41008B03);
 #endif
 
-    if (id == FOURCC_UYVY)
-	OUTREG(RADEON_OV0_SCALE_CNTL, (RADEON_SCALER_SOURCE_YVYU422
+    	if (id == FOURCC_UYVY)
+		OUTREG(RADEON_OV0_SCALE_CNTL, (RADEON_SCALER_SOURCE_YVYU422
 				       | RADEON_SCALER_ADAPTIVE_DEINT
 				       | RADEON_SCALER_SMART_SWITCH
 				       | RADEON_SCALER_DOUBLE_BUFFER
 				       | RADEON_SCALER_ENABLE
 				       | scaler_src));
-    else
-	OUTREG(RADEON_OV0_SCALE_CNTL, (RADEON_SCALER_SOURCE_VYUY422
+    	else
+		OUTREG(RADEON_OV0_SCALE_CNTL, (RADEON_SCALER_SOURCE_VYUY422
 				       | RADEON_SCALER_ADAPTIVE_DEINT
 				       | RADEON_SCALER_SMART_SWITCH
 				       | RADEON_SCALER_DOUBLE_BUFFER
 				       | RADEON_SCALER_ENABLE
 				       | scaler_src));
 
-    OUTREG(RADEON_OV0_REG_LOAD_CNTL, 0);
+    	OUTREG(RADEON_OV0_REG_LOAD_CNTL, 0);
+#ifdef ACCEL_CP
+    }
+#endif
 }
 
 
@@ -1498,6 +1679,7 @@
 	dstBox.y2 -= pScrn->frameY0;
     }
 
+    /* why is this reset needed? */
     RADEONResetVideo(pScrn);
 
     RADEONDisplayVideo(pScrn, surface->id,
@@ -1542,8 +1724,8 @@
     offscreenImages[0].stop = RADEONStopSurface;
     offscreenImages[0].setAttribute = RADEONSetSurfaceAttribute;
     offscreenImages[0].getAttribute = RADEONGetSurfaceAttribute;
-    offscreenImages[0].max_width = 1024;
-    offscreenImages[0].max_height = 1024;
+    offscreenImages[0].max_width = 1024; /* 2048? */
+    offscreenImages[0].max_height = 1024; /* 2048? */
     offscreenImages[0].num_attributes = NUM_ATTRIBUTES;
     offscreenImages[0].attributes = Attributes;
 
