diff -ur r300.orig/r300_cmdbuf.c r300/r300_cmdbuf.c
--- r300.orig/r300_cmdbuf.c	Thu Jun  2 01:00:00 2005
+++ r300/r300_cmdbuf.c	Thu Jun  2 01:00:00 2005
@@ -66,9 +66,9 @@
 	drm_radeon_cmd_buffer_t cmd;
 	int start;
 
-	if (r300->radeon.lost_context)
-		start = 0;
-	else
+	if (r300->radeon.lost_context){
+		start = 0; r300->radeon.lost_context=GL_FALSE;
+	}else
 		start = r300->cmdbuf.count_reemit;
 
 	if (RADEON_DEBUG & DEBUG_IOCTL) {
@@ -96,8 +96,8 @@
 			DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
 
 	if (RADEON_DEBUG & DEBUG_SYNC) {
-		fprintf(stderr, "Syncing in %s (from %s)\n\n", __FUNCTION__, caller);
-		radeonWaitForIdleLocked(&r300->radeon);
+		//fprintf(stderr, "Syncing in %s (from %s)\n\n", __FUNCTION__, caller);
+		//radeonWaitForIdleLocked(&r300->radeon);
 	}
 
 	r300->dma.nr_released_bufs = 0;
diff -ur r300.orig/r300_emit.h r300/r300_emit.h
--- r300.orig/r300_emit.h	Thu Jun  2 01:00:00 2005
+++ r300/r300_emit.h	Thu Jun  2 01:00:00 2005
@@ -142,6 +142,18 @@
 	cmd[0].i=cmducs((reg), _n+1); \
 	}
 
+#define reg_start_raw(reg, num_extra) \
+	{ \
+	int _n; \
+	_n=(num_extra); \
+	cmd=(drm_radeon_cmd_header_t *) r300RawAllocCmdBuf(rmesa, \
+					(_n+2), \
+					__FUNCTION__); \
+	cmd_reserved=_n+2; \
+	cmd_written=1; \
+	cmd[0].i=cmducs((reg), _n+1); \
+	}
+
 /* Prepare to write a register value to register at address reg.
    If num_extra > 0 then the following extra values are written
    into the same register. */
@@ -167,6 +179,18 @@
 	}
 
 #define	efloat(f) e32(r300PackFloat32(f))
+
+#define vsf_start_fragment_raw(dest, length)  \
+	{ \
+	int _n; \
+	_n=(length); \
+	cmd=(drm_radeon_cmd_header_t *) r300RawAllocCmdBuf(rmesa, \
+					(_n+1), \
+					__FUNCTION__); \
+	cmd_reserved=_n+2; \
+	cmd_written=1; \
+	cmd[0].i=cmdvpu((dest), _n/4); \
+	}
 
 #define vsf_start_fragment(dest, length)  \
 	{ \
diff -ur r300.orig/r300_ioctl.c r300/r300_ioctl.c
--- r300.orig/r300_ioctl.c	Thu Jun  2 01:00:00 2005
+++ r300/r300_ioctl.c	Thu Jun  2 01:00:00 2005
@@ -55,11 +55,12 @@
 
 #include "vblank.h"
 
-//#define CB_DPATH
+#define CB_DPATH
 
 #define CLEARBUFFER_COLOR	0x1
 #define CLEARBUFFER_DEPTH	0x2
 #define CLEARBUFFER_STENCIL	0x4
+static void r300EmitClearState(GLcontext * ctx);
 
 static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer)
 {
@@ -252,16 +253,19 @@
 	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size + 9+8, __FUNCTION__);
 
 	r300EmitState(r300);
+	cmd2 = (drm_r300_cmd_header_t*)r300AllocCmdBuf(r300, 9, __FUNCTION__);
 #else
+	//r300EmitClearState(ctx);
+	
 	R300_STATECHANGE(r300, cb);
-	reg_start(R300_RB3D_COLOROFFSET0, 0);
+	reg_start_raw(R300_RB3D_COLOROFFSET0, 0);
 	e32(cboffset);
 	
-	reg_start(R300_RB3D_COLORPITCH0, 0);
+	reg_start_raw(R300_RB3D_COLORPITCH0, 0);
 	e32(cbpitch | R300_COLOR_UNKNOWN_22_23);
 
 	R300_STATECHANGE(r300, cmk);
-	reg_start(R300_RB3D_COLORMASK, 0);
+	reg_start_raw(R300_RB3D_COLORMASK, 0);
 	
 	if (flags & CLEARBUFFER_COLOR) {
 		e32((ctx->Color.ColorMask[BCOMP] ? R300_COLORMASK0_B : 0) |
@@ -273,7 +277,7 @@
 	}
 	
 	R300_STATECHANGE(r300, zs);
-	reg_start(R300_RB3D_ZSTENCIL_CNTL_0, 2);
+	reg_start_raw(R300_RB3D_ZSTENCIL_CNTL_0, 2);
 	
 	{
 	uint32_t t1, t2;
@@ -325,9 +329,10 @@
 	e32(r300->state.stencil.clear);
 	}
 	
+	cmd2 = (drm_r300_cmd_header_t*)r300RawAllocCmdBuf(r300, 9, __FUNCTION__);
 #endif
 
-	cmd2 = (drm_r300_cmd_header_t*)r300AllocCmdBuf(r300, 9, __FUNCTION__);
+	//cmd2 = (drm_r300_cmd_header_t*)r300AllocCmdBuf(r300, 9, __FUNCTION__);
 	cmd2[0].packet3.cmd_type = R300_CMD_PACKET3;
 	cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR;
 	cmd2[1].u = r300PackFloat32(dPriv->w / 2.0);
@@ -349,31 +354,42 @@
 	int i;
 	LOCAL_VARS;
 	
+   reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
+	e32(0x0000000a);
+
+   reg_start(0x4f18,0);
+	e32(0x00000003);
+	/*r300EmitState(r300);
+	r300EnsureCmdBufSpace(r300, r300->hw.max_state_size + 9+8, __FUNCTION__);
+	r300FlushCmdBuf(r300, __FUNCTION__);*/
+	/*if(r300->radeon.lost_context)
+		fprintf(stderr, "LOST CONTEXT AT r300EmitClearState!\n");*/
+	//r300->radeon.lost_context=GL_FALSE;
 	R300_STATECHANGE(r300, vir[0]);
-	reg_start(R300_VAP_INPUT_ROUTE_0_0, 0);
+	reg_start_raw(R300_VAP_INPUT_ROUTE_0_0, 0);
 	e32(0x21030003);
 	
 	R300_STATECHANGE(r300, vir[1]);
-	reg_start(R300_VAP_INPUT_ROUTE_1_0, 0);
+	reg_start_raw(R300_VAP_INPUT_ROUTE_1_0, 0);
 	e32(0xF688F688);
 
 	R300_STATECHANGE(r300, vic);
-	reg_start(R300_VAP_INPUT_CNTL_0, 1);
+	reg_start_raw(R300_VAP_INPUT_CNTL_0, 1);
 	e32(0x00000001);
 	e32(0x00000405);
 	
 	R300_STATECHANGE(r300, vof);
-	reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1);
+	reg_start_raw(R300_VAP_OUTPUT_VTX_FMT_0, 1);
 	e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_PRESENT);
 	e32(0); /* no textures */
 		
 	
 	R300_STATECHANGE(r300, txe);
-	reg_start(R300_TX_ENABLE, 0);
+	reg_start_raw(R300_TX_ENABLE, 0);
 	e32(0);
 	
 	R300_STATECHANGE(r300, vpt);
-	reg_start(R300_SE_VPORT_XSCALE, 5);
+	reg_start_raw(R300_SE_VPORT_XSCALE, 5);
 	efloat(1.0);
 	efloat(dPriv->x);
 	efloat(1.0);
@@ -382,45 +398,45 @@
 	efloat(0.0);
 	
 	R300_STATECHANGE(r300, at);
-	reg_start(R300_PP_ALPHA_TEST, 0);
+	reg_start_raw(R300_PP_ALPHA_TEST, 0);
 	e32(0);
 	
 	R300_STATECHANGE(r300, bld);
-	reg_start(R300_RB3D_CBLEND, 1);
+	reg_start_raw(R300_RB3D_CBLEND, 1);
 	e32(0);
 	e32(0);
 	
 	R300_STATECHANGE(r300, unk221C);
-	reg_start(0x221C, 0);
+	reg_start_raw(0x221C, 0);
 	e32(R300_221C_CLEAR);
 	
 	R300_STATECHANGE(r300, ps);
-	reg_start(R300_RE_POINTSIZE, 0);
+	reg_start_raw(R300_RE_POINTSIZE, 0);
 	e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
 		((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
 	
 	R300_STATECHANGE(r300, ri);
-	reg_start(R300_RS_INTERP_0, 8);
+	reg_start_raw(R300_RS_INTERP_0, 8);
 	for(i = 0; i < 8; ++i){
 		e32(R300_RS_INTERP_USED);
 	}
 
 	R300_STATECHANGE(r300, rc);
 	/* The second constant is needed to get glxgears display anything .. */
-	reg_start(R300_RS_CNTL_0, 1);
+	reg_start_raw(R300_RS_CNTL_0, 1);
 	e32(R300_RS_CNTL_0_UNKNOWN_7 | R300_RS_CNTL_0_UNKNOWN_18);
 	e32(0);
 	
 	R300_STATECHANGE(r300, rr);
-	reg_start(R300_RS_ROUTE_0, 0);
+	reg_start_raw(R300_RS_ROUTE_0, 0);
 	e32(0x00004000);
 	
 	R300_STATECHANGE(r300, fp);
-	reg_start(R300_PFS_CNTL_0, 2);
+	reg_start_raw(R300_PFS_CNTL_0, 2);
 	e32(0);
 	e32(0);
 	e32(0);
-	reg_start(R300_PFS_NODE_0, 3);
+	reg_start_raw(R300_PFS_NODE_0, 3);
 	e32(0);
 	e32(0);
 	e32(0);
@@ -431,20 +447,20 @@
 	R300_STATECHANGE(r300, fpi[2]);
 	R300_STATECHANGE(r300, fpi[3]);
 	
-	reg_start(R300_PFS_INSTR0_0, 0);
+	reg_start_raw(R300_PFS_INSTR0_0, 0);
 	e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
 	
-	reg_start(R300_PFS_INSTR1_0, 0);
+	reg_start_raw(R300_PFS_INSTR1_0, 0);
 	e32(FP_SELC(0,NO,XYZ,FP_TMP(0),0,0));
 	
-	reg_start(R300_PFS_INSTR2_0, 0);
+	reg_start_raw(R300_PFS_INSTR2_0, 0);
 	e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
 	
-	reg_start(R300_PFS_INSTR3_0, 0);
+	reg_start_raw(R300_PFS_INSTR3_0, 0);
 	e32(FP_SELA(0,NO,W,FP_TMP(0),0,0));
 	
 	R300_STATECHANGE(r300, pvs);
-	reg_start(R300_VAP_PVS_CNTL_1, 2);
+	reg_start_raw(R300_VAP_PVS_CNTL_1, 2);
 	e32((0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) |
 		(0 << R300_PVS_CNTL_1_POS_END_SHIFT) |
 		(1 << R300_PVS_CNTL_1_PROGRAM_END_SHIFT));
@@ -452,7 +468,7 @@
 	e32(1 << R300_PVS_CNTL_3_PROGRAM_UNKNOWN_SHIFT);
 	
 	R300_STATECHANGE(r300, vpi);
-	vsf_start_fragment(0x0, 8);
+	vsf_start_fragment_raw(0x0, 8);
 	e32(VP_OUT(ADD,OUT,0,XYZW));
 	e32(VP_IN(IN,0));
 	e32(VP_ZERO());
@@ -477,6 +493,8 @@
 	int flags = 0;
 	int bits = 0;
 	int swapped;
+	r300ContextPtr rmesa=r300;
+	LOCAL_VARS;
 
 	if (RADEON_DEBUG & DEBUG_IOCTL)
 		fprintf(stderr, "%s:  all=%d cx=%d cy=%d cw=%d ch=%d\n",
@@ -519,9 +537,14 @@
 	swapped = r300->radeon.doPageFlip && (r300->radeon.sarea->pfCurrentPage == 1);
 
 #ifdef CB_DPATH
-	if(flags || bits)
+	if(flags || bits){
+		//fprintf(stderr, "used %d\n", r300->cmdbuf.count_used);
+		//r300EmitState(r300);
+		//r300->cmdbuf.count_reemit=0;
 		r300EmitClearState(ctx);
+	}
 #endif
+	//if (!r300->cmdbuf.count_used)fprintf(stderr, "reemit in clearbuffer!\n");
 
 	if (flags & BUFFER_BIT_FRONT_LEFT) {
 		r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped);
@@ -544,7 +567,17 @@
 	
 	/* r300ClearBuffer has trampled all over the hardware state.. */
 	r300->hw.all_dirty=GL_TRUE;
+#else
+	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
+	e32(0x0000000a);
+
+	reg_start(0x4f18,0);
+	e32(0x00000003);
+	//r300EmitState(r300);
+	//r300ResetHwState(r300);
+	//r300FlushCmdBuf(r300, __FUNCTION__);
 #endif
+	//fprintf(stderr, "lost context %d!\n", r300->radeon.lost_context);
 }
 
 void r300Flush(GLcontext * ctx)
@@ -648,12 +681,18 @@
 		if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
 			fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__,
 				region->buf->buf->idx);
+		/*fprintf(stderr, "release buffer:\n");
+		fprintf(stderr, "aos_offset %d\n", region->aos_offset);
+		fprintf(stderr, "aos_stride %d\n", region->aos_stride);
+		fprintf(stderr, "aos_size %d\n", region->aos_size);
+		fprintf(stderr, "aos_format %d\n", region->aos_format);
+		fprintf(stderr, "aos_reg %d\n", region->aos_reg);*/
 		cmd =
 		    (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa,
 								sizeof(*cmd) / 4,
 								__FUNCTION__);
 		cmd->dma.cmd_type = R300_CMD_DMA_DISCARD;
-		cmd->dma.buf_idx = region->buf->buf->idx;
+		cmd->dma.buf_idx = region->buf->buf->idx;//fprintf(stderr, "im doing it for real!\n");
 		
 		FREE(region->buf);
 		rmesa->dma.nr_released_bufs++;
@@ -697,7 +736,7 @@
 	rmesa->dma.current.start =
 	    rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;
 
-	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);
+	assert(rmesa->dma.current.ptr <= rmesa->dma.current.end);//fprintf(stderr, "start %d of %d\n", rmesa->dma.current.start, region->buf->buf->idx);
 }
 
 /* Called via glXGetMemoryOffsetMESA() */
diff -ur r300.orig/r300_render.c r300/r300_render.c
--- r300.orig/r300_render.c	Thu Jun  2 01:00:00 2005
+++ r300/r300_render.c	Thu Jun  2 01:00:00 2005
@@ -676,7 +676,7 @@
 	if(ctx->VertexProgram._Enabled == GL_FALSE){
 		_tnl_UpdateFixedFunctionProgram(ctx);
 	}
-	vp = CURRENT_VERTEX_SHADER(ctx);
+	vp = CURRENT_VERTEX_SHADER(ctx);//debug_vp(ctx, vp);
 	if(vp->translated == GL_FALSE)
 		translate_vertex_shader(vp);
 	if(vp->translated == GL_FALSE){
diff -ur r300.orig/r300_vertexprog.c r300/r300_vertexprog.c
--- r300.orig/r300_vertexprog.c	Thu Jun  2 01:00:00 2005
+++ r300/r300_vertexprog.c	Thu Jun  2 01:00:00 2005
@@ -537,6 +537,20 @@
 			vp->outputs[i] = cur_reg++;
 	
 	o_inst=vp->program.body.i;
+	/*for(i=0; i < vp->num_temporaries; i++, o_inst++){
+		o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, i,
+				VSF_FLAG_X|VSF_FLAG_Y|VSF_FLAG_Z|VSF_FLAG_W, VSF_OUT_CLASS_TMP);
+		o_inst->src1=MAKE_VSF_SOURCE(0,
+					SWIZZLE_ZERO, SWIZZLE_ZERO,
+					SWIZZLE_ZERO, SWIZZLE_ZERO,
+					VSF_IN_CLASS_TMP, VSF_FLAG_NONE);
+		o_inst->src2=MAKE_VSF_SOURCE(0,
+					SWIZZLE_ZERO, SWIZZLE_ZERO,
+					SWIZZLE_ZERO, SWIZZLE_ZERO,
+					VSF_IN_CLASS_ATTR, VSF_FLAG_NONE);
+		o_inst->src3=0;
+	}*/
+		
 	for(vpi=mesa_vp->Instructions; vpi->Opcode != VP_OPCODE_END; vpi++, o_inst++){
 		
 		operands=op_operands(vpi->Opcode);
diff -ur r300.orig/radeon_lock.c r300/radeon_lock.c
--- r300.orig/radeon_lock.c	Thu Jun  2 01:00:00 2005
+++ r300/radeon_lock.c	Thu Jun  2 01:00:00 2005
@@ -163,4 +163,5 @@
 #endif
 	
 	radeon->lost_context = GL_TRUE;
+	//fprintf(stderr, "lost context!\n");
 }
