diff -uNr r300.orig/CVS/Entries r300/CVS/Entries
--- r300.orig/CVS/Entries	Wed Apr 20 16:59:42 2005
+++ r300/CVS/Entries	Mon Apr  4 23:46:08 2005
@@ -1,5 +1,3 @@
-/Makefile/1.10/Tue Mar 15 17:09:35 2005//
-/pixel_shader.h/1.2/Thu Feb  3 04:16:59 2005//
 /r200_cmdbuf.c/1.1.1.1/Tue Sep 28 10:59:33 2004//
 /r200_context.c/1.2/Sun Oct 17 20:26:05 2004//
 /r200_context.h/1.3/Sun Jan 23 17:28:04 2005//
@@ -23,46 +21,48 @@
 /r200_tex.c/1.1.1.1/Tue Sep 28 11:00:09 2004//
 /r200_tex.h/1.1.1.1/Tue Sep 28 11:00:09 2004//
 /r200_texmem.c/1.1.1.1/Tue Sep 28 10:59:50 2004//
-/r200_texstate.c/1.2/Fri Jan 28 09:57:06 2005//
 /r200_vtxfmt.c/1.1.1.1/Tue Sep 28 10:59:25 2004//
 /r200_vtxfmt.h/1.1.1.1/Tue Sep 28 10:59:26 2004//
 /r200_vtxfmt_c.c/1.1.1.1/Tue Sep 28 10:59:27 2004//
 /r200_vtxfmt_sse.c/1.1.1.1/Tue Sep 28 10:59:59 2004//
 /r200_vtxfmt_x86.c/1.1.1.1/Tue Sep 28 10:59:59 2004//
 /r200_vtxtmp_x86.S/1.1.1.1/Tue Sep 28 10:59:39 2004//
-/r300_cmdbuf.c/1.37/Sat Mar 12 10:14:09 2005//
-/r300_cmdbuf.h/1.4/Fri Feb 11 05:59:13 2005//
-/r300_context.c/1.16/Thu Mar 17 21:50:01 2005//
-/r300_context.h/1.55/Fri Mar 18 14:44:27 2005//
-/r300_emit.h/1.10/Sat Mar 12 10:14:10 2005//
-/r300_fixed_pipelines.h/1.9/Thu Feb 17 22:07:32 2005//
-/r300_ioctl.c/1.21/Mon Mar 14 20:35:00 2005//
 /r300_ioctl.h/1.3/Tue Jan  4 18:59:48 2005//
-/r300_maos.c/1.14/Fri Mar 18 14:44:27 2005//
-/r300_maos.h/1.3/Tue Feb 22 05:16:42 2005//
 /r300_program.h/1.1.1.1/Tue Sep 28 10:59:27 2004//
-/r300_reg.h/1.32/Tue Mar 15 17:00:45 2005//
-/r300_render.c/1.79/Fri Mar 18 14:44:28 2005//
-/r300_state.c/1.93/Fri Mar 18 14:44:28 2005//
-/r300_state.h/1.4/Tue Feb  1 05:59:00 2005//
-/r300_tex.c/1.14/Thu Feb 24 13:59:56 2005//
-/r300_tex.h/1.3/Wed Feb 16 16:52:12 2005//
-/r300_texmem.c/1.8/Tue Mar  8 22:29:46 2005//
-/r300_texprog.c/1.1/Tue Mar 15 17:00:46 2005//
-/r300_texprog.h/1.1/Tue Mar 15 17:00:46 2005//
-/r300_texstate.c/1.24/Fri Mar 11 11:38:23 2005//
-/r300_vertexprog.c/1.18/Fri Mar 18 14:44:28 2005//
-/radeon_context.c/1.2/Sun Oct 17 20:26:06 2004//
 /radeon_context.h/1.3/Sun Jan 23 17:28:10 2005//
 /radeon_ioctl.c/1.2/Fri Oct 15 20:52:47 2004//
 /radeon_ioctl.h/1.1.1.1/Tue Sep 28 10:59:48 2004//
 /radeon_lock.c/1.3/Sun Oct 17 20:26:06 2004//
-/radeon_lock.h/1.1.1.1/Tue Sep 28 10:59:31 2004//
-/radeon_screen.c/1.9/Sun Feb 27 12:12:25 2005//
-/radeon_screen.h/1.2/Sat Feb 12 21:29:51 2005//
-/radeon_span.c/1.6/Thu Feb 17 22:07:33 2005//
 /radeon_span.h/1.1.1.1/Tue Sep 28 10:59:32 2004//
 /radeon_state.c/1.2/Sun Oct 17 20:26:06 2004//
 /radeon_state.h/1.2/Sun Oct 17 20:26:06 2004//
 /vertex_shader.h/1.1/Thu Dec 30 16:06:37 2004//
+/r200_texstate.c/1.2/Mon Feb  7 02:49:18 2005//
+/r300_state.h/1.4/Sat Feb 12 12:23:27 2005//
+/pixel_shader.h/1.2/Mon Feb 14 20:51:49 2005//
+/r300_cmdbuf.h/1.4/Thu Feb 24 00:50:37 2005//
+/radeon_screen.h/1.2/Sun Feb 27 06:04:14 2005//
+/r300_tex.h/1.3/Wed Mar  2 01:57:46 2005//
+/radeon_span.c/1.6/Fri Mar  4 09:33:25 2005//
+/r300_maos.h/1.3/Tue Mar  8 04:30:21 2005//
+/radeon_lock.h/1.1.1.1/Tue Mar  8 04:13:23 2005//
+/r300_texmem.c/1.8/Thu Mar 24 15:48:35 2005//
+/r300_fixed_pipelines.h/1.9/Wed Mar 23 16:20:28 2005//
+/r300_tex.c/1.14/Wed Mar 23 09:07:15 2005//
+/radeon_context.c/1.2/Wed Mar 23 15:41:34 2005//
+/radeon_screen.c/1.9/Wed Mar 23 15:41:16 2005//
+/r300_texstate.c/1.24/Mon Mar 28 15:47:53 2005//
+/r300_cmdbuf.c/1.37/Mon Mar 28 12:29:44 2005//
+/r300_emit.h/1.10/Mon Mar 28 12:32:41 2005//
+/r300_ioctl.c/1.21/Fri Apr  1 06:21:21 2005//
+/Makefile/1.10/Fri Apr  1 08:42:34 2005//
+/r300_reg.h/1.32/Fri Apr  1 08:42:36 2005//
+/r300_texprog.c/1.1/Tue Mar 15 17:00:46 2005//
+/r300_texprog.h/1.1/Tue Mar 15 17:00:46 2005//
+/r300_context.c/1.16/Sun Apr  3 19:16:43 2005//
+/r300_context.h/1.55/Mon Apr  4 19:55:52 2005//
+/r300_maos.c/1.14/Mon Apr  4 19:46:19 2005//
+/r300_render.c/1.79/Mon Apr  4 06:39:12 2005//
+/r300_state.c/1.93/Mon Apr  4 19:59:44 2005//
+/r300_vertexprog.c/1.18/Mon Apr  4 20:34:31 2005//
 D
diff -uNr r300.orig/depend r300/depend
--- r300.orig/depend	Thu Jan  1 02:00:00 1970
+++ r300/depend	Sun Apr  3 23:00:35 2005
@@ -0,0 +1 @@
+1
diff -uNr r300.orig/r300_context.c r300/r300_context.c
--- r300.orig/r300_context.c	Sun Apr  3 23:00:43 2005
+++ r300/r300_context.c	Sun Apr  3 23:00:35 2005
@@ -302,7 +302,9 @@
 		}
 		TCL_FALLBACK(r300->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
 	}
-
+#ifdef BENCH_HACK
+	ctx->Const.MaxArrayLockSize=RADEON_BUFFER_SIZE*6;//30000;
+#endif	
 	return GL_TRUE;
 }
 
diff -uNr r300.orig/r300_context.h r300/r300_context.h
--- r300.orig/r300_context.h	Sun Apr  3 23:00:43 2005
+++ r300/r300_context.h	Sun Apr  3 23:00:35 2005
@@ -104,9 +104,15 @@
 	drmBufPtr buf;
 };
 
+//#define BENCH_HACK
+
+#ifdef BENCH_HACK
+#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_texture_offset + (rvb)->start)
+#else
 #define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset +		\
 			(rvb)->address - rmesa->dma.buf0_address +	\
 			(rvb)->start)
+#endif
 
 /* A retained region, eg vertices for indexed vertices.
  */
@@ -567,7 +573,7 @@
 	};
 	
 #define VERTPROG_ACTIVE(ctx) ( ctx->VertexProgram._Enabled && (R300_CONTEXT(ctx)->current_vp != NULL) && \
-	(R300_CONTEXT(ctx)->current_vp->translated) )
+	(R300_CONTEXT(ctx)->current_vp->translated))
 	
 /* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday.
  * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
diff -uNr r300.orig/r300_maos.c r300/r300_maos.c
--- r300.orig/r300_maos.c	Sun Apr  3 23:00:43 2005
+++ r300/r300_maos.c	Sun Apr  3 23:00:35 2005
@@ -163,6 +163,125 @@
 		}
 }
 
+
+#ifdef BENCH_HACK
+static void emit_vector(GLcontext * ctx,
+			struct r300_dma_region *rvb,
+			char *data, int size, int stride, int count)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	drm_radeon_mem_alloc_t alloc;
+	int region_offset;
+	int real_size;
+	int ret;
+	
+	if (RADEON_DEBUG & DEBUG_VERTS)
+		fprintf(stderr, "%s count %d size %d stride %d\n",
+			__FUNCTION__, count, size, stride);
+
+	assert(!rvb->buf);
+//		int_base=VB->ObjPtr->data;
+
+	fprintf(stderr, "data=%p\n", data);
+	fprintf(stderr, "stride=%d\n", stride);
+	if (stride == 0) {
+		real_size = size * 4;
+		//r300AllocDmaRegion(rmesa, rvb, size * 4, 4);
+		count = 1;
+		rvb->aos_offset	= GET_START(rvb);
+		rvb->aos_stride	= 0;
+		rvb->aos_size	= size;
+	} else {
+		real_size = size * count * 4;
+		//r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4);	/* alignment? */
+		rvb->aos_offset	= GET_START(rvb);
+		rvb->aos_stride	= size;
+		rvb->aos_size	= size;
+	}
+			
+	alloc.region = RADEON_MEM_REGION_GART;
+	alloc.alignment = 4;
+	alloc.size = real_size;
+	alloc.region_offset = &region_offset;
+
+	ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+				DRM_RADEON_ALLOC,
+				&alloc, sizeof(alloc));
+   
+	if (ret) {
+		fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret);
+		exit(-1); //return NULL;
+	}
+   
+	rvb->address=rmesa->radeon.radeonScreen->gartTextures.map;//rmesa->r200Screen->gartTextures.map;
+	rvb->start=region_offset;
+	rvb->aos_offset	= GET_START(rvb);	
+	/*fprintf(stderr, "address=%08x\n", rvb->address);*/
+	fprintf(stderr, "aos offset=%08x\n", rvb->aos_offset);
+	//rvb->aos_offset=region_offset;
+	
+	/* Emit the data
+	 */
+	switch (size) {
+	case 1:
+		emit_vec4(ctx, rvb, data, stride, count);
+		break;
+	case 2:
+		emit_vec8(ctx, rvb, data, stride, count);
+		break;
+	case 3:
+		emit_vec12(ctx, rvb, data, stride, count);
+		break;
+	case 4:
+		emit_vec16(ctx, rvb, data, stride, count);
+		break;
+	default:
+		assert(0);
+		exit(1);
+		break;
+	}
+
+	
+}
+
+void r300EmitElts(GLcontext * ctx, GLuint *elts, unsigned long n_elts)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	struct r300_dma_region *rvb=&rmesa->state.elt_dma;
+	unsigned short int *out;
+	int i;
+	drm_radeon_mem_alloc_t alloc;
+	int region_offset;
+	int ret;
+
+	alloc.region = RADEON_MEM_REGION_GART;
+	alloc.alignment = 0x1;
+	alloc.size = (n_elts+1)*2;
+	alloc.region_offset = &region_offset;
+
+	ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+				DRM_RADEON_ALLOC,
+				&alloc, sizeof(alloc));
+   
+	if (ret) {
+		fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret);
+		exit(-1); //return NULL;
+	}
+		
+	rvb->address=rmesa->radeon.radeonScreen->gartTextures.map;
+	rvb->start=region_offset;
+	
+	out = (unsigned short int *)(rvb->address + rvb->start);
+	
+	for(i=0; i < n_elts; i++)
+		out[i]=(unsigned short int)elts[i];
+	
+	if(n_elts & 1)
+		out[i]=0;
+}
+
+#else
+
 static void emit_vector(GLcontext * ctx,
 			struct r300_dma_region *rvb,
 			char *data, int size, int stride, int count)
@@ -218,7 +337,7 @@
 	unsigned short int *out;
 	int i;
 	
-	r300AllocDmaRegion(rmesa, rvb, (n_elts+1)*2 , 0x20);
+	r300AllocDmaRegion(rmesa, rvb, (n_elts+1)*2 , 1);
 	
 	out = (unsigned short int *)(rvb->address + rvb->start);
 	
@@ -228,6 +347,8 @@
 	if(n_elts & 1)
 		out[i]=0;
 }
+#endif
+
 
 /* Emit vertex data to GART memory (unless immediate mode)
  * Route inputs to the vertex processor
@@ -316,7 +437,135 @@
 				inputs &= ~ (_TNL_BIT_TEX0<<i);*/
 	}
 	rmesa->state.render_inputs = inputs;
+	
+#ifdef BENCH_HACK
+//#define INTERLACED
+#endif
+
+#ifdef INTERLACED
+	int vert_size=0;
+	int region_offset;
+	drm_radeon_mem_alloc_t alloc;
+	int ret;
+
+	
+	if (inputs & _TNL_BIT_POS) {
+		vert_size+=VB->ObjPtr->size;
+	}
+
+	if (inputs & _TNL_BIT_NORMAL) {
+		vert_size+=VB->NormalPtr->size;
+	}
+
+	if (inputs & _TNL_BIT_COLOR0) {
+		int emitsize=4;
+
+		if (!immd) {
+			if (VB->ColorPtr[0]->size == 4 &&
+			    (VB->ColorPtr[0]->stride != 0 ||
+			     VB->ColorPtr[0]->data[0][3] != 1.0)) {
+				emitsize = 4;
+			} else {
+				emitsize = 3;
+			}
+		}
+		vert_size+=emitsize;
+	}
+
+	alloc.region = RADEON_MEM_REGION_GART;
+	alloc.alignment = 4;
+	alloc.size = vert_size * count * 4;
+	alloc.region_offset = &region_offset;
+
+	ret = drmCommandWriteRead( rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, sizeof(alloc));
+   	if (ret) {
+		fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret);
+		exit(-1); //return NULL;
+	}
+	
+	fprintf(stderr, "dst=%p\n", rmesa->radeon.radeonScreen->gartTextures.map);
+	fprintf(stderr, "region_offset=%d\n", region_offset);
+	//memset(((char *)rmesa->radeon.radeonScreen->gartTextures.map)+region_offset, 0, 4 * VB->ObjPtr->stride/*vert_size*/ * count);
+	fprintf(stderr, "region_offset=%d\n", region_offset);
+	memcpy(((char *)rmesa->radeon.radeonScreen->gartTextures.map)+region_offset, VB->ObjPtr->data, 4 *
+	/*VB->ObjPtr->stride*/vert_size * count);
+	fprintf(stderr, "region_offset=%d\n", region_offset);
+	
+	//rvb->address=rmesa->radeon.radeonScreen->gartTextures.map;//rmesa->r200Screen->gartTextures.map;
+	//rvb->start=region_offset;
+	//rvb->aos_offset	= GET_START(rvb);	
+	
+	fprintf(stderr, "VB->ObjPtr->stride=%d\n", VB->ObjPtr->stride);
+#if 0	
+	fprintf(stderr, "VB->ObjPtr->data=%p\n", VB->ObjPtr->data);
+	fprintf(stderr, "VB->NormalPtr->data=%p\n", VB->NormalPtr->data);
+	fprintf(stderr, "VB->ColorPtr[0]->data=%p\n", VB->ColorPtr[0]->data);
+	
+	fprintf(stderr, "VB->NormalPtr->stride=%d\n", VB->NormalPtr->stride);
+	fprintf(stderr, "VB->ColorPtr[0]->stride=%d\n", VB->ColorPtr[0]->stride);
+	
+	fprintf(stderr, "VB->ObjPtr->size=%d\n", VB->ObjPtr->size);
+	fprintf(stderr, "VB->NormalPtr->size=%d\n", VB->NormalPtr->size);
+	fprintf(stderr, "VB->ColorPtr[0]->size=%d\n", VB->ColorPtr[0]->size);
+#endif	
+	vert_size=0;
+	
+	if (inputs & _TNL_BIT_POS) {
+		vic_1 |= R300_INPUT_CNTL_POS;
+		rmesa->state.aos[nr].address=rmesa->radeon.radeonScreen->gartTextures.map;
+		rmesa->state.aos[nr].start=region_offset;
+		rmesa->state.aos[nr].aos_offset	= GET_START(&rmesa->state.aos[nr]);
+		rmesa->state.aos[nr].aos_size=VB->ObjPtr->size;
+		rmesa->state.aos[nr].aos_stride=VB->ObjPtr->stride/4;
+		rmesa->state.aos[nr].aos_format = AOS_FORMAT_FLOAT;
+		vert_size+=VB->ObjPtr->size;
+		nr++;
+	}
+
+	if (inputs & _TNL_BIT_NORMAL) {
+		vic_1 |= R300_INPUT_CNTL_NORMAL;
+		rmesa->state.aos[nr].address=rmesa->radeon.radeonScreen->gartTextures.map;
+		rmesa->state.aos[nr].start=region_offset + (vert_size * 4);
+		rmesa->state.aos[nr].aos_offset	= GET_START(&rmesa->state.aos[nr]);
+		rmesa->state.aos[nr].aos_size=VB->NormalPtr->size;
+		rmesa->state.aos[nr].aos_stride=VB->NormalPtr->stride/4;
+		rmesa->state.aos[nr].aos_format = AOS_FORMAT_FLOAT;
+		vert_size+=VB->NormalPtr->size;
+		nr++;
+	}
 
+	if (inputs & _TNL_BIT_COLOR0) {
+		int emitsize=4;
+
+		if (!immd) {
+			if (VB->ColorPtr[0]->size == 4 &&
+			    (VB->ColorPtr[0]->stride != 0 ||
+			     VB->ColorPtr[0]->data[0][3] != 1.0)) {
+				emitsize = 4;
+			} else {
+				emitsize = 3;
+			}
+		}
+		
+		vic_1 |= R300_INPUT_CNTL_COLOR;
+		rmesa->state.aos[nr].address=rmesa->radeon.radeonScreen->gartTextures.map;
+		rmesa->state.aos[nr].start=region_offset + (vert_size * 4);
+		rmesa->state.aos[nr].aos_offset	= GET_START(&rmesa->state.aos[nr]);
+		rmesa->state.aos[nr].aos_size=/*VB->ColorPtr[0]->size;*/ emitsize;
+		rmesa->state.aos[nr].aos_stride=VB->ColorPtr[0]->stride/4;
+		rmesa->state.aos[nr].aos_format = AOS_FORMAT_FLOAT_COLOR;
+		nr++;
+	}
+	//fprintf(stderr, "data=%p\n", data);
+	//fprintf(stderr, "stride=%d\n", stride);
+	
+	/*fprintf(stderr, "address=%08x\n", rvb->address);*/
+	//fprintf(stderr, "aos offset=%08x\n", rvb->aos_offset);
+	//rvb->aos_offset=region_offset;
+	
+	
+#else
+	
 	if (inputs & _TNL_BIT_POS) {
 		CONFIGURE_AOS(i_coords,	AOS_FORMAT_FLOAT,
 						VB->ObjPtr,
@@ -382,7 +631,7 @@
 			vic_1 |= R300_INPUT_CNTL_TC0 << i;
 		}
 	}
-	
+#endif	
 
 int cmd_reserved=0;
 int cmd_written=0;
@@ -518,6 +767,32 @@
 	rmesa->state.aos_count = nr;
 }
 
+#ifdef BENCH_HACK
+
+void r300ReleaseArrays(GLcontext * ctx)
+{
+	/*r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	int i;
+	drm_radeon_mem_free_t memfree;
+	int ret;
+	
+	for (i=0;i<rmesa->state.aos_count;i++) {
+		if(rmesa->state.aos[i].start){
+			memfree.region = RADEON_MEM_REGION_GART;
+			memfree.region_offset = rmesa->state.aos[i].start;
+   	
+			ret = drmCommandWrite( rmesa->radeon.dri.fd,
+					  DRM_RADEON_FREE,
+					  &memfree, sizeof(memfree));
+   	
+			if (ret) 
+				fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret);
+		}
+	}*/
+//	r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__);
+}
+#else
+
 void r300ReleaseArrays(GLcontext * ctx)
 {
 	r300ContextPtr rmesa = R300_CONTEXT(ctx);
@@ -528,3 +803,4 @@
 		r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__);
 	}
 }
+#endif
diff -uNr r300.orig/r300_render.c r300/r300_render.c
--- r300.orig/r300_render.c	Sun Apr  3 23:00:43 2005
+++ r300/r300_render.c	Sun Apr  3 23:00:35 2005
@@ -467,17 +467,23 @@
 	unsigned long addr_a;
 	
 	addr_a = addr & 0x1c;
+	if(addr_a) /* Apparently addr_a has some other meaning... */
+		addr_a=0; /* Avoid lockup. */
+	
+	addr &= ~2; /* Force align to avoid lockup. 2 appears to be the lowest align that still works */
 	
 	check_space(6);
 	
 	start_packet3(RADEON_CP_PACKET3_3D_DRAW_INDX_2, 0);
-	/* TODO: Check if R300_VAP_VF_CNTL__INDEX_SIZE_32bit works. */
+	/* TODO: Check if R300_VAP_VF_CNTL__INDEX_SIZE_32bit works.
+	   R200_SE_VF_MIN_VTX_INDX and R200_SE_VF_MAX_VTX_INDX dont seem to have
+	   any function on r300s. */
 	e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count<<16) | type);
 
 	start_packet3(RADEON_CP_PACKET3_INDX_BUFFER, 2);
 	e32(R300_EB_UNK1 | (addr_a << 16) | R300_EB_UNK2);
-	e32(addr & 0xffffffe3);
-	e32((vertex_count+1)/2 + addr_a/4);
+	e32(addr /*& 0xffffffe3*/);
+	e32((vertex_count+1)/2 + addr_a/4); /* Total number of dwords needed? */
 }
 
 static void r300_render_vb_primitive(r300ContextPtr rmesa,
@@ -499,8 +505,9 @@
    if(type<0 || num_verts <= 0)return;
 
    if(rmesa->state.Elts){
+	static int first=1;
 	r300EmitAOS(rmesa, rmesa->state.aos_count, 0);
-#if 1
+#if 0
 	start_index32_packet(num_verts, type);
 	for(i=0; i < num_verts; i++)
 		e32(rmesa->state.Elts[start+i]); /* start ? */
@@ -516,7 +523,15 @@
 		WARN_ONCE("Too many elts\n");
 		return;
 	}
+#ifdef BENCH_HACK	
+	WARN_ONCE("Elt count %d\n", num_verts);
+	if(first){
+		r300EmitElts(ctx, rmesa->state.Elts+start, num_verts);
+		first=0;
+	}
+#else	
 	r300EmitElts(ctx, rmesa->state.Elts+start, num_verts);
+#endif	
 	fire_EB(PASS_PREFIX GET_START(&(rmesa->state.elt_dma)), num_verts, type);
 #endif
    }else{
@@ -525,6 +540,11 @@
    }
 }
 
+GLcontext *foo_ctx;
+struct tnl_pipeline_stage *foo_stage;
+void *get_foo_ctx(){ return foo_ctx; }
+void *get_foo_stage(){ return foo_stage; }
+
 static GLboolean r300_run_vb_render(GLcontext *ctx,
 				 struct tnl_pipeline_stage *stage)
 {
@@ -533,16 +553,28 @@
    struct vertex_buffer *VB = &tnl->vb;
    int i, j;
    LOCAL_VARS
+   static int first=1, first1=1;
    
-	if (RADEON_DEBUG & DEBUG_PRIMS)
+	foo_ctx=ctx;
+	foo_stage=stage;
+   	if (RADEON_DEBUG & DEBUG_PRIMS)
 		fprintf(stderr, "%s\n", __FUNCTION__);
 	
-
+#ifdef BENCH_HACK
+	/* Without this xorg can stop responding for long periods of time... */
+	LOCK_HARDWARE(&(rmesa->radeon));
+	radeonWaitForIdleLocked(&(rmesa->radeon));
+	UNLOCK_HARDWARE(&(rmesa->radeon));
+	if(first){
+		r300ReleaseArrays(ctx);
+		r300EmitArrays(ctx, GL_FALSE);
+		first=0;
+	}
+#else
 	r300ReleaseArrays(ctx);
 	r300EmitArrays(ctx, GL_FALSE);
 	//dump_inputs(ctx, rmesa->state.render_inputs);
-
-//	LOCK_HARDWARE(&(rmesa->radeon));
+#endif	
 
 	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
 	e32(0x0000000a);
@@ -553,12 +585,21 @@
 
 	rmesa->state.Elts = VB->Elts;
 
-	for(i=0; i < VB->PrimitiveCount; i++){
-		GLuint prim = VB->Primitive[i].mode;
-		GLuint start = VB->Primitive[i].start;
-		GLuint length = VB->Primitive[i].count;
-		
+	static GLuint prim;
+	static GLuint start;
+	static GLuint length;
+	if(first1 == 0){
 		r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
+	}else{
+		for(i=0; i < VB->PrimitiveCount; i++){
+			prim = VB->Primitive[i].mode;
+			start = VB->Primitive[i].start;
+			length = VB->Primitive[i].count;
+			r300_render_vb_primitive(rmesa, ctx, start, start + length, prim);
+		}
+#ifdef BENCH_HACK	
+	first1=0;
+#endif	
 	}
 
 	reg_start(R300_RB3D_DSTCACHE_CTLSTAT,0);
@@ -583,7 +624,7 @@
  * rasterization) or false to indicate that the pipeline has finished
  * after we render something.
  */
-static GLboolean r300_run_render(GLcontext *ctx,
+extern /*static */GLboolean r300_run_render(GLcontext *ctx,
 				 struct tnl_pipeline_stage *stage)
 {
    r300ContextPtr rmesa = R300_CONTEXT(ctx);
@@ -594,6 +635,7 @@
 	if (RADEON_DEBUG & DEBUG_PRIMS)
 		fprintf(stderr, "%s\n", __FUNCTION__);
 
+	r300SetupVertexShader(rmesa);
 
 #if 1
 
diff -uNr r300.orig/r300_state.c r300/r300_state.c
--- r300.orig/r300_state.c	Sun Apr  3 23:00:43 2005
+++ r300/r300_state.c	Sun Apr  3 23:00:35 2005
@@ -2087,7 +2087,9 @@
 	r300UpdateTextureState(ctx);
 
 //	r300_setup_routing(ctx, GL_TRUE);
+#ifndef BENCH_HACK	
 	r300EmitArrays(ctx, GL_TRUE); /* Just do the routing */
+#endif	
 	r300_setup_textures(ctx);
 	r300_setup_rs_unit(ctx);
 
@@ -2102,7 +2104,7 @@
 		/* Initialize magic registers
 		 TODO : learn what they really do, or get rid of
 		 those we don't have to touch */
-	r300->hw.unk2080.cmd[1] = 0x0030045A;
+	r300->hw.unk2080.cmd[1] = 0x0030045A; /* Dangerous */
 
 	r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
 				| R300_VPORT_X_OFFSET_ENA
diff -uNr r300.orig/r300_vertexprog.c r300/r300_vertexprog.c
--- r300.orig/r300_vertexprog.c	Sun Apr  3 23:00:43 2005
+++ r300/r300_vertexprog.c	Sun Apr  3 23:00:35 2005
@@ -452,7 +452,7 @@
 	/* Can we trust mesas opcodes to be in order ? */
 	for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
 		if(op_names[i].opcode == opcode)
-			return op_names[i].ip & OP_MASK;
+			return op_names[i].ip;
 	
 	fprintf(stderr, "op %d not found in op_names\n", opcode);
 	exit(-1);
@@ -514,7 +514,7 @@
 		
 		operands=op_operands(vpi->Opcode);
 		are_srcs_scalar=operands & SCALAR_FLAG;
-		operands &= ~SCALAR_FLAG;
+		operands &= OP_MASK;
 		
 		for(i=0; i < operands; i++)
 			src[i]=vpi->SrcReg[i];
@@ -628,7 +628,7 @@
 			vpi->Opcode=VP_OPCODE_MAX;
 			src[1]=src[0];
 			src[1].Negate=!src[0].Negate;
-			operands=op_operands(vpi->Opcode);
+			operands=op_operands(vpi->Opcode) & OP_MASK;
 			break;
 #else
 			o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, vpi->DstReg.Index,
