We allocate and free dma buffers all the time that accounts for large part
of system time in OGL applications.
First patch fixes allocation problem by keeping buffer objects in linked
list until they have been unused for 100 flushes when they are freed back to
kernel.
2nd patch is simple bug fix where code was missing offset for dma buffer in
tcl code. (r200 only)
3rd patch is Airlied's old patch ported to new code to merge elt buffer to
dma buffers which was bitten by offset bug in tcl. (r200 only)
What are gains after these patches:
glxgears 260->630 and no more 100% cpu
warzone2100 has now playable fps I didn't output any values but feeling is
that maybe 3x fps
xomto main menu 5->30fps (seems like limited to 30 fps)
From 80271169e87be8b58b087f5c32f90be5a18bb2c7 Mon Sep 17 00:00:00 2001
From: Pauli Nieminen <[email protected]>
Date: Fri, 14 Aug 2009 22:10:24 +0300
Subject: [PATCH 1/3] radeon: Optimize memory handling for dma operations.
We keep dma buffer objects in list untill they have been unused for many
draw operations. Current limit of having 100 operations is just rando guess for
goor performance/memory trade off.
Signed-off-by: Pauli Nieminen <[email protected]>
---
src/mesa/drivers/dri/r200/r200_state.c | 12 ++-
src/mesa/drivers/dri/r200/r200_swtcl.c | 3 +-
src/mesa/drivers/dri/r300/r300_swtcl.c | 3 +-
src/mesa/drivers/dri/r300/r300_texstate.c | 3 +-
src/mesa/drivers/dri/r600/r600_texstate.c | 3 +-
src/mesa/drivers/dri/r600/r700_ioctl.c | 3 +-
src/mesa/drivers/dri/radeon/radeon_common.c | 4 +-
.../drivers/dri/radeon/radeon_common_context.c | 5 +-
.../drivers/dri/radeon/radeon_common_context.h | 17 ++-
src/mesa/drivers/dri/radeon/radeon_dma.c | 132 +++++++++++++-------
src/mesa/drivers/dri/radeon/radeon_dma.h | 4 +-
src/mesa/drivers/dri/radeon/radeon_state.c | 3 +-
src/mesa/drivers/dri/radeon/radeon_swtcl.c | 3 +-
13 files changed, 130 insertions(+), 65 deletions(-)
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
index 5a6fd20..ffc1a95 100644
--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -2289,8 +2289,11 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx)
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
struct radeon_renderbuffer *rrb;
+ struct radeon_dma_bo *dma_bo;
int i, ret;
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
rrb = radeon_get_colorbuffer(&rmesa->radeon);
@@ -2323,9 +2326,12 @@ static GLboolean r200ValidateBuffers(GLcontext *ctx)
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
- ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
- if (ret)
- return GL_FALSE;
+ dma_bo = first_elem(&rmesa->radeon.dma.reserved);
+ {
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, dma_bo->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+ }
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
index 83e70b5..1b23891 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -39,6 +39,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/image.h"
#include "main/imports.h"
#include "main/macros.h"
+#include "main/simple_list.h"
#include "swrast/s_context.h"
#include "swrast/s_fog.h"
@@ -275,7 +276,7 @@ void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
radeonEmitState(&rmesa->radeon);
r200EmitVertexAOS( rmesa,
rmesa->radeon.swtcl.vertex_size,
- rmesa->radeon.dma.current,
+ first_elem(&rmesa->radeon.dma.reserved)->bo,
current_offset);
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index a634cb5..9d6f756 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -39,6 +39,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_emit.h"
#include "r300_tex.h"
#include "r300_render.h"
+#include "main/simple_list.h"
#define EMIT_ATTR( ATTR, STYLE ) \
do { \
@@ -617,7 +618,7 @@ void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
r300_emit_scissor(ctx);
r300EmitVertexAOS(rmesa,
rmesa->radeon.swtcl.vertex_size,
- rmesa->radeon.dma.current,
+ first_elem(&rmesa->radeon.dma.reserved)->bo,
current_offset);
r300EmitVbufPrim(rmesa,
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 6f489ac..f030451 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
+#include "main/simple_list.h"
#include "r300_context.h"
#include "r300_state.h"
@@ -323,7 +324,7 @@ GLboolean r300ValidateBuffers(GLcontext * ctx)
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
- ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
if (ret)
return GL_FALSE;
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index ee9b64e..1057d7d 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
+#include "main/simple_list.h"
#include "r600_context.h"
#include "r700_state.h"
@@ -685,7 +686,7 @@ GLboolean r600ValidateBuffers(GLcontext * ctx)
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
- ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
if (ret)
return GL_FALSE;
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c
index 23cc128..e0e506d 100644
--- a/src/mesa/drivers/dri/r600/r700_ioctl.c
+++ b/src/mesa/drivers/dri/r600/r700_ioctl.c
@@ -31,6 +31,7 @@
#include "main/imports.h"
#include "main/macros.h"
#include "main/context.h"
+#include "main/simple_list.h"
#include "swrast/swrast.h"
#include "radeon_common.h"
@@ -52,7 +53,7 @@ static void r700Flush(GLcontext *ctx)
we have no DMA buffer allocated.
then no point flushing anything at all.
*/
- if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current)
+ if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved))
return;
if (radeon->dma.flush)
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index 330c2c8..d0c7355 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -1042,7 +1042,7 @@ void radeonFlush(GLcontext *ctx)
we have no DMA buffer allocated.
then no point flushing anything at all.
*/
- if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current)
+ if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved))
return;
if (radeon->dma.flush)
@@ -1146,7 +1146,7 @@ int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
{
int ret;
- radeonReleaseDmaRegion(rmesa);
+ radeonReleaseDmaRegions(rmesa);
LOCK_HARDWARE(rmesa);
ret = rcommonFlushCmdBufLocked(rmesa, caller);
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index f71dc1c..9ed19e3 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -263,6 +263,8 @@ GLboolean radeonInitContext(radeonContextPtr radeon,
radeon->texture_compressed_row_align = 64;
}
+ radeon_init_dma(radeon);
+
return GL_TRUE;
}
@@ -307,10 +309,11 @@ void radeonDestroyContext(__DRIcontextPrivate *driContextPriv )
assert(radeon);
if (radeon) {
- if (radeon->dma.current) {
+ if (!is_empty_list(&radeon->dma.reserved)) {
rcommonFlushCmdBuf( radeon, __FUNCTION__ );
}
+ radeonFreeDmaRegions(radeon);
radeonReleaseArrays(radeon->glCtx, ~0);
meta_destroy_metaops(&radeon->meta);
if (radeon->vtbl.free_context)
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index f8e1a25..c4bc319 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -269,12 +269,21 @@ struct radeon_aos {
int count; /** Number of vertices */
};
+#define DMA_BO_FREE_TIME 100
+
+struct radeon_dma_bo {
+ struct radeon_dma_bo *next, *prev;
+ struct radeon_bo *bo;
+ int expire_counter;
+};
+
struct radeon_dma {
/* Active dma region. Allocations for vertices and retained
* regions come from here. Also used for emitting random vertices,
* these may be flushed by calling flush_current();
*/
- struct radeon_bo *current; /** Buffer that DMA memory is allocated from */
+ struct radeon_dma_bo free;
+ struct radeon_dma_bo reserved;
int current_used; /** Number of bytes allocated and forgotten about */
int current_vertexptr; /** End of active vertex region */
@@ -284,12 +293,6 @@ struct radeon_dma {
* performed.
*/
void (*flush) (GLcontext *);
-
- /* Number of "in-flight" DMA buffers, i.e. the number of buffers
- * for which a DISCARD command is currently queued in the command buffer
-.
- */
- GLuint nr_released_bufs;
};
/* radeon_swtcl.c
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
index 48114a0..4af4de8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_dma.c
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
@@ -31,6 +31,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
#include "radeon_common.h"
+#include "main/simple_list.h"
#if defined(USE_X86_ASM)
#define COPY_DWORDS( dst, src, nr ) \
@@ -161,9 +162,14 @@ void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
}
}
-void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
+void radeon_init_dma(radeonContextPtr rmesa)
{
+ make_empty_list(&rmesa->dma.free);
+ make_empty_list(&rmesa->dma.reserved);
+}
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
+{
size = MAX2(size, MAX_DMA_BUF_SZ);
if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
@@ -173,49 +179,47 @@ void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
rmesa->dma.flush(rmesa->glCtx);
}
- if (rmesa->dma.nr_released_bufs > 4) {
- rcommonFlushCmdBuf(rmesa, __FUNCTION__);
- rmesa->dma.nr_released_bufs = 0;
- }
-
- if (rmesa->dma.current) {
- radeon_bo_unmap(rmesa->dma.current);
- radeon_bo_unref(rmesa->dma.current);
- rmesa->dma.current = 0;
- }
+ if (is_empty_list(&rmesa->dma.free)) {
+ struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo));
+ assert(dma_bo);
again_alloc:
#ifdef RADEON_DEBUG_BO
- rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
- 0, size, 4, RADEON_GEM_DOMAIN_GTT,
- 0, "dma.current");
+ dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+ 0, size, 4, RADEON_GEM_DOMAIN_GTT,
+ 0, "dma.current");
#else
- rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
- 0, size, 4, RADEON_GEM_DOMAIN_GTT,
- 0);
+ dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+ 0, size, 4, RADEON_GEM_DOMAIN_GTT,
+ 0);
#endif /* RADEON_DEBUG_BO */
- if (!rmesa->dma.current) {
- rcommonFlushCmdBuf(rmesa, __FUNCTION__);
- rmesa->dma.nr_released_bufs = 0;
- goto again_alloc;
+ if (!dma_bo->bo) {
+ rcommonFlushCmdBuf(rmesa, __FUNCTION__);
+ goto again_alloc;
+ }
+ insert_at_head(&rmesa->dma.reserved, dma_bo);
+ } else {
+ struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free);
+ assert(dma_bo->bo->cref == 1);
+ remove_from_list(dma_bo);
+ insert_at_head(&rmesa->dma.reserved, dma_bo);
}
rmesa->dma.current_used = 0;
rmesa->dma.current_vertexptr = 0;
if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
- rmesa->dma.current,
+ first_elem(&rmesa->dma.reserved)->bo,
RADEON_GEM_DOMAIN_GTT, 0))
fprintf(stderr,"failure to revalidate BOs - badness\n");
- if (!rmesa->dma.current) {
+ if (is_empty_list(&rmesa->dma.reserved)) {
/* Cmd buff have been flushed in radeon_revalidate_bos */
- rmesa->dma.nr_released_bufs = 0;
goto again_alloc;
}
- radeon_bo_map(rmesa->dma.current, 1);
+ radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
}
/* Allocates a region from rmesa->dma.current. If there isn't enough
@@ -236,30 +240,68 @@ void radeonAllocDmaRegion(radeonContextPtr rmesa,
alignment--;
rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
- if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
+ if (is_empty_list(&rmesa->dma.reserved)
+ || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
*poffset = rmesa->dma.current_used;
- *pbo = rmesa->dma.current;
+ *pbo = first_elem(&rmesa->dma.reserved)->bo;
radeon_bo_ref(*pbo);
/* Always align to at least 16 bytes */
rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
rmesa->dma.current_vertexptr = rmesa->dma.current_used;
- assert(rmesa->dma.current_used <= rmesa->dma.current->size);
+ assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
}
-void radeonReleaseDmaRegion(radeonContextPtr rmesa)
+void radeonFreeDmaRegions(radeonContextPtr rmesa)
{
+ struct radeon_dma_bo *dma_bo;
+ struct radeon_dma_bo *temp;
if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
- if (rmesa->dma.current) {
- rmesa->dma.nr_released_bufs++;
- radeon_bo_unmap(rmesa->dma.current);
- radeon_bo_unref(rmesa->dma.current);
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ foreach_s(dma_bo, temp, &rmesa->dma.free) {
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ FREE(dma_bo);
+ }
+
+ foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+ remove_from_list(dma_bo);
+ radeon_bo_unmap(dma_bo->bo);
+ radeon_bo_unref(dma_bo->bo);
+ FREE(dma_bo);
}
- rmesa->dma.current = NULL;
+}
+
+void radeonReleaseDmaRegions(radeonContextPtr rmesa)
+{
+ struct radeon_dma_bo *dma_bo;
+ struct radeon_dma_bo *temp;
+ const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
+ const int time = rmesa->dma.free.expire_counter;
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ /* move reserved to free list */
+ foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+ remove_from_list(dma_bo);
+ dma_bo->expire_counter = expire_at;
+ radeon_bo_unmap(dma_bo->bo);
+ insert_at_tail(&rmesa->dma.free, dma_bo);
+ }
+
+ /* free bos that have been unused for some time */
+ foreach_s(dma_bo, temp, &rmesa->dma.free) {
+ if (dma_bo->expire_counter != time)
+ break;
+ remove_from_list(dma_bo);
+ radeon_bo_unref(dma_bo->bo);
+ FREE(dma_bo);
+ }
+
}
@@ -272,10 +314,10 @@ void rcommon_flush_last_swtcl_prim( GLcontext *ctx )
if (RADEON_DEBUG & DEBUG_IOCTL)
- fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current);
+ fprintf(stderr, "%s\n", __FUNCTION__);
dma->flush = NULL;
- if (dma->current) {
+ if (!is_empty_list(&dma->reserved)) {
GLuint current_offset = dma->current_used;
assert (dma->current_used +
@@ -298,7 +340,10 @@ rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
GLuint bytes = vsize * nverts;
void *head;
restart:
- if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+ if (is_empty_list(&rmesa->dma.reserved)
+ || rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
radeonRefillCurrentDmaRegion(rmesa, bytes);
}
@@ -308,7 +353,7 @@ restart:
rmesa->hw.max_state_size + (20*sizeof(int)),
__FUNCTION__);
/* if cmdbuf flushed DMA restart */
- if (!rmesa->dma.current)
+ if (is_empty_list(&rmesa->dma.reserved))
goto restart;
rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
@@ -320,7 +365,7 @@ restart:
rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
rmesa->dma.current_vertexptr );
- head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
+ head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
rmesa->dma.current_vertexptr += bytes;
rmesa->swtcl.numverts += nverts;
return head;
@@ -330,18 +375,17 @@ void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
{
radeonContextPtr radeon = RADEON_CONTEXT( ctx );
int i;
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s\n", __FUNCTION__);
if (radeon->dma.flush) {
radeon->dma.flush(radeon->glCtx);
}
- if (radeon->tcl.elt_dma_bo) {
- radeon_bo_unref(radeon->tcl.elt_dma_bo);
- radeon->tcl.elt_dma_bo = NULL;
- }
for (i = 0; i < radeon->tcl.aos_count; i++) {
if (radeon->tcl.aos[i].bo) {
radeon_bo_unref(radeon->tcl.aos[i].bo);
radeon->tcl.aos[i].bo = NULL;
+
}
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h
index 06e388f..e066bd1 100644
--- a/src/mesa/drivers/dri/radeon/radeon_dma.h
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.h
@@ -40,13 +40,15 @@ void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
GLvoid * data, int size, int stride, int count);
void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
+void radeon_init_dma(radeonContextPtr rmesa);
void radeonAllocDmaRegion(radeonContextPtr rmesa,
struct radeon_bo **pbo, int *poffset,
int bytes, int alignment);
-void radeonReleaseDmaRegion(radeonContextPtr rmesa);
+void radeonReleaseDmaRegions(radeonContextPtr rmesa);
void rcommon_flush_last_swtcl_prim(GLcontext *ctx);
void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
+void radeonFreeDmaRegions(radeonContextPtr rmesa);
void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
index 0d1728b..56f82bd 100644
--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/state.h"
#include "main/context.h"
#include "main/framebuffer.h"
+#include "main/simple_list.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
@@ -2099,7 +2100,7 @@ static GLboolean r100ValidateBuffers(GLcontext *ctx)
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
- ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
if (ret)
return GL_FALSE;
return GL_TRUE;
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index e31f045..58b3be9 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/enums.h"
#include "main/imports.h"
#include "main/macros.h"
+#include "main/simple_list.h"
#include "swrast_setup/swrast_setup.h"
#include "math/m_translate.h"
@@ -291,7 +292,7 @@ void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
radeonEmitState(&rmesa->radeon);
radeonEmitVertexAOS( rmesa,
rmesa->radeon.swtcl.vertex_size,
- rmesa->radeon.dma.current,
+ first_elem(&rmesa->radeon.dma.reserved)->bo,
current_offset);
--
1.6.3.3
From 9804f8ace2a23d3965e02de20ae61438e9f62a83 Mon Sep 17 00:00:00 2001
From: Pauli Nieminen <[email protected]>
Date: Sat, 15 Aug 2009 01:56:21 +0300
Subject: [PATCH 2/3] r200: Fix missing offset from elt buffer pointer.
Signed-off-by: Pauli Nieminen <[email protected]>
---
src/mesa/drivers/dri/r200/r200_tcl.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index 5803709..ca9a8db 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -146,7 +146,7 @@ static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr )
rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr +
- rmesa->tcl.elt_used);
+ rmesa->radeon.tcl.elt_dma_offset + rmesa->tcl.elt_used);
rmesa->tcl.elt_used += nr*2;
--
1.6.3.3
From 9e6517de1d711308ad04ff0cc6f659d5ef90edf9 Mon Sep 17 00:00:00 2001
From: Dave Airlie <[email protected]>
Date: Fri, 26 Jun 2009 15:05:02 +1000
Subject: [PATCH 3/3] r200: make use of DMA buffers for Elts a lot better.
This allows us to return the unused portion of the dma buffer
to the allocator instead of wasting nearly 16k a pop.
Cherry picked and ported to new code by Pauli.
Signed-off-by: Pauli Nieminen <[email protected]>
---
src/mesa/drivers/dri/r200/r200_cmdbuf.c | 22 +++++-----------------
src/mesa/drivers/dri/radeon/radeon_dma.c | 11 +++++++++++
src/mesa/drivers/dri/radeon/radeon_dma.h | 2 ++
3 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
index 5d0f367..e639353 100644
--- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c
+++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
@@ -219,6 +219,9 @@ void r200FlushElts(GLcontext *ctx)
radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo);
rmesa->radeon.tcl.elt_dma_bo = NULL;
+ if (R200_ELT_BUF_SZ > elt_used)
+ radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used);
+
if (R200_DEBUG & DEBUG_SYNC) {
fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
radeonFinish( rmesa->radeon.glCtx );
@@ -240,28 +243,13 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
radeonEmitState(&rmesa->radeon);
-#ifdef RADEON_DEBUG_BO
- rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
- 0, R200_ELT_BUF_SZ, 4,
- RADEON_GEM_DOMAIN_GTT, 0, "ELT");
-#else
- rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom,
- 0, R200_ELT_BUF_SZ, 4,
- RADEON_GEM_DOMAIN_GTT, 0);
-#endif
- rmesa->radeon.tcl.elt_dma_offset = 0;
+ radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
+ &rmesa->radeon.tcl.elt_dma_offset, R200_ELT_BUF_SZ, 4);
rmesa->tcl.elt_used = min_nr * 2;
- ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, rmesa->radeon.tcl.elt_dma_bo,
- RADEON_GEM_DOMAIN_GTT, 0);
- if (ret) {
- fprintf(stderr,"failure to revalidate BOs - badness\n");
- }
-
radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
-
if (R200_DEBUG & DEBUG_PRIMS)
fprintf(stderr, "%s: header prim %x \n",
__FUNCTION__, primitive);
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
index 4af4de8..eb69a10 100644
--- a/src/mesa/drivers/dri/radeon/radeon_dma.c
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
@@ -276,6 +276,17 @@ void radeonFreeDmaRegions(radeonContextPtr rmesa)
}
}
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
+{
+ if (is_empty_list(&rmesa->dma.reserved))
+ return;
+
+ if (RADEON_DEBUG & DEBUG_IOCTL)
+ fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
+ rmesa->dma.current_used -= return_bytes;
+ rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+}
+
void radeonReleaseDmaRegions(radeonContextPtr rmesa)
{
struct radeon_dma_bo *dma_bo;
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h
index e066bd1..1b8a891 100644
--- a/src/mesa/drivers/dri/radeon/radeon_dma.h
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.h
@@ -39,8 +39,10 @@ void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count);
void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
GLvoid * data, int size, int stride, int count);
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
void radeon_init_dma(radeonContextPtr rmesa);
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
void radeonAllocDmaRegion(radeonContextPtr rmesa,
struct radeon_bo **pbo, int *poffset,
int bytes, int alignment);
--
1.6.3.3
------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day
trial. Simplify your report design, integration and deployment - and focus on
what you do best, core application coding. Discover what's new with
Crystal Reports now. http://p.sf.net/sfu/bobj-july
_______________________________________________
Mesa3d-dev mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev