[Mesa-dev] [PATCH] winsys/radeon: fix nop packet padding.

2014-07-24 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

The gpu packet prefetcher hates the ugly big nop packet those leads
to prefetching some invalid memory in some case. Apparently hawaii
is particularly sensible to this.

Note this only partialy fix hawaii issues and some zbuffer tiling
issues are still present.

Signed-off-by: Jérôme Glisse jgli...@redhat.com
---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index a06ecb2..502a550 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -451,8 +451,22 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs 
*rcs,
 while (rcs-cdw  7)
 OUT_CS(cs-base, 0x8000); /* type2 nop packet */
 } else {
-while (rcs-cdw  7)
-OUT_CS(cs-base, 0x1000); /* type3 nop packet */
+switch (rcs-cdw  7) {
+case 0:
+break;
+case 7:
+/* FIXME can this be bad if we are at cs[LAST_DW-1] ? Need to
+ * think of something.
+ */
+OUT_CS(cs-base, 0xc0001000);
+OUT_CS(cs-base, 0xcafedead);
+/* Note we fallthrough as this will add another 7 dwords */
+default:
+OUT_CS(cs-base, 0xc0001000 | (((8 - (rcs-cdw  7)) - 1)  
16));
+while (rcs-cdw  7) {
+OUT_CS(cs-base, 0xcafedead);
+}
+}
 }
 break;
 case RING_UVD:
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: add support for trace buffer.

2014-07-24 Thread j . glisse
From: Jérôme Glisse jgli...@redhat.com

Trace buffer allow to dump a command buffer which is fully repliable
as a standalone c program. This make debuging lockup immensively
simpler. This patch only plug the core minimal stuff and is still
missing the more fancy aspect that are in r600g. It however already
proved useful in debuging hawaii.

Signed-off-by: Jérôme Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/si_hw_context.c | 2 +-
 src/gallium/drivers/radeonsi/si_pipe.c   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index 56fa664..c947cd0 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -115,7 +115,7 @@ void si_context_gfx_flush(void *context, unsigned flags,
 #endif
 
/* Flush the CS. */
-   ctx-b.ws-cs_flush(cs, flags, fence, 0);
+   ctx-b.ws-cs_flush(cs, flags, fence, ctx-screen-b.cs_count++);
ctx-b.rings.gfx.flushing = false;
 
 #if SI_TRACE_CS
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 4f19268..2a7049b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -98,7 +98,8 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen, void *
}
 
sctx-b.rings.gfx.cs = ws-cs_create(ws, RING_GFX, si_context_gfx_flush,
-sctx, NULL);
+sctx, sscreen-b.trace_bo ?
+sscreen-b.trace_bo-cs_buf : 
NULL);
sctx-b.rings.gfx.flush = si_context_gfx_flush;
 
si_init_all_descriptors(sctx);
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] winsys/radeon: fix nop packet padding v2.

2014-07-24 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

The ucode we got for hawaii does not support 0x1000 special nop
packet type 3 and this leads to gpu reading invalid memory. As packet
type 2 still exist just use packet type 2.

Note this only partialy fix hawaii issues and some zbuffer tiling
issues are still present.

Changed since v1:
  - use packet type 2 instead of packet 3.

Signed-off-by: Jérôme Glisse jgli...@redhat.com
---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index a06ecb2..9ac7d0e 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -447,13 +447,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs 
*rcs,
 /* pad DMA ring to 8 DWs to meet CP fetch alignment requirements
  * r6xx, requires at least 4 dw alignment to avoid a hw bug.
  */
-if (cs-ws-info.chip_class = SI) {
-while (rcs-cdw  7)
-OUT_CS(cs-base, 0x8000); /* type2 nop packet */
-} else {
-while (rcs-cdw  7)
-OUT_CS(cs-base, 0x1000); /* type3 nop packet */
-}
+while (rcs-cdw  7)
+OUT_CS(cs-base, 0x8000); /* type2 nop packet */
 break;
 case RING_UVD:
 while (rcs-cdw  15)
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glx: load dri driver with RTLD_LOCAL so dlclose never fails to unload

2014-02-27 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

There is no reason anymore to load with RTLD_GLOBAL and for some driver
this even result in dlclose failing to unload leading to catastrophic
failure with swrast fallback.

Signed-off-by: Jérôme Glisse jgli...@redhat.com
---
 src/glx/dri_common.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index 8bf4705..e5a3f70 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -45,8 +45,8 @@
 #ifndef RTLD_NOW
 #define RTLD_NOW 0
 #endif
-#ifndef RTLD_GLOBAL
-#define RTLD_GLOBAL 0
+#ifndef RTLD_LOCAL
+#define RTLD_LOCAL 0
 #endif
 
 _X_HIDDEN void
@@ -99,7 +99,7 @@ driOpenDriver(const char *driverName)
int len;
 
/* Attempt to make sure libGL symbols will be visible to the driver */
-   glhandle = dlopen(libGL.so.1, RTLD_NOW | RTLD_GLOBAL);
+   glhandle = dlopen(libGL.so.1, RTLD_NOW | RTLD_LOCAL);
 
libPaths = NULL;
if (geteuid() == getuid()) {
@@ -127,14 +127,14 @@ driOpenDriver(const char *driverName)
   snprintf(realDriverName, sizeof realDriverName,
%.*s/tls/%s_dri.so, len, p, driverName);
   InfoMessageF(OpenDriver: trying %s\n, realDriverName);
-  handle = dlopen(realDriverName, RTLD_NOW | RTLD_GLOBAL);
+  handle = dlopen(realDriverName, RTLD_NOW | RTLD_LOCAL);
 #endif
 
   if (handle == NULL) {
  snprintf(realDriverName, sizeof realDriverName,
   %.*s/%s_dri.so, len, p, driverName);
  InfoMessageF(OpenDriver: trying %s\n, realDriverName);
- handle = dlopen(realDriverName, RTLD_NOW | RTLD_GLOBAL);
+ handle = dlopen(realDriverName, RTLD_NOW | RTLD_LOCAL);
   }
 
   if (handle != NULL)
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] winsys/radeon: consolidate tracing into winsys v2

2013-04-24 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This move the tracing timeout and printing into winsys and add
an debug environement variable for it (R600_DEBUG=trace_cs).

Lot of file touched because of winsys API changes.

v2: Do not write lockup file if ib uniq id does not match last one

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r300/r300_context.c|  2 +-
 src/gallium/drivers/r300/r300_flush.c  |  6 ++--
 src/gallium/drivers/r600/evergreen_compute.c   |  2 +-
 src/gallium/drivers/r600/r600_hw_context.c | 36 +-
 src/gallium/drivers/r600/r600_pipe.c   | 17 +-
 src/gallium/drivers/r600/r600_pipe.h   | 11 ++-
 src/gallium/drivers/r600/r600_state_common.c   |  4 ---
 src/gallium/drivers/radeon/radeon_uvd.c|  4 +--
 src/gallium/drivers/radeonsi/r600_hw_context.c |  2 +-
 src/gallium/drivers/radeonsi/radeonsi_compute.c|  2 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c   |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c  | 19 +++-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h  | 11 +++
 src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c | 30 +++---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c  |  2 +-
 src/gallium/winsys/radeon/drm/radeon_winsys.h  | 13 +---
 16 files changed, 68 insertions(+), 95 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c 
b/src/gallium/drivers/r300/r300_context.c
index 340a7f0..ba1859b 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -379,7 +379,7 @@ struct pipe_context* r300_create_context(struct 
pipe_screen* screen,
  sizeof(struct pipe_transfer), 64,
  UTIL_SLAB_SINGLETHREADED);
 
-r300-cs = rws-cs_create(rws, RING_GFX);
+r300-cs = rws-cs_create(rws, RING_GFX, NULL);
 if (r300-cs == NULL)
 goto fail;
 
diff --git a/src/gallium/drivers/r300/r300_flush.c 
b/src/gallium/drivers/r300/r300_flush.c
index 10c4a30..709fe52 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -52,7 +52,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, 
unsigned flags)
 }
 
 r300-flush_counter++;
-r300-rws-cs_flush(r300-cs, flags);
+r300-rws-cs_flush(r300-cs, flags, 0);
 r300-dirty_hw = 0;
 
 /* New kitchen sink, baby. */
@@ -100,11 +100,11 @@ void r300_flush(struct pipe_context *pipe,
  * and we cannot emit an empty CS. Let's write to some reg. */
 CS_LOCALS(r300);
 OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
-r300-rws-cs_flush(r300-cs, flags);
+r300-rws-cs_flush(r300-cs, flags, 0);
 } else {
 /* Even if hw is not dirty, we should at least reset the CS in case
  * the space checking failed for the first draw operation. */
-r300-rws-cs_flush(r300-cs, flags);
+r300-rws-cs_flush(r300-cs, flags, 0);
 }
 }
 
diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 189ffac..9393bbe 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -426,7 +426,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
}
 
-   ctx-ws-cs_flush(ctx-rings.gfx.cs, flush_flags);
+   ctx-ws-cs_flush(ctx-rings.gfx.cs, flush_flags, 
ctx-screen-cs_count++);
 
ctx-flags = 0;
 
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index b4fb3bf..3abce1e 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -137,21 +137,17 @@ void r600_need_cs_space(struct r600_context *ctx, 
unsigned num_dw,
for (i = 0; i  R600_NUM_ATOMS; i++) {
if (ctx-atoms[i]  ctx-atoms[i]-dirty) {
num_dw += ctx-atoms[i]-num_dw;
-#if R600_TRACE_CS
if (ctx-screen-trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
-#endif
}
}
 
/* The upper-bound of how much space a draw command would take. 
*/
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
-#if R600_TRACE_CS
if (ctx-screen-trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
-#endif
}
 
/* Count in queries_suspend. */
@@ -339,37 +335,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned 
flags)
}
 
/* Flush the CS. */
-#if R600_TRACE_CS
-   if (ctx-screen-trace_bo) {
-   struct r600_screen *rscreen = 

[Mesa-dev] [PATCH] winsys/radeon: consolidate tracing into winsys

2013-04-23 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This move the tracing timeout and printing into winsys and add
an debug environement variable for it (R600_DEBUG=trace_cs).

Lot of file touched because of winsys API changes.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r300/r300_context.c|  2 +-
 src/gallium/drivers/r300/r300_flush.c  |  6 ++--
 src/gallium/drivers/r600/evergreen_compute.c   |  2 +-
 src/gallium/drivers/r600/r600_hw_context.c | 36 +-
 src/gallium/drivers/r600/r600_pipe.c   | 17 +-
 src/gallium/drivers/r600/r600_pipe.h   | 11 ++-
 src/gallium/drivers/r600/r600_state_common.c   |  4 ---
 src/gallium/drivers/radeon/radeon_uvd.c|  4 +--
 src/gallium/drivers/radeonsi/r600_hw_context.c |  2 +-
 src/gallium/drivers/radeonsi/radeonsi_compute.c|  2 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c   |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c  | 19 +++-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h  | 11 +++
 src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c | 26 +---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c  |  2 +-
 src/gallium/winsys/radeon/drm/radeon_winsys.h  | 13 +---
 16 files changed, 64 insertions(+), 95 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c 
b/src/gallium/drivers/r300/r300_context.c
index 340a7f0..ba1859b 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -379,7 +379,7 @@ struct pipe_context* r300_create_context(struct 
pipe_screen* screen,
  sizeof(struct pipe_transfer), 64,
  UTIL_SLAB_SINGLETHREADED);
 
-r300-cs = rws-cs_create(rws, RING_GFX);
+r300-cs = rws-cs_create(rws, RING_GFX, NULL);
 if (r300-cs == NULL)
 goto fail;
 
diff --git a/src/gallium/drivers/r300/r300_flush.c 
b/src/gallium/drivers/r300/r300_flush.c
index 10c4a30..709fe52 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -52,7 +52,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, 
unsigned flags)
 }
 
 r300-flush_counter++;
-r300-rws-cs_flush(r300-cs, flags);
+r300-rws-cs_flush(r300-cs, flags, 0);
 r300-dirty_hw = 0;
 
 /* New kitchen sink, baby. */
@@ -100,11 +100,11 @@ void r300_flush(struct pipe_context *pipe,
  * and we cannot emit an empty CS. Let's write to some reg. */
 CS_LOCALS(r300);
 OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0);
-r300-rws-cs_flush(r300-cs, flags);
+r300-rws-cs_flush(r300-cs, flags, 0);
 } else {
 /* Even if hw is not dirty, we should at least reset the CS in case
  * the space checking failed for the first draw operation. */
-r300-rws-cs_flush(r300-cs, flags);
+r300-rws-cs_flush(r300-cs, flags, 0);
 }
 }
 
diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 189ffac..9393bbe 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -426,7 +426,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
}
 
-   ctx-ws-cs_flush(ctx-rings.gfx.cs, flush_flags);
+   ctx-ws-cs_flush(ctx-rings.gfx.cs, flush_flags, 
ctx-screen-cs_count++);
 
ctx-flags = 0;
 
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index b4fb3bf..3abce1e 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -137,21 +137,17 @@ void r600_need_cs_space(struct r600_context *ctx, 
unsigned num_dw,
for (i = 0; i  R600_NUM_ATOMS; i++) {
if (ctx-atoms[i]  ctx-atoms[i]-dirty) {
num_dw += ctx-atoms[i]-num_dw;
-#if R600_TRACE_CS
if (ctx-screen-trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
-#endif
}
}
 
/* The upper-bound of how much space a draw command would take. 
*/
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
-#if R600_TRACE_CS
if (ctx-screen-trace_bo) {
num_dw += R600_TRACE_CS_DWORDS;
}
-#endif
}
 
/* Count in queries_suspend. */
@@ -339,37 +335,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned 
flags)
}
 
/* Flush the CS. */
-#if R600_TRACE_CS
-   if (ctx-screen-trace_bo) {
-   struct r600_screen *rscreen = ctx-screen;
-   unsigned i;
-
-   for (i = 0; i  

[Mesa-dev] [PATCH 1/2] radeonsi: add 2d tiling support for texture v3

2013-04-08 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

v2: Remove left over code
v3: Restage properly the commit so hunk of first one are not in
second one.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/r600_texture.c | 11 ++--
 src/gallium/drivers/radeonsi/si_state.c | 81 +
 2 files changed, 20 insertions(+), 72 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_texture.c 
b/src/gallium/drivers/radeonsi/r600_texture.c
index 1b8382f..8992f9a 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -47,7 +47,6 @@ static void r600_copy_to_staging_texture(struct pipe_context 
*ctx, struct r600_t
transfer-box);
 }
 
-
 /* Copy from a transfer's staging texture to a full GPU one. */
 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct 
r600_transfer *rtransfer)
 {
@@ -152,12 +151,12 @@ static int r600_init_surface(struct r600_screen *rscreen,
 
if (!is_flushed_depth  is_depth) {
surface-flags |= RADEON_SURF_ZBUFFER;
-
if (is_stencil) {
surface-flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
}
}
+   surface-flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
return 0;
 }
 
@@ -530,7 +529,11 @@ struct pipe_resource *si_texture_create(struct pipe_screen 
*screen,
 
if (!(templ-flags  R600_RESOURCE_FLAG_TRANSFER) 
!(templ-bind  PIPE_BIND_SCANOUT)) {
-   array_mode = V_009910_ARRAY_1D_TILED_THIN1;
+   if (util_format_is_compressed(templ-format)) {
+   array_mode = V_009910_ARRAY_1D_TILED_THIN1;
+   } else {
+   array_mode = V_009910_ARRAY_2D_TILED_THIN1;
+   }
}
 
r = r600_init_surface(rscreen, surface, templ, array_mode,
@@ -620,6 +623,8 @@ struct pipe_resource *si_texture_from_handle(struct 
pipe_screen *screen,
if (r) {
return NULL;
}
+   /* always set the scanout flags */
+   surface.flags |= RADEON_SURF_SCANOUT;
return (struct pipe_resource *)r600_texture_create_object(screen, 
templ, array_mode,
  stride, 0, 
buf, FALSE, surface);
 }
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index ca9e8b4..61ede64 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1541,67 +1541,16 @@ boolean si_is_format_supported(struct pipe_screen 
*screen,
return retval == usage;
 }
 
-static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, 
unsigned level)
-{
-   if (util_format_is_depth_or_stencil(rtex-real_format)) {
-   if (rtex-surface.level[level].mode == RADEON_SURF_MODE_1D) {
-   return 4;
-   } else if (rtex-surface.level[level].mode == 
RADEON_SURF_MODE_2D) {
-   switch (rtex-real_format) {
-   case PIPE_FORMAT_Z16_UNORM:
-   return 5;
-   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   case PIPE_FORMAT_Z32_FLOAT:
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-   return 6;
-   default:
-   return 7;
-   }
-   }
-   }
+static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, 
unsigned level, bool stencil)
+{
+   unsigned tile_mode_index = 0;
 
-   switch (rtex-surface.level[level].mode) {
-   default:
-   assert(!Invalid surface mode);
-   /* Fall through */
-   case RADEON_SURF_MODE_LINEAR_ALIGNED:
-   return 8;
-   case RADEON_SURF_MODE_1D:
-   if (rtex-surface.flags  RADEON_SURF_SCANOUT)
-   return 9;
-   else
-   return 13;
-   case RADEON_SURF_MODE_2D:
-   if (rtex-surface.flags  RADEON_SURF_SCANOUT) {
-   switch (util_format_get_blocksize(rtex-real_format)) {
-   case 1:
-   return 10;
-   case 2:
-   return 11;
-   default:
-   assert(!Invalid block size);
-   /* Fall through */
-   case 4:
-   return 12;
-   }
-   } else {
-   switch (util_format_get_blocksize(rtex-real_format)) {
- 

[Mesa-dev] [PATCH 2/2] radeonsi: add support for compressed texture v2

2013-04-08 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Most test pass, issue are with border color and swizzle.

Based on ircnickmaelcum patch.

v2: Restaged commit hunk

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/si_state.c | 71 -
 src/gallium/drivers/radeonsi/sid.h  |  7 
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 61ede64..a39843c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -30,6 +30,7 @@
 #include util/u_helpers.h
 #include util/u_math.h
 #include util/u_pack_color.h
+#include util/u_format_s3tc.h
 #include tgsi/tgsi_parse.h
 #include radeonsi_pipe.h
 #include radeonsi_shader.h
@@ -1164,6 +1165,8 @@ static uint32_t si_translate_texformat(struct pipe_screen 
*screen,
   const struct util_format_description 
*desc,
   int first_non_void)
 {
+   struct r600_screen *rscreen = (struct r600_screen*)screen;
+   bool enable_s3tc = rscreen-info.drm_minor = 31;
boolean uniform = TRUE;
int i;
 
@@ -1205,7 +1208,51 @@ static uint32_t si_translate_texformat(struct 
pipe_screen *screen,
break;
}
 
-   /* TODO compressed formats */
+   if (desc-layout == UTIL_FORMAT_LAYOUT_RGTC) {
+   if (!enable_s3tc)
+   goto out_unknown;
+
+   switch (format) {
+   case PIPE_FORMAT_RGTC1_SNORM:
+   case PIPE_FORMAT_LATC1_SNORM:
+   case PIPE_FORMAT_RGTC1_UNORM:
+   case PIPE_FORMAT_LATC1_UNORM:
+   return V_008F14_IMG_DATA_FORMAT_BC4;
+   case PIPE_FORMAT_RGTC2_SNORM:
+   case PIPE_FORMAT_LATC2_SNORM:
+   case PIPE_FORMAT_RGTC2_UNORM:
+   case PIPE_FORMAT_LATC2_UNORM:
+   return V_008F14_IMG_DATA_FORMAT_BC5;
+   default:
+   goto out_unknown;
+   }
+   }
+
+   if (desc-layout == UTIL_FORMAT_LAYOUT_S3TC) {
+
+   if (!enable_s3tc)
+   goto out_unknown;
+
+   if (!util_format_s3tc_enabled) {
+   goto out_unknown;
+   }
+
+   switch (format) {
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+   case PIPE_FORMAT_DXT1_SRGB:
+   case PIPE_FORMAT_DXT1_SRGBA:
+   return V_008F14_IMG_DATA_FORMAT_BC1;
+   case PIPE_FORMAT_DXT3_RGBA:
+   case PIPE_FORMAT_DXT3_SRGBA:
+   return V_008F14_IMG_DATA_FORMAT_BC2;
+   case PIPE_FORMAT_DXT5_RGBA:
+   case PIPE_FORMAT_DXT5_SRGBA:
+   return V_008F14_IMG_DATA_FORMAT_BC3;
+   default:
+   goto out_unknown;
+   }
+   }
 
if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
@@ -2109,7 +2156,27 @@ static struct pipe_sampler_view 
*si_create_sampler_view(struct pipe_context *ctx
break;
default:
if (first_non_void  0) {
-   num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
+   if (util_format_is_compressed(pipe_format)) {
+   switch (pipe_format) {
+   case PIPE_FORMAT_DXT1_SRGB:
+   case PIPE_FORMAT_DXT1_SRGBA:
+   case PIPE_FORMAT_DXT3_SRGBA:
+   case PIPE_FORMAT_DXT5_SRGBA:
+   num_format = 
V_008F14_IMG_NUM_FORMAT_SRGB;
+   break;
+   case PIPE_FORMAT_RGTC1_SNORM:
+   case PIPE_FORMAT_LATC1_SNORM:
+   case PIPE_FORMAT_RGTC2_SNORM:
+   case PIPE_FORMAT_LATC2_SNORM:
+   num_format = 
V_008F14_IMG_NUM_FORMAT_SNORM;
+   break;
+   default:
+   num_format = 
V_008F14_IMG_NUM_FORMAT_UNORM;
+   break;
+   }
+   } else {
+   num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
+   }
} else if (desc-colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
num_format = V_008F14_IMG_NUM_FORMAT_SRGB;
} else {
diff --git a/src/gallium/drivers/radeonsi/sid.h 
b/src/gallium/drivers/radeonsi/sid.h
index 8528981..2722c79 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ 

[Mesa-dev] radeonsi: tiling and compressed texture

2013-04-05 Thread j . glisse
Rebase on top of lastest libdrm patch. With small modification to ddx you can 
also
have tiled front buffer rendering. But again we need to wait next mesa release
before changing ddx to assume by default it is installed with a recent enough 
mesa.

No regression, just new test that pass.

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeonsi: add 2d tiling support for texture v2

2013-04-05 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

v2: Remove left over code

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/r600_texture.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_texture.c 
b/src/gallium/drivers/radeonsi/r600_texture.c
index 1b8382f..8992f9a 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -47,7 +47,6 @@ static void r600_copy_to_staging_texture(struct pipe_context 
*ctx, struct r600_t
transfer-box);
 }
 
-
 /* Copy from a transfer's staging texture to a full GPU one. */
 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct 
r600_transfer *rtransfer)
 {
@@ -152,12 +151,12 @@ static int r600_init_surface(struct r600_screen *rscreen,
 
if (!is_flushed_depth  is_depth) {
surface-flags |= RADEON_SURF_ZBUFFER;
-
if (is_stencil) {
surface-flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
}
}
+   surface-flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
return 0;
 }
 
@@ -530,7 +529,11 @@ struct pipe_resource *si_texture_create(struct pipe_screen 
*screen,
 
if (!(templ-flags  R600_RESOURCE_FLAG_TRANSFER) 
!(templ-bind  PIPE_BIND_SCANOUT)) {
-   array_mode = V_009910_ARRAY_1D_TILED_THIN1;
+   if (util_format_is_compressed(templ-format)) {
+   array_mode = V_009910_ARRAY_1D_TILED_THIN1;
+   } else {
+   array_mode = V_009910_ARRAY_2D_TILED_THIN1;
+   }
}
 
r = r600_init_surface(rscreen, surface, templ, array_mode,
@@ -620,6 +623,8 @@ struct pipe_resource *si_texture_from_handle(struct 
pipe_screen *screen,
if (r) {
return NULL;
}
+   /* always set the scanout flags */
+   surface.flags |= RADEON_SURF_SCANOUT;
return (struct pipe_resource *)r600_texture_create_object(screen, 
templ, array_mode,
  stride, 0, 
buf, FALSE, surface);
 }
-- 
1.8.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radeonsi: add support for compressed texture

2013-04-05 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Most test pass, issue are with border color and swizzle.

Based on ircnickmaelcum patch.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/si_state.c | 165 +---
 src/gallium/drivers/radeonsi/sid.h  |   7 ++
 2 files changed, 96 insertions(+), 76 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index ca9e8b4..d968b95 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -30,6 +30,7 @@
 #include util/u_helpers.h
 #include util/u_math.h
 #include util/u_pack_color.h
+#include util/u_format_s3tc.h
 #include tgsi/tgsi_parse.h
 #include radeonsi_pipe.h
 #include radeonsi_shader.h
@@ -1164,6 +1165,8 @@ static uint32_t si_translate_texformat(struct pipe_screen 
*screen,
   const struct util_format_description 
*desc,
   int first_non_void)
 {
+   struct r600_screen *rscreen = (struct r600_screen*)screen;
+   bool enable_s3tc = rscreen-info.drm_minor = 31;
boolean uniform = TRUE;
int i;
 
@@ -1206,6 +1209,51 @@ static uint32_t si_translate_texformat(struct 
pipe_screen *screen,
}
 
/* TODO compressed formats */
+   if (desc-layout == UTIL_FORMAT_LAYOUT_RGTC) {
+   if (!enable_s3tc)
+   goto out_unknown;
+
+   switch (format) {
+   case PIPE_FORMAT_RGTC1_SNORM:
+   case PIPE_FORMAT_LATC1_SNORM:
+   case PIPE_FORMAT_RGTC1_UNORM:
+   case PIPE_FORMAT_LATC1_UNORM:
+   return V_008F14_IMG_DATA_FORMAT_BC4;
+   case PIPE_FORMAT_RGTC2_SNORM:
+   case PIPE_FORMAT_LATC2_SNORM:
+   case PIPE_FORMAT_RGTC2_UNORM:
+   case PIPE_FORMAT_LATC2_UNORM:
+   return V_008F14_IMG_DATA_FORMAT_BC5;
+   default:
+   goto out_unknown;
+   }
+   }
+
+   if (desc-layout == UTIL_FORMAT_LAYOUT_S3TC) {
+
+   if (!enable_s3tc)
+   goto out_unknown;
+
+   if (!util_format_s3tc_enabled) {
+   goto out_unknown;
+   }
+
+   switch (format) {
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+   case PIPE_FORMAT_DXT1_SRGB:
+   case PIPE_FORMAT_DXT1_SRGBA:
+   return V_008F14_IMG_DATA_FORMAT_BC1;
+   case PIPE_FORMAT_DXT3_RGBA:
+   case PIPE_FORMAT_DXT3_SRGBA:
+   return V_008F14_IMG_DATA_FORMAT_BC2;
+   case PIPE_FORMAT_DXT5_RGBA:
+   case PIPE_FORMAT_DXT5_SRGBA:
+   return V_008F14_IMG_DATA_FORMAT_BC3;
+   default:
+   goto out_unknown;
+   }
+   }
 
if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
@@ -1541,67 +1589,16 @@ boolean si_is_format_supported(struct pipe_screen 
*screen,
return retval == usage;
 }
 
-static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, 
unsigned level)
-{
-   if (util_format_is_depth_or_stencil(rtex-real_format)) {
-   if (rtex-surface.level[level].mode == RADEON_SURF_MODE_1D) {
-   return 4;
-   } else if (rtex-surface.level[level].mode == 
RADEON_SURF_MODE_2D) {
-   switch (rtex-real_format) {
-   case PIPE_FORMAT_Z16_UNORM:
-   return 5;
-   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   case PIPE_FORMAT_Z32_FLOAT:
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-   return 6;
-   default:
-   return 7;
-   }
-   }
-   }
+static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, 
unsigned level, bool stencil)
+{
+   unsigned tile_mode_index = 0;
 
-   switch (rtex-surface.level[level].mode) {
-   default:
-   assert(!Invalid surface mode);
-   /* Fall through */
-   case RADEON_SURF_MODE_LINEAR_ALIGNED:
-   return 8;
-   case RADEON_SURF_MODE_1D:
-   if (rtex-surface.flags  RADEON_SURF_SCANOUT)
-   return 9;
-   else
-   return 13;
-   case RADEON_SURF_MODE_2D:
-   if (rtex-surface.flags  RADEON_SURF_SCANOUT) {
-   switch (util_format_get_blocksize(rtex-real_format)) {
- 

[Mesa-dev] radeonsi 2d tiling

2013-04-03 Thread j . glisse
This is mesa match for 2d tiling, it's missing change to configure.ac
to require proper libdrm. Will respin once i know.

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] gallium/radeonsi: add 2d tiling support for texture

2013-04-03 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/r600_texture.c |  4 +-
 src/gallium/drivers/radeonsi/si_state.c | 83 +
 2 files changed, 14 insertions(+), 73 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_texture.c 
b/src/gallium/drivers/radeonsi/r600_texture.c
index 1b8382f..8d0db7f 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -47,7 +47,6 @@ static void r600_copy_to_staging_texture(struct pipe_context 
*ctx, struct r600_t
transfer-box);
 }
 
-
 /* Copy from a transfer's staging texture to a full GPU one. */
 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct 
r600_transfer *rtransfer)
 {
@@ -152,7 +151,6 @@ static int r600_init_surface(struct r600_screen *rscreen,
 
if (!is_flushed_depth  is_depth) {
surface-flags |= RADEON_SURF_ZBUFFER;
-
if (is_stencil) {
surface-flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
@@ -530,7 +528,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen 
*screen,
 
if (!(templ-flags  R600_RESOURCE_FLAG_TRANSFER) 
!(templ-bind  PIPE_BIND_SCANOUT)) {
-   array_mode = V_009910_ARRAY_1D_TILED_THIN1;
+   array_mode = V_009910_ARRAY_2D_TILED_THIN1;
}
 
r = r600_init_surface(rscreen, surface, templ, array_mode,
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index ca9e8b4..9483304 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1541,67 +1541,16 @@ boolean si_is_format_supported(struct pipe_screen 
*screen,
return retval == usage;
 }
 
-static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, 
unsigned level)
-{
-   if (util_format_is_depth_or_stencil(rtex-real_format)) {
-   if (rtex-surface.level[level].mode == RADEON_SURF_MODE_1D) {
-   return 4;
-   } else if (rtex-surface.level[level].mode == 
RADEON_SURF_MODE_2D) {
-   switch (rtex-real_format) {
-   case PIPE_FORMAT_Z16_UNORM:
-   return 5;
-   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-   case PIPE_FORMAT_Z32_FLOAT:
-   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-   return 6;
-   default:
-   return 7;
-   }
-   }
-   }
+static unsigned si_tile_mode_index(struct r600_resource_texture *rtex, 
unsigned level, bool stencil)
+{
+   unsigned tile_mode_index = 0;
 
-   switch (rtex-surface.level[level].mode) {
-   default:
-   assert(!Invalid surface mode);
-   /* Fall through */
-   case RADEON_SURF_MODE_LINEAR_ALIGNED:
-   return 8;
-   case RADEON_SURF_MODE_1D:
-   if (rtex-surface.flags  RADEON_SURF_SCANOUT)
-   return 9;
-   else
-   return 13;
-   case RADEON_SURF_MODE_2D:
-   if (rtex-surface.flags  RADEON_SURF_SCANOUT) {
-   switch (util_format_get_blocksize(rtex-real_format)) {
-   case 1:
-   return 10;
-   case 2:
-   return 11;
-   default:
-   assert(!Invalid block size);
-   /* Fall through */
-   case 4:
-   return 12;
-   }
-   } else {
-   switch (util_format_get_blocksize(rtex-real_format)) {
-   case 1:
-   return 14;
-   case 2:
-   return 15;
-   case 4:
-   return 16;
-   case 8:
-   return 17;
-   default:
-   return 13;
-   }
-   }
+   if (stencil) {
+   tile_mode_index = rtex-surface.stencil_tiling_index[level];
+   } else {
+   tile_mode_index = rtex-surface.tiling_index[level];
}
+   return tile_mode_index;
 }
 
 /*
@@ -1638,7 +1587,7 @@ static void si_cb(struct r600_context *rctx, struct 
si_pm4_state *pm4,
slice = slice - 1;
}
 
-   tile_mode_index = 

[Mesa-dev] [PATCH] winsys/radeon: add command stream replay dump for faulty lockup

2013-03-27 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Build time option, set RADEON_CS_DUMP_ON_LOCKUP to 1 in radeon_drm_cs.h to
enable it.

When enabled after each cs submission the code will try to detect lockup by
waiting on one of the buffer of the cs to become idle, after a timeout it
will consider that the cs triggered a lockup and will write a radeon_lockup.c
file in current directory that have all information for replaying the cs.

To build this file :
gcc -O0 -g radeon_lockup.c -ldrm -o radeon_lockup -I/usr/include/libdrm

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/winsys/radeon/drm/Makefile.sources |   1 +
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c  |  80 ++--
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h  |   2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c  |   4 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h  |   6 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c | 135 +
 6 files changed, 191 insertions(+), 37 deletions(-)
 create mode 100644 src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c

diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources 
b/src/gallium/winsys/radeon/drm/Makefile.sources
index 1d18d61..4ca5ebb 100644
--- a/src/gallium/winsys/radeon/drm/Makefile.sources
+++ b/src/gallium/winsys/radeon/drm/Makefile.sources
@@ -1,4 +1,5 @@
 C_SOURCES := \
radeon_drm_bo.c \
radeon_drm_cs.c \
+   radeon_drm_cs_dump.c \
radeon_drm_winsys.c
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index f4ac526..5a9493a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -391,14 +391,54 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
 FREE(bo);
 }
 
+void *radeon_bo_do_map(struct radeon_bo *bo)
+{
+struct drm_radeon_gem_mmap args = {0};
+void *ptr;
+
+/* Return the pointer if it's already mapped. */
+if (bo-ptr)
+return bo-ptr;
+
+/* Map the buffer. */
+pipe_mutex_lock(bo-map_mutex);
+/* Return the pointer if it's already mapped (in case of a race). */
+if (bo-ptr) {
+pipe_mutex_unlock(bo-map_mutex);
+return bo-ptr;
+}
+args.handle = bo-handle;
+args.offset = 0;
+args.size = (uint64_t)bo-base.size;
+if (drmCommandWriteRead(bo-rws-fd,
+DRM_RADEON_GEM_MMAP,
+args,
+sizeof(args))) {
+pipe_mutex_unlock(bo-map_mutex);
+fprintf(stderr, radeon: gem_mmap failed: %p 0x%08X\n,
+bo, bo-handle);
+return NULL;
+}
+
+ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
+   bo-rws-fd, args.addr_ptr);
+if (ptr == MAP_FAILED) {
+pipe_mutex_unlock(bo-map_mutex);
+fprintf(stderr, radeon: mmap failed, errno: %i\n, errno);
+return NULL;
+}
+bo-ptr = ptr;
+pipe_mutex_unlock(bo-map_mutex);
+
+return bo-ptr;
+}
+
 static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
struct radeon_winsys_cs *rcs,
enum pipe_transfer_usage usage)
 {
 struct radeon_bo *bo = (struct radeon_bo*)buf;
 struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
-struct drm_radeon_gem_mmap args = {0};
-void *ptr;
 
 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
 if (!(usage  PIPE_TRANSFER_UNSYNCHRONIZED)) {
@@ -461,41 +501,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle 
*buf,
 }
 }
 
-/* Return the pointer if it's already mapped. */
-if (bo-ptr)
-return bo-ptr;
-
-/* Map the buffer. */
-pipe_mutex_lock(bo-map_mutex);
-/* Return the pointer if it's already mapped (in case of a race). */
-if (bo-ptr) {
-pipe_mutex_unlock(bo-map_mutex);
-return bo-ptr;
-}
-args.handle = bo-handle;
-args.offset = 0;
-args.size = (uint64_t)bo-base.size;
-if (drmCommandWriteRead(bo-rws-fd,
-DRM_RADEON_GEM_MMAP,
-args,
-sizeof(args))) {
-pipe_mutex_unlock(bo-map_mutex);
-fprintf(stderr, radeon: gem_mmap failed: %p 0x%08X\n,
-bo, bo-handle);
-return NULL;
-}
-
-ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
-   bo-rws-fd, args.addr_ptr);
-if (ptr == MAP_FAILED) {
-pipe_mutex_unlock(bo-map_mutex);
-fprintf(stderr, radeon: mmap failed, errno: %i\n, errno);
-return NULL;
-}
-bo-ptr = ptr;
-pipe_mutex_unlock(bo-map_mutex);
-
-return bo-ptr;
+return radeon_bo_do_map(bo);
 }
 
 static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index 

[Mesa-dev] [PATCH] winsys/radeon: add command stream replay dump for faulty lockup v2

2013-03-27 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Build time option, set RADEON_CS_DUMP_ON_LOCKUP to 1 in radeon_drm_cs.h to
enable it.

When enabled after each cs submission the code will try to detect lockup by
waiting on one of the buffer of the cs to become idle, after a timeout it
will consider that the cs triggered a lockup and will write a radeon_lockup.c
file in current directory that have all information for replaying the cs.

To build this file :
gcc -O0 -g radeon_lockup.c -ldrm -o radeon_lockup -I/usr/include/libdrm

v2: Add radeon_ctx.h file to mesa git tree

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/winsys/radeon/drm/Makefile.sources |   1 +
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c  |  80 +++
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h  |   2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c  |   4 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h  |   6 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c | 141 
 src/gallium/winsys/radeon/tools/radeon_ctx.h   | 237 +
 7 files changed, 434 insertions(+), 37 deletions(-)
 create mode 100644 src/gallium/winsys/radeon/drm/radeon_drm_cs_dump.c
 create mode 100644 src/gallium/winsys/radeon/tools/radeon_ctx.h

diff --git a/src/gallium/winsys/radeon/drm/Makefile.sources 
b/src/gallium/winsys/radeon/drm/Makefile.sources
index 1d18d61..4ca5ebb 100644
--- a/src/gallium/winsys/radeon/drm/Makefile.sources
+++ b/src/gallium/winsys/radeon/drm/Makefile.sources
@@ -1,4 +1,5 @@
 C_SOURCES := \
radeon_drm_bo.c \
radeon_drm_cs.c \
+   radeon_drm_cs_dump.c \
radeon_drm_winsys.c
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index f4ac526..5a9493a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -391,14 +391,54 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
 FREE(bo);
 }
 
+void *radeon_bo_do_map(struct radeon_bo *bo)
+{
+struct drm_radeon_gem_mmap args = {0};
+void *ptr;
+
+/* Return the pointer if it's already mapped. */
+if (bo-ptr)
+return bo-ptr;
+
+/* Map the buffer. */
+pipe_mutex_lock(bo-map_mutex);
+/* Return the pointer if it's already mapped (in case of a race). */
+if (bo-ptr) {
+pipe_mutex_unlock(bo-map_mutex);
+return bo-ptr;
+}
+args.handle = bo-handle;
+args.offset = 0;
+args.size = (uint64_t)bo-base.size;
+if (drmCommandWriteRead(bo-rws-fd,
+DRM_RADEON_GEM_MMAP,
+args,
+sizeof(args))) {
+pipe_mutex_unlock(bo-map_mutex);
+fprintf(stderr, radeon: gem_mmap failed: %p 0x%08X\n,
+bo, bo-handle);
+return NULL;
+}
+
+ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
+   bo-rws-fd, args.addr_ptr);
+if (ptr == MAP_FAILED) {
+pipe_mutex_unlock(bo-map_mutex);
+fprintf(stderr, radeon: mmap failed, errno: %i\n, errno);
+return NULL;
+}
+bo-ptr = ptr;
+pipe_mutex_unlock(bo-map_mutex);
+
+return bo-ptr;
+}
+
 static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf,
struct radeon_winsys_cs *rcs,
enum pipe_transfer_usage usage)
 {
 struct radeon_bo *bo = (struct radeon_bo*)buf;
 struct radeon_drm_cs *cs = (struct radeon_drm_cs*)rcs;
-struct drm_radeon_gem_mmap args = {0};
-void *ptr;
 
 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
 if (!(usage  PIPE_TRANSFER_UNSYNCHRONIZED)) {
@@ -461,41 +501,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle 
*buf,
 }
 }
 
-/* Return the pointer if it's already mapped. */
-if (bo-ptr)
-return bo-ptr;
-
-/* Map the buffer. */
-pipe_mutex_lock(bo-map_mutex);
-/* Return the pointer if it's already mapped (in case of a race). */
-if (bo-ptr) {
-pipe_mutex_unlock(bo-map_mutex);
-return bo-ptr;
-}
-args.handle = bo-handle;
-args.offset = 0;
-args.size = (uint64_t)bo-base.size;
-if (drmCommandWriteRead(bo-rws-fd,
-DRM_RADEON_GEM_MMAP,
-args,
-sizeof(args))) {
-pipe_mutex_unlock(bo-map_mutex);
-fprintf(stderr, radeon: gem_mmap failed: %p 0x%08X\n,
-bo, bo-handle);
-return NULL;
-}
-
-ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
-   bo-rws-fd, args.addr_ptr);
-if (ptr == MAP_FAILED) {
-pipe_mutex_unlock(bo-map_mutex);
-fprintf(stderr, radeon: mmap failed, errno: %i\n, errno);
-return NULL;
-}
-bo-ptr = ptr;
-pipe_mutex_unlock(bo-map_mutex);
-
-return bo-ptr;
+return radeon_bo_do_map(bo);
 }
 
 static void 

[Mesa-dev] [PATCH] radeonsi: add cs tracing

2013-03-25 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Same as on r600, trace cs execution by writting cs offset after each
states, this allow to pin point lockup inside command stream and
narrow down the scope of lockup investigation.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/r600_hw_context.c | 58 ++
 src/gallium/drivers/radeonsi/r600_texture.c|  2 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c   | 22 ++
 src/gallium/drivers/radeonsi/radeonsi_pipe.h   | 12 ++
 src/gallium/drivers/radeonsi/radeonsi_pm4.c| 12 ++
 src/gallium/drivers/radeonsi/si_state_draw.c   |  7 +++-
 6 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c 
b/src/gallium/drivers/radeonsi/r600_hw_context.c
index bd348f9..3cd5d0e 100644
--- a/src/gallium/drivers/radeonsi/r600_hw_context.c
+++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
@@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
/* Save 16 dwords for the fence mechanism. */
num_dw += 16;
 
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   num_dw += R600_TRACE_CS_DWORDS;
+   }
+#endif
+
/* Flush if there's not enough space. */
if (num_dw  RADEON_MAX_CMDBUF_DWORDS) {
radeonsi_flush(ctx-context, NULL, RADEON_FLUSH_ASYNC);
@@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx, unsigned 
flags)
/* force to keep tiling flags */
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   struct r600_screen *rscreen = ctx-screen;
+   unsigned i;
+
+   for (i = 0; i  cs-cdw; i++) {
+   fprintf(stderr, [%4d] [%5d] 0x%08x\n, 
rscreen-cs_count, i, cs-buf[i]);
+   }
+   rscreen-cs_count++;
+   }
+#endif
+
/* Flush the CS. */
ctx-ws-cs_flush(ctx-cs, flags);
 
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   struct r600_screen *rscreen = ctx-screen;
+   unsigned i;
+
+   for (i = 0; i  10; i++) {
+   usleep(5);
+   if (!ctx-ws-buffer_is_busy(rscreen-trace_bo-buf, 
RADEON_USAGE_READWRITE)) {
+   break;
+   }
+   }
+   if (i == 10) {
+   fprintf(stderr, timeout on cs lockup likely happen at 
cs %d dw %d\n,
+   rscreen-trace_ptr[1], rscreen-trace_ptr[0]);
+   } else {
+   fprintf(stderr, cs %d executed in %dms\n, 
rscreen-trace_ptr[1], i * 5);
+   }
+   }
+#endif
+
ctx-pm4_dirty_cdwords = 0;
ctx-flags = 0;
 
@@ -665,3 +703,23 @@ void r600_context_draw_opaque_count(struct r600_context 
*ctx, struct r600_so_tar
cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, t-filled_size, 
RADEON_USAGE_READ);
 
 }
+
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx)
+{
+   struct r600_screen *rscreen = rctx-screen;
+   struct radeon_winsys_cs *cs = rctx-cs;
+   uint64_t va;
+   uint32_t reloc;
+
+   va = r600_resource_va(rscreen-screen, (void*)rscreen-trace_bo);
+   reloc = r600_context_bo_reloc(rctx, rscreen-trace_bo, 
RADEON_USAGE_READWRITE);
+   cs-buf[cs-cdw++] = PKT3(PKT3_MEM_WRITE, 3, 0);
+   cs-buf[cs-cdw++] = va  0xUL;
+   cs-buf[cs-cdw++] = (va  32UL)  0xFFUL;
+   cs-buf[cs-cdw++] = cs-cdw;
+   cs-buf[cs-cdw++] = rscreen-cs_count;
+   cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0);
+   cs-buf[cs-cdw++] = reloc;
+}
+#endif
diff --git a/src/gallium/drivers/radeonsi/r600_texture.c 
b/src/gallium/drivers/radeonsi/r600_texture.c
index 6cafc3d..3d074a3 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -550,7 +550,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen 
*screen,
 
if (!(templ-flags  R600_RESOURCE_FLAG_TRANSFER) 
!(templ-bind  PIPE_BIND_SCANOUT)) {
-   array_mode = V_009910_ARRAY_2D_TILED_THIN1;
+   array_mode = V_009910_ARRAY_1D_TILED_THIN1;
}
 
r = r600_init_surface(rscreen, surface, templ, array_mode,
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index c5dac29..a370d7e 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -525,6 +525,14 @@ static void r600_destroy_screen(struct pipe_screen* 
pscreen)
rscreen-ws-buffer_unmap(rscreen-fences.bo-cs_buf);
si_resource_reference(rscreen-fences.bo, NULL);
}
+
+#if R600_TRACE_CS
+   if (rscreen-trace_bo) {
+   rscreen-ws-buffer_unmap(rscreen-trace_bo-cs_buf);
+   pipe_resource_reference((struct 

[Mesa-dev] [PATCH] radeonsi: add cs tracing v2

2013-03-25 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Same as on r600, trace cs execution by writting cs offset after each
states, this allow to pin point lockup inside command stream and
narrow down the scope of lockup investigation.

v2: Use WRITE_DATA packet instead of WRITE_MEM

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/radeonsi/r600_hw_context.c | 61 ++
 src/gallium/drivers/radeonsi/radeonsi_pipe.c   | 22 ++
 src/gallium/drivers/radeonsi/radeonsi_pipe.h   | 12 +
 src/gallium/drivers/radeonsi/radeonsi_pm4.c| 12 +
 src/gallium/drivers/radeonsi/si_state_draw.c   |  7 ++-
 src/gallium/drivers/radeonsi/sid.h | 14 ++
 6 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c 
b/src/gallium/drivers/radeonsi/r600_hw_context.c
index bd348f9..967f093 100644
--- a/src/gallium/drivers/radeonsi/r600_hw_context.c
+++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
@@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
/* Save 16 dwords for the fence mechanism. */
num_dw += 16;
 
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   num_dw += R600_TRACE_CS_DWORDS;
+   }
+#endif
+
/* Flush if there's not enough space. */
if (num_dw  RADEON_MAX_CMDBUF_DWORDS) {
radeonsi_flush(ctx-context, NULL, RADEON_FLUSH_ASYNC);
@@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx, unsigned 
flags)
/* force to keep tiling flags */
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   struct r600_screen *rscreen = ctx-screen;
+   unsigned i;
+
+   for (i = 0; i  cs-cdw; i++) {
+   fprintf(stderr, [%4d] [%5d] 0x%08x\n, 
rscreen-cs_count, i, cs-buf[i]);
+   }
+   rscreen-cs_count++;
+   }
+#endif
+
/* Flush the CS. */
ctx-ws-cs_flush(ctx-cs, flags);
 
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   struct r600_screen *rscreen = ctx-screen;
+   unsigned i;
+
+   for (i = 0; i  10; i++) {
+   usleep(5);
+   if (!ctx-ws-buffer_is_busy(rscreen-trace_bo-buf, 
RADEON_USAGE_READWRITE)) {
+   break;
+   }
+   }
+   if (i == 10) {
+   fprintf(stderr, timeout on cs lockup likely happen at 
cs %d dw %d\n,
+   rscreen-trace_ptr[1], rscreen-trace_ptr[0]);
+   } else {
+   fprintf(stderr, cs %d executed in %dms\n, 
rscreen-trace_ptr[1], i * 5);
+   }
+   }
+#endif
+
ctx-pm4_dirty_cdwords = 0;
ctx-flags = 0;
 
@@ -665,3 +703,26 @@ void r600_context_draw_opaque_count(struct r600_context 
*ctx, struct r600_so_tar
cs-buf[cs-cdw++] = r600_context_bo_reloc(ctx, t-filled_size, 
RADEON_USAGE_READ);
 
 }
+
+#if R600_TRACE_CS
+void r600_trace_emit(struct r600_context *rctx)
+{
+   struct r600_screen *rscreen = rctx-screen;
+   struct radeon_winsys_cs *cs = rctx-cs;
+   uint64_t va;
+   uint32_t reloc;
+
+   va = r600_resource_va(rscreen-screen, (void*)rscreen-trace_bo);
+   reloc = r600_context_bo_reloc(rctx, rscreen-trace_bo, 
RADEON_USAGE_READWRITE);
+   cs-buf[cs-cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
+   cs-buf[cs-cdw++] = 
PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
+   PKT3_WRITE_DATA_WR_CONFIRM |
+   
PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
+   cs-buf[cs-cdw++] = va  0xUL;
+   cs-buf[cs-cdw++] = (va  32UL)  0xUL;
+   cs-buf[cs-cdw++] = cs-cdw;
+   cs-buf[cs-cdw++] = rscreen-cs_count;
+   cs-buf[cs-cdw++] = PKT3(PKT3_NOP, 0, 0);
+   cs-buf[cs-cdw++] = reloc;
+}
+#endif
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index c5dac29..a370d7e 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -525,6 +525,14 @@ static void r600_destroy_screen(struct pipe_screen* 
pscreen)
rscreen-ws-buffer_unmap(rscreen-fences.bo-cs_buf);
si_resource_reference(rscreen-fences.bo, NULL);
}
+
+#if R600_TRACE_CS
+   if (rscreen-trace_bo) {
+   rscreen-ws-buffer_unmap(rscreen-trace_bo-cs_buf);
+   pipe_resource_reference((struct 
pipe_resource**)rscreen-trace_bo, NULL);
+   }
+#endif
+
pipe_mutex_destroy(rscreen-fences.mutex);
 
rscreen-ws-destroy(rscreen-ws);
@@ -727,5 +735,19 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
LIST_INITHEAD(rscreen-fences.blocks);
pipe_mutex_init(rscreen-fences.mutex);
 

[Mesa-dev] [PATCH] r600g: workaround hyperz lockup on evergreen

2013-02-22 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This work around disable hyperz if write to zbuffer is disabled. Somehow
using hyperz when not writting to the zbuffer trigger GPU lockup. See :

https://bugs.freedesktop.org/show_bug.cgi?id=60848

Candidate for 9.1

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c   | 10 +-
 src/gallium/drivers/r600/r600_pipe.h |  4 +++-
 src/gallium/drivers/r600/r600_state.c|  1 +
 src/gallium/drivers/r600/r600_state_common.c | 10 ++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 4a91942..2e301bc 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -858,6 +858,7 @@ static void *evergreen_create_dsa_state(struct pipe_context 
*ctx,
dsa-valuemask[1] = state-stencil[1].valuemask;
dsa-writemask[0] = state-stencil[0].writemask;
dsa-writemask[1] = state-stencil[1].writemask;
+   dsa-zwritemask = state-depth.writemask;
 
db_depth_control = S_028800_Z_ENABLE(state-depth.enabled) |
S_028800_Z_WRITE_ENABLE(state-depth.writemask) |
@@ -2286,7 +2287,14 @@ static void evergreen_emit_db_misc_state(struct 
r600_context *rctx, struct r600_
}
db_render_override |= S_02800C_NOOP_CULL_DISABLE(1);
}
-   if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled) {
+   /* FIXME we should be able to use hyperz even if we are not writing to
+* zbuffer but somehow this trigger GPU lockup. See :
+*
+* https://bugs.freedesktop.org/show_bug.cgi?id=60848
+*
+* Disable hyperz for now if not writing to zbuffer.
+*/
+   if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled  
rctx-zwritemask) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL 
*/
db_render_override |= 
S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
/* This is to fix a lockup when hyperz and alpha test are 
enabled at
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index de9c205..5ebf98a 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -298,7 +298,8 @@ struct r600_dsa_state {
unsignedalpha_ref;
ubyte   valuemask[2];
ubyte   writemask[2];
-   unsignedsx_alpha_test_control;
+   unsignedzwritemask;
+   unsignedsx_alpha_test_control;
 };
 
 struct r600_pipe_shader;
@@ -513,6 +514,7 @@ struct r600_context {
boolalpha_to_one;
boolforce_blend_disable;
boolean dual_src_blend;
+   unsignedzwritemask;
 
/* Index buffer. */
struct pipe_index_bufferindex_buffer;
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index c6559bb..2d3ec93 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -842,6 +842,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx,
dsa-valuemask[1] = state-stencil[1].valuemask;
dsa-writemask[0] = state-stencil[0].writemask;
dsa-writemask[1] = state-stencil[1].writemask;
+   dsa-zwritemask = state-depth.writemask;
 
db_depth_control = S_028800_Z_ENABLE(state-depth.enabled) |
S_028800_Z_WRITE_ENABLE(state-depth.writemask) |
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index c4bd758..52b7d55 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -284,6 +284,16 @@ static void r600_bind_dsa_state(struct pipe_context *ctx, 
void *state)
ref.valuemask[1] = dsa-valuemask[1];
ref.writemask[0] = dsa-writemask[0];
ref.writemask[1] = dsa-writemask[1];
+   if (rctx-zwritemask != dsa-zwritemask) {
+   rctx-zwritemask = dsa-zwritemask;
+   if (rctx-chip_class = EVERGREEN) {
+   /* work around some issue when not writting to zbuffer
+* we are having lockup on evergreen so do not enable
+* hyperz when not writting zbuffer
+*/
+   rctx-db_misc_state.atom.dirty = true;
+   }
+   }
 
r600_set_stencil_ref(ctx, ref);
 
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: fix lockup when hyperz alpha test are enabled together. v3

2013-02-12 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Seems that alpha test being enabled confuse the GPU on the order in
which it should perform the Z testing. So force the order programmed
throught db shader control.

v2: Only force z order when alpha test is enabled
v3: Update db shader when binding new dsa + spelling fix

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c   | 25 +++--
 src/gallium/drivers/r600/r600_state.c| 22 +-
 src/gallium/drivers/r600/r600_state_common.c |  5 +
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 211c218..29b22ab 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2251,6 +2251,13 @@ static void evergreen_emit_db_misc_state(struct 
r600_context *rctx, struct r600_
if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL 
*/
db_render_override |= 
S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
+   /* This is to fix a lockup when hyperz and alpha test are 
enabled at
+* the same time somehow GPU get confuse on which order to pick 
for
+* z test
+*/
+   if (rctx-alphatest_state.sx_alpha_test_control) {
+   db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1);
+   }
} else {
db_render_override |= 
S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
}
@@ -3240,7 +3247,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_state *rstate = shader-rstate;
struct r600_shader *rshader = shader-shader;
-   unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, 
spi_ps_in_control_1, db_shader_control;
+   unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, 
spi_ps_in_control_1, db_shader_control = 0;
int pos_index = -1, face_index = -1;
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = 
FALSE;
@@ -3250,7 +3257,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
 
rstate-nregs = 0;
 
-   db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
for (i = 0; i  rshader-ninput; i++) {
/* evergreen NUM_INTERP only contains values interpolated into 
the LDS,
   POSITION goes via GPRs from the SC so isn't counted */
@@ -3484,6 +3490,21 @@ void evergreen_update_db_shader_control(struct 
r600_context * rctx)

V_02880C_EXPORT_DB_FULL) |

S_02880C_ALPHA_TO_MASK_DISABLE(rctx-framebuffer.cb0_is_integer);
 
+   /* When alpha test is enabled we can't trust the hw to make the proper
+* decision on the order in which ztest should be run related to 
fragment
+* shader execution.
+*
+* If alpha test is enabled perform early z rejection (RE_Z) but don't 
early
+* write to the zbuffer. Write to zbuffer is delayed after fragment 
shader
+* execution and thus after alpha test so if discarded by the alpha test
+* the z value is not written.
+*/
+   if (rctx-alphatest_state.sx_alpha_test_control) {
+   db_shader_control |= S_02880C_Z_ORDER(V_02880C_RE_Z);
+   } else {
+   db_shader_control |= 
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+   }
+
if (db_shader_control != rctx-db_misc_state.db_shader_control) {
rctx-db_misc_state.db_shader_control = db_shader_control;
rctx-db_misc_state.atom.dirty = true;
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 5322850..3f359fb 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1966,6 +1966,13 @@ static void r600_emit_db_misc_state(struct r600_context 
*rctx, struct r600_atom
if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL 
*/
db_render_override |= 
S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
+   /* This is to fix a lockup when hyperz and alpha test are 
enabled at
+* the same time somehow GPU get confuse on which order to pick 
for
+* z test
+*/
+   if (rctx-alphatest_state.sx_alpha_test_control) {
+   db_render_override |= S_028D10_FORCE_SHADER_Z_ORDER(1);
+   }
} else {
db_render_override 

[Mesa-dev] [PATCH 2/2] r600g: fix lockup when hyperz alpha test are enabled together. v2

2013-02-11 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Seems that alpha test being enabled confuse the GPU on the order in
which it should perform the Z testing. So force the order programmed
throught db shader control.

v2: Only force z order when alpha test is enabled

Signed-off-by: Jerome Glisse jgli...@redhat.com
Reviewed-by: Marek Olšák mar...@gmail.com
---
 src/gallium/drivers/r600/evergreen_state.c | 25 +++--
 src/gallium/drivers/r600/r600_state.c  | 22 +-
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 211c218..b710131 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2251,6 +2251,13 @@ static void evergreen_emit_db_misc_state(struct 
r600_context *rctx, struct r600_
if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL 
*/
db_render_override |= 
S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
+   /* This is to fix a lockup when hyperz and alpha test are 
enabled at
+* the same time some how GPU get confuse on which order to 
pick for
+* z test
+*/
+   if (rctx-alphatest_state.sx_alpha_test_control) {
+   db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1);
+   }
} else {
db_render_override |= 
S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
}
@@ -3240,7 +3247,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_state *rstate = shader-rstate;
struct r600_shader *rshader = shader-shader;
-   unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, 
spi_ps_in_control_1, db_shader_control;
+   unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, 
spi_ps_in_control_1, db_shader_control = 0;
int pos_index = -1, face_index = -1;
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = 
FALSE;
@@ -3250,7 +3257,6 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
 
rstate-nregs = 0;
 
-   db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
for (i = 0; i  rshader-ninput; i++) {
/* evergreen NUM_INTERP only contains values interpolated into 
the LDS,
   POSITION goes via GPRs from the SC so isn't counted */
@@ -3484,6 +3490,21 @@ void evergreen_update_db_shader_control(struct 
r600_context * rctx)

V_02880C_EXPORT_DB_FULL) |

S_02880C_ALPHA_TO_MASK_DISABLE(rctx-framebuffer.cb0_is_integer);
 
+   /* When alpha test is enabled we can't antrust the hw to make the proper
+* decision on the order in which ztest should be run related to 
fragment
+* shader execution.
+*
+* If alpha test is enabled perform early z rejection (RE_Z) but don't 
early
+* write to the zbuffer. Write to zbuffer is delayed after fragment 
shader
+* execution and thus after alpha test so if discarded by the alpha test
+* the z value is not written.
+*/
+   if (rctx-alphatest_state.sx_alpha_test_control) {
+   db_shader_control |= S_02880C_Z_ORDER(V_02880C_RE_Z);
+   } else {
+   db_shader_control |= 
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+   }
+
if (db_shader_control != rctx-db_misc_state.db_shader_control) {
rctx-db_misc_state.db_shader_control = db_shader_control;
rctx-db_misc_state.atom.dirty = true;
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 5322850..8efd4b3 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1966,6 +1966,13 @@ static void r600_emit_db_misc_state(struct r600_context 
*rctx, struct r600_atom
if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL 
*/
db_render_override |= 
S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
+   /* This is to fix a lockup when hyperz and alpha test are 
enabled at
+* the same time some how GPU get confuse on which order to 
pick for
+* z test
+*/
+   if (rctx-alphatest_state.sx_alpha_test_control) {
+   db_render_override |= S_028D10_FORCE_SHADER_Z_ORDER(1);
+   }
} else {
db_render_override |= 
S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
}
@@ 

[Mesa-dev] [PATCH] r600g: fix lockup when hyperz alpha test are enabled together.

2013-02-08 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Seems that alpha test being enabled confuse the GPU on the order in
which it should perform the Z testing. So force the order programmed
throught db shader control.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c | 5 +
 src/gallium/drivers/r600/r600_state.c  | 5 +
 2 files changed, 10 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 211c218..a610b69 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2251,6 +2251,11 @@ static void evergreen_emit_db_misc_state(struct 
r600_context *rctx, struct r600_
if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL 
*/
db_render_override |= 
S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_OFF);
+   /* This is to fix a lockup when hyperz and alpha test are 
enabled at
+* the same time some how GPU get confuse on which order to 
pick for
+* z test
+*/
+   db_render_override |= S_02800C_FORCE_SHADER_Z_ORDER(1);
} else {
db_render_override |= 
S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE);
}
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 67c4d99..2760f19 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1966,6 +1966,11 @@ static void r600_emit_db_misc_state(struct r600_context 
*rctx, struct r600_atom
if (rctx-db_state.rsurf  rctx-db_state.rsurf-htile_enabled) {
/* FORCE_OFF means HiZ/HiS are determined by DB_SHADER_CONTROL 
*/
db_render_override |= 
S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_OFF);
+   /* This is to fix a lockup when hyperz and alpha test are 
enabled at
+* the same time some how GPU get confuse on which order to 
pick for
+* z test
+*/
+   db_render_override |= S_028D10_FORCE_SHADER_Z_ORDER(1);
} else {
db_render_override |= 
S_028D10_FORCE_HIZ_ENABLE(V_028D10_FORCE_DISABLE);
}
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: add cs memory usage accounting and limit it v2

2013-01-31 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

We are now seing cs that can go over the vram+gtt size to avoid
failing flush early cs that goes over 70% (gtt+vram) usage. 70%
is use to allow some fragmentation.

The idea is to compute a gross estimate of memory requirement of
each draw call. After each draw call, memory will be precisely
accounted. So the uncertainty is only on the current draw call.
In practice this gave very good estimate (+/- 10% of the target
memory limit).

v2: Remove left over from testing version, remove useless NULL
checking. Improve commit message.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c|  4 
 src/gallium/drivers/r600/r600_hw_context.c| 12 
 src/gallium/drivers/r600/r600_pipe.h  | 21 +
 src/gallium/drivers/r600/r600_state.c |  3 +++
 src/gallium/drivers/r600/r600_state_common.c  | 13 -
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++
 7 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 0a3861f..5dd8b13 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
surf = (struct r600_surface*)state-cbufs[i];
rtex = (struct r600_texture*)surf-base.texture;
 
+   r600_context_add_resource_size(ctx, state-cbufs[i]-texture);
+
if (!surf-color_initialized) {
evergreen_init_color_surface(rctx, surf);
}
@@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
if (state-zsbuf) {
surf = (struct r600_surface*)state-zsbuf;
 
+   r600_context_add_resource_size(ctx, state-zsbuf-texture);
+
if (!surf-depth_initialized) {
evergreen_init_depth_surface(rctx, surf);
}
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 23f488a..a89f230 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -359,6 +359,16 @@ out_err:
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
boolean count_draw_in)
 {
+   if (!ctx-ws-cs_memory_below_limit(ctx-rings.gfx.cs, ctx-vram, 
ctx-gtt)) {
+   ctx-gtt = 0;
+   ctx-vram = 0;
+   ctx-rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
+   return;
+   }
+   /* all will be accounted once relocation are emited */
+   ctx-gtt = 0;
+   ctx-vram = 0;
+
/* The number of dwords we already used in the CS so far. */
num_dw += ctx-rings.gfx.cs-cdw;
 
@@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
 
ctx-pm4_dirty_cdwords = 0;
ctx-flags = 0;
+   ctx-gtt = 0;
+   ctx-vram = 0;
 
/* Begin a new CS. */
r600_emit_command_buffer(ctx-rings.gfx.cs, ctx-start_cs_cmd);
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 3ff42d3..42b4e7c 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -447,6 +447,10 @@ struct r600_context {
unsignedbackend_mask;
unsignedmax_db; /* for OQ */
 
+   /* current unaccounted memory usage */
+   uint64_tvram;
+   uint64_tgtt;
+
/* Miscellaneous state objects. */
void*custom_dsa_flush;
void*custom_blend_resolve;
@@ -998,4 +1002,21 @@ static INLINE unsigned u_max_layer(struct pipe_resource 
*r, unsigned level)
}
 }
 
+static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, 
struct pipe_resource *r)
+{
+   struct r600_context *rctx = (struct r600_context *)ctx;
+   struct r600_resource *rr = (struct r600_resource *)r;
+
+   if (r == NULL) {
+   return;
+   }
+
+   if (rr-domains  RADEON_DOMAIN_GTT) {
+   rctx-gtt += rr-buf-size;
+   }
+   if (rr-domains  RADEON_DOMAIN_VRAM) {
+   rctx-vram += rr-buf-size;
+   }
+}
+
 #endif
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index c0bc2a5..44cd00e 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct 
pipe_context *ctx,
 
surf = (struct r600_surface*)state-cbufs[i];
rtex = (struct r600_texture*)surf-base.texture;
+   

[Mesa-dev] [PATCH] r600g: add cs memory usage accounting and limit it

2013-01-30 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

We are now seing cs that can go over the vram+gtt size to avoid
failing flush early cs that goes over 70% (gtt+vram) usage. 70%
is use to allow some fragmentation.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c|  4 
 src/gallium/drivers/r600/r600.h   |  1 +
 src/gallium/drivers/r600/r600_buffer.c|  1 +
 src/gallium/drivers/r600/r600_hw_context.c| 12 
 src/gallium/drivers/r600/r600_pipe.c  |  3 +++
 src/gallium/drivers/r600/r600_pipe.h  | 21 +
 src/gallium/drivers/r600/r600_state.c |  3 +++
 src/gallium/drivers/r600/r600_state_common.c  | 17 -
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++
 10 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index be1c427..84f8dce 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
surf = (struct r600_surface*)state-cbufs[i];
rtex = (struct r600_texture*)surf-base.texture;
 
+   r600_context_add_resource_size(ctx, state-cbufs[i]-texture);
+
if (!surf-color_initialized) {
evergreen_init_color_surface(rctx, surf);
}
@@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
if (state-zsbuf) {
surf = (struct r600_surface*)state-zsbuf;
 
+   r600_context_add_resource_size(ctx, state-zsbuf-texture);
+
if (!surf-depth_initialized) {
evergreen_init_depth_surface(rctx, surf);
}
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index a383c90..b9f7d3d 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -50,6 +50,7 @@ struct r600_resource {
 
/* Resource state. */
unsigneddomains;
+   uint64_tsize;
 };
 
 #define R600_BLOCK_MAX_BO  32
diff --git a/src/gallium/drivers/r600/r600_buffer.c 
b/src/gallium/drivers/r600/r600_buffer.c
index 6df0d91..92f549a 100644
--- a/src/gallium/drivers/r600/r600_buffer.c
+++ b/src/gallium/drivers/r600/r600_buffer.c
@@ -250,6 +250,7 @@ bool r600_init_resource(struct r600_screen *rscreen,
break;
}
 
+   res-size = size;
res-buf = rscreen-ws-buffer_create(rscreen-ws, size, alignment,
   use_reusable_pool,
   initial_domain);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index ebafd97..44d3b4d 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -359,6 +359,16 @@ out_err:
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
boolean count_draw_in)
 {
+   if (!ctx-ws-cs_memory_below_limit(ctx-rings.gfx.cs, ctx-vram, 
ctx-gtt)) {
+   ctx-gtt = 0;
+   ctx-vram = 0;
+   ctx-rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
+   return;
+   }
+   /* all will be accounted once relocation are emited */
+   ctx-gtt = 0;
+   ctx-vram = 0;
+
/* The number of dwords we already used in the CS so far. */
num_dw += ctx-rings.gfx.cs-cdw;
 
@@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx)
 
ctx-pm4_dirty_cdwords = 0;
ctx-flags = 0;
+   ctx-gtt = 0;
+   ctx-vram = 0;
 
/* Begin a new CS. */
r600_emit_command_buffer(ctx-rings.gfx.cs, ctx-start_cs_cmd);
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index a59578d..cb50cfe 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -333,6 +333,9 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
rctx-chip_class = rscreen-chip_class;
rctx-keep_tiling_flags = rscreen-info.drm_minor = 12;
 
+   rctx-gtt = 0;
+   rctx-vram = 0;
+
LIST_INITHEAD(rctx-active_nontimer_queries);
LIST_INITHEAD(rctx-dirty);
LIST_INITHEAD(rctx-enable_list);
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 3ff42d3..beb4b33 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -447,6 +447,10 @@ struct r600_context {
unsignedbackend_mask;
unsignedmax_db; /* for OQ */
 
+   

[Mesa-dev] [PATCH 4/4] r600g: only emit gfx cmd when there is actual work in it

2013-01-25 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_compute.c | 2 ++
 src/gallium/drivers/r600/r600_hw_context.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 6 ++
 src/gallium/drivers/r600/r600_pipe.h | 1 +
 src/gallium/drivers/r600/r600_query.c| 2 ++
 src/gallium/drivers/r600/r600_state_common.c | 1 +
 6 files changed, 13 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index f4a7905..977595e 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -308,6 +308,8 @@ static void evergreen_emit_direct_dispatch(
r600_write_value(cs, grid_layout[2]);
/* VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN */
r600_write_value(cs, 1);
+
+   rctx-rings.gfx.cdraw++;
 }
 
 static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout,
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index d7518a5..511a276 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1122,6 +1122,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
size -= byte_count;
src_offset += byte_count;
dst_offset += byte_count;
+   rctx-rings.gfx.cdraw++;
}
 }
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 6767412..af08cff 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -120,6 +120,10 @@ static void r600_flush(struct pipe_context *ctx, unsigned 
flags)
struct pipe_query *render_cond = NULL;
unsigned render_cond_mode = 0;
 
+   if (!rctx-rings.gfx.cdraw) {
+   return;
+   }
+
rctx-rings.gfx.flushing = true;
/* Disable render condition. */
if (rctx-current_render_cond) {
@@ -130,6 +134,7 @@ static void r600_flush(struct pipe_context *ctx, unsigned 
flags)
 
r600_context_flush(rctx, flags);
rctx-rings.gfx.flushing = false;
+   rctx-rings.gfx.cdraw = 0;
r600_begin_new_cs(rctx);
 
/* Re-enable render condition. */
@@ -387,6 +392,7 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
goto fail;
}
 
+   rctx-rings.gfx.cdraw = 0;
rctx-rings.gfx.cs = rctx-ws-cs_create(rctx-ws, RING_GFX);
rctx-rings.gfx.flush = r600_flush_gfx_ring;
rctx-ws-cs_set_flush_callback(rctx-rings.gfx.cs, 
r600_flush_from_winsys, rctx);
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 31dcd05..5c72756 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -418,6 +418,7 @@ struct r600_fetch_shader {
 struct r600_ring {
struct radeon_winsys_cs *cs;
boolflushing;
+   unsignedcdraw;
void (*flush)(void *ctx, unsigned flags);
 };
 
diff --git a/src/gallium/drivers/r600/r600_query.c 
b/src/gallium/drivers/r600/r600_query.c
index 0335189..7916f2d 100644
--- a/src/gallium/drivers/r600/r600_query.c
+++ b/src/gallium/drivers/r600/r600_query.c
@@ -149,6 +149,7 @@ static void r600_emit_query_begin(struct r600_context *ctx, 
struct r600_query *q
cs-buf[cs-cdw++] = (3  29) | ((va  32UL)  0xFF);
cs-buf[cs-cdw++] = 0;
cs-buf[cs-cdw++] = 0;
+   ctx-rings.gfx.cdraw++;
break;
default:
assert(0);
@@ -201,6 +202,7 @@ static void r600_emit_query_end(struct r600_context *ctx, 
struct r600_query *que
cs-buf[cs-cdw++] = (3  29) | ((va  32UL)  0xFF);
cs-buf[cs-cdw++] = 0;
cs-buf[cs-cdw++] = 0;
+   ctx-rings.gfx.cdraw++;
break;
default:
assert(0);
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index b547d64..d4616ce 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1439,6 +1439,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const 
struct pipe_draw_info
r600_trace_emit(rctx);
}
 #endif
+   rctx-rings.gfx.cdraw++;
 
/* Set the depth buffer as dirty. */
if (rctx-framebuffer.state.zsbuf) {
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] r600g async dma support

2013-01-25 Thread j . glisse
So design is mostly the same then previously. Few changes, first i use only
one thread to offload all cs submission wether gfx or dma. Reasons is that
using on thread for gfx and one for dma lead to more complex synchronization
with no gain ie when submitting gfx you would need to make sure previous
dma submittion are done and vice et versa. So in the end it's just not a
good idea. Moreover the dma submission is lot faster than the gfx one as
the dma cs are smaller and simpler to parse for the kernel.

Second is that i don't use a stack in r600g to keep track of cs submission
ordering. Instead anytime r600g switch cmd stream ie start writing dma
command after writing gfx one, we first asynchronously flush the gfx
command. This insure that any point in time the driver is only building
command for either gfx or dma ring and everything is serialize from driver
pov. It simplify implementation as there is no need to special case some
corner case such as query/event or streamout buffer.

The last patch is a small optimization that decrease the cpu overhead by
not submitting gfx cmd that does not do anything.

Everything been tested on r7xx and evergreen and i witnessed no regression.

Evergreen can be improved by adding support for partial blit but i am not
sure it's worth it.

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] radeon/winsys: add dma ring support to winsys v3

2013-01-25 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Add ring support, you can create a cs for each ring. DMA ring is
bit special regarding relocation as you must emit as much relocation
as there is use of the buffer.

v2: - Improved comment on relocation changes
- Use a single thread to queue cs submittion this simplify driver
  code while not impacting performances. Rational for this is that
  you have to wait for all previous submission to have completed
  so there was never a case while we could have 2 different thread
  submitting a command stream at the same time. This code just
  consolidate submission into one single thread per winsys.
v3: - Do not use semaphore for empty queue signaling, instead use
  cond var. This is because it's tricky to maintain an even number
  of call to semaphore wait and semaphore signal (the number of
  cs in the stack would for instance make that number vary).

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r300/r300_context.c   |   2 +-
 src/gallium/drivers/r600/r600_pipe.c  |   2 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c  |   2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |   2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 160 --
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |   8 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  87 
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.h |  17 +++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  20 ++-
 9 files changed, 218 insertions(+), 82 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c 
b/src/gallium/drivers/r300/r300_context.c
index d8af13f..340a7f0 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -379,7 +379,7 @@ struct pipe_context* r300_create_context(struct 
pipe_screen* screen,
  sizeof(struct pipe_transfer), 64,
  UTIL_SLAB_SINGLETHREADED);
 
-r300-cs = rws-cs_create(rws);
+r300-cs = rws-cs_create(rws, RING_GFX);
 if (r300-cs == NULL)
 goto fail;
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index fda5074..e4a35cf 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
goto fail;
}
 
-   rctx-cs = rctx-ws-cs_create(rctx-ws);
+   rctx-cs = rctx-ws-cs_create(rctx-ws, RING_GFX);
rctx-ws-cs_set_flush_callback(rctx-cs, r600_flush_from_winsys, rctx);
 
rctx-uploader = u_upload_create(rctx-context, 1024 * 1024, 256,
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index cbb3bc4..5792fe2 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -222,7 +222,7 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
case TAHITI:
si_init_state_functions(rctx);
LIST_INITHEAD(rctx-active_query_list);
-   rctx-cs = rctx-ws-cs_create(rctx-ws);
+   rctx-cs = rctx-ws-cs_create(rctx-ws, RING_GFX);
rctx-max_db = 8;
si_init_config(rctx);
break;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 897e962..6daafc3 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle 
*buf,
 } else {
 /* Try to avoid busy-waiting in radeon_bo_wait. */
 if (p_atomic_read(bo-num_active_ioctls))
-radeon_drm_cs_sync_flush(cs);
+radeon_drm_cs_sync_flush(rcs);
 }
 
 radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index c5e7f1e..cab2704 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -90,6 +90,10 @@
 #define RADEON_CS_RING_COMPUTE  1
 #endif
 
+#ifndef RADEON_CS_RING_DMA
+#define RADEON_CS_RING_DMA  2
+#endif
+
 #ifndef RADEON_CS_END_OF_FRAME
 #define RADEON_CS_END_OF_FRAME  0x04
 #endif
@@ -158,10 +162,8 @@ static void radeon_destroy_cs_context(struct 
radeon_cs_context *csc)
 FREE(csc-relocs);
 }
 
-DEBUG_GET_ONCE_BOOL_OPTION(thread, RADEON_THREAD, TRUE)
-static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
 
-static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
+static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys 

[Mesa-dev] [PATCH 3/4] r600g: add async for staging buffer upload

2013-01-25 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c |  44 ++
 src/gallium/drivers/r600/evergreen_state.c  | 197 
 src/gallium/drivers/r600/evergreend.h   |  15 ++
 src/gallium/drivers/r600/r600.h |  27 
 src/gallium/drivers/r600/r600_buffer.c  |  25 ++-
 src/gallium/drivers/r600/r600_hw_context.c  |  48 +-
 src/gallium/drivers/r600/r600_pipe.c|   6 +-
 src/gallium/drivers/r600/r600_pipe.h|   9 ++
 src/gallium/drivers/r600/r600_state.c   | 190 +++
 src/gallium/drivers/r600/r600_state_common.c|   6 +-
 src/gallium/drivers/r600/r600_texture.c |  24 ++-
 src/gallium/drivers/r600/r600d.h|  15 ++
 12 files changed, 589 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index fa90c9a..1c30404 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -26,6 +26,7 @@
 #include r600_hw_context_priv.h
 #include evergreend.h
 #include util/u_memory.h
+#include util/u_math.h
 
 static const struct r600_reg cayman_config_reg_list[] = {
{R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS | 
REG_FLAG_FLUSH_CHANGE, 0},
@@ -238,3 +239,46 @@ void evergreen_set_streamout_enable(struct r600_context 
*ctx, unsigned buffer_en
r600_write_context_reg(cs, R_028B94_VGT_STRMOUT_CONFIG, 
S_028B94_STREAMOUT_0_EN(0));
}
 }
+
+void evergreen_dma_copy(struct r600_context *rctx,
+   struct pipe_resource *dst,
+   struct pipe_resource *src,
+   unsigned long dst_offset,
+   unsigned long src_offset,
+   unsigned long size)
+{
+   struct radeon_winsys_cs *cs = rctx-rings.dma.cs;
+   unsigned i, ncopy, csize, sub_cmd, shift;
+   struct r600_resource *rdst = (struct r600_resource*)dst;
+   struct r600_resource *rsrc = (struct r600_resource*)src;
+
+   /* make sure that the dma ring is only one active */
+   rctx-rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
+
+   /* see if we use dword or byte copy */
+   if (!(dst_offset  0x3)  !(src_offset  0x3)  !(size  0x3)) {
+   size = 2;
+   sub_cmd = 0x00;
+   shift = 2;
+   } else {
+   sub_cmd = 0x40;
+   shift = 0;
+   }
+   ncopy = (size / 0x000f) + !!(size % 0x000f);
+
+   r600_need_dma_space(rctx, ncopy * 5);
+   for (i = 0; i  ncopy; i++) {
+   csize = size  0x000f ? size : 0x000f;
+   /* emit reloc before writting cs so that cs is always in 
consistent state */
+   r600_context_bo_reloc(rctx, rctx-rings.dma, rsrc, 
RADEON_USAGE_READ);
+   r600_context_bo_reloc(rctx, rctx-rings.dma, rdst, 
RADEON_USAGE_WRITE);
+   cs-buf[cs-cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, 
csize);
+   cs-buf[cs-cdw++] = dst_offset  0x;
+   cs-buf[cs-cdw++] = src_offset  0x;
+   cs-buf[cs-cdw++] = (dst_offset  32UL)  0xff;
+   cs-buf[cs-cdw++] = (src_offset  32UL)  0xff;
+   dst_offset += csize  shift;
+   src_offset += csize  shift;
+   size -= csize;
+   }
+}
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 86e2c81..f0511d8 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -30,6 +30,20 @@
 #include util/u_framebuffer.h
 #include util/u_dual_blend.h
 #include evergreen_compute.h
+#include util/u_math.h
+
+static INLINE unsigned evergreen_array_mode(unsigned mode)
+{
+   switch (mode) {
+   case RADEON_SURF_MODE_LINEAR_ALIGNED:   return 
V_028C70_ARRAY_LINEAR_ALIGNED;
+   break;
+   case RADEON_SURF_MODE_1D:   return 
V_028C70_ARRAY_1D_TILED_THIN1;
+   break;
+   case RADEON_SURF_MODE_2D:   return 
V_028C70_ARRAY_2D_TILED_THIN1;
+   default:
+   case RADEON_SURF_MODE_LINEAR:   return 
V_028C70_ARRAY_LINEAR_GENERAL;
+   }
+}
 
 static uint32_t eg_num_banks(uint32_t nbanks)
 {
@@ -3445,3 +3459,186 @@ void evergreen_update_db_shader_control(struct 
r600_context * rctx)
rctx-db_misc_state.atom.dirty = true;
}
 }
+
+static void evergreen_dma_copy_tile(struct r600_context *rctx,
+   struct pipe_resource *dst,
+   unsigned dst_level,
+   unsigned dst_x,
+   unsigned dst_y,
+   unsigned dst_z,
+   struct pipe_resource *src,
+   unsigned src_level,
+ 

[Mesa-dev] [PATCH 3/4] r600g: add async for staging buffer upload v2

2013-01-25 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

v2: Add virtual address to dma src/dst offset for cayman

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c |  46 ++
 src/gallium/drivers/r600/evergreen_state.c  | 201 
 src/gallium/drivers/r600/evergreend.h   |  15 ++
 src/gallium/drivers/r600/r600.h |  27 
 src/gallium/drivers/r600/r600_buffer.c  |  25 ++-
 src/gallium/drivers/r600/r600_hw_context.c  |  48 +-
 src/gallium/drivers/r600/r600_pipe.c|   6 +-
 src/gallium/drivers/r600/r600_pipe.h|   9 ++
 src/gallium/drivers/r600/r600_state.c   | 190 ++
 src/gallium/drivers/r600/r600_state_common.c|   6 +-
 src/gallium/drivers/r600/r600_texture.c |  24 ++-
 src/gallium/drivers/r600/r600d.h|  15 ++
 12 files changed, 595 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index fa90c9a..ca4f4b3 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -26,6 +26,7 @@
 #include r600_hw_context_priv.h
 #include evergreend.h
 #include util/u_memory.h
+#include util/u_math.h
 
 static const struct r600_reg cayman_config_reg_list[] = {
{R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS | 
REG_FLAG_FLUSH_CHANGE, 0},
@@ -238,3 +239,48 @@ void evergreen_set_streamout_enable(struct r600_context 
*ctx, unsigned buffer_en
r600_write_context_reg(cs, R_028B94_VGT_STRMOUT_CONFIG, 
S_028B94_STREAMOUT_0_EN(0));
}
 }
+
+void evergreen_dma_copy(struct r600_context *rctx,
+   struct pipe_resource *dst,
+   struct pipe_resource *src,
+   unsigned long dst_offset,
+   unsigned long src_offset,
+   unsigned long size)
+{
+   struct radeon_winsys_cs *cs = rctx-rings.dma.cs;
+   unsigned i, ncopy, csize, sub_cmd, shift;
+   struct r600_resource *rdst = (struct r600_resource*)dst;
+   struct r600_resource *rsrc = (struct r600_resource*)src;
+
+   /* make sure that the dma ring is only one active */
+   rctx-rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
+   dst_offset += r600_resource_va(rctx-screen-screen, dst);
+   src_offset += r600_resource_va(rctx-screen-screen, src);
+
+   /* see if we use dword or byte copy */
+   if (!(dst_offset  0x3)  !(src_offset  0x3)  !(size  0x3)) {
+   size = 2;
+   sub_cmd = 0x00;
+   shift = 2;
+   } else {
+   sub_cmd = 0x40;
+   shift = 0;
+   }
+   ncopy = (size / 0x000f) + !!(size % 0x000f);
+
+   r600_need_dma_space(rctx, ncopy * 5);
+   for (i = 0; i  ncopy; i++) {
+   csize = size  0x000f ? size : 0x000f;
+   /* emit reloc before writting cs so that cs is always in 
consistent state */
+   r600_context_bo_reloc(rctx, rctx-rings.dma, rsrc, 
RADEON_USAGE_READ);
+   r600_context_bo_reloc(rctx, rctx-rings.dma, rdst, 
RADEON_USAGE_WRITE);
+   cs-buf[cs-cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, 
csize);
+   cs-buf[cs-cdw++] = dst_offset  0x;
+   cs-buf[cs-cdw++] = src_offset  0x;
+   cs-buf[cs-cdw++] = (dst_offset  32UL)  0xff;
+   cs-buf[cs-cdw++] = (src_offset  32UL)  0xff;
+   dst_offset += csize  shift;
+   src_offset += csize  shift;
+   size -= csize;
+   }
+}
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 86e2c81..5c22e24 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -30,6 +30,20 @@
 #include util/u_framebuffer.h
 #include util/u_dual_blend.h
 #include evergreen_compute.h
+#include util/u_math.h
+
+static INLINE unsigned evergreen_array_mode(unsigned mode)
+{
+   switch (mode) {
+   case RADEON_SURF_MODE_LINEAR_ALIGNED:   return 
V_028C70_ARRAY_LINEAR_ALIGNED;
+   break;
+   case RADEON_SURF_MODE_1D:   return 
V_028C70_ARRAY_1D_TILED_THIN1;
+   break;
+   case RADEON_SURF_MODE_2D:   return 
V_028C70_ARRAY_2D_TILED_THIN1;
+   default:
+   case RADEON_SURF_MODE_LINEAR:   return 
V_028C70_ARRAY_LINEAR_GENERAL;
+   }
+}
 
 static uint32_t eg_num_banks(uint32_t nbanks)
 {
@@ -3445,3 +3459,190 @@ void evergreen_update_db_shader_control(struct 
r600_context * rctx)
rctx-db_misc_state.atom.dirty = true;
}
 }
+
+static void evergreen_dma_copy_tile(struct r600_context *rctx,
+   struct pipe_resource *dst,
+   unsigned dst_level,
+   unsigned dst_x,
+ 

[Mesa-dev] [RFC] r600g multi ring

2013-01-07 Thread j . glisse
So first patch is the winsys change while second patch implement multi ring
in the r600g driver. It use a stack to keep track of the order into which
rings must be submited. If will only pop the necessary entry from the stack
depending on the current request.

I think this address all concern from the previous patch. I am testing it
with dma (third patch not included here).

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeon/winsys: add dma ring support to winsys

2013-01-07 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r300/r300_context.c   |   2 +-
 src/gallium/drivers/r600/r600_pipe.c  |   2 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c  |   2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |   2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 104 +++---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |   2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   6 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  21 -
 8 files changed, 100 insertions(+), 41 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c 
b/src/gallium/drivers/r300/r300_context.c
index b498454..f0d738e 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -376,7 +376,7 @@ struct pipe_context* r300_create_context(struct 
pipe_screen* screen,
  sizeof(struct pipe_transfer), 64,
  UTIL_SLAB_SINGLETHREADED);
 
-r300-cs = rws-cs_create(rws);
+r300-cs = rws-cs_create(rws, RING_GFX);
 if (r300-cs == NULL)
 goto fail;
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 29ef988..7c4ec44 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -289,7 +289,7 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
goto fail;
}
 
-   rctx-cs = rctx-ws-cs_create(rctx-ws);
+   rctx-cs = rctx-ws-cs_create(rctx-ws, RING_GFX);
rctx-ws-cs_set_flush_callback(rctx-cs, r600_flush_from_winsys, rctx);
 
rctx-uploader = u_upload_create(rctx-context, 1024 * 1024, 256,
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index d66e30f..cfa1ff7 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -222,7 +222,7 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
case TAHITI:
si_init_state_functions(rctx);
LIST_INITHEAD(rctx-active_query_list);
-   rctx-cs = rctx-ws-cs_create(rctx-ws);
+   rctx-cs = rctx-ws-cs_create(rctx-ws, RING_GFX);
rctx-max_db = 8;
si_init_config(rctx);
break;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 897e962..6daafc3 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle 
*buf,
 } else {
 /* Try to avoid busy-waiting in radeon_bo_wait. */
 if (p_atomic_read(bo-num_active_ioctls))
-radeon_drm_cs_sync_flush(cs);
+radeon_drm_cs_sync_flush(rcs);
 }
 
 radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index c5e7f1e..5e2c471 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -90,6 +90,10 @@
 #define RADEON_CS_RING_COMPUTE  1
 #endif
 
+#ifndef RADEON_CS_RING_DMA
+#define RADEON_CS_RING_DMA  2
+#endif
+
 #ifndef RADEON_CS_END_OF_FRAME
 #define RADEON_CS_END_OF_FRAME  0x04
 #endif
@@ -161,7 +165,7 @@ static void radeon_destroy_cs_context(struct 
radeon_cs_context *csc)
 DEBUG_GET_ONCE_BOOL_OPTION(thread, RADEON_THREAD, TRUE)
 static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param);
 
-static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
+static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys 
*rws, enum ring_type ring_type)
 {
 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
 struct radeon_drm_cs *cs;
@@ -189,6 +193,7 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct 
radeon_winsys *rws)
 cs-csc = cs-csc1;
 cs-cst = cs-csc2;
 cs-base.buf = cs-csc-buf;
+cs-base.ring_type = ring_type;
 
 p_atomic_inc(ws-num_cs);
 if (cs-ws-num_cpus  1  debug_get_option_thread())
@@ -246,24 +251,34 @@ int radeon_get_reloc(struct radeon_cs_context *csc, 
struct radeon_bo *bo)
 return -1;
 }
 
-static unsigned radeon_add_reloc(struct radeon_cs_context *csc,
+static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
  struct radeon_bo *bo,
  enum radeon_bo_usage usage,
  enum radeon_bo_domain domains,
  enum radeon_bo_domain *added_domains)
 {
+struct radeon_cs_context *csc = cs-csc;
 struct 

[Mesa-dev] [PATCH 1/3] r600g/radeon/winsys: indentation cleanup

2013-01-04 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_pipe.c  | 18 +-
 src/gallium/drivers/r600/r600_pipe.h  |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |  3 +--
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |  2 +-
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 65dcbf8..e9d5e0a 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -290,21 +290,21 @@ static struct pipe_context *r600_create_context(struct 
pipe_screen *screen, void
rctx-cs = rctx-ws-cs_create(rctx-ws);
rctx-ws-cs_set_flush_callback(rctx-cs, r600_flush_from_winsys, rctx);
 
-rctx-uploader = u_upload_create(rctx-context, 1024 * 1024, 256,
- PIPE_BIND_INDEX_BUFFER |
- PIPE_BIND_CONSTANT_BUFFER);
-if (!rctx-uploader)
-goto fail;
+   rctx-uploader = u_upload_create(rctx-context, 1024 * 1024, 256,
+   PIPE_BIND_INDEX_BUFFER |
+   PIPE_BIND_CONSTANT_BUFFER);
+   if (!rctx-uploader)
+   goto fail;
 
rctx-allocator_fetch_shader = u_suballocator_create(rctx-context, 64 
* 1024, 256,
 0, 
PIPE_USAGE_STATIC, FALSE);
-if (!rctx-allocator_fetch_shader)
-goto fail;
+   if (!rctx-allocator_fetch_shader)
+   goto fail;
 
rctx-allocator_so_filled_size = u_suballocator_create(rctx-context, 
4096, 4,
-   0, 
PIPE_USAGE_STATIC, TRUE);
+   0, 
PIPE_USAGE_STATIC, TRUE);
 if (!rctx-allocator_so_filled_size)
-goto fail;
+   goto fail;
 
rctx-blitter = util_blitter_create(rctx-context);
if (rctx-blitter == NULL)
diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 6b7c053..934a6f5 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -408,7 +408,7 @@ struct r600_context {
struct radeon_winsys*ws;
struct radeon_winsys_cs *cs;
struct blitter_context  *blitter;
-   struct u_upload_mgr *uploader;
+   struct u_upload_mgr *uploader;
struct u_suballocator   *allocator_so_filled_size;
struct u_suballocator   *allocator_fetch_shader;
struct util_slab_mempoolpool_transfers;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 07e92c5..897e962 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -802,8 +802,7 @@ static void radeon_bo_set_tiling(struct pb_buffer *_buf,
 sizeof(args));
 }
 
-static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle(
-struct pb_buffer *_buf)
+static struct radeon_winsys_cs_handle *radeon_drm_get_cs_handle(struct 
pb_buffer *_buf)
 {
 /* return radeon_bo. */
 return (struct radeon_winsys_cs_handle*)get_radeon_bo(_buf);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 6336d3a..286eb6a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -33,7 +33,7 @@
 struct radeon_cs_context {
 uint32_tbuf[RADEON_MAX_CMDBUF_DWORDS];
 
-int fd;
+int fd;
 struct drm_radeon_cscs;
 struct drm_radeon_cs_chunk  chunks[3];
 uint64_tchunk_array[3];
-- 
1.7.11.7

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeon/winsys: move radeon family/class identification to winsys

2013-01-04 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Upcoming async dma support rely on winsys knowing about GPU families.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r300/r300_chipset.c   |  57 +--
 src/gallium/drivers/r300/r300_chipset.h   |  27 --
 src/gallium/drivers/r300/r300_emit.c  |   4 +-
 src/gallium/drivers/r300/r300_query.c |   2 +-
 src/gallium/drivers/r300/r300_texture_desc.c  |  12 +--
 src/gallium/drivers/r600/r600.h   |  37 ---
 src/gallium/drivers/r600/r600_asm.c   |   5 +-
 src/gallium/drivers/r600/r600_pipe.c  |  32 ++-
 src/gallium/drivers/r600/r600_shader.c|   1 +
 src/gallium/drivers/radeonsi/r600.h   |  12 ---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  96 ---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.h |   6 +-
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 112 ++
 13 files changed, 227 insertions(+), 176 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_chipset.c 
b/src/gallium/drivers/r300/r300_chipset.c
index beaa1f4..11061ed 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -22,6 +22,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
 #include r300_chipset.h
+#include ../../winsys/radeon/drm/radeon_winsys.h
 
 #include util/u_debug.h
 #include util/u_memory.h
@@ -62,7 +63,7 @@ void r300_parse_chipset(uint32_t pci_id, struct 
r300_capabilities* caps)
 switch (pci_id) {
 #define CHIPSET(pci_id, name, chipfamily) \
 case pci_id: \
-caps-family = CHIP_FAMILY_##chipfamily; \
+caps-family = CHIP_##chipfamily; \
 break;
 #include pci_ids/r300_pci_ids.h
 #undef CHIPSET
@@ -81,71 +82,71 @@ void r300_parse_chipset(uint32_t pci_id, struct 
r300_capabilities* caps)
 
 
 switch (caps-family) {
-case CHIP_FAMILY_R300:
-case CHIP_FAMILY_R350:
+case CHIP_R300:
+case CHIP_R350:
 caps-high_second_pipe = TRUE;
 caps-num_vert_fpus = 4;
 caps-hiz_ram = R300_HIZ_LIMIT;
 caps-zmask_ram = PIPE_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_RV350:
-case CHIP_FAMILY_RV370:
+case CHIP_RV350:
+case CHIP_RV370:
 caps-high_second_pipe = TRUE;
 caps-num_vert_fpus = 2;
 caps-zmask_ram = RV3xx_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_RV380:
+case CHIP_RV380:
 caps-high_second_pipe = TRUE;
 caps-num_vert_fpus = 2;
 caps-hiz_ram = R300_HIZ_LIMIT;
 caps-zmask_ram = RV3xx_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_RS400:
-case CHIP_FAMILY_RS600:
-case CHIP_FAMILY_RS690:
-case CHIP_FAMILY_RS740:
+case CHIP_RS400:
+case CHIP_RS600:
+case CHIP_RS690:
+case CHIP_RS740:
 break;
 
-case CHIP_FAMILY_RC410:
-case CHIP_FAMILY_RS480:
+case CHIP_RC410:
+case CHIP_RS480:
 caps-zmask_ram = RV3xx_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_R420:
-case CHIP_FAMILY_R423:
-case CHIP_FAMILY_R430:
-case CHIP_FAMILY_R480:
-case CHIP_FAMILY_R481:
-case CHIP_FAMILY_RV410:
+case CHIP_R420:
+case CHIP_R423:
+case CHIP_R430:
+case CHIP_R480:
+case CHIP_R481:
+case CHIP_RV410:
 caps-num_vert_fpus = 6;
 caps-hiz_ram = R300_HIZ_LIMIT;
 caps-zmask_ram = PIPE_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_R520:
+case CHIP_R520:
 caps-num_vert_fpus = 8;
 caps-hiz_ram = R300_HIZ_LIMIT;
 caps-zmask_ram = PIPE_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_RV515:
+case CHIP_RV515:
 caps-num_vert_fpus = 2;
 caps-hiz_ram = R300_HIZ_LIMIT;
 caps-zmask_ram = PIPE_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_RV530:
+case CHIP_RV530:
 caps-num_vert_fpus = 5;
 caps-hiz_ram = RV530_HIZ_LIMIT;
 caps-zmask_ram = PIPE_ZMASK_SIZE;
 break;
 
-case CHIP_FAMILY_R580:
-case CHIP_FAMILY_RV560:
-case CHIP_FAMILY_RV570:
+case CHIP_R580:
+case CHIP_RV560:
+case CHIP_RV570:
 caps-num_vert_fpus = 8;
 caps-hiz_ram = RV530_HIZ_LIMIT;
 caps-zmask_ram = PIPE_ZMASK_SIZE;
@@ -153,12 +154,12 @@ void r300_parse_chipset(uint32_t pci_id, struct 
r300_capabilities* caps)
 }
 
 caps-num_tex_units = 16;
-caps-is_r400 = caps-family = CHIP_FAMILY_R420  caps-family  
CHIP_FAMILY_RV515;
-caps-is_r500 = caps-family = CHIP_FAMILY_RV515;
-caps-is_rv350 = caps-family = CHIP_FAMILY_RV350;
+caps-is_r400 = caps-family = CHIP_R420  caps-family  CHIP_RV515;
+caps-is_r500 = caps-family = CHIP_RV515;
+caps-is_rv350 = caps-family = CHIP_RV350;
 caps-z_compress = caps-is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4;
 caps-dxtc_swizzle = caps-is_r400 || caps-is_r500;
-caps-has_us_format = caps-family == 

[Mesa-dev] [PATCH 3/3] radeon/winsys: add async dma infrastructure

2013-01-04 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

The design is to take advantage of the fact that kernel will emit
semaphore when buffer is referenced by different ring. So the only
thing we need to enforce synchronization btw dma and gfx/compute
ring is to make sure that we never reference same bo at the same
time on the dma and gfx ring.

This is achieved by tracking relocation, when we add a relocation
to the dma ring for a bo we check first if the bo has an active
relocation on the gfx ring. If it's the case we flush the gfx ring.
We do the same when adding a bo to the gfx ring we check it does
not have a relocation on the dma ring if it has one we flush the
dma ring.

This patch also simplify the helper query function to know if a bo
has pending write/read command.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r300/r300_emit.c   |  21 +-
 src/gallium/drivers/r300/r300_flush.c  |   7 +-
 src/gallium/drivers/r600/evergreen_hw_context.c|  39 +++
 src/gallium/drivers/r600/evergreend.h  |  16 ++
 src/gallium/drivers/r600/r600.h|  13 +
 src/gallium/drivers/r600/r600_blit.c   |  94 +--
 src/gallium/drivers/r600/r600_hw_context.c |  44 +++-
 src/gallium/drivers/r600/r600_pipe.c   |  13 +-
 src/gallium/drivers/r600/r600_pipe.h   |   2 +-
 src/gallium/drivers/r600/r600_texture.c|   2 +-
 src/gallium/drivers/r600/r600d.h   |  16 ++
 src/gallium/drivers/radeonsi/r600_hw_context.c |   2 +-
 .../drivers/radeonsi/r600_hw_context_priv.h|   2 +-
 src/gallium/drivers/radeonsi/r600_texture.c|   2 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c   |  13 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c  |  10 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h  |   2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c  | 270 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h  |  40 ++-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c  |   6 +
 src/gallium/winsys/radeon/drm/radeon_winsys.h  |  28 ++-
 21 files changed, 509 insertions(+), 133 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_emit.c 
b/src/gallium/drivers/r300/r300_emit.c
index d1ed4b3..c824821 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1184,7 +1184,8 @@ validate:
 assert(tex  tex-buf  cbuf is marked, but NULL!);
 r300-rws-cs_add_reloc(r300-cs, tex-cs_buf,
 RADEON_USAGE_READWRITE,
-r300_surface(fb-cbufs[i])-domain);
+r300_surface(fb-cbufs[i])-domain,
+RADEON_RING_DMA);
 }
 /* ...depth buffer... */
 if (fb-zsbuf) {
@@ -1192,7 +1193,8 @@ validate:
 assert(tex  tex-buf  zsbuf is marked, but NULL!);
 r300-rws-cs_add_reloc(r300-cs, tex-cs_buf,
 RADEON_USAGE_READWRITE,
-r300_surface(fb-zsbuf)-domain);
+r300_surface(fb-zsbuf)-domain,
+RADEON_RING_DMA);
 }
 }
 if (r300-textures_state.dirty) {
@@ -1204,18 +1206,21 @@ validate:
 
 tex = r300_resource(texstate-sampler_views[i]-base.texture);
 r300-rws-cs_add_reloc(r300-cs, tex-cs_buf, RADEON_USAGE_READ,
-tex-domain);
+tex-domain,
+RADEON_RING_DMA);
 }
 }
 /* ...occlusion query buffer... */
 if (r300-query_current)
 r300-rws-cs_add_reloc(r300-cs, r300-query_current-cs_buf,
-RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
+RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT,
+RADEON_RING_DMA);
 /* ...vertex buffer for SWTCL path... */
 if (r300-vbo)
 r300-rws-cs_add_reloc(r300-cs, r300_resource(r300-vbo)-cs_buf,
 RADEON_USAGE_READ,
-r300_resource(r300-vbo)-domain);
+r300_resource(r300-vbo)-domain,
+RADEON_RING_DMA);
 /* ...vertex buffers for HWTCL path... */
 if (do_validate_vertex_buffers  r300-vertex_arrays_dirty) {
 struct pipe_vertex_buffer *vbuf = r300-vertex_buffer;
@@ -1230,14 +1235,16 @@ validate:
 
 r300-rws-cs_add_reloc(r300-cs, r300_resource(buf)-cs_buf,
 RADEON_USAGE_READ,
-r300_resource(buf)-domain);
+r300_resource(buf)-domain,
+RADEON_RING_DMA);
 }
 }
 /* ...and index buffer for HWTCL path. */
 if 

[Mesa-dev] [PATCH] r600g: add cs tracing infrastructure for lockup pin pointing

2012-12-19 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

It's a build time option you need to set R600_TRACE_CS to 1 and it
will print to stderr all cs along as cs trace point value which
gave last offset into a cs process by the GPU.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_hw_context.c  | 41 +
 src/gallium/drivers/r600/r600_hw_context_priv.h |  5 +--
 src/gallium/drivers/r600/r600_pipe.c| 20 
 src/gallium/drivers/r600/r600_pipe.h| 16 ++
 src/gallium/drivers/r600/r600_state_common.c| 26 
 5 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index cdd31a4..6c8cb9d 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -27,6 +27,7 @@
 #include r600d.h
 #include util/u_memory.h
 #include errno.h
+#include unistd.h
 
 /* Get backends mask */
 void r600_get_backend_mask(struct r600_context *ctx)
@@ -369,6 +370,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
for (i = 0; i  R600_NUM_ATOMS; i++) {
if (ctx-atoms[i]  ctx-atoms[i]-dirty) {
num_dw += ctx-atoms[i]-num_dw;
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   num_dw += R600_TRACE_CS_DWORDS;
+   }
+#endif
}
}
 
@@ -376,6 +382,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
 
/* The upper-bound of how much space a draw command would take. 
*/
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   num_dw += R600_TRACE_CS_DWORDS;
+   }
+#endif
}
 
/* Count in queries_suspend. */
@@ -717,7 +728,37 @@ void r600_context_flush(struct r600_context *ctx, unsigned 
flags)
}
 
/* Flush the CS. */
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   struct r600_screen *rscreen = ctx-screen;
+   unsigned i;
+
+   for (i = 0; i  cs-cdw; i++) {
+   fprintf(stderr, [%4d] [%5d] 0x%08x\n, 
rscreen-cs_count, i, cs-buf[i]);
+   }
+   rscreen-cs_count++;
+   }
+#endif
ctx-ws-cs_flush(ctx-cs, flags);
+#if R600_TRACE_CS
+   if (ctx-screen-trace_bo) {
+   struct r600_screen *rscreen = ctx-screen;
+   unsigned i;
+
+   for (i = 0; i  10; i++) {
+   usleep(5);
+   if (!ctx-ws-buffer_is_busy(rscreen-trace_bo-buf, 
RADEON_USAGE_READWRITE)) {
+   break;
+   }
+   }
+   if (i == 10) {
+   fprintf(stderr, timeout on cs lockup likely happen at 
cs %d dw %d\n,
+   rscreen-trace_ptr[1], rscreen-trace_ptr[0]);
+   } else {
+   fprintf(stderr, cs %d executed in %dms\n, 
rscreen-trace_ptr[1], i * 5);
+   }
+   }
+#endif
 
r600_begin_new_cs(ctx);
 }
diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h 
b/src/gallium/drivers/r600/r600_hw_context_priv.h
index 050c472..692e6ec 100644
--- a/src/gallium/drivers/r600/r600_hw_context_priv.h
+++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
@@ -29,8 +29,9 @@
 #include r600_pipe.h
 
 /* the number of CS dwords for flushing and drawing */
-#define R600_MAX_FLUSH_CS_DWORDS 12
-#define R600_MAX_DRAW_CS_DWORDS 34
+#define R600_MAX_FLUSH_CS_DWORDS   12
+#define R600_MAX_DRAW_CS_DWORDS34
+#define R600_TRACE_CS_DWORDS   7
 
 /* these flags are used in register flags and added into block flags */
 #define REG_FLAG_NEED_BO 1
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index e497744..7990400 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -723,6 +723,12 @@ static void r600_destroy_screen(struct pipe_screen* 
pscreen)
rscreen-ws-buffer_unmap(rscreen-fences.bo-cs_buf);
pipe_resource_reference((struct 
pipe_resource**)rscreen-fences.bo, NULL);
}
+#if R600_TRACE_CS
+   if (rscreen-trace_bo) {
+   rscreen-ws-buffer_unmap(rscreen-trace_bo-cs_buf);
+   pipe_resource_reference((struct 
pipe_resource**)rscreen-trace_bo, NULL);
+   }
+#endif
pipe_mutex_destroy(rscreen-fences.mutex);
 
rscreen-ws-destroy(rscreen-ws);
@@ -1042,5 +1048,19 @@ struct pipe_screen *r600_screen_create(struct 
radeon_winsys *ws)
 
rscreen-global_pool = compute_memory_pool_new(rscreen);
 
+#if R600_TRACE_CS
+   rscreen-cs_count = 0;
+  

[Mesa-dev] [PATCH] r600g: work around ddx over alignment

2012-12-18 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This force surface allocated from ddx to be consider as height
aligned on 8 and fix 1D-2D tiling transition that result from
this.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_texture.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c 
b/src/gallium/drivers/r600/r600_texture.c
index 56e9b64..9373451 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -89,7 +89,8 @@ static int r600_init_surface(struct r600_screen *rscreen,
 struct radeon_surface *surface,
 const struct pipe_resource *ptex,
 unsigned array_mode,
-bool is_flushed_depth)
+bool is_flushed_depth,
+bool from_ddx)
 {
const struct util_format_description *desc =
util_format_description(ptex-format);
@@ -107,6 +108,10 @@ static int r600_init_surface(struct r600_screen *rscreen,
surface-array_size = 1;
surface-last_level = ptex-last_level;
 
+   if (from_ddx) {
+   surface-npix_y = align(surface-npix_y, 8);
+   }
+
if (rscreen-chip_class = EVERGREEN  !is_flushed_depth 
ptex-format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
surface-bpe = 4; /* stencil is allocated separately on 
evergreen */
@@ -539,7 +544,8 @@ struct pipe_resource *r600_texture_create(struct 
pipe_screen *screen,
}
 
r = r600_init_surface(rscreen, surface, templ, array_mode,
- templ-flags  R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+ templ-flags  R600_RESOURCE_FLAG_FLUSHED_DEPTH,
+ false);
if (r) {
return NULL;
}
@@ -627,7 +633,7 @@ struct pipe_resource *r600_texture_from_handle(struct 
pipe_screen *screen,
else
array_mode = V_038000_ARRAY_LINEAR_ALIGNED;
 
-   r = r600_init_surface(rscreen, surface, templ, array_mode, false);
+   r = r600_init_surface(rscreen, surface, templ, array_mode, false, 
true);
if (r) {
return NULL;
}
-- 
1.8.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] hyperz

2012-12-17 Thread j . glisse
So those were tested on everegreen (caicos, redwood, turks, barts) and on
rv740 and did not regress anything. I can't test other r6xx/r7xx as currently
mesa master trigger lockup on anything else than rv740.

I am gonna merge those by the end of this week.

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g: rework flusing and synchronization pattern v6

2012-12-17 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This bring r600g allmost inline with closed source driver when
it comes to flushing and synchronization pattern.

v2-v4: history lost somewhere in outer space
v5: Fix compute size of flushing, use define for flags, update
worst case cs size requirement for flush, treat rs780 and
newer as r7xx when it comes to streamout.
v6: Fix num dw computation for framebuffer state, remove dead
code, use define instead of hardcoded value.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_compute.c   |   8 +-
 .../drivers/r600/evergreen_compute_internal.c  |   4 +-
 src/gallium/drivers/r600/evergreen_state.c |   4 +-
 src/gallium/drivers/r600/r600.h|  16 +-
 src/gallium/drivers/r600/r600_hw_context.c | 179 +++--
 src/gallium/drivers/r600/r600_hw_context_priv.h|   2 +-
 src/gallium/drivers/r600/r600_state.c  |  20 ++-
 src/gallium/drivers/r600/r600_state_common.c   |  19 +--
 8 files changed, 90 insertions(+), 162 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 66b0cc6..ea75d80 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer(
 
/* The vertex instructions in the compute shaders use the texture cache,
 * so we need to invalidate it. */
-   rctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   rctx-flags |= R600_CONTEXT_GPU_FLUSH;
state-enabled_mask |= 1  vb_index;
state-dirty_mask |= 1  vb_index;
state-atom.dirty = true;
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 */
r600_emit_command_buffer(ctx-cs, ctx-start_compute_cs_cmd);
 
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
r600_flush_emit(ctx);
 
/* Emit colorbuffers. */
@@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 
/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0x
 */
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_GPU_FLUSH;
r600_flush_emit(ctx);
 
 #if 0
@@ -468,7 +468,7 @@ void evergreen_emit_cs_shader(
r600_write_value(cs, r600_context_bo_reloc(rctx, kernel-code_bo,
RADEON_USAGE_READ));
 
-   rctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   rctx-flags |= R600_CONTEXT_GPU_FLUSH;
 }
 
 static void evergreen_launch_grid(
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c 
b/src/gallium/drivers/r600/evergreen_compute_internal.c
index f7aebf2..94f556f 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -545,7 +545,7 @@ void evergreen_set_tex_resource(
 
util_format_get_blockwidth(tmp-resource.b.b.format) *
 view-base.texture-width0*height*depth;
 
-   pipe-ctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_GPU_FLUSH;
 
evergreen_emit_force_reloc(res);
evergreen_emit_force_reloc(res);
@@ -604,7 +604,7 @@ void evergreen_set_const_cache(
res-usage = RADEON_USAGE_READ;
res-coher_bo_size = size;
 
-   pipe-ctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_GPU_FLUSH;
 }
 
 struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 996c1b4..58964c4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
uint32_t i, log_samples;
 
if (rctx-framebuffer.state.nr_cbufs) {
-   rctx-flags |= R600_CONTEXT_CB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
 
if (rctx-framebuffer.state.cbufs[0]-texture-nr_samples  1) {
rctx-flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
}
if (rctx-framebuffer.state.zsbuf) {
-   rctx-flags |= R600_CONTEXT_DB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
}
 
util_copy_framebuffer_state(rctx-framebuffer.state, state);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index d15cd52..c351982 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -182,17 +182,11 @@ struct r600_so_target {
unsignedso_index;
 };
 
-#define 

[Mesa-dev] [PATCH 2/2] r600g: add htile support v15

2012-12-17 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
   disable fast zclear if clearing different layer of texture
   array to different value
v10 Disable hyperz for texture array making test simpler. Force
db_misc_state update when no depth buffer is bound. Remove
unused variable, rename depth_clearstencil to depth_clear.
Don't allocate htile surface for flushed depth. Something
broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
v12 Rebase on top of newer mesa
v13 Rebase on top of newer mesa, htile surface need to be initialized
to zero, somehow special casing first clear to not use fast clear
and thus initialize the htile surface with proper value does not
work in all case.
v14 Use resource not texture for htile buffer make the htile buffer
size computation easier and simpler. Disable preload on evergreen
as its still troublesome in some case
v15 Cleanup some comment and remove some left over

Signed-off-by: Pierre-Eric Pelloux-Prayer pell...@gmail.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c | 65 ++
 src/gallium/drivers/r600/evergreend.h  |  2 +
 src/gallium/drivers/r600/r600_blit.c   | 28 +
 src/gallium/drivers/r600/r600_hw_context.c |  1 +
 src/gallium/drivers/r600/r600_pipe.c   |  8 
 src/gallium/drivers/r600/r600_pipe.h   | 26 +++-
 src/gallium/drivers/r600/r600_resource.h   |  9 +
 src/gallium/drivers/r600/r600_state.c  | 57 --
 src/gallium/drivers/r600/r600_texture.c| 38 +
 src/gallium/drivers/r600/r600d.h   |  1 +
 10 files changed, 214 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 58964c4..032af78 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1545,6 +1545,18 @@ static void evergreen_init_depth_surface(struct 
r600_context *rctx,
S_028044_FORMAT(V_028044_STENCIL_8);
}
 
+   surf-htile_enabled = 0;
+   /* use htile only for first level */
+   if (rtex-htile  !level) {
+   surf-htile_enabled = 1;
+   surf-db_htile_data_base = 0;
+   surf-db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+   S_028ABC_HTILE_HEIGHT(1) |
+   S_028ABC_LINEAR(1);
+   surf-db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+   surf-db_preload_control = 0;
+   }
+
surf-depth_initialized = true;
 }
 
@@ -1625,6 +1637,16 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
rctx-poly_offset_state.zs_format = 
state-zsbuf-format;
rctx-poly_offset_state.atom.dirty = true;
}
+
+   if (rctx-db_state.rsurf != surf) {
+   rctx-db_state.rsurf = surf;
+   rctx-db_state.atom.dirty = true;
+   rctx-db_misc_state.atom.dirty = true;
+   }
+   } else if (rctx-db_state.rsurf) {
+   rctx-db_state.rsurf = NULL;
+   rctx-db_state.atom.dirty = true;
+   rctx-db_misc_state.atom.dirty = true;
}
 
if (rctx-cb_misc_state.nr_cbufs != state-nr_cbufs) {
@@ -2081,6 +2103,28 @@ static void evergreen_emit_cb_misc_state(struct 
r600_context *rctx, struct r600_
r600_write_value(cs, 0xf | (a-dual_src_blend ? ps_colormask : 0) | 
fb_colormask); /* R_02823C_CB_SHADER_MASK */
 }
 
+static 

[Mesa-dev] [PATCH 2/2] r600g: add htile support v16

2012-12-17 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
   disable fast zclear if clearing different layer of texture
   array to different value
v10 Disable hyperz for texture array making test simpler. Force
db_misc_state update when no depth buffer is bound. Remove
unused variable, rename depth_clearstencil to depth_clear.
Don't allocate htile surface for flushed depth. Something
broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
v12 Rebase on top of newer mesa
v13 Rebase on top of newer mesa, htile surface need to be initialized
to zero, somehow special casing first clear to not use fast clear
and thus initialize the htile surface with proper value does not
work in all case.
v14 Use resource not texture for htile buffer make the htile buffer
size computation easier and simpler. Disable preload on evergreen
as its still troublesome in some case
v15 Cleanup some comment and remove some left over
v16 Define name for bit 20 of CP_COHER_CNTL

Signed-off-by: Pierre-Eric Pelloux-Prayer pell...@gmail.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c | 65 ++
 src/gallium/drivers/r600/evergreend.h  |  2 +
 src/gallium/drivers/r600/r600_blit.c   | 28 +
 src/gallium/drivers/r600/r600_hw_context.c |  7 ++--
 src/gallium/drivers/r600/r600_pipe.c   |  8 
 src/gallium/drivers/r600/r600_pipe.h   | 26 +++-
 src/gallium/drivers/r600/r600_resource.h   |  9 +
 src/gallium/drivers/r600/r600_state.c  | 57 --
 src/gallium/drivers/r600/r600_texture.c| 38 +
 src/gallium/drivers/r600/r600d.h   |  5 +++
 10 files changed, 221 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 58964c4..032af78 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1545,6 +1545,18 @@ static void evergreen_init_depth_surface(struct 
r600_context *rctx,
S_028044_FORMAT(V_028044_STENCIL_8);
}
 
+   surf-htile_enabled = 0;
+   /* use htile only for first level */
+   if (rtex-htile  !level) {
+   surf-htile_enabled = 1;
+   surf-db_htile_data_base = 0;
+   surf-db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+   S_028ABC_HTILE_HEIGHT(1) |
+   S_028ABC_LINEAR(1);
+   surf-db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+   surf-db_preload_control = 0;
+   }
+
surf-depth_initialized = true;
 }
 
@@ -1625,6 +1637,16 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
rctx-poly_offset_state.zs_format = 
state-zsbuf-format;
rctx-poly_offset_state.atom.dirty = true;
}
+
+   if (rctx-db_state.rsurf != surf) {
+   rctx-db_state.rsurf = surf;
+   rctx-db_state.atom.dirty = true;
+   rctx-db_misc_state.atom.dirty = true;
+   }
+   } else if (rctx-db_state.rsurf) {
+   rctx-db_state.rsurf = NULL;
+   rctx-db_state.atom.dirty = true;
+   rctx-db_misc_state.atom.dirty = true;
}
 
if (rctx-cb_misc_state.nr_cbufs != state-nr_cbufs) {
@@ -2081,6 +2103,28 @@ static void evergreen_emit_cb_misc_state(struct 
r600_context *rctx, struct r600_
r600_write_value(cs, 0xf | (a-dual_src_blend ? ps_colormask : 0) | 

[Mesa-dev] r600g: hyperz support

2012-12-13 Thread j . glisse
Ok so this time it should be it. Following patch seems to behave properly.
I am still in process of checking again that they don't regress anything,
i should be done monday or tuesday. If there is no objection by them i
will commit them.

Note that you need kernel patch for those and that by default hyperz get
enabled you can disable it by setting R600_HYPERZ=0 env variable.

On perf side quick benchmark shows improvement from 2% to 10% depending
on the benchmark and on the GPU.

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g: rework flusing and synchronization pattern v5

2012-12-13 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This bring r600g allmost inline with closed source driver when
it comes to flushing and synchronization pattern.

v2-v4: history lost somewhere in outer space
v5: Fix compute size of flushing, use define for flags, update
worst case cs size requirement for flush, treat rs780 and
newer as r7xx when it comes to streamout.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_compute.c   |   8 +-
 .../drivers/r600/evergreen_compute_internal.c  |   4 +-
 src/gallium/drivers/r600/evergreen_state.c |   4 +-
 src/gallium/drivers/r600/r600.h|  16 +-
 src/gallium/drivers/r600/r600_hw_context.c | 174 ++---
 src/gallium/drivers/r600/r600_hw_context_priv.h|   2 +-
 src/gallium/drivers/r600/r600_state.c  |  18 ++-
 src/gallium/drivers/r600/r600_state_common.c   |  19 +--
 8 files changed, 84 insertions(+), 161 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 66b0cc6..ea75d80 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer(
 
/* The vertex instructions in the compute shaders use the texture cache,
 * so we need to invalidate it. */
-   rctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   rctx-flags |= R600_CONTEXT_GPU_FLUSH;
state-enabled_mask |= 1  vb_index;
state-dirty_mask |= 1  vb_index;
state-atom.dirty = true;
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 */
r600_emit_command_buffer(ctx-cs, ctx-start_compute_cs_cmd);
 
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_WAIT_IDLE | R600_CONTEXT_FLUSH_AND_INV;
r600_flush_emit(ctx);
 
/* Emit colorbuffers. */
@@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 
/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0x
 */
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_GPU_FLUSH;
r600_flush_emit(ctx);
 
 #if 0
@@ -468,7 +468,7 @@ void evergreen_emit_cs_shader(
r600_write_value(cs, r600_context_bo_reloc(rctx, kernel-code_bo,
RADEON_USAGE_READ));
 
-   rctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   rctx-flags |= R600_CONTEXT_GPU_FLUSH;
 }
 
 static void evergreen_launch_grid(
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c 
b/src/gallium/drivers/r600/evergreen_compute_internal.c
index f7aebf2..94f556f 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -545,7 +545,7 @@ void evergreen_set_tex_resource(
 
util_format_get_blockwidth(tmp-resource.b.b.format) *
 view-base.texture-width0*height*depth;
 
-   pipe-ctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_GPU_FLUSH;
 
evergreen_emit_force_reloc(res);
evergreen_emit_force_reloc(res);
@@ -604,7 +604,7 @@ void evergreen_set_const_cache(
res-usage = RADEON_USAGE_READ;
res-coher_bo_size = size;
 
-   pipe-ctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_GPU_FLUSH;
 }
 
 struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 996c1b4..58964c4 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
uint32_t i, log_samples;
 
if (rctx-framebuffer.state.nr_cbufs) {
-   rctx-flags |= R600_CONTEXT_CB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
 
if (rctx-framebuffer.state.cbufs[0]-texture-nr_samples  1) {
rctx-flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
}
if (rctx-framebuffer.state.zsbuf) {
-   rctx-flags |= R600_CONTEXT_DB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
}
 
util_copy_framebuffer_state(rctx-framebuffer.state, state);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index d15cd52..c351982 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -182,17 +182,11 @@ struct r600_so_target {
unsignedso_index;
 };
 
-#define R600_CONTEXT_PS_PARTIAL_FLUSH  (1  0)
-#define R600_CONTEXT_CB_FLUSH  (1  1)
-#define 

[Mesa-dev] [PATCH 2/2] r600g: add htile support v14

2012-12-13 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
   disable fast zclear if clearing different layer of texture
   array to different value
v10 Disable hyperz for texture array making test simpler. Force
db_misc_state update when no depth buffer is bound. Remove
unused variable, rename depth_clearstencil to depth_clear.
Don't allocate htile surface for flushed depth. Something
broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
v12 Rebase on top of newer mesa
v13 Rebase on top of newer mesa, htile surface need to be initialized
to zero, somehow special casing first clear to not use fast clear
and thus initialize the htile surface with proper value does not
work in all case.
v14 Use resource not texture for htile buffer make the htile buffer
size computation easier and simpler. Disable preload on evergreen
as its still troublesome in some case

Signed-off-by: Pierre-Eric Pelloux-Prayer pell...@gmail.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c | 65 ++
 src/gallium/drivers/r600/evergreend.h  |  2 +
 src/gallium/drivers/r600/r600_blit.c   | 28 +
 src/gallium/drivers/r600/r600_hw_context.c |  1 +
 src/gallium/drivers/r600/r600_pipe.c   | 10 +
 src/gallium/drivers/r600/r600_pipe.h   | 26 +++-
 src/gallium/drivers/r600/r600_resource.h   |  9 +
 src/gallium/drivers/r600/r600_state.c  | 58 --
 src/gallium/drivers/r600/r600_texture.c| 39 ++
 src/gallium/drivers/r600/r600d.h   |  1 +
 10 files changed, 218 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 58964c4..39de55b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1545,6 +1545,18 @@ static void evergreen_init_depth_surface(struct 
r600_context *rctx,
S_028044_FORMAT(V_028044_STENCIL_8);
}
 
+   surf-htile_enabled = 0;
+   /* use htile only for first level */
+   if (rtex-htile  !level) {
+   surf-htile_enabled = 1;
+   surf-db_htile_data_base = 0;
+   surf-db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+   S_028ABC_HTILE_HEIGHT(1) |
+   S_028ABC_LINEAR(1);
+   surf-db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+   surf-db_preload_control = 0;
+   }
+
surf-depth_initialized = true;
 }
 
@@ -1625,6 +1637,16 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
rctx-poly_offset_state.zs_format = 
state-zsbuf-format;
rctx-poly_offset_state.atom.dirty = true;
}
+
+   if (rctx-db_state.rsurf != surf) {
+   rctx-db_state.rsurf = surf;
+   rctx-db_state.atom.dirty = true;
+   rctx-db_misc_state.atom.dirty = true;
+   }
+   } else if (rctx-db_state.rsurf) {
+   rctx-db_state.rsurf = NULL;
+   rctx-db_state.atom.dirty = true;
+   rctx-db_misc_state.atom.dirty = true;
}
 
if (rctx-cb_misc_state.nr_cbufs != state-nr_cbufs) {
@@ -2081,6 +2103,28 @@ static void evergreen_emit_cb_misc_state(struct 
r600_context *rctx, struct r600_
r600_write_value(cs, 0xf | (a-dual_src_blend ? ps_colormask : 0) | 
fb_colormask); /* R_02823C_CB_SHADER_MASK */
 }
 
+static void evergreen_emit_db_state(struct r600_context 

[Mesa-dev] [RFC] r600g hyperz support

2012-12-06 Thread j . glisse
So i finally have something that doesn't seem to lockup (i run in loop several
things that used to lockup on various GPU over 24hour without a single lockup),
or regress anything. It's a bundle deal the first patch is needed for lockup
avoidance. Tested on :
rv610, rv635, rv670, rv710, rv730, rv740, rv770, cedar, redwood, barts
A slightly different version of patch 2 have also been tested on :
rs780,rs880,caicos,turks,barts le

It's not yet fully cook as i need to check again the htile buffer size
computation which seems kind of wrong (i use a modified kernel to no choke on
it and to overallocate things). Otherwise the mesa bit are fully cook, i am
sending this to get feedback on them.

I will go back to htile computation but this will require quite a lot few GPU
swaping.

For the curious this is the result of looking at more that 12GB of fglrx command
stream ...

Cheers,
Jerome
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g: rework flusing and synchronization pattern v4

2012-12-06 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

This bring r600g allmost inline with closed source driver when
it comes to flushing and synchronization pattern.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_compute.c   |   8 +-
 .../drivers/r600/evergreen_compute_internal.c  |   4 +-
 src/gallium/drivers/r600/evergreen_state.c |   4 +-
 src/gallium/drivers/r600/r600.h|  16 +--
 src/gallium/drivers/r600/r600_hw_context.c | 154 -
 src/gallium/drivers/r600/r600_state.c  |  18 ++-
 src/gallium/drivers/r600/r600_state_common.c   |  19 ++-
 7 files changed, 61 insertions(+), 162 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 44831a7..33a5910 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -98,7 +98,7 @@ static void evergreen_cs_set_vertex_buffer(
 
/* The vertex instructions in the compute shaders use the texture cache,
 * so we need to invalidate it. */
-   rctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   rctx-flags |= R600_CONTEXT_FLUSH;
state-enabled_mask |= 1  vb_index;
state-dirty_mask |= 1  vb_index;
state-atom.dirty = true;
@@ -329,7 +329,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 */
r600_emit_command_buffer(ctx-cs, ctx-start_compute_cs_cmd);
 
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_FLUSH;
r600_flush_emit(ctx);
 
/* Emit colorbuffers. */
@@ -409,7 +409,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
 
/* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0x
 */
-   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   ctx-flags |= R600_CONTEXT_FLUSH;
r600_flush_emit(ctx);
 
 #if 0
@@ -468,7 +468,7 @@ void evergreen_emit_cs_shader(
r600_write_value(cs, r600_context_bo_reloc(rctx, kernel-code_bo,
RADEON_USAGE_READ));
 
-   rctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   rctx-flags |= R600_CONTEXT_FLUSH;
 }
 
 static void evergreen_launch_grid(
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c 
b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 7bc7fb4..187bcf1 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -538,7 +538,7 @@ void evergreen_set_tex_resource(
 
util_format_get_blockwidth(tmp-resource.b.b.format) *
 view-base.texture-width0*height*depth;
 
-   pipe-ctx-flags |= R600_CONTEXT_TEX_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_FLUSH;
 
evergreen_emit_force_reloc(res);
evergreen_emit_force_reloc(res);
@@ -597,7 +597,7 @@ void evergreen_set_const_cache(
res-usage = RADEON_USAGE_READ;
res-coher_bo_size = size;
 
-   pipe-ctx-flags |= R600_CONTEXT_SHADERCONST_FLUSH;
+   pipe-ctx-flags |= R600_CONTEXT_FLUSH;
 }
 
 struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 9b898cb..7bc4772 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1557,14 +1557,14 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
uint32_t i, log_samples;
 
if (rctx-framebuffer.state.nr_cbufs) {
-   rctx-flags |= R600_CONTEXT_CB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
 
if (rctx-framebuffer.state.cbufs[0]-texture-nr_samples  1) {
rctx-flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
}
}
if (rctx-framebuffer.state.zsbuf) {
-   rctx-flags |= R600_CONTEXT_DB_FLUSH;
+   rctx-flags |= R600_CONTEXT_WAIT_IDLE | 
R600_CONTEXT_FLUSH_AND_INV;
}
 
util_copy_framebuffer_state(rctx-framebuffer.state, state);
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 7d43416..4060672 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -180,17 +180,11 @@ struct r600_so_target {
unsignedso_index;
 };
 
-#define R600_CONTEXT_PS_PARTIAL_FLUSH  (1  0)
-#define R600_CONTEXT_CB_FLUSH  (1  1)
-#define R600_CONTEXT_DB_FLUSH  (1  2)
-#define R600_CONTEXT_SHADERCONST_FLUSH (1  3)
-#define R600_CONTEXT_TEX_FLUSH (1  4)
-#define R600_CONTEXT_VTX_FLUSH (1  5)
-#define R600_CONTEXT_STREAMOUT_FLUSH   (1  6)
-#define R600_CONTEXT_WAIT_IDLE (1  7)
-#define 

[Mesa-dev] [PATCH 2/2] r600g: add htile support v13

2012-12-06 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
   disable fast zclear if clearing different layer of texture
   array to different value
v10 Disable hyperz for texture array making test simpler. Force
db_misc_state update when no depth buffer is bound. Remove
unused variable, rename depth_clearstencil to depth_clear.
Don't allocate htile surface for flushed depth. Something
broken the cliprect change, this need to be investigated.
v11 Rebase on top of newer mesa
v12 Rebase on top of newer mesa
v13 Rebase on top of newer mesa, htile surface need to be initialized
to zero, somehow special casing first clear to not use fast clear
and thus initialize the htile surface with proper value does not
work in all case.

Signed-off-by: Pierre-Eric Pelloux-Prayer pell...@gmail.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c | 83 +++---
 src/gallium/drivers/r600/evergreend.h  |  2 +
 src/gallium/drivers/r600/r600_blit.c   | 28 ++
 src/gallium/drivers/r600/r600_hw_context.c |  1 +
 src/gallium/drivers/r600/r600_pipe.c   |  9 
 src/gallium/drivers/r600/r600_pipe.h   | 26 ++
 src/gallium/drivers/r600/r600_resource.h   |  9 
 src/gallium/drivers/r600/r600_state.c  | 59 +++--
 src/gallium/drivers/r600/r600_texture.c| 50 ++
 src/gallium/drivers/r600/r600d.h   |  1 +
 10 files changed, 247 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 7bc4772..e7f5c44 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1545,6 +1545,36 @@ static void evergreen_init_depth_surface(struct 
r600_context *rctx,
S_028044_FORMAT(V_028044_STENCIL_8);
}
 
+   surf-htile_enabled = 0;
+   /* use htile only for first level */
+   if (rtex-htile  !level) {
+   unsigned preload_x, preload_y;
+
+   surf-htile_enabled = 1;
+   surf-db_htile_data_base = 
rtex-htile-surface.level[level].offset;
+   surf-db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
+   S_028ABC_HTILE_HEIGHT(1) |
+   S_028ABC_LINEAR(1) |
+   S_028ABC_FULL_CACHE(1);
+   if (rtex-surface.level[level].nblk_x = 512) {
+   surf-db_htile_surface |= 
S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(4);
+   } else if (rtex-surface.level[level].nblk_x = 1024) {
+   surf-db_htile_surface |= 
S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(2);
+   } else {
+   surf-db_htile_surface |= 
S_028ABC_HTILE_USES_PRELOAD_WIN(1);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_WIDTH(16);
+   surf-db_htile_surface |= S_028ABC_PREFETCH_HEIGHT(0);
+   }
+   surf-db_depth_info |= S_028040_TILE_SURFACE_ENABLE(1);
+   preload_x = align(rtex-surface.level[level].nblk_x, 32)  5;
+   preload_y = align(rtex-surface.level[level].nblk_y, 32)  5;
+   surf-db_preload_control = S_028AC8_MAX_X(preload_x) | 
S_028AC8_MAX_Y(preload_y);
+   }
+

[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu v2

2012-10-30 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

On r6xx/r7xx shader resource management need to make sure that the
shader does not goes over the gpr register limit. Each specific
asic has a maxmimum register that can be split btw shader stage.
For each stage the shader must not use more register than the
limit programmed.

v2: Print an error message when discarding draw. Don't add another
boolean to context structure, but rather propagate the discard
boolean through the call chain.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_pipe.h |  2 +-
 src/gallium/drivers/r600/r600_state.c| 67 +++-
 src/gallium/drivers/r600/r600_state_common.c | 27 ++-
 3 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index ff2a5fd..3edef40 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -595,7 +595,7 @@ void *r600_create_db_flush_dsa(struct r600_context *rctx);
 void *r600_create_resolve_blend(struct r600_context *rctx);
 void *r700_create_resolve_blend(struct r600_context *rctx);
 void *r600_create_decompress_blend(struct r600_context *rctx);
-void r600_adjust_gprs(struct r600_context *rctx);
+bool r600_adjust_gprs(struct r600_context *rctx);
 boolean r600_is_format_supported(struct pipe_screen *screen,
 enum pipe_format format,
 enum pipe_texture_target target,
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 7d07008..76fe44d 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2187,36 +2187,61 @@ void r600_init_state_functions(struct r600_context 
*rctx)
 }
 
 /* Adjust GPR allocation on R6xx/R7xx */
-void r600_adjust_gprs(struct r600_context *rctx)
+bool r600_adjust_gprs(struct r600_context *rctx)
 {
-   unsigned num_ps_gprs = rctx-default_ps_gprs;
-   unsigned num_vs_gprs = rctx-default_vs_gprs;
+   unsigned num_ps_gprs = rctx-ps_shader-current-shader.bc.ngpr;
+   unsigned num_vs_gprs = rctx-vs_shader-current-shader.bc.ngpr;
+   unsigned new_num_ps_gprs = num_ps_gprs;
+   unsigned new_num_vs_gprs = num_vs_gprs;
+   unsigned cur_num_ps_gprs = 
G_008C04_NUM_PS_GPRS(rctx-config_state.sq_gpr_resource_mgmt_1);
+   unsigned cur_num_vs_gprs = 
G_008C04_NUM_VS_GPRS(rctx-config_state.sq_gpr_resource_mgmt_1);
+   unsigned def_num_ps_gprs = rctx-default_ps_gprs;
+   unsigned def_num_vs_gprs = rctx-default_vs_gprs;
+   unsigned def_num_clause_temp_gprs = rctx-r6xx_num_clause_temp_gprs;
+   /* hardware will reserve twice num_clause_temp_gprs */
+   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
def_num_clause_temp_gprs * 2;
unsigned tmp;
-   int diff;
 
-   if (rctx-ps_shader-current-shader.bc.ngpr  rctx-default_ps_gprs) {
-   diff = rctx-ps_shader-current-shader.bc.ngpr - 
rctx-default_ps_gprs;
-   num_vs_gprs -= diff;
-   num_ps_gprs += diff;
-   }
-
-   if (rctx-vs_shader-current-shader.bc.ngpr  rctx-default_vs_gprs)
-   {
-   diff = rctx-vs_shader-current-shader.bc.ngpr - 
rctx-default_vs_gprs;
-   num_ps_gprs -= diff;
-   num_vs_gprs += diff;
+   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must = to max_gprs 
*/
+   if (new_num_ps_gprs  cur_num_ps_gprs || new_num_vs_gprs  
cur_num_vs_gprs) {
+   /* try to use switch back to default */
+   if (new_num_ps_gprs  def_num_ps_gprs || new_num_vs_gprs  
def_num_vs_gprs) {
+   /* always privilege vs stage so that at worst we have 
the
+* pixel stage producing wrong output (not the vertex
+* stage) */
+   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
def_num_clause_temp_gprs * 2);
+   new_num_vs_gprs = num_vs_gprs;
+   } else {
+   new_num_ps_gprs = def_num_ps_gprs;
+   new_num_vs_gprs = def_num_vs_gprs;
+   }
+   } else {
+   return true;
}
 
-   tmp = 0;
-   tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
-   tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
-   tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx-r6xx_num_clause_temp_gprs);
-
-   if (tmp != rctx-config_state.sq_gpr_resource_mgmt_1) {
+   /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value =
+* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
+* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
+* it will lockup. So in this case just discard the draw command
+* and don't change the current gprs repartitions.
+*/
+   if (num_ps_gprs  new_num_ps_gprs || num_vs_gprs  

[Mesa-dev] [PATCH] r600g: avoid shader needing too many gpr to lockup the gpu

2012-10-26 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

On r6xx/r7xx shader resource management need to make sure that the
shader does not goes over the gpr register limit. Each specific
asic has a maxmimum register that can be split btw shader stage.
For each stage the shader must not use more register than the
limit programmed.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_pipe.h |  1 +
 src/gallium/drivers/r600/r600_state.c| 60 +++-
 src/gallium/drivers/r600/r600_state_common.c | 22 +-
 3 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index ff2a5fd..2045af3 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -363,6 +363,7 @@ struct r600_context {
enum chip_class chip_class;
boolean has_vertex_cache;
boolean keep_tiling_flags;
+   booldiscard_draw;
unsigneddefault_ps_gprs, default_vs_gprs;
unsignedr6xx_num_clause_temp_gprs;
unsignedbackend_mask;
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index 7d07008..43af934 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2189,30 +2189,54 @@ void r600_init_state_functions(struct r600_context 
*rctx)
 /* Adjust GPR allocation on R6xx/R7xx */
 void r600_adjust_gprs(struct r600_context *rctx)
 {
-   unsigned num_ps_gprs = rctx-default_ps_gprs;
-   unsigned num_vs_gprs = rctx-default_vs_gprs;
+   unsigned num_ps_gprs = rctx-ps_shader-current-shader.bc.ngpr;
+   unsigned num_vs_gprs = rctx-vs_shader-current-shader.bc.ngpr;
+   unsigned new_num_ps_gprs = num_ps_gprs;
+   unsigned new_num_vs_gprs = num_vs_gprs;
+   unsigned cur_num_ps_gprs = 
G_008C04_NUM_PS_GPRS(rctx-config_state.sq_gpr_resource_mgmt_1);
+   unsigned cur_num_vs_gprs = 
G_008C04_NUM_VS_GPRS(rctx-config_state.sq_gpr_resource_mgmt_1);
+   unsigned def_num_ps_gprs = rctx-default_ps_gprs;
+   unsigned def_num_vs_gprs = rctx-default_vs_gprs;
+   unsigned def_num_clause_temp_gprs = rctx-r6xx_num_clause_temp_gprs;
+   /* hardware will reserve twice num_clause_temp_gprs */
+   unsigned max_gprs = def_num_ps_gprs + def_num_vs_gprs + 
def_num_clause_temp_gprs * 2;
unsigned tmp;
-   int diff;
 
-   if (rctx-ps_shader-current-shader.bc.ngpr  rctx-default_ps_gprs) {
-   diff = rctx-ps_shader-current-shader.bc.ngpr - 
rctx-default_ps_gprs;
-   num_vs_gprs -= diff;
-   num_ps_gprs += diff;
+   /* the sum of all SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS must = to max_gprs 
*/
+   if (new_num_ps_gprs  cur_num_ps_gprs || new_num_vs_gprs  
cur_num_vs_gprs) {
+   /* try to use switch back to default */
+   if (new_num_ps_gprs  def_num_ps_gprs || new_num_vs_gprs  
def_num_vs_gprs) {
+   /* always privilege vs stage so that at worst we have 
the
+* pixel stage producing wrong output (not the vertex
+* stage) */
+   new_num_ps_gprs = max_gprs - (new_num_vs_gprs + 
def_num_clause_temp_gprs * 2);
+   new_num_vs_gprs = num_vs_gprs;
+   } else {
+   new_num_ps_gprs = def_num_ps_gprs;
+   new_num_vs_gprs = def_num_vs_gprs;
+   }
+   } else {
+   rctx-discard_draw = false;
+   return;
}
 
-   if (rctx-vs_shader-current-shader.bc.ngpr  rctx-default_vs_gprs)
-   {
-   diff = rctx-vs_shader-current-shader.bc.ngpr - 
rctx-default_vs_gprs;
-   num_ps_gprs -= diff;
-   num_vs_gprs += diff;
+   /* SQ_PGM_RESOURCES_*.NUM_GPRS must always be program to a value =
+* SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS otherwise the GPU will lockup
+* Also if a shader use more gpr than SQ_GPR_RESOURCE_MGMT*.NUM_*_GPRS
+* it will lockup. So in this case just discard the draw command
+* and don't change the current gprs repartitions.
+*/
+   rctx-discard_draw = false;
+   if (num_ps_gprs  new_num_ps_gprs || num_vs_gprs  new_num_vs_gprs) {
+   rctx-discard_draw = true;
+   return;
}
 
-   tmp = 0;
-   tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs);
-   tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs);
-   tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(rctx-r6xx_num_clause_temp_gprs);
-
-   if (tmp != rctx-config_state.sq_gpr_resource_mgmt_1) {
+   /* in some case we endup recomputing the current value */
+   tmp = S_008C04_NUM_PS_GPRS(new_num_ps_gprs) |
+   

[Mesa-dev] [PATCH] r600g: avoid GPU doing constant preload from random address

2012-09-07 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Previous command stream might have set any of the constant buffer
and the previous address might no longer be valid thus GPU might
preload constant from random invalid address and possibly triggering
lockup.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c | 20 
 src/gallium/drivers/r600/r600_state.c  | 20 
 2 files changed, 40 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 9a5183e..405d1b0 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2602,6 +2602,26 @@ void evergreen_init_common_regs(struct 
r600_command_buffer *cb,
r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, 
S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, 
S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 
+   /* to avoid GPU doing any preloading of constant from random address */
+   r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 8);
+   r600_store_value(cb, 0); /* R_028140_ALU_CONST_BUFFER_SIZE_PS_0 */
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 8);
+   r600_store_value(cb, 0); /* R_028180_ALU_CONST_BUFFER_SIZE_VS_0 */
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+
r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, 
S_028354_SURFACE_SYNC_MASK(0xf));
 
return;
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index ccafdc6..b363dc1 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2381,6 +2381,26 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0); /* R_0288C4_SQ_REDUC_RING_ITEMSIZE */
r600_store_value(cb, 0); /* R_0288C8_SQ_GS_VERT_ITEMSIZE */
 
+   /* to avoid GPU doing any preloading of constant from random address */
+   r600_store_context_reg_seq(cb, R_028140_ALU_CONST_BUFFER_SIZE_PS_0, 8);
+   r600_store_value(cb, 0); /* R_028140_ALU_CONST_BUFFER_SIZE_PS_0 */
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_context_reg_seq(cb, R_028180_ALU_CONST_BUFFER_SIZE_VS_0, 8);
+   r600_store_value(cb, 0); /* R_028180_ALU_CONST_BUFFER_SIZE_VS_0 */
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+   r600_store_value(cb, 0);
+
r600_store_context_reg_seq(cb, R_028A10_VGT_OUTPUT_PATH_CNTL, 13);
r600_store_value(cb, 0); /* R_028A10_VGT_OUTPUT_PATH_CNTL */
r600_store_value(cb, 0); /* R_028A14_VGT_HOS_CNTL */
-- 
1.7.11.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: order atom emission

2012-09-05 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

To avoid GPU lockup registers must be emited in a specific order
(no kidding ...). This patch rework atom emission so order in which
atom are emited in respect to each other is always the same. We
don't have any informations on what is the correct order so order
will need to be infered from fglrx command stream.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_compute.c |  2 +-
 src/gallium/drivers/r600/evergreen_state.c   | 53 +---
 src/gallium/drivers/r600/r600_hw_context.c   | 10 +++---
 src/gallium/drivers/r600/r600_pipe.c |  1 -
 src/gallium/drivers/r600/r600_pipe.h | 33 +
 src/gallium/drivers/r600/r600_state.c| 43 +-
 src/gallium/drivers/r600/r600_state_common.c | 36 ++-
 7 files changed, 96 insertions(+), 82 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index acf91ba..3533312 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -583,7 +583,7 @@ void evergreen_init_atom_start_compute_cs(struct 
r600_context *ctx)
/* since all required registers are initialised in the
 * start_compute_cs_cmd atom, we can EMIT_EARLY here.
 */
-   r600_init_command_buffer(cb, 256, EMIT_EARLY);
+   r600_init_command_buffer(ctx, cb, 1, 256);
cb-pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
 
switch (ctx-family) {
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index bda8ed5..695c647 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2161,27 +2161,40 @@ static void cayman_emit_sample_mask(struct r600_context 
*rctx, struct r600_atom
 
 void evergreen_init_state_functions(struct r600_context *rctx)
 {
-   r600_init_atom(rctx-cb_misc_state.atom, evergreen_emit_cb_misc_state, 
0, 0);
-   r600_atom_dirty(rctx, rctx-cb_misc_state.atom);
-   r600_init_atom(rctx-db_misc_state.atom, evergreen_emit_db_misc_state, 
7, 0);
-   r600_atom_dirty(rctx, rctx-db_misc_state.atom);
-   r600_init_atom(rctx-vertex_buffer_state.atom, 
evergreen_fs_emit_vertex_buffers, 0, 0);
-   r600_init_atom(rctx-cs_vertex_buffer_state.atom, 
evergreen_cs_emit_vertex_buffers, 0, 0);
-   r600_init_atom(rctx-vs_constbuf_state.atom, 
evergreen_emit_vs_constant_buffers, 0, 0);
-   r600_init_atom(rctx-ps_constbuf_state.atom, 
evergreen_emit_ps_constant_buffers, 0, 0);
-   r600_init_atom(rctx-vs_samplers.views.atom, 
evergreen_emit_vs_sampler_views, 0, 0);
-   r600_init_atom(rctx-ps_samplers.views.atom, 
evergreen_emit_ps_sampler_views, 0, 0);
-   r600_init_atom(rctx-cs_shader_state.atom, evergreen_emit_cs_shader, 
0, 0);
-   r600_init_atom(rctx-vs_samplers.atom_sampler, 
evergreen_emit_vs_sampler, 0, 0);
-   r600_init_atom(rctx-ps_samplers.atom_sampler, 
evergreen_emit_ps_sampler, 0, 0);
-
-   if (rctx-chip_class == EVERGREEN)
-   r600_init_atom(rctx-sample_mask.atom, 
evergreen_emit_sample_mask, 3, 0);
-   else
-   r600_init_atom(rctx-sample_mask.atom, 
cayman_emit_sample_mask, 4, 0);
+   unsigned id = 4;
+
+   /* shader const */
+   r600_init_atom(rctx, rctx-vs_constbuf_state.atom, id++, 
evergreen_emit_vs_constant_buffers, 0);
+   r600_init_atom(rctx, rctx-ps_constbuf_state.atom, id++, 
evergreen_emit_ps_constant_buffers, 0);
+   /* shader program */
+   r600_init_atom(rctx, rctx-cs_shader_state.atom, id++, 
evergreen_emit_cs_shader, 0);
+   /* sampler */
+   r600_init_atom(rctx, rctx-vs_samplers.atom_sampler, id++, 
evergreen_emit_vs_sampler, 0);
+   r600_init_atom(rctx, rctx-ps_samplers.atom_sampler, id++, 
evergreen_emit_ps_sampler, 0);
+   /* resources */
+   r600_init_atom(rctx, rctx-vertex_buffer_state.atom, id++, 
evergreen_fs_emit_vertex_buffers, 0);
+   r600_init_atom(rctx, rctx-cs_vertex_buffer_state.atom, id++, 
evergreen_cs_emit_vertex_buffers, 0);
+   r600_init_atom(rctx, rctx-vs_samplers.views.atom, id++, 
evergreen_emit_vs_sampler_views, 0);
+   r600_init_atom(rctx, rctx-ps_samplers.views.atom, id++, 
evergreen_emit_ps_sampler_views, 0);
+
+   if (rctx-chip_class == EVERGREEN) {
+   r600_init_atom(rctx, rctx-sample_mask.atom, id++, 
evergreen_emit_sample_mask, 3);
+   } else {
+   r600_init_atom(rctx, rctx-sample_mask.atom, id++, 
cayman_emit_sample_mask, 4);
+   }
rctx-sample_mask.sample_mask = ~0;
r600_atom_dirty(rctx, rctx-sample_mask.atom);
 
+   r600_init_atom(rctx, rctx-cb_misc_state.atom, id++, 
evergreen_emit_cb_misc_state, 0);
+   r600_atom_dirty(rctx, rctx-cb_misc_state.atom);
+
+   r600_init_atom(rctx, rctx-alphatest_state.atom, id++, 
r600_emit_alphatest_state, 3);
+  

[Mesa-dev] [PATCH] r600g: atomize sampler state v2

2012-08-03 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Use atom for sampler state. Does not provide new functionality
or fix any bug. Just a step toward full atom base r600g.

v2: Split seamless on r6xx/r7xx into it's own atom. Make sure it's
emited after sampler and with a pipeline flush before otherwise
it does not take effect.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c |  117 
 src/gallium/drivers/r600/evergreen_state.c  |  136 +++---
 src/gallium/drivers/r600/r600.h |5 +-
 src/gallium/drivers/r600/r600_hw_context.c  |  149 +---
 src/gallium/drivers/r600/r600_pipe.h|   24 ++-
 src/gallium/drivers/r600/r600_state.c   |  217 +++
 src/gallium/drivers/r600/r600_state_common.c|   66 ++-
 7 files changed, 262 insertions(+), 452 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 199033f..6494786 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -575,37 +575,6 @@ static const struct r600_reg cayman_context_reg_list[] = {
{R_028EAC_CB_COLOR11_DIM, 0, 0},
 };
 
-/* SHADER SAMPLER BORDER EG/CM */
-static int evergreen_state_sampler_border_init(struct r600_context *ctx, 
uint32_t offset, unsigned id)
-{
-   struct r600_reg r600_shader_sampler_border[] = {
-   {R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0, 0},
-   {R_00A404_TD_PS_SAMPLER0_BORDER_RED, 0, 0},
-   {R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0},
-   {R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0},
-   {R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0},
-   };
-   unsigned nreg = Elements(r600_shader_sampler_border);
-   unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) 
* 0x100 + 0x4 + id * 0x1C;
-   struct r600_range *range;
-   struct r600_block *block;
-   int r;
-
-   for (int i = 0; i  nreg; i++) {
-   r600_shader_sampler_border[i].offset -= 
R_00A400_TD_PS_SAMPLER0_BORDER_INDEX;
-   r600_shader_sampler_border[i].offset += fake_offset;
-   }
-   r = r600_context_add_block(ctx, r600_shader_sampler_border, nreg, 
PKT3_SET_CONFIG_REG, 0);
-   if (r) {
-   return r;
-   }
-   /* set proper offset */
-   range = ctx-range[CTX_RANGE_ID(r600_shader_sampler_border[0].offset)];
-   block = 
range-blocks[CTX_BLOCK_ID(r600_shader_sampler_border[0].offset)];
-   block-pm4[1] = (offset - EVERGREEN_CONFIG_REG_OFFSET)  2;
-   return 0;
-}
-
 static int evergreen_loop_const_init(struct r600_context *ctx, uint32_t offset)
 {
unsigned nreg = 32;
@@ -646,32 +615,6 @@ int evergreen_context_init(struct r600_context *ctx)
if (r)
goto out_err;
 
-
-   /* PS SAMPLER */
-   for (int j = 0, offset = 0; j  18; j++, offset += 0xC) {
-   r = r600_state_sampler_init(ctx, offset);
-   if (r)
-   goto out_err;
-   }
-   /* VS SAMPLER */
-   for (int j = 0, offset = 0xD8; j  18; j++, offset += 0xC) {
-   r = r600_state_sampler_init(ctx, offset);
-   if (r)
-   goto out_err;
-   }
-   /* PS SAMPLER BORDER */
-   for (int j = 0; j  18; j++) {
-   r = evergreen_state_sampler_border_init(ctx, 
R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, j);
-   if (r)
-   goto out_err;
-   }
-   /* VS SAMPLER BORDER */
-   for (int j = 0; j  18; j++) {
-   r = evergreen_state_sampler_border_init(ctx, 
R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, j);
-   if (r)
-   goto out_err;
-   }
-
/* PS loop const */
evergreen_loop_const_init(ctx, 0);
/* VS loop const */
@@ -688,66 +631,6 @@ out_err:
return r;
 }
 
-static inline void evergreen_context_pipe_state_set_sampler_border(struct 
r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
-{
-   unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) 
* 0x100 + 0x4 + id * 0x1C;
-   struct r600_range *range;
-   struct r600_block *block;
-   int i;
-   int dirty;
-
-   range = ctx-range[CTX_RANGE_ID(fake_offset)];
-   block = range-blocks[CTX_BLOCK_ID(fake_offset)];
-   if (state == NULL) {
-   block-status = ~(R600_BLOCK_STATUS_ENABLED | 
R600_BLOCK_STATUS_DIRTY);
-   LIST_DELINIT(block-list);
-   LIST_DELINIT(block-enable_list);
-   return;
-   }
-   if (state-nregs = 3) {
-   return;
-   }
-
-   dirty = block-status  R600_BLOCK_STATUS_DIRTY;
-   if (block-reg[0] != id) {
-   block-reg[0] = id;
-   dirty |= 

[Mesa-dev] r600g atomizing

2012-08-01 Thread j . glisse
This patch atomize the sampler state. No regression on evergreen,
can't really check r6xx/r7xx as they all lockup for me with mesa
master and 3.5

Plan is to convert everything to atom and then predefine atom
emission order.

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: atomize sampler state

2012-08-01 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Use atom for sampler state. Does not provide new functionality
or fix any bug. Just a step toward full atom base r600g.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c |  117 -
 src/gallium/drivers/r600/evergreen_state.c  |  136 ---
 src/gallium/drivers/r600/r600.h |5 +-
 src/gallium/drivers/r600/r600_hw_context.c  |  145 +---
 src/gallium/drivers/r600/r600_pipe.h|   15 +-
 src/gallium/drivers/r600/r600_state.c   |  207 +++
 src/gallium/drivers/r600/r600_state_common.c|   66 +++-
 7 files changed, 242 insertions(+), 449 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 199033f..6494786 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -575,37 +575,6 @@ static const struct r600_reg cayman_context_reg_list[] = {
{R_028EAC_CB_COLOR11_DIM, 0, 0},
 };
 
-/* SHADER SAMPLER BORDER EG/CM */
-static int evergreen_state_sampler_border_init(struct r600_context *ctx, 
uint32_t offset, unsigned id)
-{
-   struct r600_reg r600_shader_sampler_border[] = {
-   {R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, 0, 0},
-   {R_00A404_TD_PS_SAMPLER0_BORDER_RED, 0, 0},
-   {R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, 0, 0},
-   {R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, 0, 0},
-   {R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, 0, 0},
-   };
-   unsigned nreg = Elements(r600_shader_sampler_border);
-   unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) 
* 0x100 + 0x4 + id * 0x1C;
-   struct r600_range *range;
-   struct r600_block *block;
-   int r;
-
-   for (int i = 0; i  nreg; i++) {
-   r600_shader_sampler_border[i].offset -= 
R_00A400_TD_PS_SAMPLER0_BORDER_INDEX;
-   r600_shader_sampler_border[i].offset += fake_offset;
-   }
-   r = r600_context_add_block(ctx, r600_shader_sampler_border, nreg, 
PKT3_SET_CONFIG_REG, 0);
-   if (r) {
-   return r;
-   }
-   /* set proper offset */
-   range = ctx-range[CTX_RANGE_ID(r600_shader_sampler_border[0].offset)];
-   block = 
range-blocks[CTX_BLOCK_ID(r600_shader_sampler_border[0].offset)];
-   block-pm4[1] = (offset - EVERGREEN_CONFIG_REG_OFFSET)  2;
-   return 0;
-}
-
 static int evergreen_loop_const_init(struct r600_context *ctx, uint32_t offset)
 {
unsigned nreg = 32;
@@ -646,32 +615,6 @@ int evergreen_context_init(struct r600_context *ctx)
if (r)
goto out_err;
 
-
-   /* PS SAMPLER */
-   for (int j = 0, offset = 0; j  18; j++, offset += 0xC) {
-   r = r600_state_sampler_init(ctx, offset);
-   if (r)
-   goto out_err;
-   }
-   /* VS SAMPLER */
-   for (int j = 0, offset = 0xD8; j  18; j++, offset += 0xC) {
-   r = r600_state_sampler_init(ctx, offset);
-   if (r)
-   goto out_err;
-   }
-   /* PS SAMPLER BORDER */
-   for (int j = 0; j  18; j++) {
-   r = evergreen_state_sampler_border_init(ctx, 
R_00A400_TD_PS_SAMPLER0_BORDER_INDEX, j);
-   if (r)
-   goto out_err;
-   }
-   /* VS SAMPLER BORDER */
-   for (int j = 0; j  18; j++) {
-   r = evergreen_state_sampler_border_init(ctx, 
R_00A414_TD_VS_SAMPLER0_BORDER_INDEX, j);
-   if (r)
-   goto out_err;
-   }
-
/* PS loop const */
evergreen_loop_const_init(ctx, 0);
/* VS loop const */
@@ -688,66 +631,6 @@ out_err:
return r;
 }
 
-static inline void evergreen_context_pipe_state_set_sampler_border(struct 
r600_context *ctx, struct r600_pipe_state *state, unsigned offset, unsigned id)
-{
-   unsigned fake_offset = (offset - R_00A400_TD_PS_SAMPLER0_BORDER_INDEX) 
* 0x100 + 0x4 + id * 0x1C;
-   struct r600_range *range;
-   struct r600_block *block;
-   int i;
-   int dirty;
-
-   range = ctx-range[CTX_RANGE_ID(fake_offset)];
-   block = range-blocks[CTX_BLOCK_ID(fake_offset)];
-   if (state == NULL) {
-   block-status = ~(R600_BLOCK_STATUS_ENABLED | 
R600_BLOCK_STATUS_DIRTY);
-   LIST_DELINIT(block-list);
-   LIST_DELINIT(block-enable_list);
-   return;
-   }
-   if (state-nregs = 3) {
-   return;
-   }
-
-   dirty = block-status  R600_BLOCK_STATUS_DIRTY;
-   if (block-reg[0] != id) {
-   block-reg[0] = id;
-   dirty |= R600_BLOCK_STATUS_DIRTY;
-   }
-
-   for (i = 1; i  5; i++) {
-   if (block-reg[i] != state-regs[i + 2].value) {
-   block-reg[i] = state-regs[i + 

[Mesa-dev] r600g: hyperz

2012-07-17 Thread j . glisse
So this patch serie add hyperz but does not enable it by default. I
think i addressed all comment in v9 for htile. I am also asking to
include the flushing rework as without it hyperz lockup with thing
such as gears.

So with both patch most application should be fine with hyperz, but
application that switch depth are the most likely to trigger lockup.

From regression pov the 2 patch doesn't seems to regress anything
according to piglit (r6xx,r7xx,evergreen). However, enabling hyperz
lead to some rendering issue with evergreen and lightmark, but as
i can't run piglit regression without lockup i haven't tracked down
exactly what (my guess is that it's related to hyper-stencil).

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] r600g: add htile support v9

2012-07-17 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.
v9 Fix comments, properly account new register in emit function,
   disable fast zclear if clearing different layer of texture
   array to different value

Signed-off-by: Pierre-Eric Pelloux-Prayer pell...@gmail.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c |6 +
 src/gallium/drivers/r600/evergreen_state.c  |  102 -
 src/gallium/drivers/r600/evergreend.h   |4 +
 src/gallium/drivers/r600/r600_blit.c|   38 +++
 src/gallium/drivers/r600/r600_hw_context.c  |   25 +
 src/gallium/drivers/r600/r600_pipe.c|8 ++
 src/gallium/drivers/r600/r600_pipe.h|   13 ++-
 src/gallium/drivers/r600/r600_resource.h|7 ++
 src/gallium/drivers/r600/r600_state.c   |  133 ---
 src/gallium/drivers/r600/r600_texture.c |  103 ++
 src/gallium/drivers/r600/r600d.h|6 +
 11 files changed, 399 insertions(+), 46 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 081701f..546c884 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -62,6 +62,9 @@ static const struct r600_reg evergreen_context_reg_list[] = {
{GROUP_FORCE_NEW_BLOCK, 0, 0},
{R_028058_DB_DEPTH_SIZE, 0, 0},
{R_02805C_DB_DEPTH_SLICE, 0, 0},
+   {R_02802C_DB_DEPTH_CLEAR, 0, 0},
+   {R_028ABC_DB_HTILE_SURFACE, 0, 0},
+   {R_028AC8_DB_PRELOAD_CONTROL, 0, 0},
{R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
{R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
{R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
@@ -319,6 +322,9 @@ static const struct r600_reg cayman_context_reg_list[] = {
{GROUP_FORCE_NEW_BLOCK, 0, 0},
{R_028058_DB_DEPTH_SIZE, 0, 0},
{R_02805C_DB_DEPTH_SLICE, 0, 0},
+   {R_02802C_DB_DEPTH_CLEAR, 0, 0},
+   {R_028ABC_DB_HTILE_SURFACE, 0, 0},
+   {R_028AC8_DB_PRELOAD_CONTROL, 0, 0},
{R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
{R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
{R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index a66387b..214d76b 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -710,13 +710,15 @@ static void *evergreen_create_blend_state(struct 
pipe_context *ctx,
}
blend-cb_target_mask = target_mask;
 
-   if (target_mask)
+   if (target_mask) {
color_control |= S_028808_MODE(V_028808_CB_NORMAL);
-   else
+   } else {
color_control |= S_028808_MODE(V_028808_CB_DISABLE);
+   }
 
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
color_control);
+
/* only have dual source on MRT0 */
blend-dual_src_blend = util_blend_state_is_dual(state, 0);
for (int i = 0; i  8; i++) {
@@ -1668,6 +1670,26 @@ static void evergreen_db(struct r600_context *rctx, 
struct r600_pipe_state *rsta
}
}
 
+   /* hyperz */
+   if (rtex-hyperz) {
+   uint64_t htile_offset = 
rtex-hyperz-surface.level[level].offset;
+
+   rctx-db_misc_state.hyperz = true;
+   rctx-db_misc_state.db_htile_surface_mask = 0x;
+   r600_atom_dirty(rctx, rctx-db_misc_state.atom);
+   z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+   r600_pipe_state_add_reg_bo(rstate, R_028014_DB_HTILE_DATA_BASE,
+   

[Mesa-dev] [PATCH 2/2] r600g: simplify and fix flushing and synchronization v2

2012-07-17 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Flushing and synchronization only need to happen at begining
and end of cs, and after each draw packet if necessary. This
patch is especialy needed for hyperz/htile feature.

v2: Separate evergreen and r6xx/r7xx flushing/syncing allow
easier specialization of each functions. Fix r6xx/r7xx
regression.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_compute.c   |   23 +--
 .../drivers/r600/evergreen_compute_internal.c  |4 +-
 src/gallium/drivers/r600/evergreen_hw_context.c|  110 ++-
 src/gallium/drivers/r600/evergreen_state.c |   14 +-
 src/gallium/drivers/r600/evergreend.h  |3 +-
 src/gallium/drivers/r600/r600.h|   19 +-
 src/gallium/drivers/r600/r600_buffer.c |2 +-
 src/gallium/drivers/r600/r600_hw_context.c |  203 
 src/gallium/drivers/r600/r600_hw_context_priv.h|3 +-
 src/gallium/drivers/r600/r600_pipe.c   |2 -
 src/gallium/drivers/r600/r600_pipe.h   |6 +-
 src/gallium/drivers/r600/r600_state.c  |   23 +--
 src/gallium/drivers/r600/r600_state_common.c   |   68 ++-
 13 files changed, 297 insertions(+), 183 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 947a328..37c3395 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -96,7 +96,7 @@ static void evergreen_cs_set_vertex_buffer(
vb-buffer = buffer;
vb-user_buffer = NULL;
 
-   r600_inval_vertex_cache(rctx);
+   rctx-flags |= R600_CONTEXT_VTX_FLUSH;
state-dirty_mask |= 1  vb_index;
r600_atom_dirty(rctx, state-atom);
 }
@@ -208,8 +208,7 @@ static void evergreen_bind_compute_state(struct 
pipe_context *ctx_, void *state)
res-usage = RADEON_USAGE_READ;
res-coher_bo_size = ctx-cs_shader-bc.ndw*4;
 
-   r600_inval_shader_cache(ctx);
-
+   ctx-flags |= R600_CONTEXT_SH_FLUSH;
 }
 
 /* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
@@ -364,8 +363,11 @@ static void compute_emit_cs(struct r600_context *ctx)
 */
r600_emit_atom(ctx, ctx-start_compute_cs_cmd.atom);
 
+   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   r600_flush_emit(ctx);
+
/* Emit cb_state */
-cb_state = ctx-states[R600_PIPE_STATE_FRAMEBUFFER];
+   cb_state = ctx-states[R600_PIPE_STATE_FRAMEBUFFER];
r600_context_pipe_state_emit(ctx, cb_state, 
RADEON_CP_PACKET3_COMPUTE_MODE);
 
/* Emit vertex buffer state */
@@ -405,15 +407,8 @@ static void compute_emit_cs(struct r600_context *ctx)
}
}
 
-   /* r600_flush_framebuffer() updates the cb_flush_flags and then
-* calls r600_emit_atom() on the ctx-surface_sync_cmd.atom, which emits
-* a SURFACE_SYNC packet via r600_emit_surface_sync().
-*
-* XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to
-* 0x, so we will need to add a field to struct
-* r600_surface_sync_cmd if we want to manually set this value.
-*/
-   r600_flush_framebuffer(ctx, true /* Flush now */);
+   ctx-flags |= R600_CONTEXT_CB_FLUSH;
+   r600_flush_emit(ctx);
 
 #if 0
COMPUTE_DBG(cdw: %i\n, cs-cdw);
@@ -460,6 +455,8 @@ static void evergreen_launch_grid(
evergreen_set_lds(ctx-cs_shader, 0, 0, num_waves);
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
evergreen_direct_dispatch(ctx_, block_layout, grid_layout);
+   /* set draw pending so flush function know we mean business */
+   ctx-flags |= R600_CONTEXT_DRAW_PENDING;
compute_emit_cs(ctx);
 }
 
diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c 
b/src/gallium/drivers/r600/evergreen_compute_internal.c
index 1d11bab..8bb6426 100644
--- a/src/gallium/drivers/r600/evergreen_compute_internal.c
+++ b/src/gallium/drivers/r600/evergreen_compute_internal.c
@@ -559,7 +559,7 @@ void evergreen_set_tex_resource(
 
res-coher_bo_size = tmp-offset[0] + 
util_format_get_blockwidth(tmp-real_format)*view-base.texture-width0*height*depth;
 
-   r600_inval_texture_cache(pipe-ctx);
+   pipe-ctx-flags |= R600_CONTEXT_TEX_FLUSH;
 
evergreen_emit_force_reloc(res);
evergreen_emit_force_reloc(res);
@@ -618,7 +618,7 @@ void evergreen_set_const_cache(
res-usage = RADEON_USAGE_READ;
res-coher_bo_size = size;
 
-   r600_inval_shader_cache(pipe-ctx);
+   pipe-ctx-flags |= R600_CONTEXT_SH_FLUSH;
 }
 
 struct r600_resource* r600_compute_buffer_alloc_vram(
diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 546c884..cf4a225 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ 

[Mesa-dev] r600g: hyperz

2012-07-13 Thread j . glisse
So this patch add hyperz but does not enable it. I have been working on
that for the last 7 month i just fail at not making it lockup. Same time
i would prefer having this code upstream so i don't have to rebase.

I try to match fglrx sync  flush pattern but that would basicly mean
rewritting the whole r600g. So here is a cleanup of r600g flushing
i have been working on to try to fix lockup, but no success there
(slightly improve thing i think but when it comes to lockup it's
hard to have a metric on how more likely they are to happen or not)
http://people.freedesktop.org/~glisse/0002-r600g-simplify-and-fix-flushing-and-synchronization.patch

So if the feature doesn't lockup you will see on average 5% increase
in performance.

Anyway off to work on something else.

Cheers,
Jerome

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] r600g: add htile support v8

2012-07-13 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

htile is used for HiZ and HiS support and fast Z/S clears.
This commit just adds the htile setup and Fast Z clear.
We don't take full advantage of HiS with that patch.

v2 really use fast clear, still random issue with some tiles
   need to try more flush combination, fix depth/stencil
   texture decompression
v3 fix random issue on r6xx/r7xx
v4 rebase on top of lastest mesa, disable CB export when clearing
   htile surface to avoid wasting bandwidth
v5 resummarize htile surface when uploading z value. Fix z/stencil
   decompression, the custom blitter with custom dsa is no longer
   needed.
v6 Reorganize render control/override update mecanism, fixing more
   issues in the process.
v7 Add nop after depth surface base update to work around some htile
   flushing issue. For htile to 8x8 on r6xx/r7xx as other combination
   have issue. Do not enable hyperz when flushing/uncompressing
   depth buffer.
v8 Fix htile surface, preload and prefetch setup. Only set preload
   and prefetch on htile surface clear like fglrx. Record depth
   clear value per level. Support several level for the htile
   surface. First depth clear can't be a fast clear.

Signed-off-by: Pierre-Eric Pelloux-Prayer pell...@gmail.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c |8 +-
 src/gallium/drivers/r600/evergreen_state.c  |   97 -
 src/gallium/drivers/r600/evergreend.h   |4 +
 src/gallium/drivers/r600/r600_blit.c|   37 +--
 src/gallium/drivers/r600/r600_hw_context.c  |   25 +
 src/gallium/drivers/r600/r600_pipe.c|1 +
 src/gallium/drivers/r600/r600_pipe.h|   13 ++-
 src/gallium/drivers/r600/r600_resource.h|7 ++
 src/gallium/drivers/r600/r600_state.c   |  133 ---
 src/gallium/drivers/r600/r600_state_common.c|6 +
 src/gallium/drivers/r600/r600_texture.c |   98 +
 src/gallium/drivers/r600/r600d.h|6 +
 12 files changed, 376 insertions(+), 59 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 53d4582..546c884 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -43,7 +43,6 @@ static const struct r600_reg evergreen_ctl_const_list[] = {
 };
 
 static const struct r600_reg evergreen_context_reg_list[] = {
-   {R_028000_DB_RENDER_CONTROL, 0, 0},
{R_028008_DB_DEPTH_VIEW, 0, 0},
{R_028010_DB_RENDER_OVERRIDE2, 0, 0},
{GROUP_FORCE_NEW_BLOCK, 0, 0},
@@ -63,6 +62,9 @@ static const struct r600_reg evergreen_context_reg_list[] = {
{GROUP_FORCE_NEW_BLOCK, 0, 0},
{R_028058_DB_DEPTH_SIZE, 0, 0},
{R_02805C_DB_DEPTH_SLICE, 0, 0},
+   {R_02802C_DB_DEPTH_CLEAR, 0, 0},
+   {R_028ABC_DB_HTILE_SURFACE, 0, 0},
+   {R_028AC8_DB_PRELOAD_CONTROL, 0, 0},
{R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
{R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
{R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
@@ -301,7 +303,6 @@ static const struct r600_reg evergreen_context_reg_list[] = 
{
 };
 
 static const struct r600_reg cayman_context_reg_list[] = {
-   {R_028000_DB_RENDER_CONTROL, 0, 0},
{R_028008_DB_DEPTH_VIEW, 0, 0},
{R_028010_DB_RENDER_OVERRIDE2, 0, 0},
{GROUP_FORCE_NEW_BLOCK, 0, 0},
@@ -321,6 +322,9 @@ static const struct r600_reg cayman_context_reg_list[] = {
{GROUP_FORCE_NEW_BLOCK, 0, 0},
{R_028058_DB_DEPTH_SIZE, 0, 0},
{R_02805C_DB_DEPTH_SLICE, 0, 0},
+   {R_02802C_DB_DEPTH_CLEAR, 0, 0},
+   {R_028ABC_DB_HTILE_SURFACE, 0, 0},
+   {R_028AC8_DB_PRELOAD_CONTROL, 0, 0},
{R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0},
{R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0},
{R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0},
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 404df02..9cadaa1 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -710,13 +710,15 @@ static void *evergreen_create_blend_state(struct 
pipe_context *ctx,
}
blend-cb_target_mask = target_mask;
 
-   if (target_mask)
+   if (target_mask) {
color_control |= S_028808_MODE(V_028808_CB_NORMAL);
-   else
+   } else {
color_control |= S_028808_MODE(V_028808_CB_DISABLE);
+   }
 
r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL,
color_control);
+
/* only have dual source on MRT0 */
blend-dual_src_blend = util_blend_state_is_dual(state, 0);
for (int i = 0; i  8; i++) {
@@ -759,7 +761,6 @@ static void *evergreen_create_dsa_state(struct pipe_context 
*ctx,

[Mesa-dev] [PATCH 1/3] r600g: avoid unnecessary shader exports v2

2012-06-26 Thread j . glisse
From: Vadim Girlin vadimgir...@gmail.com

In some cases TGSI shader has more color outputs than the number of CBs,
so it seems we need to limit the number of color exports. This requires
different shader variants depending on the nr_cbufs, but on the other hand
we are doing less exports, which are very costly.

v2: fix various piglit regressions

Signed-off-by: Vadim Girlin vadimgir...@gmail.com
Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/evergreen_state.c   |   10 +++---
 src/gallium/drivers/r600/r600_shader.c   |   25 ++---
 src/gallium/drivers/r600/r600_shader.h   |7 ++-
 src/gallium/drivers/r600/r600_state.c|2 ++
 src/gallium/drivers/r600/r600_state_common.c |4 ++--
 5 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index b618ca8..3fe95e1 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2641,18 +2641,14 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
db_shader_control |= S_02880C_KILL_ENABLE(1);
 
exports_ps = 0;
-   num_cout = 0;
for (i = 0; i  rshader-noutput; i++) {
if (rshader-output[i].name == TGSI_SEMANTIC_POSITION ||
rshader-output[i].name == TGSI_SEMANTIC_STENCIL)
exports_ps |= 1;
-   else if (rshader-output[i].name == TGSI_SEMANTIC_COLOR) {
-   if (rshader-fs_write_all)
-   num_cout = rshader-nr_cbufs;
-   else
-   num_cout++;
-   }
}
+
+   num_cout = rshader-nr_ps_color_exports;
+
exports_ps |= S_02884C_EXPORT_COLORS(num_cout);
if (!exports_ps) {
/* always at least export 1 component per pixel */
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index d294084..37914eb 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -801,6 +801,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
ctx-cv_output = i;
break;
}
+   } else if (ctx-type == TGSI_PROCESSOR_FRAGMENT) {
+   switch (d-Semantic.Name) {
+   case TGSI_SEMANTIC_COLOR:
+   ctx-shader-nr_ps_max_color_exports++;
+   break;
+   }
}
break;
case TGSI_FILE_CONSTANT:
@@ -1153,8 +1159,10 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
ctx.colors_used = 0;
ctx.clip_vertex_write = 0;
 
+   shader-nr_ps_color_exports = 0;
+   shader-nr_ps_max_color_exports = 0;
+
shader-two_side = (ctx.type == TGSI_PROCESSOR_FRAGMENT)  
rctx-two_side;
-   shader-nr_cbufs = rctx-nr_cbufs;
 
/* register allocations */
/* Values [0,127] correspond to GPR[0..127].
@@ -1289,6 +1297,9 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
}
}
 
+   if (shader-fs_write_all  rctx-chip_class = EVERGREEN)
+   shader-nr_ps_max_color_exports = 8;
+
if (ctx.fragcoord_input = 0) {
if (ctx.bc-chip_class == CAYMAN) {
for (j = 0 ; j  4; j++) {
@@ -1528,10 +1539,17 @@ static int r600_shader_from_tgsi(struct r600_context * 
rctx, struct r600_pipe_sh
break;
case TGSI_PROCESSOR_FRAGMENT:
if (shader-output[i].name == TGSI_SEMANTIC_COLOR) {
+   /* never export more colors than the number of 
CBs */
+   if (next_pixel_base  next_pixel_base = 
(rctx-nr_cbufs + rctx-dual_src_blend * 1)) {
+   /* skip export */
+   j--;
+   continue;
+   }
output[j].array_base = next_pixel_base++;
output[j].type = 
V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+   shader-nr_ps_color_exports++;
if (shader-fs_write_all  (rctx-chip_class 
= EVERGREEN)) {
-   for (k = 1; k  shader-nr_cbufs; k++) {
+   for (k = 1; k  rctx-nr_cbufs; k++) {
j++;
memset(output[j], 0, 
sizeof(struct r600_bytecode_output));
output[j].gpr = 

[Mesa-dev] [PATCH 2/3] r600g: enable DUAL_EXPORT mode when possible

2012-06-26 Thread j . glisse
From: Vadim Girlin vadimgir...@gmail.com

It seems DUAL_EXPORT on evergreen may be enabled when all CBs use 16-bit export
mode (EXPORT_4C_16BPC), also there should be at least one CB, and the PS
shouldn't export depth/stencil.

Signed-off-by: Vadim Girlin vadimgir...@gmail.com
---
 src/gallium/drivers/r600/evergreen_state.c   |   46 ++
 src/gallium/drivers/r600/evergreend.h|7 
 src/gallium/drivers/r600/r600_pipe.h |5 +++
 src/gallium/drivers/r600/r600_state_common.c |3 ++
 4 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 3fe95e1..bddb67e 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1458,7 +1458,6 @@ static void evergreen_cb(struct r600_context *rctx, 
struct r600_pipe_state *rsta
 (desc-channel[i].size  17 
  desc-channel[i].type == UTIL_FORMAT_TYPE_FLOAT))) {
color_info |= S_028C70_SOURCE_FORMAT(V_028C70_EXPORT_4C_16BPC);
-   rctx-export_16bpc = true;
} else {
rctx-export_16bpc = false;
}
@@ -1661,6 +1660,7 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state);
uint32_t tl, br;
+   int i;
 
if (rstate == NULL)
return;
@@ -1674,10 +1674,16 @@ static void evergreen_set_framebuffer_state(struct 
pipe_context *ctx,
 
/* build states */
rctx-have_depth_fb = 0;
+   rctx-export_16bpc = true;
rctx-nr_cbufs = state-nr_cbufs;
-   for (int i = 0; i  state-nr_cbufs; i++) {
+   for (i = 0; i  state-nr_cbufs; i++) {
evergreen_cb(rctx, rstate, state, i);
}
+
+   for (; i  8 ; i++) {
+   r600_pipe_state_add_reg(rstate, R_028C70_CB_COLOR0_INFO + i * 
0x3C, 0);
+   }
+
if (state-zsbuf) {
evergreen_db(rctx, rstate, state);
}
@@ -2585,6 +2591,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
int ninterp = 0;
boolean have_linear = FALSE, have_centroid = FALSE, have_perspective = 
FALSE;
unsigned spi_baryc_cntl, sid, tmp, idx = 0;
+   unsigned z_export = 0, stencil_export = 0;
 
rstate-nregs = 0;
 
@@ -2633,13 +2640,16 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
 
for (i = 0; i  rshader-noutput; i++) {
if (rshader-output[i].name == TGSI_SEMANTIC_POSITION)
-   db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
+   z_export = 1;
if (rshader-output[i].name == TGSI_SEMANTIC_STENCIL)
-   db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(1);
+   stencil_export = 1;
}
if (rshader-uses_kill)
db_shader_control |= S_02880C_KILL_ENABLE(1);
 
+   db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
+   db_shader_control |= S_02880C_STENCIL_EXPORT_ENABLE(stencil_export);
+
exports_ps = 0;
for (i = 0; i  rshader-noutput; i++) {
if (rshader-output[i].name == TGSI_SEMANTIC_POSITION ||
@@ -2711,8 +2721,9 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader
r600_pipe_state_add_reg(rstate,
R_02884C_SQ_PGM_EXPORTS_PS,
exports_ps);
-   r600_pipe_state_add_reg(rstate, R_02880C_DB_SHADER_CONTROL,
-   db_shader_control);
+
+   shader-db_shader_control = db_shader_control;
+   shader-ps_depth_export = z_export | stencil_export;
 
shader-sprite_coord_enable = rctx-sprite_coord_enable;
if (rctx-rasterizer)
@@ -2798,3 +2809,26 @@ void *evergreen_create_db_flush_dsa(struct r600_context 
*rctx)
/* Don't set the 'is_flush' flag in r600_pipe_dsa, evergreen doesn't 
need it. */
return rstate;
 }
+
+void evergreen_update_dual_export_state(struct r600_context * rctx)
+{
+   unsigned dual_export = rctx-export_16bpc  rctx-nr_cbufs 
+   !rctx-ps_shader-ps_depth_export;
+
+   unsigned db_source_format = dual_export ? V_02880C_EXPORT_DB_TWO :
+   V_02880C_EXPORT_DB_FULL;
+
+   unsigned db_shader_control = rctx-ps_shader-db_shader_control |
+   S_02880C_DUAL_EXPORT_ENABLE(dual_export) |
+   S_02880C_DB_SOURCE_FORMAT(db_source_format);
+
+   if (db_shader_control != rctx-db_shader_control) {
+   struct r600_pipe_state rstate;
+
+   rctx-db_shader_control = db_shader_control;
+
+   rstate.nregs = 0;
+   r600_pipe_state_add_reg(rstate, 

[Mesa-dev] [PATCH 3/3] r600g: enable DUAL_EXPORT mode when possible on r6xx/r7xx

2012-06-26 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

DUAL_EXPORT can be enabled on r6xx/r7xx when all CBs use 16-bit export
and there is no depth/stencil export.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_pipe.h |1 +
 src/gallium/drivers/r600/r600_state.c|   45 --
 src/gallium/drivers/r600/r600_state_common.c |   30 +++--
 3 files changed, 57 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.h 
b/src/gallium/drivers/r600/r600_pipe.h
index 2c107c8..4f5cf0e 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -462,6 +462,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen,
 enum pipe_texture_target target,
 unsigned sample_count,
 unsigned usage);
+void r600_update_dual_export_state(struct r600_context * rctx);
 
 /* r600_texture.c */
 void r600_init_screen_texture_functions(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/r600/r600_state.c 
b/src/gallium/drivers/r600/r600_state.c
index b314edc..a38cb53 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1495,8 +1495,11 @@ static void r600_cb(struct r600_context *rctx, struct 
r600_pipe_state *rstate,
 ntype != V_0280A0_NUMBER_UINT 
 ntype != V_0280A0_NUMBER_SINT) 
G_0280A0_BLEND_CLAMP(color_info) 
-   !G_0280A0_BLEND_FLOAT32(color_info))
+   !G_0280A0_BLEND_FLOAT32(color_info)) {
color_info |= 
S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
+   } else {
+   rctx-export_16bpc = false;
+   }
} else {
/* EXPORT_NORM can be enabled if:
 * - 11-bit or smaller UNORM/SNORM/SRGB
@@ -1507,8 +1510,11 @@ static void r600_cb(struct r600_context *rctx, struct 
r600_pipe_state *rstate,
  desc-channel[i].type != UTIL_FORMAT_TYPE_FLOAT 
  ntype != V_0280A0_NUMBER_UINT  ntype != 
V_0280A0_NUMBER_SINT) ||
(desc-channel[i].size  17 
-desc-channel[i].type == UTIL_FORMAT_TYPE_FLOAT)))
+desc-channel[i].type == UTIL_FORMAT_TYPE_FLOAT))) {
color_info |= 
S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
+   } else {
+   rctx-export_16bpc = false;
+   }
}
 
/* for possible dual-src MRT write color info 1 */
@@ -1640,6 +1646,7 @@ static void r600_set_framebuffer_state(struct 
pipe_context *ctx,
 
/* build states */
rctx-have_depth_fb = 0;
+   rctx-export_16bpc = true;
rctx-nr_cbufs = state-nr_cbufs;
 
for (int i = 0; i  state-nr_cbufs; i++) {
@@ -2234,6 +2241,7 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct 
r600_pipe_shader *shad
int pos_index = -1, face_index = -1;
unsigned tmp, sid, ufi = 0;
int need_linear = 0;
+   unsigned z_export = 0, stencil_export = 0;
 
rstate-nregs = 0;
 
@@ -2273,23 +2281,23 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, 
struct r600_pipe_shader *shad
db_shader_control = S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
for (i = 0; i  rshader-noutput; i++) {
if (rshader-output[i].name == TGSI_SEMANTIC_POSITION)
-   db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
+   z_export = 1;
if (rshader-output[i].name == TGSI_SEMANTIC_STENCIL)
-   db_shader_control |= 
S_02880C_STENCIL_REF_EXPORT_ENABLE(1);
+   stencil_export = 1;
}
+   db_shader_control |= S_02880C_Z_EXPORT_ENABLE(z_export);
+   db_shader_control |= S_02880C_STENCIL_REF_EXPORT_ENABLE(stencil_export);
if (rshader-uses_kill)
db_shader_control |= S_02880C_KILL_ENABLE(1);
 
exports_ps = 0;
-   num_cout = 0;
for (i = 0; i  rshader-noutput; i++) {
if (rshader-output[i].name == TGSI_SEMANTIC_POSITION ||
-   rshader-output[i].name == TGSI_SEMANTIC_STENCIL)
+   rshader-output[i].name == TGSI_SEMANTIC_STENCIL) {
exports_ps |= 1;
-   else if (rshader-output[i].name == TGSI_SEMANTIC_COLOR) {
-   num_cout++;
}
}
+   num_cout = rshader-nr_ps_color_exports;
exports_ps |= S_028854_EXPORT_COLORS(num_cout);
if (!exports_ps) {
/* always at least export 1 component per pixel */
@@ -2335,8 +2343,8 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct 
r600_pipe_shader *shad
R_028854_SQ_PGM_EXPORTS_PS,
exports_ps);
   

[Mesa-dev] [PATCH] r600g: fix z/stencil texture creation

2012-06-15 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

z or stencil texture should not be created with the z/stencil
flags for surface creation as they are intended to be bound
as texture.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_texture.c |   34 +-
 1 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c 
b/src/gallium/drivers/r600/r600_texture.c
index 5b15990..517f273 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -237,7 +237,8 @@ static void r600_texture_set_array_mode(struct pipe_screen 
*screen,
 
 static int r600_init_surface(struct radeon_surface *surface,
 const struct pipe_resource *ptex,
-unsigned array_mode)
+unsigned array_mode, unsigned is_transfer,
+unsigned is_texture)
 {
surface-npix_x = ptex-width0;
surface-npix_y = ptex-height0;
@@ -298,7 +299,7 @@ static int r600_init_surface(struct radeon_surface *surface,
if (ptex-bind  PIPE_BIND_SCANOUT) {
surface-flags |= RADEON_SURF_SCANOUT;
}
-   if (util_format_is_depth_and_stencil(ptex-format)) {
+   if (util_format_is_depth_and_stencil(ptex-format)  !is_transfer  
!is_texture) {
surface-flags |= RADEON_SURF_ZBUFFER;
surface-flags |= RADEON_SURF_SBUFFER;
}
@@ -316,11 +317,6 @@ static int r600_setup_surface(struct pipe_screen *screen,
unsigned i;
int r;
 
-   if (util_format_is_depth_or_stencil(rtex-real_format)) {
-   rtex-surface.flags |= RADEON_SURF_ZBUFFER;
-   rtex-surface.flags |= RADEON_SURF_SBUFFER;
-   }
-
r = rscreen-ws-surface_init(rscreen-ws, rtex-surface);
if (r) {
return r;
@@ -572,7 +568,8 @@ r600_texture_create_object(struct pipe_screen *screen,
r600_setup_miptree(screen, rtex, array_mode);
if (rscreen-use_surface_alloc) {
rtex-surface = *surface;
-   r = r600_setup_surface(screen, rtex, array_mode, 
pitch_in_bytes_override);
+   r = r600_setup_surface(screen, rtex, array_mode,
+  pitch_in_bytes_override);
if (r) {
FREE(rtex);
return NULL;
@@ -642,7 +639,9 @@ struct pipe_resource *r600_texture_create(struct 
pipe_screen *screen,
}
}
 
-   r = r600_init_surface(surface, templ, array_mode);
+   r = r600_init_surface(surface, templ, array_mode,
+ templ-flags  R600_RESOURCE_FLAG_TRANSFER,
+ templ-usage  PIPE_BIND_SAMPLER_VIEW);
if (r) {
return NULL;
}
@@ -723,7 +722,7 @@ struct pipe_resource *r600_texture_from_handle(struct 
pipe_screen *screen,
else
array_mode = 0;
 
-   r = r600_init_surface(surface, templ, array_mode);
+   r = r600_init_surface(surface, templ, array_mode, 0, 0);
if (r) {
return NULL;
}
@@ -796,8 +795,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct 
pipe_context *ctx,
 * the CPU is much happier reading out of cached system memory
 * than uncached VRAM.
 */
-   if (R600_TEX_IS_TILED(rtex, level))
+   if (R600_TEX_IS_TILED(rtex, level)) {
use_staging_texture = TRUE;
+   }
 
if ((usage  PIPE_TRANSFER_READ)  u_box_volume(box)  1024)
use_staging_texture = TRUE;
@@ -805,15 +805,18 @@ struct pipe_transfer* r600_texture_get_transfer(struct 
pipe_context *ctx,
/* Use a staging texture for uploads if the underlying BO is busy. */
if (!(usage  PIPE_TRANSFER_READ) 
(rctx-ws-cs_is_buffer_referenced(rctx-cs, rtex-resource.cs_buf, 
RADEON_USAGE_READWRITE) ||
-rctx-ws-buffer_is_busy(rtex-resource.buf, 
RADEON_USAGE_READWRITE)))
+rctx-ws-buffer_is_busy(rtex-resource.buf, 
RADEON_USAGE_READWRITE))) {
use_staging_texture = TRUE;
+   }
 
if (!permit_hardware_blit(ctx-screen, texture) ||
-   (texture-flags  R600_RESOURCE_FLAG_TRANSFER))
+   (texture-flags  R600_RESOURCE_FLAG_TRANSFER)) {
use_staging_texture = FALSE;
+   }
 
-   if (use_staging_texture  (usage  PIPE_TRANSFER_MAP_DIRECTLY))
+   if (use_staging_texture  (usage  PIPE_TRANSFER_MAP_DIRECTLY)) {
return NULL;
+   }
 
trans = CALLOC_STRUCT(r600_transfer);
if (trans == NULL)
@@ -898,8 +901,9 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
}
 
if (rtex-is_depth  !rtex-is_flushing_texture) {
-   if ((transfer-usage  PIPE_TRANSFER_WRITE)  
rtex-flushed_depth_texture)
+   if ((transfer-usage  

[Mesa-dev] [PATCH] r600g: fix z/stencil texture creation v2

2012-06-15 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

z or stencil texture should not be created with the z/stencil
flags for surface creation as they are intended to be bound
as texture.

v2: remove broken code

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 src/gallium/drivers/r600/r600_texture.c |   32 --
 1 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c 
b/src/gallium/drivers/r600/r600_texture.c
index 5b15990..fe9a923 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -237,7 +237,7 @@ static void r600_texture_set_array_mode(struct pipe_screen 
*screen,
 
 static int r600_init_surface(struct radeon_surface *surface,
 const struct pipe_resource *ptex,
-unsigned array_mode)
+unsigned array_mode, bool is_transfer)
 {
surface-npix_x = ptex-width0;
surface-npix_y = ptex-height0;
@@ -298,7 +298,7 @@ static int r600_init_surface(struct radeon_surface *surface,
if (ptex-bind  PIPE_BIND_SCANOUT) {
surface-flags |= RADEON_SURF_SCANOUT;
}
-   if (util_format_is_depth_and_stencil(ptex-format)) {
+   if (util_format_is_depth_and_stencil(ptex-format)  !is_transfer) {
surface-flags |= RADEON_SURF_ZBUFFER;
surface-flags |= RADEON_SURF_SBUFFER;
}
@@ -316,11 +316,6 @@ static int r600_setup_surface(struct pipe_screen *screen,
unsigned i;
int r;
 
-   if (util_format_is_depth_or_stencil(rtex-real_format)) {
-   rtex-surface.flags |= RADEON_SURF_ZBUFFER;
-   rtex-surface.flags |= RADEON_SURF_SBUFFER;
-   }
-
r = rscreen-ws-surface_init(rscreen-ws, rtex-surface);
if (r) {
return r;
@@ -572,7 +567,8 @@ r600_texture_create_object(struct pipe_screen *screen,
r600_setup_miptree(screen, rtex, array_mode);
if (rscreen-use_surface_alloc) {
rtex-surface = *surface;
-   r = r600_setup_surface(screen, rtex, array_mode, 
pitch_in_bytes_override);
+   r = r600_setup_surface(screen, rtex, array_mode,
+  pitch_in_bytes_override);
if (r) {
FREE(rtex);
return NULL;
@@ -642,7 +638,8 @@ struct pipe_resource *r600_texture_create(struct 
pipe_screen *screen,
}
}
 
-   r = r600_init_surface(surface, templ, array_mode);
+   r = r600_init_surface(surface, templ, array_mode,
+ templ-flags  R600_RESOURCE_FLAG_TRANSFER);
if (r) {
return NULL;
}
@@ -723,7 +720,7 @@ struct pipe_resource *r600_texture_from_handle(struct 
pipe_screen *screen,
else
array_mode = 0;
 
-   r = r600_init_surface(surface, templ, array_mode);
+   r = r600_init_surface(surface, templ, array_mode, 0);
if (r) {
return NULL;
}
@@ -796,8 +793,9 @@ struct pipe_transfer* r600_texture_get_transfer(struct 
pipe_context *ctx,
 * the CPU is much happier reading out of cached system memory
 * than uncached VRAM.
 */
-   if (R600_TEX_IS_TILED(rtex, level))
+   if (R600_TEX_IS_TILED(rtex, level)) {
use_staging_texture = TRUE;
+   }
 
if ((usage  PIPE_TRANSFER_READ)  u_box_volume(box)  1024)
use_staging_texture = TRUE;
@@ -805,15 +803,18 @@ struct pipe_transfer* r600_texture_get_transfer(struct 
pipe_context *ctx,
/* Use a staging texture for uploads if the underlying BO is busy. */
if (!(usage  PIPE_TRANSFER_READ) 
(rctx-ws-cs_is_buffer_referenced(rctx-cs, rtex-resource.cs_buf, 
RADEON_USAGE_READWRITE) ||
-rctx-ws-buffer_is_busy(rtex-resource.buf, 
RADEON_USAGE_READWRITE)))
+rctx-ws-buffer_is_busy(rtex-resource.buf, 
RADEON_USAGE_READWRITE))) {
use_staging_texture = TRUE;
+   }
 
if (!permit_hardware_blit(ctx-screen, texture) ||
-   (texture-flags  R600_RESOURCE_FLAG_TRANSFER))
+   (texture-flags  R600_RESOURCE_FLAG_TRANSFER)) {
use_staging_texture = FALSE;
+   }
 
-   if (use_staging_texture  (usage  PIPE_TRANSFER_MAP_DIRECTLY))
+   if (use_staging_texture  (usage  PIPE_TRANSFER_MAP_DIRECTLY)) {
return NULL;
+   }
 
trans = CALLOC_STRUCT(r600_transfer);
if (trans == NULL)
@@ -898,8 +899,9 @@ void r600_texture_transfer_destroy(struct pipe_context *ctx,
}
 
if (rtex-is_depth  !rtex-is_flushing_texture) {
-   if ((transfer-usage  PIPE_TRANSFER_WRITE)  
rtex-flushed_depth_texture)
+   if ((transfer-usage  PIPE_TRANSFER_WRITE)  
rtex-flushed_depth_texture) {
r600_blit_push_depth(ctx, rtex);
+   

[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v10

2012-01-10 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Virtual address space put the userspace in charge of their GPU
address space. It's up to userspace to bind bo into the virtual
address space. Command stream can them be executed using the
IB_VM chunck.

This patch add support for this configuration. It doesn't remove
the 64K ib size limit thought this limit can be extanded up to
1M for IB_VM chunk.

v2: fix rendering
v3: fix rendering when using index buffer
v4: make vm conditional on kernel support add basic va management
v5: catch the case when we already have va for a bo
v6: agd5f: update on top of ioctl changes
v7: agd5f: further ioctl updates
v8: indentation cleanup + fix non cayman
v9: rebase against lastest mesa + improvement from Marek  Michel
v10: fix cut/paste bug

Signed-off-by: Jerome Glisse jgli...@redhat.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c   |9 +-
 src/gallium/drivers/r600/evergreen_state.c|   49 --
 src/gallium/drivers/r600/r600_hw_context.c|   47 --
 src/gallium/drivers/r600/r600_pipe.h  |3 +-
 src/gallium/drivers/r600/r600_resource.c  |   11 ++
 src/gallium/drivers/r600/r600_resource.h  |2 +
 src/gallium/drivers/r600/r600_state_common.c  |   14 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |  177 +
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |   21 ++-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |4 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   10 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |   11 ++
 13 files changed, 312 insertions(+), 48 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index bd1d969..e75eaf2 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, 
const struct r600_draw *dr
struct r600_block *dirty_block = NULL;
struct r600_block *next_block;
uint32_t *pm4;
+   uint64_t va;
 
if (draw-indices) {
ndwords = 11;
@@ -1174,9 +1175,11 @@ void evergreen_context_draw(struct r600_context *ctx, 
const struct r600_draw *dr
pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx-predicate_drawing);
pm4[3] = draw-vgt_num_instances;
if (draw-indices) {
-   pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx-predicate_drawing);
-   pm4[5] = draw-indices_bo_offset;
-   pm4[6] = 0;
+   va = r600_resource_va(ctx-screen-screen, 
(void*)draw-indices);
+   va += draw-indices_bo_offset;
+   pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx-predicate_drawing);
+   pm4[5] = va;
+   pm4[6] = (va  32UL)  0xFF;
pm4[7] = draw-vgt_num_indices;
pm4[8] = draw-vgt_draw_initiator;
pm4[9] = PKT3(PKT3_NOP, 0, ctx-predicate_drawing);
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 7ded03d..aca6136 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1101,8 +1101,8 @@ static struct pipe_sampler_view 
*evergreen_create_sampler_view(struct pipe_conte
rstate-val[1] = (S_030004_TEX_HEIGHT(height - 1) |
  S_030004_TEX_DEPTH(depth - 1) |
  S_030004_ARRAY_MODE(array_mode));
-   rstate-val[2] = tmp-offset[0]  8;
-   rstate-val[3] = tmp-offset[1]  8;
+   rstate-val[2] = (tmp-offset[0] + r600_resource_va(ctx-screen, 
texture))  8;
+   rstate-val[3] = (tmp-offset[1] + r600_resource_va(ctx-screen, 
texture))  8;
rstate-val[4] = (word4 |
  
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
  S_030010_ENDIAN_SWAP(endian) |
@@ -1343,7 +1343,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, 
struct r600_pipe_state
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype, endian;
-   unsigned offset;
+   uint64_t offset;
unsigned tile_type;
const struct util_format_description *desc;
int i;
@@ -1443,10 +1443,13 @@ static void evergreen_cb(struct r600_pipe_context 
*rctx, struct r600_pipe_state
} else /* workaround for linear buffers */
tile_type = 1;
 
+   offset += r600_resource_va(rctx-context.screen, 
state-cbufs[cb]-texture);
+   offset = 8;
+
/* FIXME handle enabling of CB beyond BASE8 which has different offset 
*/
r600_pipe_state_add_reg(rstate,
R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-   offset  8, 0x, rtex-resource, 

[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v9

2012-01-09 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Virtual address space put the userspace in charge of their GPU
address space. It's up to userspace to bind bo into the virtual
address space. Command stream can them be executed using the
IB_VM chunck.

This patch add support for this configuration. It doesn't remove
the 64K ib size limit thought this limit can be extanded up to
1M for IB_VM chunk.

v2: fix rendering
v3: fix rendering when using index buffer
v4: make vm conditional on kernel support add basic va management
v5: catch the case when we already have va for a bo
v6: agd5f: update on top of ioctl changes
v7: agd5f: further ioctl updates
v8: indentation cleanup + fix non cayman
v9: rebase against lastest mesa + improvement from Marek  Michel

Signed-off-by: Jerome Glisse jgli...@redhat.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c   |9 +-
 src/gallium/drivers/r600/evergreen_state.c|   49 --
 src/gallium/drivers/r600/r600_hw_context.c|   47 --
 src/gallium/drivers/r600/r600_pipe.h  |3 +-
 src/gallium/drivers/r600/r600_resource.c  |   11 ++
 src/gallium/drivers/r600/r600_resource.h  |2 +
 src/gallium/drivers/r600/r600_state_common.c  |   14 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |  177 +
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |   21 ++-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |4 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   10 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |   11 ++
 13 files changed, 312 insertions(+), 48 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index bd1d969..e75eaf2 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, 
const struct r600_draw *dr
struct r600_block *dirty_block = NULL;
struct r600_block *next_block;
uint32_t *pm4;
+   uint64_t va;
 
if (draw-indices) {
ndwords = 11;
@@ -1174,9 +1175,11 @@ void evergreen_context_draw(struct r600_context *ctx, 
const struct r600_draw *dr
pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx-predicate_drawing);
pm4[3] = draw-vgt_num_instances;
if (draw-indices) {
-   pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx-predicate_drawing);
-   pm4[5] = draw-indices_bo_offset;
-   pm4[6] = 0;
+   va = r600_resource_va(ctx-screen-screen, 
(void*)draw-indices);
+   va += draw-indices_bo_offset;
+   pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx-predicate_drawing);
+   pm4[5] = va;
+   pm4[6] = (va  32UL)  0xFF;
pm4[7] = draw-vgt_num_indices;
pm4[8] = draw-vgt_draw_initiator;
pm4[9] = PKT3(PKT3_NOP, 0, ctx-predicate_drawing);
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 7ded03d..aca6136 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1101,8 +1101,8 @@ static struct pipe_sampler_view 
*evergreen_create_sampler_view(struct pipe_conte
rstate-val[1] = (S_030004_TEX_HEIGHT(height - 1) |
  S_030004_TEX_DEPTH(depth - 1) |
  S_030004_ARRAY_MODE(array_mode));
-   rstate-val[2] = tmp-offset[0]  8;
-   rstate-val[3] = tmp-offset[1]  8;
+   rstate-val[2] = (tmp-offset[0] + r600_resource_va(ctx-screen, 
texture))  8;
+   rstate-val[3] = (tmp-offset[1] + r600_resource_va(ctx-screen, 
texture))  8;
rstate-val[4] = (word4 |
  
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
  S_030010_ENDIAN_SWAP(endian) |
@@ -1343,7 +1343,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, 
struct r600_pipe_state
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype, endian;
-   unsigned offset;
+   uint64_t offset;
unsigned tile_type;
const struct util_format_description *desc;
int i;
@@ -1443,10 +1443,13 @@ static void evergreen_cb(struct r600_pipe_context 
*rctx, struct r600_pipe_state
} else /* workaround for linear buffers */
tile_type = 1;
 
+   offset += r600_resource_va(rctx-context.screen, 
state-cbufs[cb]-texture);
+   offset = 8;
+
/* FIXME handle enabling of CB beyond BASE8 which has different offset 
*/
r600_pipe_state_add_reg(rstate,
R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-   offset  8, 0x, rtex-resource, 
RADEON_USAGE_READWRITE);
+  

[Mesa-dev] [PATCH] r600g: add support for virtual address space on cayman v8

2012-01-06 Thread j . glisse
From: Jerome Glisse jgli...@redhat.com

Virtual address space put the userspace in charge of their GPU
address space. It's up to userspace to bind bo into the virtual
address space. Command stream can them be executed using the
IB_VM chunck.

This patch add support for this configuration. It doesn't remove
the 64K ib size limit thought this limit can be extanded up to
1M for IB_VM chunk.

v2: fix rendering
v3: fix rendering when using index buffer
v4: make vm conditional on kernel support add basic va management
v5: catch the case when we already have va for a bo
v6: agd5f: update on top of ioctl changes
v7: agd5f: further ioctl updates
v8: indentation cleanup + fix non cayman

Signed-off-by: Jerome Glisse jgli...@redhat.com
Signed-off-by: Alex Deucher alexander.deuc...@amd.com
---
 src/gallium/drivers/r600/evergreen_hw_context.c   |7 +-
 src/gallium/drivers/r600/evergreen_state.c|   49 --
 src/gallium/drivers/r600/r600_hw_context.c|   47 +--
 src/gallium/drivers/r600/r600_pipe.h  |3 +-
 src/gallium/drivers/r600/r600_resource.c  |   11 ++
 src/gallium/drivers/r600/r600_resource.h  |2 +
 src/gallium/drivers/r600/r600_state_common.c  |   14 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c |  165 +
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |2 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |   29 +++-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |5 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |   18 ++-
 src/gallium/winsys/radeon/drm/radeon_winsys.h |   11 ++
 13 files changed, 316 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c 
b/src/gallium/drivers/r600/evergreen_hw_context.c
index 96e8d18..01764ed 100644
--- a/src/gallium/drivers/r600/evergreen_hw_context.c
+++ b/src/gallium/drivers/r600/evergreen_hw_context.c
@@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, 
const struct r600_draw *dr
struct r600_block *dirty_block = NULL;
struct r600_block *next_block;
uint32_t *pm4;
+   uint64_t va;
 
if (draw-indices) {
ndwords = 11;
@@ -1174,8 +1175,10 @@ void evergreen_context_draw(struct r600_context *ctx, 
const struct r600_draw *dr
pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx-predicate_drawing);
pm4[3] = draw-vgt_num_instances;
if (draw-indices) {
-   pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx-predicate_drawing);
-   pm4[5] = draw-indices_bo_offset;
+   va = r600_resource_va(ctx-screen-screen, 
(void*)draw-indices);
+   va += draw-indices_bo_offset;
+   pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx-predicate_drawing);
+   pm4[5] = va;
pm4[6] = 0;
pm4[7] = draw-vgt_num_indices;
pm4[8] = draw-vgt_draw_initiator;
diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index d0c02d5..678d0db 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1099,8 +1099,8 @@ static struct pipe_sampler_view 
*evergreen_create_sampler_view(struct pipe_conte
rstate-val[1] = (S_030004_TEX_HEIGHT(height - 1) |
  S_030004_TEX_DEPTH(depth - 1) |
  S_030004_ARRAY_MODE(array_mode));
-   rstate-val[2] = tmp-offset[0]  8;
-   rstate-val[3] = tmp-offset[1]  8;
+   rstate-val[2] = (tmp-offset[0] + r600_resource_va(ctx-screen, 
texture))  8;
+   rstate-val[3] = (tmp-offset[1] + r600_resource_va(ctx-screen, 
texture))  8;
rstate-val[4] = (word4 |
  
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
  S_030010_ENDIAN_SWAP(endian) |
@@ -1341,7 +1341,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, 
struct r600_pipe_state
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype, endian;
-   unsigned offset;
+   uint64_t offset;
unsigned tile_type;
const struct util_format_description *desc;
int i;
@@ -1441,10 +1441,13 @@ static void evergreen_cb(struct r600_pipe_context 
*rctx, struct r600_pipe_state
} else /* workaround for linear buffers */
tile_type = 1;
 
+   offset += r600_resource_va(rctx-context.screen, 
state-cbufs[cb]-texture);
+   offset = 8;
+
/* FIXME handle enabling of CB beyond BASE8 which has different offset 
*/
r600_pipe_state_add_reg(rstate,
R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-   offset  8, 0x, rtex-resource, 
RADEON_USAGE_READWRITE);
+   offset, 0x, rtex-resource, 
RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate,