The introduction of nouveau_bo_wr32() in commit "drm/nouveau: use bo
accessors for push buffers" to OUT_RING() made it considerably slower.

'x11perf -aa10text' benchmark hits the OUT_RING hard, since user
pushbuffers are first copied from user to kernel, and then from kernel
to the real pushbuffer bo using OUT_RING. The speed
- before nouveau_bo_wr32(): 677k/sec
- after: 475k/sec

This patch implements OUT_RINGp() for copying an arbitrary number of
dwords from an array to the pushbuffer bo. All OUT_RING copy loops are
replaced with calls to OUT_RINGp(). This brings aa10text speed to
785k/sec.

The tests have been run on nv28, Athlon64 3000+ (x86_64) and oprofile
running.

Signed-off-by: Pekka Paalanen <[email protected]>
---
 drivers/gpu/drm/nouveau/nouveau_dma.c |   13 +++++++++++++
 drivers/gpu/drm/nouveau/nouveau_dma.h |    3 +++
 drivers/gpu/drm/nouveau/nouveau_gem.c |    3 +--
 drivers/gpu/drm/nouveau/nv04_fbcon.c  |   14 ++++++++------
 drivers/gpu/drm/nouveau/nv50_fbcon.c  |    4 ++--
 5 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c 
b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 183a7d5..0025c3d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -90,6 +90,19 @@ nouveau_dma_init(struct nouveau_channel *chan)
        return 0;
 }
 
+void
+OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords)
+{
+       bool is_iomem;
+       u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem);
+       mem = &mem[chan->dma.cur];
+       if (is_iomem)
+               memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4);
+       else
+               memcpy(mem, data, nr_dwords * 4);
+       chan->dma.cur += nr_dwords;
+}
+
 static inline bool
 READ_GET(struct nouveau_channel *chan, uint32_t *get)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h 
b/drivers/gpu/drm/nouveau/nouveau_dma.h
index 072d9b9..cdaa37d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -109,6 +109,9 @@ OUT_RING(struct nouveau_channel *chan, int data)
        nouveau_bo_wr32(chan->pushbuf_bo, chan->dma.cur++, data);
 }
 
+extern void
+OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords);
+
 static inline void
 BEGIN_RING(struct nouveau_channel *chan, int subc, int mthd, int size)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 4516058..8b2c9e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -528,8 +528,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void 
*data,
        if (ret)
                goto out;
 
-       for (i = 0; i < req->nr_dwords; i++)
-               OUT_RING (chan, pushbuf[i]);
+       OUT_RINGp(chan, pushbuf, req->nr_dwords);
 
        ret = nouveau_fence_emit(fence);
        if (ret) {
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c 
b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 648b435..14fc87f 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -94,9 +94,12 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct 
fb_image *image)
        struct drm_device *dev = par->dev;
        struct drm_nouveau_private *dev_priv = dev->dev_private;
        struct nouveau_channel *chan = dev_priv->channel;
-       uint32_t fg, bg, mask = ~(~0 >> (32 - info->var.bits_per_pixel));
-       uint32_t dsize, width, *data = (uint32_t *) image->data;
-       int j, k = 0;
+       uint32_t fg;
+       uint32_t bg;
+       uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
+       uint32_t dsize;
+       uint32_t width;
+       uint32_t *data = (uint32_t *)image->data;
 
        if (info->state != FBINFO_STATE_RUNNING)
                return;
@@ -140,9 +143,8 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct 
fb_image *image)
                }
 
                BEGIN_RING(chan, NvSubGdiRect, 0x0c00, iter_len);
-               for (j = iter_len; j--;)
-                       OUT_RING(chan, data[k++]);
-
+               OUT_RINGp(chan, data, iter_len);
+               data += iter_len;
                dsize -= iter_len;
        }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c 
b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index d7af9ed..d3807e3 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -139,8 +139,8 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct 
fb_image *image)
                dwords -= push;
 
                BEGIN_RING(chan, NvSub2D, 0x40000860, push);
-               while (push--)
-                       OUT_RING(chan, *data++);
+               OUT_RINGp(chan, data, push);
+               data += push;
        }
 
        FIRE_RING (chan);
-- 
1.6.3.3

_______________________________________________
Nouveau mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/nouveau

Reply via email to