The introduction of nouveau_bo_wr32() in commit "drm/nouveau: use bo accessors for push buffers" to OUT_RING() made it considerably slower.
'x11perf -aa10text' benchmark hits the OUT_RING hard, since user pushbuffers are first copied from user to kernel, and then from kernel to the real pushbuffer bo using OUT_RING. The speed - before nouveau_bo_wr32(): 677k/sec - after: 475k/sec This patch implements OUT_RINGp() for copying an arbitrary number of dwords from an array to the pushbuffer bo. All OUT_RING copy loops are replaced with calls to OUT_RINGp(). This brings aa10text speed to 785k/sec. The tests have been run on nv28, Athlon64 3000+ (x86_64) and oprofile running. Signed-off-by: Pekka Paalanen <[email protected]> --- drivers/gpu/drm/nouveau/nouveau_dma.c | 13 +++++++++++++ drivers/gpu/drm/nouveau/nouveau_dma.h | 3 +++ drivers/gpu/drm/nouveau/nouveau_gem.c | 3 +-- drivers/gpu/drm/nouveau/nv04_fbcon.c | 14 ++++++++------ drivers/gpu/drm/nouveau/nv50_fbcon.c | 4 ++-- 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c index 183a7d5..0025c3d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.c +++ b/drivers/gpu/drm/nouveau/nouveau_dma.c @@ -90,6 +90,19 @@ nouveau_dma_init(struct nouveau_channel *chan) return 0; } +void +OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords) +{ + bool is_iomem; + u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem); + mem = &mem[chan->dma.cur]; + if (is_iomem) + memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4); + else + memcpy(mem, data, nr_dwords * 4); + chan->dma.cur += nr_dwords; +} + static inline bool READ_GET(struct nouveau_channel *chan, uint32_t *get) { diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h index 072d9b9..cdaa37d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dma.h +++ b/drivers/gpu/drm/nouveau/nouveau_dma.h @@ -109,6 +109,9 @@ OUT_RING(struct nouveau_channel *chan, int data) nouveau_bo_wr32(chan->pushbuf_bo, chan->dma.cur++, data); } +extern void +OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords); + static inline void BEGIN_RING(struct nouveau_channel *chan, int subc, int mthd, int size) { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 4516058..8b2c9e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -528,8 +528,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, if (ret) goto out; - for (i = 0; i < req->nr_dwords; i++) - OUT_RING (chan, pushbuf[i]); + OUT_RINGp(chan, pushbuf, req->nr_dwords); ret = nouveau_fence_emit(fence); if (ret) { diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c index 648b435..14fc87f 100644 --- a/drivers/gpu/drm/nouveau/nv04_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c @@ -94,9 +94,12 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) struct drm_device *dev = par->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; struct nouveau_channel *chan = dev_priv->channel; - uint32_t fg, bg, mask = ~(~0 >> (32 - info->var.bits_per_pixel)); - uint32_t dsize, width, *data = (uint32_t *) image->data; - int j, k = 0; + uint32_t fg; + uint32_t bg; + uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel)); + uint32_t dsize; + uint32_t width; + uint32_t *data = (uint32_t *)image->data; if (info->state != FBINFO_STATE_RUNNING) return; @@ -140,9 +143,8 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) } BEGIN_RING(chan, NvSubGdiRect, 0x0c00, iter_len); - for (j = iter_len; j--;) - OUT_RING(chan, data[k++]); - + OUT_RINGp(chan, data, iter_len); + data += iter_len; dsize -= iter_len; } diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c index d7af9ed..d3807e3 100644 --- a/drivers/gpu/drm/nouveau/nv50_fbcon.c +++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c @@ -139,8 +139,8 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image) dwords -= push; BEGIN_RING(chan, NvSub2D, 0x40000860, push); - while (push--) - OUT_RING(chan, *data++); + OUT_RINGp(chan, data, push); + data += push; } FIRE_RING (chan); -- 1.6.3.3 _______________________________________________ Nouveau mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/nouveau
