.gitignore | 1 ChangeLog | 197 ++++++++++++++ configure.ac | 2 debian/changelog | 9 debian/libdrm-intel1.symbols | 1 debian/rules | 2 libdrm/intel/Makefile.am | 2 libdrm/intel/intel_bufmgr.c | 7 libdrm/intel/intel_bufmgr.h | 1 libdrm/intel/intel_bufmgr_gem.c | 28 +- libdrm/intel/intel_bufmgr_priv.h | 8 libdrm/nouveau/Makefile.am | 3 libdrm/nouveau/nouveau_bo.c | 405 +++--------------------------- libdrm/nouveau/nouveau_bo.h | 8 libdrm/nouveau/nouveau_channel.c | 66 ---- libdrm/nouveau/nouveau_class.h | 505 +++++++++++++++++++++++++++++++++----- libdrm/nouveau/nouveau_device.c | 10 libdrm/nouveau/nouveau_dma.c | 217 ---------------- libdrm/nouveau/nouveau_dma.h | 154 ----------- libdrm/nouveau/nouveau_drmif.h | 2 libdrm/nouveau/nouveau_fence.c | 243 ------------------ libdrm/nouveau/nouveau_notifier.c | 4 libdrm/nouveau/nouveau_private.h | 92 ------ libdrm/nouveau/nouveau_pushbuf.c | 209 ++++++++++----- libdrm/radeon/radeon_bo.h | 28 ++ libdrm/radeon/radeon_bo_gem.c | 81 +++++- libdrm/radeon/radeon_cs.h | 9 libdrm/radeon/radeon_cs_gem.c | 12 libdrm/radeon/radeon_cs_space.c | 2 libdrm/radeon/radeon_track.c | 1 shared-core/drm.h | 1 shared-core/nouveau_drm.h | 124 +-------- shared-core/radeon_drm.h | 57 +++- 33 files changed, 1059 insertions(+), 1432 deletions(-)
New commits: commit a209119784450b02794fe94545875f071777c4c0 Author: Julien Cristau <[email protected]> Date: Sat Sep 5 12:57:20 2009 +0200 Prepare changelog for upload diff --git a/debian/changelog b/debian/changelog index c809e49..34bb470 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -libdrm (2.4.13-1) UNRELEASED; urgency=low +libdrm (2.4.13-1) unstable; urgency=low [ Christopher James Halse Rogers ] * debian/control: @@ -9,7 +9,7 @@ libdrm (2.4.13-1) UNRELEASED; urgency=low * New upstream release. * Update libdrm-intel1.symbols. - -- Christopher James Halse Rogers <[email protected]> Wed, 11 Feb 2009 18:12:41 +1100 + -- Julien Cristau <[email protected]> Sat, 05 Sep 2009 13:15:36 +0200 libdrm (2.4.12-1) unstable; urgency=low commit c5927ee69fb74eb278f161de2b334fb1f9862f24 Author: Julien Cristau <[email protected]> Date: Sat Sep 5 12:54:35 2009 +0200 Update changelogs and symbols file for new release diff --git a/ChangeLog b/ChangeLog index 4ef5518..9932baa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,200 @@ +commit 73b59c894380995a2889b98e79acadd2da0bb237 +Author: Eric Anholt <[email protected]> +Date: Fri Aug 28 15:20:22 2009 -0700 + + Bump to version 2.4.13 for release. + +commit 8214a65ad1f4ccd4966e0def0d43f0c4289e4bc6 +Author: Eric Anholt <[email protected]> +Date: Thu Aug 27 18:32:07 2009 -0700 + + Add drm_intel_bo_busy to query whether mapping a BO would block. + +commit 19d6fadfa29993b261ebac2869b2289f6d3091c3 +Author: Michel Dänzer <[email protected]> +Date: Fri Aug 28 11:42:07 2009 +0200 + + Revert "libdrm_radeon: Always wait for BO idle in bo_map." + + This reverts commit 0a732983f059c353b267b6bf877e1f0eea4e033f. + + Paul Nieminen and Dave Airlie pointed out on IRC that this shouldn't be + necessary. I was seeing visual corruption in X before I made this change, but + I can't reproduce that anymore so it was probably an unrelated issue. + +commit 0a732983f059c353b267b6bf877e1f0eea4e033f +Author: Michel Dänzer <[email protected]> +Date: Thu Aug 27 08:36:58 2009 +0200 + + libdrm_radeon: Always wait for BO idle in bo_map. + + This allows users to eliminate explicit bo_wait calls before bo_map calls. + +commit ce6c68dc8a893ed8673f49d381a8500c2ee3c29f +Author: Jakob Bornecrantz <[email protected]> +Date: Fri Aug 21 14:06:51 2009 +0200 + + Kill last remnant of replacefb ioctl + + Kenrels doesn't expose this ioctl + +commit 02a4d22e95de863fe3e01a9f5658ef81417c28cd +Author: Alex Deucher <[email protected]> +Date: Mon Aug 24 18:15:03 2009 -0400 + + radeon: pull in z pipe changes from kernel + +commit caad8d85559709301c00760b9e8707d57f8c6c67 +Author: Pauli Nieminen <[email protected]> +Date: Sat Aug 22 13:16:18 2009 +1000 + + radeon: add support for busy/domain check interface. + + airlied: modified the interface to drop busy return value, just return + it normally, also fixed int->uint32_t for domain + + Signed-off-by: Pauli Nieminen <[email protected]> + +commit cbb3ae3dab9dc82d95524726135b8d6ef86bcf27 +Author: Ben Skeggs <[email protected]> +Date: Wed Aug 19 15:55:05 2009 +1000 + + nouveau: fix a thinko in copyless pushbuf ioctl + + No idea why G80 doesn't hit this, but, this fixes at least one NV40 card. + +commit 8c43b79b21929e9e54e13e892f7787e222e73f39 +Author: Pauli Nieminen <[email protected]> +Date: Tue Aug 18 18:51:38 2009 +0300 + + libdrm_radeon: Optimize copy of table to cs buffer with specialized call. + + Using this call in OUT_BATCH_TABLE reduces radeonEmitState cpu usage from + 9% to 5% and emit_vpu goes from 7% to 1.5%. I did use calgrind to profile + gears for cpu hotspots with r500 card. + + Signed-off-by: Pauli Nieminen <[email protected]> + +commit a474fd978c0dedbed21b5dff24126acb1c7effef +Author: Pauli Nieminen <[email protected]> +Date: Tue Aug 18 18:51:37 2009 +0300 + + libdrm_radeon: Fix loops so that compiler can optimize them. + + GCC did war about optimization not possible because possible forever loop. + + Signed-off-by: Pauli Nieminen <[email protected]> + +commit 64cef1e46554fbf82388acfcfc8051ce956a3dc2 +Author: Pauli Nieminen <[email protected]> +Date: Fri Aug 7 20:03:26 2009 +0300 + + libdrm/radeon: Update head of linked list not to point freed memory. + + Signed-off-by: Pauli Nieminen <[email protected]> + +commit 28f4bfa04b8ad4dfcc55027f4b2385f4dd6c23c5 +Author: Ben Skeggs <[email protected]> +Date: Wed Aug 12 14:21:00 2009 +1000 + + nouveau: support for copy-less pushbuf ioctl + +commit 250ab3a38eb6ef18d747717cabd0195ad04a82e0 +Author: Ben Skeggs <[email protected]> +Date: Tue Aug 18 14:25:50 2009 +1000 + + nouveau: for the moment, assert if we exceed some reloc limits + + Nasty, but nicer than silently not writing into the pushbuf + +commit 1978f6d8d1215a9501882eb074901bcd0dfc0775 +Author: Dave Airlie <[email protected]> +Date: Mon Aug 17 21:21:02 2009 +1000 + + radeon: fix bo wait at map time. + +commit f7996165fffe0a835752e8b9bb6b4d81bba5c91d +Author: Christoph Brill <[email protected]> +Date: Sun Aug 16 08:26:25 2009 +0200 + + Filter radeon pkgconfig file as do intel and nouveau + +commit 1d465178fbab77a9c0e830ea8c47bf61735def71 +Author: Dave Airlie <[email protected]> +Date: Sat Aug 15 21:32:35 2009 +1000 + + radeon: fix GTT writing space check + + Noticed by vehemens on irc. + + Signed-off-by: Dave Airlie <[email protected]> + +commit d74c67fb13d8c3e8c2e5968d827285d147a5dfc0 +Author: Anssi Hannula <[email protected]> +Date: Fri Jul 24 21:58:43 2009 +0300 + + link libdrm_intel with -lrt for new use of clock_gettime(). + + Signed-off-by: Eric Anholt <[email protected]> + +commit 4507863058a10d00c982975daf396f83caee0fe2 +Author: Dave Airlie <[email protected]> +Date: Sat Aug 1 17:19:43 2009 +1000 + + libdrm_radeon: add tiling support + +commit 322cf6cf736b22b62656ac0431936b3cdb784038 +Author: Dave Airlie <[email protected]> +Date: Sat Aug 1 16:47:24 2009 +1000 + + radeon: don't map/unmap explicitly. + + This caches the mapping and just use mapping as a sync point + +commit 5a73f066ba149816cc0fc2de4b97ec4714cf8ebc +Author: Ben Skeggs <[email protected]> +Date: Tue Jul 28 08:12:21 2009 +1000 + + nouveau: don't overwrite user-specified bo size needlessly + +commit 0bf8fb3bce6b4d9b4820d38d37e1ac1e73e22d23 +Author: Ben Skeggs <[email protected]> +Date: Tue Jul 28 07:46:20 2009 +1000 + + nouveau: user buffers need to be mappable + +commit 001331f4f1f094ef02497aa618ae5eeb2febedfb +Author: Ben Skeggs <[email protected]> +Date: Mon Jul 27 07:23:09 2009 +1000 + + nouveau: drm api 0.0.15, update object header, remove fake bo support + +commit 30449829c0347dc7dbe29acb13e49e2f2cb72ae9 +Author: Maarten Maathuis <[email protected]> +Date: Mon Jul 27 19:24:13 2009 +0200 + + libdrm/nouveau: unmap before ufree means nvbo->sysmem != NULL, which inbalances cpu_prep/cpu_finish + + - The bo was mapped with sysmem == NULL, so this means cpu prep is called. + - The bo was unmapped with sysmem != NULL, so this means cpu finish is not called. + - This can lead to a non-zero "cpu writers" count in ttm_bo. + +commit 9aed44beeac4f250a58c792d64a4dee1dde3d086 +Author: Alex Deucher <[email protected]> +Date: Wed Jul 15 14:51:38 2009 -0400 + + Add RADEON_INIT_R600_CP too + + this rounds out the r6xx/r7xx stuff in radeon_drm.h + +commit 0411c37409102c71e8ba980f9afb2d4a8c64189d +Author: Alex Deucher <[email protected]> +Date: Wed Jul 15 14:40:41 2009 -0400 + + Add R600_SCRATCH_REG_OFFSET to radeon_drm.h + + This lets us libdrm from master for r6xx/r7xx 3d + commit eea95ed8af24300e5a5d2489dfe0d73c24300651 Author: Eric Anholt <[email protected]> Date: Wed Jul 15 10:04:44 2009 -0700 diff --git a/debian/changelog b/debian/changelog index ac0fb16..c809e49 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,9 +1,14 @@ -libdrm (2.4.12-2) UNRELEASED; urgency=low +libdrm (2.4.13-1) UNRELEASED; urgency=low + [ Christopher James Halse Rogers ] * debian/control: + Remove scary 'built from DRM snapshot' warning from long description of libdrm-intel1{,-dbg} + [ Julien Cristau ] + * New upstream release. + * Update libdrm-intel1.symbols. + -- Christopher James Halse Rogers <[email protected]> Wed, 11 Feb 2009 18:12:41 +1100 libdrm (2.4.12-1) unstable; urgency=low diff --git a/debian/libdrm-intel1.symbols b/debian/libdrm-intel1.symbols index 994f032..9910249 100644 --- a/debian/libdrm-intel1.symbols +++ b/debian/libdrm-intel1.symbols @@ -1,6 +1,7 @@ libdrm_intel.so.1 libdrm-intel1 #MINVER# drm_intel_bo_al...@base 2.4.1 drm_intel_bo_alloc_for_ren...@base 2.4.5 + drm_intel_bo_b...@base 2.4.13 drm_intel_bo_disable_re...@base 2.4.10 drm_intel_bo_emit_re...@base 2.4.1 drm_intel_bo_e...@base 2.4.1 diff --git a/debian/rules b/debian/rules index e77bacf..284509d 100755 --- a/debian/rules +++ b/debian/rules @@ -105,7 +105,7 @@ binary-arch: build install dh_compress dh_fixperms dh_makeshlibs -plibdrm2 -V'libdrm2 (>= 2.4.3)' -- -c4 - dh_makeshlibs -plibdrm-intel1 -V'libdrm-intel1 (>= 2.4.11)' -- -c4 + dh_makeshlibs -plibdrm-intel1 -V'libdrm-intel1 (>= 2.4.13)' -- -c4 dh_installdeb dh_shlibdeps dh_gencontrol commit 73b59c894380995a2889b98e79acadd2da0bb237 Author: Eric Anholt <[email protected]> Date: Fri Aug 28 15:20:22 2009 -0700 Bump to version 2.4.13 for release. diff --git a/configure.ac b/configure.ac index 0794a2c..425417e 100644 --- a/configure.ac +++ b/configure.ac @@ -19,7 +19,7 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. AC_PREREQ(2.60) -AC_INIT([libdrm], 2.4.12, [[email protected]], libdrm) +AC_INIT([libdrm], 2.4.13, [[email protected]], libdrm) AC_USE_SYSTEM_EXTENSIONS AC_CONFIG_SRCDIR([Makefile.am]) AM_INIT_AUTOMAKE([dist-bzip2]) commit 8214a65ad1f4ccd4966e0def0d43f0c4289e4bc6 Author: Eric Anholt <[email protected]> Date: Thu Aug 27 18:32:07 2009 -0700 Add drm_intel_bo_busy to query whether mapping a BO would block. diff --git a/libdrm/intel/intel_bufmgr.c b/libdrm/intel/intel_bufmgr.c index f170e7f..219c761 100644 --- a/libdrm/intel/intel_bufmgr.c +++ b/libdrm/intel/intel_bufmgr.c @@ -220,6 +220,13 @@ int drm_intel_bo_disable_reuse(drm_intel_bo *bo) return 0; } +int drm_intel_bo_busy(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_busy) + return bo->bufmgr->bo_busy(bo); + return 0; +} + int drm_intel_get_pipe_from_crtc_id (drm_intel_bufmgr *bufmgr, int crtc_id) { diff --git a/libdrm/intel/intel_bufmgr.h b/libdrm/intel/intel_bufmgr.h index 758558d..218b759 100644 --- a/libdrm/intel/intel_bufmgr.h +++ b/libdrm/intel/intel_bufmgr.h @@ -107,6 +107,7 @@ int drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t *tiling_mode, uint32_t *swizzle_mode); int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t *name); +int drm_intel_bo_busy(drm_intel_bo *bo); int drm_intel_bo_disable_reuse(drm_intel_bo *bo); diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c index 737ceae..baa0ee6 100644 --- a/libdrm/intel/intel_bufmgr_gem.c +++ b/libdrm/intel/intel_bufmgr_gem.c @@ -314,6 +314,22 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo) return 0; } +static int +drm_intel_gem_bo_busy(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + struct drm_i915_gem_busy busy; + int ret; + + memset(&busy, 0, sizeof(busy)); + busy.handle = bo_gem->gem_handle; + + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + + return (ret == 0 && busy.busy); +} + static drm_intel_bo * drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, unsigned long size, unsigned int alignment, @@ -344,8 +360,6 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, pthread_mutex_lock(&bufmgr_gem->lock); /* Get a buffer out of the cache if available */ if (bucket != NULL && bucket->num_entries > 0) { - struct drm_i915_gem_busy busy; - if (for_render) { /* Allocate new render-target BOs from the tail (MRU) * of the list, as it will likely be hot in the GPU cache @@ -364,13 +378,8 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name, */ bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head); - memset(&busy, 0, sizeof(busy)); - busy.handle = bo_gem->gem_handle; - - ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); - alloc_from_cache = (ret == 0 && busy.busy == 0); - - if (alloc_from_cache) { + if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { + alloc_from_cache = 1; DRMLISTDEL(&bo_gem->head); bucket->num_entries--; } @@ -1491,6 +1500,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; + bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; bufmgr_gem->bufmgr.debug = 0; bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space; diff --git a/libdrm/intel/intel_bufmgr_priv.h b/libdrm/intel/intel_bufmgr_priv.h index 0098076..af17c12 100644 --- a/libdrm/intel/intel_bufmgr_priv.h +++ b/libdrm/intel/intel_bufmgr_priv.h @@ -177,6 +177,12 @@ struct _drm_intel_bufmgr { */ int (*bo_flink)(drm_intel_bo *bo, uint32_t *name); + /** + * Returns 1 if mapping the buffer for write could cause the process + * to block, due to the object being active in the GPU. + */ + int (*bo_busy)(drm_intel_bo *bo); + int (*check_aperture_space)(drm_intel_bo **bo_array, int count); /** @@ -200,7 +206,7 @@ struct _drm_intel_bufmgr { * \param crtc_id the crtc identifier */ int (*get_pipe_from_crtc_id)(drm_intel_bufmgr *bufmgr, int crtc_id); - + int debug; /**< Enables verbose debugging printouts */ }; commit 19d6fadfa29993b261ebac2869b2289f6d3091c3 Author: Michel Dänzer <[email protected]> Date: Fri Aug 28 11:42:07 2009 +0200 Revert "libdrm_radeon: Always wait for BO idle in bo_map." This reverts commit 0a732983f059c353b267b6bf877e1f0eea4e033f. Paul Nieminen and Dave Airlie pointed out on IRC that this shouldn't be necessary. I was seeing visual corruption in X before I made this change, but I can't reproduce that anymore so it was probably an unrelated issue. diff --git a/libdrm/radeon/radeon_bo_gem.c b/libdrm/radeon/radeon_bo_gem.c index add55db..76d80e7 100644 --- a/libdrm/radeon/radeon_bo_gem.c +++ b/libdrm/radeon/radeon_bo_gem.c @@ -152,7 +152,10 @@ static int bo_map(struct radeon_bo *bo, int write) int r; void *ptr; - if (bo_gem->map_count++ != 0 || bo_gem->priv_ptr) { + if (bo_gem->map_count++ != 0) { + return 0; + } + if (bo_gem->priv_ptr) { goto wait; } commit 0a732983f059c353b267b6bf877e1f0eea4e033f Author: Michel Dänzer <[email protected]> Date: Thu Aug 27 08:36:58 2009 +0200 libdrm_radeon: Always wait for BO idle in bo_map. This allows users to eliminate explicit bo_wait calls before bo_map calls. diff --git a/libdrm/radeon/radeon_bo_gem.c b/libdrm/radeon/radeon_bo_gem.c index 76d80e7..add55db 100644 --- a/libdrm/radeon/radeon_bo_gem.c +++ b/libdrm/radeon/radeon_bo_gem.c @@ -152,10 +152,7 @@ static int bo_map(struct radeon_bo *bo, int write) int r; void *ptr; - if (bo_gem->map_count++ != 0) { - return 0; - } - if (bo_gem->priv_ptr) { + if (bo_gem->map_count++ != 0 || bo_gem->priv_ptr) { goto wait; } commit ce6c68dc8a893ed8673f49d381a8500c2ee3c29f Author: Jakob Bornecrantz <[email protected]> Date: Fri Aug 21 14:06:51 2009 +0200 Kill last remnant of replacefb ioctl Kenrels doesn't expose this ioctl diff --git a/shared-core/drm.h b/shared-core/drm.h index 42a6c23..d97844f 100644 --- a/shared-core/drm.h +++ b/shared-core/drm.h @@ -795,7 +795,6 @@ struct drm_gem_open { #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, uint32_t) -#define DRM_IOCTL_MODE_REPLACEFB DRM_IOWR(0xB0, struct drm_mode_fb_cmd) /*...@}*/ commit 02a4d22e95de863fe3e01a9f5658ef81417c28cd Author: Alex Deucher <[email protected]> Date: Mon Aug 24 18:15:03 2009 -0400 radeon: pull in z pipe changes from kernel diff --git a/shared-core/radeon_drm.h b/shared-core/radeon_drm.h index 6659de7..47f1952 100644 --- a/shared-core/radeon_drm.h +++ b/shared-core/radeon_drm.h @@ -706,6 +706,7 @@ typedef struct drm_radeon_indirect { #define RADEON_PARAM_FB_LOCATION 14 /* FB location */ #define RADEON_PARAM_NUM_GB_PIPES 15 /* num GB pipes */ #define RADEON_PARAM_DEVICE_ID 16 +#define RADEON_PARAM_NUM_Z_PIPES 17 /* num Z pipes */ typedef struct drm_radeon_getparam { int param; @@ -894,6 +895,7 @@ struct drm_radeon_cs { #define RADEON_INFO_DEVICE_ID 0x00 #define RADEON_INFO_NUM_GB_PIPES 0x01 +#define RADEON_INFO_NUM_Z_PIPES 0x02 struct drm_radeon_info { uint32_t request; commit caad8d85559709301c00760b9e8707d57f8c6c67 Author: Pauli Nieminen <[email protected]> Date: Sat Aug 22 13:16:18 2009 +1000 radeon: add support for busy/domain check interface. airlied: modified the interface to drop busy return value, just return it normally, also fixed int->uint32_t for domain Signed-off-by: Pauli Nieminen <[email protected]> diff --git a/libdrm/radeon/radeon_bo.h b/libdrm/radeon/radeon_bo.h index 09ad068..1e2e6c2 100644 --- a/libdrm/radeon/radeon_bo.h +++ b/libdrm/radeon/radeon_bo.h @@ -73,6 +73,7 @@ struct radeon_bo_funcs { uint32_t pitch); int (*bo_get_tiling)(struct radeon_bo *bo, uint32_t *tiling_flags, uint32_t *pitch); + int (*bo_is_busy)(struct radeon_bo *bo, uint32_t *domain); }; struct radeon_bo_manager { @@ -166,6 +167,15 @@ static inline int _radeon_bo_wait(struct radeon_bo *bo, return bo->bom->funcs->bo_wait(bo); } +static inline int _radeon_bo_is_busy(struct radeon_bo *bo, + uint32_t *domain, + const char *file, + const char *func, + int line) +{ + return bo->bom->funcs->bo_is_busy(bo, domain); +} + static inline int radeon_bo_set_tiling(struct radeon_bo *bo, uint32_t tiling_flags, uint32_t pitch) { @@ -199,5 +209,7 @@ static inline int radeon_bo_is_static(struct radeon_bo *bo) _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__) #define radeon_bo_wait(bo) \ _radeon_bo_wait(bo, __FILE__, __func__, __LINE__) +#define radeon_bo_is_busy(bo, domain) \ + _radeon_bo_is_busy(bo, domain, __FILE__, __func__, __LINE__) #endif diff --git a/libdrm/radeon/radeon_bo_gem.c b/libdrm/radeon/radeon_bo_gem.c index cf59a35..76d80e7 100644 --- a/libdrm/radeon/radeon_bo_gem.c +++ b/libdrm/radeon/radeon_bo_gem.c @@ -209,6 +209,21 @@ static int bo_wait(struct radeon_bo *bo) return ret; } +static int bo_is_busy(struct radeon_bo *bo, uint32_t *domain) +{ + struct drm_radeon_gem_busy args; + int ret; + + args.handle = bo->handle; + args.domain = 0; + + ret = drmCommandWriteRead(bo->bom->fd, DRM_RADEON_GEM_BUSY, + &args, sizeof(args)); + + *domain = args.domain; + return ret; +} + static int bo_set_tiling(struct radeon_bo *bo, uint32_t tiling_flags, uint32_t pitch) { @@ -257,6 +272,7 @@ static struct radeon_bo_funcs bo_gem_funcs = { NULL, bo_set_tiling, bo_get_tiling, + bo_is_busy, }; struct radeon_bo_manager *radeon_bo_manager_gem_ctor(int fd) diff --git a/shared-core/radeon_drm.h b/shared-core/radeon_drm.h index 3745ac5..6659de7 100644 --- a/shared-core/radeon_drm.h +++ b/shared-core/radeon_drm.h @@ -505,6 +505,7 @@ typedef struct { #define DRM_RADEON_INFO 0x27 #define DRM_RADEON_GEM_SET_TILING 0x28 #define DRM_RADEON_GEM_GET_TILING 0x29 +#define DRM_RADEON_GEM_BUSY 0x2a #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -545,6 +546,7 @@ typedef struct { #define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) #define DRM_IOCTL_RADEON_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) #define DRM_IOCTL_RADEON_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) +#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) typedef struct drm_radeon_init { enum { @@ -835,7 +837,7 @@ struct drm_radeon_gem_wait_idle { struct drm_radeon_gem_busy { uint32_t handle; - uint32_t busy; + uint32_t domain; }; struct drm_radeon_gem_pread { commit cbb3ae3dab9dc82d95524726135b8d6ef86bcf27 Author: Ben Skeggs <[email protected]> Date: Wed Aug 19 15:55:05 2009 +1000 nouveau: fix a thinko in copyless pushbuf ioctl No idea why G80 doesn't hit this, but, this fixes at least one NV40 card. diff --git a/libdrm/nouveau/nouveau_pushbuf.c b/libdrm/nouveau/nouveau_pushbuf.c index 86d5a4e..1192e22 100644 --- a/libdrm/nouveau/nouveau_pushbuf.c +++ b/libdrm/nouveau/nouveau_pushbuf.c @@ -267,6 +267,8 @@ nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min) *(nvpb->base.cur++) = nvpb->cal_suffix0; *(nvpb->base.cur++) = nvpb->cal_suffix1; + if (nvpb->base.remaining > 2) /* space() will fixup if not */ + nvpb->base.remaining -= 2; req.channel = chan->id; req.handle = nvpb->buffer[nvpb->current]->handle; commit 8c43b79b21929e9e54e13e892f7787e222e73f39 Author: Pauli Nieminen <[email protected]> Date: Tue Aug 18 18:51:38 2009 +0300 libdrm_radeon: Optimize copy of table to cs buffer with specialized call. Using this call in OUT_BATCH_TABLE reduces radeonEmitState cpu usage from 9% to 5% and emit_vpu goes from 7% to 1.5%. I did use calgrind to profile gears for cpu hotspots with r500 card. Signed-off-by: Pauli Nieminen <[email protected]> diff --git a/libdrm/radeon/radeon_cs.h b/libdrm/radeon/radeon_cs.h index 7efec7e..1117a85 100644 --- a/libdrm/radeon/radeon_cs.h +++ b/libdrm/radeon/radeon_cs.h @@ -201,6 +201,15 @@ static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword) } } +static inline void radeon_cs_write_table(struct radeon_cs *cs, void *data, uint32_t size) +{ + memcpy(cs->packets + cs->cdw, data, size * 4); + cs->cdw += size; + if (cs->section) { + cs->section_cdw += size; + } +} + static inline void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data) { cs->space_flush_fn = fn; commit a474fd978c0dedbed21b5dff24126acb1c7effef Author: Pauli Nieminen <[email protected]> Date: Tue Aug 18 18:51:37 2009 +0300 libdrm_radeon: Fix loops so that compiler can optimize them. GCC did war about optimization not possible because possible forever loop. Signed-off-by: Pauli Nieminen <[email protected]> diff --git a/libdrm/radeon/radeon_cs_gem.c b/libdrm/radeon/radeon_cs_gem.c index 264b067..a0db53b 100644 --- a/libdrm/radeon/radeon_cs_gem.c +++ b/libdrm/radeon/radeon_cs_gem.c @@ -354,21 +354,21 @@ static void cs_gem_print(struct radeon_cs *cs, FILE *file) unsigned opcode; unsigned reg; unsigned cnt; - int i, j; + unsigned int i, j; for (i = 0; i < cs->cdw;) { - cnt = CP_PACKET_GET_COUNT(cs->packets[i]); + cnt = CP_PACKET_GET_COUNT(cs->packets[i]) + 1; switch (CP_PACKET_GET_TYPE(cs->packets[i])) { case PACKET_TYPE0: - fprintf(file, "Pkt0 at %d (%d dwords):\n", i, cnt + 1); + fprintf(file, "Pkt0 at %d (%d dwords):\n", i, cnt); reg = CP_PACKET0_GET_REG(cs->packets[i]); if (CP_PACKET0_GET_ONE_REG_WR(cs->packets[i++])) { - for (j = 0; j <= cnt; j++) { + for (j = 0; j < cnt; j++) { fprintf(file, " 0x%08X -> 0x%04X\n", cs->packets[i++], reg); } } else { - for (j = 0; j <= cnt; j++) { + for (j = 0; j < cnt; j++) { fprintf(file, " 0x%08X -> 0x%04X\n", cs->packets[i++], reg); reg += 4; @@ -410,7 +410,7 @@ static void cs_gem_print(struct radeon_cs *cs, FILE *file) fprintf(file, "Unknow opcode 0x%02X at %d\n", opcode, i); return; } - for (j = 0; j <= cnt; j++) { + for (j = 0; j < cnt; j++) { fprintf(file, " 0x%08X\n", cs->packets[i++]); } break; commit 64cef1e46554fbf82388acfcfc8051ce956a3dc2 Author: Pauli Nieminen <[email protected]> Date: Fri Aug 7 20:03:26 2009 +0300 libdrm/radeon: Update head of linked list not to point freed memory. Signed-off-by: Pauli Nieminen <[email protected]> diff --git a/libdrm/radeon/radeon_track.c b/libdrm/radeon/radeon_track.c index 1623906..9ab0927 100644 --- a/libdrm/radeon/radeon_track.c +++ b/libdrm/radeon/radeon_track.c @@ -137,4 +137,5 @@ void radeon_tracker_print(struct radeon_tracker *tracker, FILE *file) track = track->next; free(tmp); } + tracker->tracks.next = NULL; } commit 28f4bfa04b8ad4dfcc55027f4b2385f4dd6c23c5 Author: Ben Skeggs <[email protected]> Date: Wed Aug 12 14:21:00 2009 +1000 nouveau: support for copy-less pushbuf ioctl diff --git a/libdrm/nouveau/nouveau_private.h b/libdrm/nouveau/nouveau_private.h index 49dde5e..67144e3 100644 --- a/libdrm/nouveau/nouveau_private.h +++ b/libdrm/nouveau/nouveau_private.h @@ -36,11 +36,17 @@ #include "nouveau_resource.h" #include "nouveau_pushbuf.h" +#define CALPB_BUFFERS 4 +#define CALPB_BUFSZ 16384 struct nouveau_pushbuf_priv { struct nouveau_pushbuf base; int use_cal; - struct nouveau_bo *buffer; + uint32_t cal_suffix0; + uint32_t cal_suffix1; + struct nouveau_bo *buffer[CALPB_BUFFERS]; + int current; + int current_offset; unsigned *pushbuf; unsigned size; diff --git a/libdrm/nouveau/nouveau_pushbuf.c b/libdrm/nouveau/nouveau_pushbuf.c index 480dbd2..86d5a4e 100644 --- a/libdrm/nouveau/nouveau_pushbuf.c +++ b/libdrm/nouveau/nouveau_pushbuf.c @@ -59,7 +59,6 @@ nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr, struct nouveau_bo *bo, uint32_t data, uint32_t data2, uint32_t flags, uint32_t vor, uint32_t tor) { - struct nouveau_device_priv *nvdev = nouveau_device(chan->device); struct nouveau_pushbuf_priv *nvpb = nouveau_pushbuf(chan->pushbuf); struct drm_nouveau_gem_pushbuf_reloc *r; struct drm_nouveau_gem_pushbuf_bo *pbbo; @@ -119,11 +118,50 @@ nouveau_pushbuf_emit_reloc(struct nouveau_channel *chan, void *ptr, } static int +nouveau_pushbuf_space_call(struct nouveau_channel *chan, unsigned min) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + struct nouveau_bo *bo; + int ret; + + if (min < PB_MIN_USER_DWORDS) + min = PB_MIN_USER_DWORDS; + + nvpb->current_offset = nvpb->base.cur - nvpb->pushbuf; + if (nvpb->current_offset + min + 2 <= nvpb->size) + return 0; + + nvpb->current++; + if (nvpb->current == CALPB_BUFFERS) + nvpb->current = 0; + bo = nvpb->buffer[nvpb->current]; + + ret = nouveau_bo_map(bo, NOUVEAU_BO_WR); + if (ret) + return ret; + + nvpb->size = (bo->size - 8) / 4; + nvpb->pushbuf = bo->map; + nvpb->current_offset = 0; + + nvpb->base.channel = chan; + nvpb->base.remaining = nvpb->size; + nvpb->base.cur = nvpb->pushbuf; + + nouveau_bo_unmap(bo); + return 0; +} + +static int nouveau_pushbuf_space(struct nouveau_channel *chan, unsigned min) { struct nouveau_channel_priv *nvchan = nouveau_channel(chan); struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + if (nvpb->use_cal) + return nouveau_pushbuf_space_call(chan, min); + if (nvpb->pushbuf) { free(nvpb->pushbuf); nvpb->pushbuf = NULL; @@ -139,13 +177,69 @@ nouveau_pushbuf_space(struct nouveau_channel *chan, unsigned min) return 0; } +static void +nouveau_pushbuf_fini_call(struct nouveau_channel *chan) +{ + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + int i; + + for (i = 0; i < CALPB_BUFFERS; i++) + nouveau_bo_ref(NULL, &nvpb->buffer[i]); + nvpb->use_cal = 0; + nvpb->pushbuf = NULL; +} + +static void +nouveau_pushbuf_init_call(struct nouveau_channel *chan) +{ + struct drm_nouveau_gem_pushbuf_call req; + struct nouveau_channel_priv *nvchan = nouveau_channel(chan); + struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + struct nouveau_device *dev = chan->device; + int i, ret; + + req.channel = chan->id; + req.handle = 0; + ret = drmCommandWriteRead(nouveau_device(dev)->fd, + DRM_NOUVEAU_GEM_PUSHBUF_CALL, + &req, sizeof(req)); + if (ret) + return; + + for (i = 0; i < CALPB_BUFFERS; i++) { + ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, + 0, CALPB_BUFSZ, &nvpb->buffer[i]); + if (ret) { + nouveau_pushbuf_fini_call(chan); + return; + } + } + + nvpb->use_cal = 1; + nvpb->cal_suffix0 = req.suffix0; + nvpb->cal_suffix1 = req.suffix1; +} + int nouveau_pushbuf_init(struct nouveau_channel *chan) { struct nouveau_channel_priv *nvchan = nouveau_channel(chan); struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; + int ret; - nouveau_pushbuf_space(chan, 0); + nouveau_pushbuf_init_call(chan); + + ret = nouveau_pushbuf_space(chan, 0); + if (ret) { + if (nvpb->use_cal) { + nouveau_pushbuf_fini_call(chan); + ret = nouveau_pushbuf_space(chan, 0); + } + + if (ret) + return ret; + } nvpb->buffers = calloc(NOUVEAU_GEM_MAX_BUFFERS, sizeof(struct drm_nouveau_gem_pushbuf_bo)); @@ -162,24 +256,49 @@ nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min) struct nouveau_device_priv *nvdev = nouveau_device(chan->device); struct nouveau_channel_priv *nvchan = nouveau_channel(chan); struct nouveau_pushbuf_priv *nvpb = &nvchan->pb; - struct drm_nouveau_gem_pushbuf req; unsigned i; int ret; if (nvpb->base.remaining == nvpb->size) return 0; - nvpb->size -= nvpb->base.remaining; - req.channel = chan->id; - req.nr_dwords = nvpb->size; - req.dwords = (uint64_t)(unsigned long)nvpb->pushbuf; - req.nr_buffers = nvpb->nr_buffers; - req.buffers = (uint64_t)(unsigned long)nvpb->buffers; - req.nr_relocs = nvpb->nr_relocs; - req.relocs = (uint64_t)(unsigned long)nvpb->relocs; - ret = drmCommandWrite(nvdev->fd, DRM_NOUVEAU_GEM_PUSHBUF, - &req, sizeof(req)); - assert(ret == 0); + if (nvpb->use_cal) { + struct drm_nouveau_gem_pushbuf_call req; + + *(nvpb->base.cur++) = nvpb->cal_suffix0; + *(nvpb->base.cur++) = nvpb->cal_suffix1; + + req.channel = chan->id; + req.handle = nvpb->buffer[nvpb->current]->handle; + req.offset = nvpb->current_offset * 4; + req.nr_buffers = nvpb->nr_buffers; + req.buffers = (uint64_t)(unsigned long)nvpb->buffers; + req.nr_relocs = nvpb->nr_relocs; + req.relocs = (uint64_t)(unsigned long)nvpb->relocs; + req.nr_dwords = (nvpb->base.cur - nvpb->pushbuf) - + nvpb->current_offset; -- To UNSUBSCRIBE, email to [email protected] with a subject of "unsubscribe". Trouble? Contact [email protected]

