[Intel-gfx] [PATCH 2/2] agp/intel: make intel-gtt.c into a real source file
Now that the disentangling is complete, stop including intel-gtt.c from intel-agp.c. The linux build system _really_ doesn't allow .c source files with the same name as the module. It fails with the following message when trying to build such a bugger: make[3]: Circular drivers/char/agp/intel-agp.o - drivers/char/agp/intel-agp.o dependency dropped. Instead of renaming intel-agp.c I've simply created a new module out of intel-gtt.c. Renaming intel-agp.ko to something else is not an option for it will surely kill someones boot process. Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch --- drivers/char/agp/Makefile|1 + drivers/char/agp/intel-agp.c |2 -- drivers/char/agp/intel-agp.h |4 drivers/char/agp/intel-gtt.c | 19 +-- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/char/agp/Makefile b/drivers/char/agp/Makefile index 627f542..8eb56e2 100644 --- a/drivers/char/agp/Makefile +++ b/drivers/char/agp/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_AGP_HP_ZX1) += hp-agp.o obj-$(CONFIG_AGP_PARISC) += parisc-agp.o obj-$(CONFIG_AGP_I460) += i460-agp.o obj-$(CONFIG_AGP_INTEL)+= intel-agp.o +obj-$(CONFIG_AGP_INTEL)+= intel-gtt.o obj-$(CONFIG_AGP_NVIDIA) += nvidia-agp.o obj-$(CONFIG_AGP_SGI_TIOCA)+= sgi-agp.o obj-$(CONFIG_AGP_SIS) += sis-agp.o diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index e1a5770..d210ff9 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -13,8 +13,6 @@ #include agp.h #include intel-agp.h -#include intel-gtt.c - int intel_agp_enabled; EXPORT_SYMBOL(intel_agp_enabled); diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h index 2547465..7d8a4ee 100644 --- a/drivers/char/agp/intel-agp.h +++ b/drivers/char/agp/intel-agp.h @@ -237,3 +237,7 @@ agp_bridge-dev-device == PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB || \ agp_bridge-dev-device == PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB || \ IS_SNB) + +int intel_gmch_probe(struct pci_dev *pdev, + struct agp_bridge_data *bridge); +void intel_gmch_remove(struct pci_dev *pdev); diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index c53436f..f7793ec 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -15,6 +15,16 @@ * /fairy-tale-mode off */ +#include linux/module.h +#include linux/pci.h +#include linux/init.h +#include linux/kernel.h +#include linux/pagemap.h +#include linux/agp_backend.h +#include asm/smp.h +#include agp.h +#include intel-agp.h + /* * If we have Intel graphics, we're not going to have anything other than * an Intel IOMMU. So make the correct use of the PCI DMA API contingent @@ -1588,7 +1598,7 @@ static int find_gmch(u16 device) return 1; } -int __devinit intel_gmch_probe(struct pci_dev *pdev, +int intel_gmch_probe(struct pci_dev *pdev, struct agp_bridge_data *bridge) { int i; @@ -1621,9 +1631,14 @@ int __devinit intel_gmch_probe(struct pci_dev *pdev, return 1; } +EXPORT_SYMBOL(intel_gmch_probe); -void __devexit intel_gmch_remove(struct pci_dev *pdev) +void intel_gmch_remove(struct pci_dev *pdev) { if (intel_private.pcidev) pci_dev_put(intel_private.pcidev); } +EXPORT_SYMBOL(intel_gmch_remove); + +MODULE_AUTHOR(Dave Jones da...@redhat.com); +MODULE_LICENSE(GPL and additional rights); -- 1.7.0.4 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] libdrm changes to allow exact fence usage accounting
Hi all, Currently libdrm pessimistically assumes that every tiled bo needs a fence on 2nd and 3rd gen hw on _every_ aperture space checks. Especially with execbuf2 this results in needlessly small batchbuffers. My libdrm patch solves this problem by adding a function to libdrm to disable all fence accounting. Fence usage can then be checked in the client, where enough information is available to do it exactly. Comments highly welcome. And if you like this please merge it - I don't have commit rights on mesa/libdrm and I'd like someone else's explicit sign-off on interface changes, anyway. As an example I've also included a patch to convert xf86-video-intel to do its own fence accounting. This is just for illustration, that patch is in no way merge-ready - it's totally killing performance on some cairo traces. On gvim-0 it even hits the brick-wall and slows it down by a factor of ten :( Yours, Daniel Daniel Vetter (1): i830 uxa: track fence reg usage exactly src/i830.h | 13 ++- src/i830_batchbuffer.c |2 + src/i830_driver.c | 32 src/i830_render.c | 12 +++--- src/i830_uxa.c | 95 +--- src/i915_render.c | 13 --- 6 files changed, 139 insertions(+), 28 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915: check execbuffer for validity earlier to save on work.
Before, we were waiting until we knew the batch object's gtt offset before we checked for validity. However, since this is purely an alignment check (it is impossible for the batch object to get to execbuffer stage without being pinned and bound) we can check alignment before we do any other expensive work. Additionally, check that batch_start_offset + batch_len is = the size of the batchbuffer object. And that batch_len and batch_start_offset do not overflow a u_int32_t. Signed-off-by: Owain G. Ainsworth o...@openbsd.org --- drivers/gpu/drm/i915/i915_gem.c | 42 ++ 1 files changed, 16 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b85727c..a9da182 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3656,23 +3656,6 @@ err: return ret; } -static int -i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, - uint64_t exec_offset) -{ - uint32_t exec_start, exec_len; - - exec_start = (uint32_t) exec_offset + exec-batch_start_offset; - exec_len = (uint32_t) exec-batch_len; - - if ((exec_start | exec_len) 0x7) - return -EINVAL; - - if (!exec_start) - return -EINVAL; - - return 0; -} static int i915_gem_wait_for_pending_flip(struct drm_device *dev, @@ -3722,7 +3705,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_clip_rect *cliprects = NULL; struct drm_i915_gem_relocation_entry *relocs = NULL; int ret = 0, ret2, i, pinned = 0; - uint64_t exec_offset; uint32_t seqno, flush_domains, reloc_index; int pin_tries, flips; @@ -3730,6 +3712,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, DRM_INFO(buffers_ptr %d buffer_count %d len %08x\n, (int) args-buffers_ptr, args-buffer_count, args-batch_len); #endif + /* +* check for valid execbuffer offset. We can do this early because +* bound objects are always page aligned, so only the start offset +* matters. Also check for overflow. +*/ + if ((args-batch_start_offset | args-batch_len) 0x7 || + args-batch_start_offset + args-batch_len args-batch_len || + args-batch_start_offset + args-batch_len + args-batch_start_offset) + return -EINVAL; if (args-buffer_count 1) { DRM_ERROR(execbuf with %d buffers\n, args-buffer_count); @@ -3878,15 +3870,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = -EINVAL; goto err; } - batch_obj-pending_read_domains |= I915_GEM_DOMAIN_COMMAND; - - /* Sanity check the batch buffer, prior to moving objects */ - exec_offset = exec_list[args-buffer_count - 1].offset; - ret = i915_gem_check_execbuffer (args, exec_offset); - if (ret != 0) { - DRM_ERROR(execbuf with invalid offset/length\n); + if (args-batch_start_offset + args-batch_len batch_obj-size) { + DRM_ERROR(batchbuffer shorter than program length\n); + ret = EINVAL; goto err; } + batch_obj-pending_read_domains |= I915_GEM_DOMAIN_COMMAND; i915_verify_inactive(dev, __FILE__, __LINE__); @@ -3955,7 +3944,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, #endif /* Exec the batchbuffer */ - ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset); + ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, + exec_list[args-buffer_count - 1].offset); if (ret) { DRM_ERROR(dispatch failed %d\n, ret); goto err; -- 1.6.5.7 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915: check execbuffer for validity earlier to save on work.
On Thu, 22 Apr 2010 22:38:41 +0100, Owain G. Ainsworth zer...@googlemail.com wrote: Before, we were waiting until we knew the batch object's gtt offset before we checked for validity. However, since this is purely an alignment check (it is impossible for the batch object to get to execbuffer stage without being pinned and bound) we can check alignment before we do any other expensive work. Additionally, check that batch_start_offset + batch_len is = the size of the batchbuffer object. And that batch_len and batch_start_offset do not overflow a u_int32_t. Signed-off-by: Owain G. Ainsworth o...@openbsd.org Minor comment inline. Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 42 ++ 1 files changed, 16 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b85727c..a9da182 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3656,23 +3656,6 @@ err: return ret; } -static int -i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, -uint64_t exec_offset) -{ - uint32_t exec_start, exec_len; - - exec_start = (uint32_t) exec_offset + exec-batch_start_offset; - exec_len = (uint32_t) exec-batch_len; - - if ((exec_start | exec_len) 0x7) - return -EINVAL; - - if (!exec_start) - return -EINVAL; - - return 0; -} static int i915_gem_wait_for_pending_flip(struct drm_device *dev, @@ -3722,7 +3705,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_clip_rect *cliprects = NULL; struct drm_i915_gem_relocation_entry *relocs = NULL; int ret = 0, ret2, i, pinned = 0; - uint64_t exec_offset; uint32_t seqno, flush_domains, reloc_index; int pin_tries, flips; @@ -3730,6 +3712,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, DRM_INFO(buffers_ptr %d buffer_count %d len %08x\n, (int) args-buffers_ptr, args-buffer_count, args-batch_len); #endif + /* + * check for valid execbuffer offset. We can do this early because + * bound objects are always page aligned, so only the start offset + * matters. Also check for overflow. + */ + if ((args-batch_start_offset | args-batch_len) 0x7 || + args-batch_start_offset + args-batch_len args-batch_len || + args-batch_start_offset + args-batch_len + args-batch_start_offset) + return -EINVAL; A minor critique, can we move this predicate into an inline for the sake of the reader? i915_gem_check_execbuffer_offset(args) perhaps? -ickle -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915: Remove the bind in mmap_gtt_ioctl
On Thu, 22 Apr 2010 22:38:42 +0100, Owain G. Ainsworth zer...@googlemail.com wrote: It really won't affect faulting speed at all, and in rare cases may do more than we needed in the first place. If we do need to map and we aren't bound it will take *just as long* when the mmap happens as it would on the ioctl being called. Discussed with danvet and jbarnes. Signed-Off-By: Owain G. Ainsworth o...@openbsd.org Acked-by: Chris Wilson ch...@chris-wilson.co.uk -ickle -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: adjust fence registers asynchronously on tiling changes v2
This avoids stalling on the gpu. With the preparation from the previous patch, this is really just a small change. Thanks to Owain Ainsworth zer...@googlemail.com for coming up with the idea for this patch and hashing out the implementation with me on irc. v2: Reset obj_priv-fence_invalid in put_fence. No functional change because put_fence checks whether there is actually a fence allocated, so the put_fence in adjust_fencing won't harm. But IMHO the code makes slightly more sense this way around. Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch --- drivers/gpu/drm/i915/i915_drv.h|6 ++ drivers/gpu/drm/i915/i915_gem.c| 10 ++ drivers/gpu/drm/i915/i915_gem_tiling.c | 26 ++ 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7aec3ec..174269c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -698,6 +698,12 @@ struct drm_i915_gem_object { */ int fence_reg; + /** +* Tiling changes can happen asynchronously to avoid gpu stalls. +* This will be set if the current fencing may be invalid. +*/ + int fence_invalid; + /** How many users have pinned this object in GTT space */ int pin_count; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8e73a12..023f4db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2485,6 +2485,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) struct drm_i915_fence_reg *reg = NULL; int ret; + BUG_ON(obj_priv-fence_invalid); + /* Just update our place in the LRU if our fence is getting used. */ if (obj_priv-fence_reg != I915_FENCE_REG_NONE) { list_move_tail(obj_priv-fence_list, dev_priv-mm.fence_list); @@ -2554,6 +2556,12 @@ i915_gem_object_adjust_fencing(struct drm_gem_object *obj) struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); int ret; + if (obj_priv-fence_invalid) { + ret = i915_gem_object_put_fence_reg(obj); + if (ret != 0) + return ret; + } + if (!i915_gem_object_fence_offset_ok(obj, obj_priv-tiling_mode)) { ret = i915_gem_object_unbind(obj); if (ret != 0) @@ -2621,6 +2629,8 @@ i915_gem_object_put_fence_reg(struct drm_gem_object *obj) struct drm_device *dev = obj-dev; struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); + obj_priv-fence_invalid = 0; + if (obj_priv-fence_reg == I915_FENCE_REG_NONE) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 00f264d..aadaaa6 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -322,30 +322,16 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, mutex_lock(dev-struct_mutex); if (args-tiling_mode != obj_priv-tiling_mode || args-stride != obj_priv-stride) { - /* We need to rebind the object if its current allocation -* no longer meets the alignment restrictions for its new -* tiling mode. Otherwise we can just leave it alone, but -* need to ensure that any fence register is cleared. -*/ - if (!i915_gem_object_fence_offset_ok(obj, args-tiling_mode)) - ret = i915_gem_object_unbind(obj); - else if (obj_priv-fence_reg != I915_FENCE_REG_NONE) - ret = i915_gem_object_put_fence_reg(obj); - else - i915_gem_release_mmap(obj); - - if (ret != 0) { - WARN(ret != -ERESTARTSYS, -failed to reset object for tiling switch); - args-tiling_mode = obj_priv-tiling_mode; - args-stride = obj_priv-stride; - goto err; - } + /* Change tiling parameter asynchronously to avoid +* gpu stalls. */ + obj_priv-fence_invalid = 1; + + i915_gem_release_mmap(obj); obj_priv-tiling_mode = args-tiling_mode; obj_priv-stride = args-stride; } -err: + drm_gem_object_unreference(obj); mutex_unlock(dev-struct_mutex); -- 1.6.6.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx