[Intel-gfx] [PATCH 2/2] agp/intel: make intel-gtt.c into a real source file

2010-04-22 Thread Daniel Vetter
Now that the disentangling is complete, stop including intel-gtt.c
from intel-agp.c.

The linux build system _really_ doesn't allow .c source files with the
same name as the module. It fails with the following message when trying
to build such a bugger:

make[3]: Circular drivers/char/agp/intel-agp.o - drivers/char/agp/intel-agp.o
dependency dropped.

Instead of renaming intel-agp.c I've simply created a new module out
of intel-gtt.c. Renaming intel-agp.ko to something else is not an option
for it will surely kill someones boot process.

Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch
---
 drivers/char/agp/Makefile|1 +
 drivers/char/agp/intel-agp.c |2 --
 drivers/char/agp/intel-agp.h |4 
 drivers/char/agp/intel-gtt.c |   19 +--
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/char/agp/Makefile b/drivers/char/agp/Makefile
index 627f542..8eb56e2 100644
--- a/drivers/char/agp/Makefile
+++ b/drivers/char/agp/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_AGP_HP_ZX1)  += hp-agp.o
 obj-$(CONFIG_AGP_PARISC)   += parisc-agp.o
 obj-$(CONFIG_AGP_I460) += i460-agp.o
 obj-$(CONFIG_AGP_INTEL)+= intel-agp.o
+obj-$(CONFIG_AGP_INTEL)+= intel-gtt.o
 obj-$(CONFIG_AGP_NVIDIA)   += nvidia-agp.o
 obj-$(CONFIG_AGP_SGI_TIOCA)+= sgi-agp.o
 obj-$(CONFIG_AGP_SIS)  += sis-agp.o
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index e1a5770..d210ff9 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -13,8 +13,6 @@
 #include agp.h
 #include intel-agp.h
 
-#include intel-gtt.c
-
 int intel_agp_enabled;
 EXPORT_SYMBOL(intel_agp_enabled);
 
diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h
index 2547465..7d8a4ee 100644
--- a/drivers/char/agp/intel-agp.h
+++ b/drivers/char/agp/intel-agp.h
@@ -237,3 +237,7 @@
agp_bridge-dev-device == PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB 
|| \
agp_bridge-dev-device == PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB 
|| \
IS_SNB)
+
+int intel_gmch_probe(struct pci_dev *pdev,
+  struct agp_bridge_data *bridge);
+void intel_gmch_remove(struct pci_dev *pdev);
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index c53436f..f7793ec 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -15,6 +15,16 @@
  * /fairy-tale-mode off
  */
 
+#include linux/module.h
+#include linux/pci.h
+#include linux/init.h
+#include linux/kernel.h
+#include linux/pagemap.h
+#include linux/agp_backend.h
+#include asm/smp.h
+#include agp.h
+#include intel-agp.h
+
 /*
  * If we have Intel graphics, we're not going to have anything other than
  * an Intel IOMMU. So make the correct use of the PCI DMA API contingent
@@ -1588,7 +1598,7 @@ static int find_gmch(u16 device)
return 1;
 }
 
-int __devinit intel_gmch_probe(struct pci_dev *pdev,
+int intel_gmch_probe(struct pci_dev *pdev,
  struct agp_bridge_data *bridge)
 {
int i;
@@ -1621,9 +1631,14 @@ int __devinit intel_gmch_probe(struct pci_dev *pdev,
 
return 1;
 }
+EXPORT_SYMBOL(intel_gmch_probe);
 
-void __devexit intel_gmch_remove(struct pci_dev *pdev)
+void intel_gmch_remove(struct pci_dev *pdev)
 {
if (intel_private.pcidev)
pci_dev_put(intel_private.pcidev);
 }
+EXPORT_SYMBOL(intel_gmch_remove);
+
+MODULE_AUTHOR(Dave Jones da...@redhat.com);
+MODULE_LICENSE(GPL and additional rights);
-- 
1.7.0.4

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] libdrm changes to allow exact fence usage accounting

2010-04-22 Thread Daniel Vetter
Hi all,

Currently libdrm pessimistically assumes that every tiled bo needs a fence
on 2nd and 3rd gen hw on _every_ aperture space checks. Especially with
execbuf2 this results in needlessly small batchbuffers.

My libdrm patch solves this problem by adding a function to libdrm to
disable all fence accounting. Fence usage can then be checked in the
client, where enough information is available to do it exactly.

Comments highly welcome. And if you like this please merge it - I don't
have commit rights on mesa/libdrm and I'd like someone else's explicit
sign-off on interface changes, anyway.

As an example I've also included a patch to convert xf86-video-intel to
do its own fence accounting. This is just for illustration, that patch is
in no way merge-ready - it's totally killing performance on some cairo
traces. On gvim-0 it even hits the brick-wall and slows it down by a factor
of ten :(

Yours, Daniel

Daniel Vetter (1):
  i830 uxa: track fence reg usage exactly

 src/i830.h |   13 ++-
 src/i830_batchbuffer.c |2 +
 src/i830_driver.c  |   32 
 src/i830_render.c  |   12 +++---
 src/i830_uxa.c |   95 +---
 src/i915_render.c  |   13 ---
 6 files changed, 139 insertions(+), 28 deletions(-)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915: check execbuffer for validity earlier to save on work.

2010-04-22 Thread Owain G. Ainsworth
Before, we were waiting until we knew the batch object's gtt offset
before we checked for validity. However, since this is purely an
alignment check (it is impossible for the batch object to get to
execbuffer stage without being pinned and bound) we can check alignment
before we do any other expensive work.

Additionally, check that batch_start_offset +  batch_len is = the size
of the batchbuffer object. And that batch_len and batch_start_offset do
not overflow a u_int32_t.

Signed-off-by: Owain G. Ainsworth o...@openbsd.org
---
 drivers/gpu/drm/i915/i915_gem.c |   42 ++
 1 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b85727c..a9da182 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3656,23 +3656,6 @@ err:
return ret;
 }
 
-static int
-i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
-  uint64_t exec_offset)
-{
-   uint32_t exec_start, exec_len;
-
-   exec_start = (uint32_t) exec_offset + exec-batch_start_offset;
-   exec_len = (uint32_t) exec-batch_len;
-
-   if ((exec_start | exec_len)  0x7)
-   return -EINVAL;
-
-   if (!exec_start)
-   return -EINVAL;
-
-   return 0;
-}
 
 static int
 i915_gem_wait_for_pending_flip(struct drm_device *dev,
@@ -3722,7 +3705,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_clip_rect *cliprects = NULL;
struct drm_i915_gem_relocation_entry *relocs = NULL;
int ret = 0, ret2, i, pinned = 0;
-   uint64_t exec_offset;
uint32_t seqno, flush_domains, reloc_index;
int pin_tries, flips;
 
@@ -3730,6 +3712,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
DRM_INFO(buffers_ptr %d buffer_count %d len %08x\n,
  (int) args-buffers_ptr, args-buffer_count, args-batch_len);
 #endif
+   /*
+* check for valid execbuffer offset. We can do this early because
+* bound objects are always page aligned, so only the start offset
+* matters. Also check for overflow.
+*/
+   if ((args-batch_start_offset | args-batch_len)  0x7 ||
+   args-batch_start_offset + args-batch_len  args-batch_len ||
+   args-batch_start_offset + args-batch_len 
+   args-batch_start_offset)
+   return -EINVAL;
 
if (args-buffer_count  1) {
DRM_ERROR(execbuf with %d buffers\n, args-buffer_count);
@@ -3878,15 +3870,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
ret = -EINVAL;
goto err;
}
-   batch_obj-pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
-
-   /* Sanity check the batch buffer, prior to moving objects */
-   exec_offset = exec_list[args-buffer_count - 1].offset;
-   ret = i915_gem_check_execbuffer (args, exec_offset);
-   if (ret != 0) {
-   DRM_ERROR(execbuf with invalid offset/length\n);
+   if (args-batch_start_offset + args-batch_len  batch_obj-size) {
+   DRM_ERROR(batchbuffer shorter than program length\n);
+   ret = EINVAL;
goto err;
}
+   batch_obj-pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 
i915_verify_inactive(dev, __FILE__, __LINE__);
 
@@ -3955,7 +3944,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 #endif
 
/* Exec the batchbuffer */
-   ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset);
+   ret = i915_dispatch_gem_execbuffer(dev, args, cliprects,
+   exec_list[args-buffer_count - 1].offset);
if (ret) {
DRM_ERROR(dispatch failed %d\n, ret);
goto err;
-- 
1.6.5.7

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915: check execbuffer for validity earlier to save on work.

2010-04-22 Thread Chris Wilson
On Thu, 22 Apr 2010 22:38:41 +0100, Owain G. Ainsworth 
zer...@googlemail.com wrote:
 Before, we were waiting until we knew the batch object's gtt offset
 before we checked for validity. However, since this is purely an
 alignment check (it is impossible for the batch object to get to
 execbuffer stage without being pinned and bound) we can check alignment
 before we do any other expensive work.
 
 Additionally, check that batch_start_offset +  batch_len is = the size
 of the batchbuffer object. And that batch_len and batch_start_offset do
 not overflow a u_int32_t.
 
 Signed-off-by: Owain G. Ainsworth o...@openbsd.org

Minor comment inline.
Reviewed-by: Chris Wilson ch...@chris-wilson.co.uk

 ---
  drivers/gpu/drm/i915/i915_gem.c |   42 ++
  1 files changed, 16 insertions(+), 26 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
 index b85727c..a9da182 100644
 --- a/drivers/gpu/drm/i915/i915_gem.c
 +++ b/drivers/gpu/drm/i915/i915_gem.c
 @@ -3656,23 +3656,6 @@ err:
   return ret;
  }
  
 -static int
 -i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
 -uint64_t exec_offset)
 -{
 - uint32_t exec_start, exec_len;
 -
 - exec_start = (uint32_t) exec_offset + exec-batch_start_offset;
 - exec_len = (uint32_t) exec-batch_len;
 -
 - if ((exec_start | exec_len)  0x7)
 - return -EINVAL;
 -
 - if (!exec_start)
 - return -EINVAL;
 -
 - return 0;
 -}
  
  static int
  i915_gem_wait_for_pending_flip(struct drm_device *dev,
 @@ -3722,7 +3705,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
 *data,
   struct drm_clip_rect *cliprects = NULL;
   struct drm_i915_gem_relocation_entry *relocs = NULL;
   int ret = 0, ret2, i, pinned = 0;
 - uint64_t exec_offset;
   uint32_t seqno, flush_domains, reloc_index;
   int pin_tries, flips;
  
 @@ -3730,6 +3712,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
 *data,
   DRM_INFO(buffers_ptr %d buffer_count %d len %08x\n,
 (int) args-buffers_ptr, args-buffer_count, args-batch_len);
  #endif
 + /*
 +  * check for valid execbuffer offset. We can do this early because
 +  * bound objects are always page aligned, so only the start offset
 +  * matters. Also check for overflow.
 +  */
 + if ((args-batch_start_offset | args-batch_len)  0x7 ||
 + args-batch_start_offset + args-batch_len  args-batch_len ||
 + args-batch_start_offset + args-batch_len 
 + args-batch_start_offset)
 + return -EINVAL;

A minor critique, can we move this predicate into an inline for the sake
of the reader? i915_gem_check_execbuffer_offset(args) perhaps?
-ickle

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Remove the bind in mmap_gtt_ioctl

2010-04-22 Thread Chris Wilson
On Thu, 22 Apr 2010 22:38:42 +0100, Owain G. Ainsworth 
zer...@googlemail.com wrote:
 It really won't affect faulting speed at all, and in rare cases may do
 more than we needed in the first place. If we do need to map and we
 aren't bound it will take *just as long* when the mmap happens as it
 would on the ioctl being called.
 
 Discussed with danvet and jbarnes.
 
 Signed-Off-By: Owain G. Ainsworth o...@openbsd.org
Acked-by: Chris Wilson ch...@chris-wilson.co.uk
-ickle

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: adjust fence registers asynchronously on tiling changes v2

2010-04-22 Thread Daniel Vetter
This avoids stalling on the gpu. With the preparation from the
previous patch, this is really just a small change.

Thanks to Owain Ainsworth zer...@googlemail.com for coming up
with the idea for this patch and hashing out the implementation
with me on irc.

v2: Reset obj_priv-fence_invalid in put_fence. No functional
change because put_fence checks whether there is actually a
fence allocated, so the put_fence in adjust_fencing won't harm.
But IMHO the code makes slightly more sense this way around.

Signed-off-by: Daniel Vetter daniel.vet...@ffwll.ch
---
 drivers/gpu/drm/i915/i915_drv.h|6 ++
 drivers/gpu/drm/i915/i915_gem.c|   10 ++
 drivers/gpu/drm/i915/i915_gem_tiling.c |   26 ++
 3 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7aec3ec..174269c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -698,6 +698,12 @@ struct drm_i915_gem_object {
 */
int fence_reg;
 
+   /**
+* Tiling changes can happen asynchronously to avoid gpu stalls.
+* This will be set if the current fencing may be invalid.
+*/
+   int fence_invalid;
+
/** How many users have pinned this object in GTT space */
int pin_count;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8e73a12..023f4db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2485,6 +2485,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
struct drm_i915_fence_reg *reg = NULL;
int ret;
 
+   BUG_ON(obj_priv-fence_invalid);
+
/* Just update our place in the LRU if our fence is getting used. */
if (obj_priv-fence_reg != I915_FENCE_REG_NONE) {
list_move_tail(obj_priv-fence_list, dev_priv-mm.fence_list);
@@ -2554,6 +2556,12 @@ i915_gem_object_adjust_fencing(struct drm_gem_object 
*obj)
struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
int ret;
 
+   if (obj_priv-fence_invalid) {
+   ret = i915_gem_object_put_fence_reg(obj);
+   if (ret != 0)
+   return ret;
+   }
+
if (!i915_gem_object_fence_offset_ok(obj, obj_priv-tiling_mode)) {
ret = i915_gem_object_unbind(obj);
if (ret != 0)
@@ -2621,6 +2629,8 @@ i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
struct drm_device *dev = obj-dev;
struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 
+   obj_priv-fence_invalid = 0;
+
if (obj_priv-fence_reg == I915_FENCE_REG_NONE)
return 0;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c 
b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 00f264d..aadaaa6 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -322,30 +322,16 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
mutex_lock(dev-struct_mutex);
if (args-tiling_mode != obj_priv-tiling_mode ||
args-stride != obj_priv-stride) {
-   /* We need to rebind the object if its current allocation
-* no longer meets the alignment restrictions for its new
-* tiling mode. Otherwise we can just leave it alone, but
-* need to ensure that any fence register is cleared.
-*/
-   if (!i915_gem_object_fence_offset_ok(obj, args-tiling_mode))
-   ret = i915_gem_object_unbind(obj);
-   else if (obj_priv-fence_reg != I915_FENCE_REG_NONE)
-   ret = i915_gem_object_put_fence_reg(obj);
-   else
-   i915_gem_release_mmap(obj);
-
-   if (ret != 0) {
-   WARN(ret != -ERESTARTSYS,
-failed to reset object for tiling switch);
-   args-tiling_mode = obj_priv-tiling_mode;
-   args-stride = obj_priv-stride;
-   goto err;
-   }
+   /* Change tiling parameter asynchronously to avoid
+* gpu stalls. */
+   obj_priv-fence_invalid = 1;
+
+   i915_gem_release_mmap(obj);
 
obj_priv-tiling_mode = args-tiling_mode;
obj_priv-stride = args-stride;
}
-err:
+
drm_gem_object_unreference(obj);
mutex_unlock(dev-struct_mutex);
 
-- 
1.6.6.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx