On Thursday, October 23, 2008 3:51 pm Jesse Barnes wrote:
> On Wednesday, October 22, 2008 2:42 pm Jesse Barnes wrote:
> > Ok, this one actually works w/o corrupting your display.  However, after
> > running for awhile I end up hitting the BUG_ON at i915_gem.c:1061, so I'm
> > hoping someone can tell me where I messed up the list handling (the
> > change in i915_gem_retire_request tries to deal with that, since the
> > flushing list is also non-empty at leavevt time for some reason).
>
> Ok, some more details on this.  It looks like we're getting to unbind with
> an active object, generally from get_fence but sometimes from other paths
> too. The object is active so i915_gem_wait_rendering(obj) ends up calling
> i915_wait_request() on it.  But the object itself isn't on any of the
> lists: inactive, flushing, or active, which is probably why wait_request
> doesn't end up clearing its active flag (even though its seqno has passed).
>
> Also, I confirmed that i915_gem_object_set_domain wasn't queuing a request
> with the object by putting the BUG_ON(obj->active) up above it, so this
> seems to be just an object retirement bug of some kind...

Here's the latest patch I'm playing with, which includes some fixes for things 
Eric noticed today:
  - fixup errot paths in map_gtt_ioctl (don't leak refs, make sure to unlock)
  - remove mapped list, just add gtt mapped stuff to the inactive list
    (assuming this is the first time it's bound)
  - don't allocate a fence register for every GTT bind, only do it for faulted
    objects (otherwise we end up just wasting them all)
  - play with just zapping the page range from get_fence (this somewhat
    works--it's more stable but there's some corruption, so either I'm not
    zapping the previous map correctly or I'm setting the fences up wrong)

As for the lists, I realized that the pinned front buffer probably won't be on 
any lists, at least to begin with, so that could explain what I saw earlier.  
However it should have a nonzero pin count so I don't know why we'd try to 
evict it...

Jesse
diff --git a/Makefile b/Makefile
index 16e3fbb..fac635f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 27
-EXTRAVERSION =
+EXTRAVERSION = -drm-gtt
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index bde64b8..9916366 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -262,6 +262,9 @@ static int drm_addmap_core(struct drm_device * dev, unsigned int offset,
 		DRM_DEBUG("AGP offset = 0x%08lx, size = 0x%08lx\n", map->offset, map->size);
 
 		break;
+	case _DRM_GEM:
+		DRM_ERROR("tried to rmmap GEM object\n");
+		break;
 	}
 	case _DRM_SCATTER_GATHER:
 		if (!dev->sg) {
@@ -419,6 +422,9 @@ int drm_rmmap_locked(struct drm_device *dev, drm_local_map_t *map)
 		dmah.size = map->size;
 		__drm_pci_free(dev, &dmah);
 		break;
+	case _DRM_GEM:
+		DRM_ERROR("tried to rmmap GEM object\n");
+		break;
 	}
 	drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 96f416a..aad8d76 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -236,6 +236,7 @@ int drm_lastclose(struct drm_device * dev)
 		dev->lock.file_priv = NULL;
 		wake_up_interruptible(&dev->lock.lock_queue);
 	}
+	dev->dev_mapping = NULL;
 	mutex_unlock(&dev->struct_mutex);
 
 	DRM_DEBUG("lastclose completed\n");
@@ -290,6 +291,8 @@ EXPORT_SYMBOL(drm_init);
  */
 static void drm_cleanup(struct drm_device * dev)
 {
+	struct drm_driver *driver = dev->driver;
+
 	DRM_DEBUG("\n");
 
 	if (!dev) {
@@ -319,6 +322,9 @@ static void drm_cleanup(struct drm_device * dev)
 	drm_ht_remove(&dev->map_hash);
 	drm_ctxbitmap_cleanup(dev);
 
+	if (driver->driver_features & DRIVER_GEM)
+		drm_gem_destroy(dev);
+
 	drm_put_minor(&dev->primary);
 	if (drm_put_dev(dev))
 		DRM_ERROR("Cannot unload module\n");
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 0d46627..0958cf6 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -147,11 +147,21 @@ int drm_open(struct inode *inode, struct file *filp)
 		spin_lock(&dev->count_lock);
 		if (!dev->open_count++) {
 			spin_unlock(&dev->count_lock);
-			return drm_setup(dev);
+			retcode = drm_setup(dev);
+			goto out;
 		}
 		spin_unlock(&dev->count_lock);
 	}
 
+out:
+	mutex_lock(&dev->struct_mutex);
+	if (dev->dev_mapping == NULL)
+		dev->dev_mapping = inode->i_mapping;
+	else if (dev->dev_mapping != inode->i_mapping)
+		WARN(1, "dev->dev_mapping not inode mapping (%p expected %p)\n",
+		     dev->dev_mapping, inode->i_mapping);
+	mutex_unlock(&dev->struct_mutex);
+
 	return retcode;
 }
 EXPORT_SYMBOL(drm_open);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ccd1afd..431dc3c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -64,6 +64,13 @@
  * up at a later date, and as our interface with shmfs for memory allocation.
  */
 
+/*
+ * We make up offsets for buffer objects so we can recognize them at
+ * mmap time.
+ */
+#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
+#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
+
 /**
  * Initialize the GEM device fields
  */
@@ -71,6 +78,8 @@
 int
 drm_gem_init(struct drm_device *dev)
 {
+	struct drm_gem_mm *mm;
+
 	spin_lock_init(&dev->object_name_lock);
 	idr_init(&dev->object_name_idr);
 	atomic_set(&dev->object_count, 0);
@@ -79,9 +88,41 @@ drm_gem_init(struct drm_device *dev)
 	atomic_set(&dev->pin_memory, 0);
 	atomic_set(&dev->gtt_count, 0);
 	atomic_set(&dev->gtt_memory, 0);
+
+	mm = drm_calloc(1, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	if (!mm) {
+		DRM_ERROR("out of memory\n");
+		return -ENOMEM;
+	}
+
+	dev->mm_private = mm;
+
+	if (drm_ht_create(&mm->offset_hash, 19)) {
+		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		return -ENOMEM;
+	}
+
+	if (drm_mm_init(&mm->offset_manager, DRM_FILE_PAGE_OFFSET_START,
+			DRM_FILE_PAGE_OFFSET_SIZE)) {
+		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		drm_ht_remove(&mm->offset_hash);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
+void
+drm_gem_destroy(struct drm_device *dev)
+{
+	struct drm_gem_mm *mm = dev->mm_private;
+
+	drm_mm_takedown(&mm->offset_manager);
+	drm_ht_remove(&mm->offset_hash);
+	drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	dev->mm_private = NULL;
+}
+
 /**
  * Allocate a GEM object of the specified size with shmfs backing store
  */
@@ -419,3 +460,58 @@ drm_gem_object_handle_free(struct kref *kref)
 }
 EXPORT_SYMBOL(drm_gem_object_handle_free);
 
+int
+drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map *map = NULL;
+	struct drm_gem_object *obj;
+	struct drm_hash_item *hash;
+	unsigned long prot;
+	int ret = 0;
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
+		mutex_unlock(&dev->struct_mutex);
+		return drm_mmap(filp, vma);
+	}
+
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	if (!map ||
+	    ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
+		ret =  -EPERM;
+		goto out_unlock;
+	}
+
+	/* Check for valid size. */
+	if (map->size < vma->vm_end - vma->vm_start) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	obj = map->handle;
+	if (!obj->dev->driver->gem_vm_ops) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_ops = obj->dev->driver->gem_vm_ops;
+	vma->vm_private_data = map->handle;
+	/* FIXME: use pgprot_writecombine when available */
+	prot = pgprot_val(vma->vm_page_prot);
+	prot |= _PAGE_CACHE_WC;
+	vma->vm_page_prot = __pgprot(prot);
+
+	vma->vm_file = filp;	/* Needed for drm_vm_open() */
+	drm_vm_open_locked(vma);
+
+out_unlock:
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_mmap);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 3316067..af539f7 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -127,6 +127,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_insert_item);
 
 /*
  * Just insert an item and return any "bits" bit key that hasn't been
@@ -188,6 +189,7 @@ int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 	ht->fill--;
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_remove_item);
 
 void drm_ht_remove(struct drm_open_hash *ht)
 {
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index c234c6f..3ffae02 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -267,6 +267,9 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 				dmah.size = map->size;
 				__drm_pci_free(dev, &dmah);
 				break;
+			case _DRM_GEM:
+				DRM_ERROR("tried to rmmap GEM object\n");
+				break;
 			}
 			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 		}
@@ -399,7 +402,7 @@ static struct vm_operations_struct drm_vm_sg_ops = {
  * Create a new drm_vma_entry structure as the \p vma private data entry and
  * add it to drm_device::vmalist.
  */
-static void drm_vm_open_locked(struct vm_area_struct *vma)
+void drm_vm_open_locked(struct vm_area_struct *vma)
 {
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
@@ -540,7 +543,7 @@ EXPORT_SYMBOL(drm_core_get_reg_ofs);
  * according to the mapping type and remaps the pages. Finally sets the file
  * pointer and calls vm_open().
  */
-static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
+int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_file *priv = filp->private_data;
 	struct drm_device *dev = priv->minor->dev;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index db34780..9e9b76b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -953,6 +953,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, 0),
+	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0),
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a80ead2..1c87509 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -81,6 +81,10 @@ static int i915_resume(struct drm_device *dev)
 	return 0;
 }
 
+static struct vm_operations_struct i915_gem_vm_ops = {
+	.fault = i915_gem_fault,
+};
+
 static struct drm_driver driver = {
 	/* don't use mtrr's here, the Xserver or user space app should
 	 * deal with them for intel hardware.
@@ -111,13 +115,14 @@ static struct drm_driver driver = {
 	.proc_cleanup = i915_gem_proc_cleanup,
 	.gem_init_object = i915_gem_init_object,
 	.gem_free_object = i915_gem_free_object,
+	.gem_vm_ops = &i915_gem_vm_ops,
 	.ioctls = i915_ioctls,
 	.fops = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
 		 .ioctl = drm_ioctl,
-		 .mmap = drm_mmap,
+		 .mmap = drm_gem_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eae4ed3..65acc42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -107,6 +107,13 @@ struct intel_opregion {
 	int enabled;
 };
 
+#define I915_FENCE_REG_NONE -1
+
+struct drm_i915_fence_reg {
+	uint64_t val;
+	struct drm_gem_object *obj;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 
@@ -151,6 +158,8 @@ typedef struct drm_i915_private {
 
 	struct intel_opregion opregion;
 
+	struct drm_i915_fence_reg fence_regs[16];
+
 	/* Register state */
 	u8 saveLBB;
 	u32 saveDSPACNTR;
@@ -360,6 +369,21 @@ struct drm_i915_gem_object {
 	 * This is the same as gtt_space->start
 	 */
 	uint32_t gtt_offset;
+	/**
+	 * Required alignment for the object
+	 */
+	uint32_t gtt_alignment;
+	/**
+	 * Fake offset for use by mmap(2)
+	 */
+	uint64_t mmap_offset;
+
+	/**
+	 * Fence register bits (if any) for this object.  Will be set
+	 * as needed when mapped into the GTT.
+	 * Protected by dev->struct_mutex.
+	 */
+	int fence_reg;
 
 	/** Boolean whether this object has a valid gtt offset. */
 	int gtt_bound;
@@ -372,6 +396,7 @@ struct drm_i915_gem_object {
 
 	/** Current tiling mode for the object. */
 	uint32_t tiling_mode;
+	uint32_t stride;
 
 	/** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
 	uint32_t agp_type;
@@ -480,6 +505,8 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv);
 int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
 int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
@@ -514,6 +541,7 @@ uint32_t i915_get_gem_seqno(struct drm_device *dev);
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_work_handler(struct work_struct *work);
 void i915_gem_clflush_object(struct drm_gem_object *obj);
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
@@ -558,6 +586,7 @@ extern void opregion_enable_asle(struct drm_device *dev);
 
 #define I915_READ(reg)          readl(dev_priv->regs + (reg))
 #define I915_WRITE(reg, val)     writel(val, dev_priv->regs + (reg))
+#define I915_WRITE64(reg, val)	writeq(val, dev_priv->regs + (reg))
 #define I915_READ16(reg)	readw(dev_priv->regs + (reg))
 #define I915_WRITE16(reg, val)	writel(val, dev_priv->regs + (reg))
 #define I915_READ8(reg)		readb(dev_priv->regs + (reg))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd..3b5b230 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,10 @@ i915_gem_set_domain(struct drm_gem_object *obj,
 static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
 static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
+static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
+					   unsigned alignment);
+static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
+static int i915_gem_evict_something(struct drm_device *dev);
 
 static void
 i915_gem_cleanup_ringbuffer(struct drm_device *dev);
@@ -476,6 +480,133 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * vma: VMA in question
+ * vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ */
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	pgoff_t page_offset;
+	unsigned long pfn;
+	int ret = 0;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
+		PAGE_SHIFT;
+
+	/* Now bind it into the GTT if needed */
+	mutex_lock(&dev->struct_mutex);
+	if (!obj_priv->gtt_space) {
+		ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+		if (ret) {
+			mutex_unlock(&dev->struct_mutex);
+			return VM_FAULT_SIGBUS;
+		}
+		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+	}
+
+	/* Need a new fence register? */
+	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+	    obj_priv->tiling_mode != I915_TILING_NONE)
+		i915_gem_object_get_fence_reg(obj);
+
+	pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
+		page_offset;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case -ENOMEM:
+	case -EAGAIN:
+		return VM_FAULT_OOM;
+	case -EFAULT:
+	case -EBUSY:
+		DRM_ERROR("can't insert pfn??  fault or busy...\n");
+		return VM_FAULT_SIGBUS;
+	default:
+		return VM_FAULT_NOPAGE;
+	}
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file_priv: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
+	int ret;
+
+	if (!(dev->driver->driver_features & DRIVER_GEM))
+		return -ENODEV;
+
+	mutex_lock(&dev->struct_mutex);
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (obj == NULL) {
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+		return -EBADF;
+	}
+
+	obj_priv = obj->driver_private;
+	obj_priv->gtt_alignment = args->alignment;
+	args->addr_ptr = obj_priv->mmap_offset;
+
+	/*
+	 * Pull it into the GTT so that we have a page list (makes the
+	 * initial fault faster and any subsequent flushing possible).
+	 */
+	if (!obj_priv->agp_mem) {
+		ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+		if (ret) {
+			drm_gem_object_unreference(obj);
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+	}
+
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 static void
 i915_gem_object_free_page_list(struct drm_gem_object *obj)
 {
@@ -642,10 +773,6 @@ i915_gem_retire_request(struct drm_device *dev,
 		 */
 		if (obj_priv->last_rendering_seqno != request->seqno)
 			return;
-#if WATCH_LRU
-		DRM_INFO("%s: retire %d moves to inactive list %p\n",
-			 __func__, request->seqno, obj);
-#endif
 
 		if (obj->write_domain != 0) {
 			list_move_tail(&obj_priv->list,
@@ -774,7 +901,7 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
 	 * buffer to have made it to the inactive list, and we would need
 	 * a separate wait queue to handle that.
 	 */
-	if (ret == 0)
+	if (ret == 0 || ret == -ERESTARTSYS)
 		i915_gem_retire_requests(dev);
 
 	return ret;
@@ -906,7 +1033,9 @@ static int
 i915_gem_object_unbind(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	loff_t offset;
 	int ret = 0;
 
 #if WATCH_BUF
@@ -950,6 +1079,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	BUG_ON(obj_priv->active);
 
+	/* blow away mappings if mapped through GTT */
+	offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
+	unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
+
+	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+		I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
+		obj_priv->fence_reg = I915_FENCE_REG_NONE;
+		dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
+	}
+
 	i915_gem_object_free_page_list(obj);
 
 	if (obj_priv->gtt_space) {
@@ -1093,6 +1232,100 @@ i915_gem_object_get_page_list(struct drm_gem_object *obj)
 	return 0;
 }
 
+static int fence_reg_start = 4; /* 4 if userland hasn't ioctl'd us yet */
+static int num_fence_regs = 16; /* 8 on pre-965, 16 otherwise */
+
+/**
+ * i915_gem_object_get_fence_reg - set up a fence reg for an object
+ * @obj: object to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ *
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ */
+static void
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	struct drm_i915_fence_reg *reg = NULL;
+	int i;
+
+	switch (obj_priv->tiling_mode) {
+	case I915_TILING_NONE:
+		WARN(1, "allocating a fence for non-tiled object?\n");
+		break;
+	case I915_TILING_X:
+		WARN(obj_priv->stride & (512 - 1),
+		     "object is X tiled but has non-512B pitch\n");
+		break;
+	case I915_TILING_Y:
+		WARN(obj_priv->stride & (128 - 1),
+		     "object is Y tiled but has non-128B pitch\n");
+		break;
+	}
+
+	/* First try to find a free reg */
+	/* FIXME: use all fence regs if userland isn't using them */
+	for (i = fence_reg_start; i < num_fence_regs; i++) {
+		if (!dev_priv->fence_regs[i].obj) {
+			reg = &dev_priv->fence_regs[i];
+			break;
+		}
+	}
+
+	/* None available, just take the first available one */
+	if (!reg) {
+		struct drm_i915_gem_object *old_obj_priv;
+		loff_t offset;
+
+		for (i = fence_reg_start; i < num_fence_regs; i++) {
+			old_obj_priv = dev_priv->fence_regs[i].obj->driver_private;
+			if (!old_obj_priv->pin_count) {
+				reg = &dev_priv->fence_regs[i];
+				break;
+			}
+		}
+
+		/* Should have one by now unless userspace abused us */
+		BUG_ON(!reg);
+#if 0
+		/*
+		 * Zap this virtual mapping so we can set up a fence again
+		 * for this object next time we need it.
+		 */
+		offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
+		unmap_mapping_range(dev->dev_mapping, offset,
+				    reg->obj->size, 1);
+#else
+		/*
+		 * Unbinding the object fully shouldn't be necessary, but
+		 * it should work.  We can use the above instead to just
+		 * zap the virtual mapping.
+		 */
+		i915_gem_object_unbind(reg->obj);
+#endif
+	}
+
+	obj_priv->fence_reg = i;
+	reg->obj = obj;
+	reg->val = ((obj_priv->gtt_offset + obj->size - 4096) &
+		    0xfffff000) << 32;
+	reg->val |= obj_priv->gtt_offset & 0xfffff000;
+	reg->val |= ((obj_priv->stride / 128) - 1) << 2;
+	if (obj_priv->tiling_mode == I915_TILING_Y)
+		reg->val |= 1 << 1;
+	reg->val |= 1; /* valid */
+
+	I915_WRITE64(FENCE_REG_965_0 + (i * 8), reg->val);
+}
+
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
@@ -2132,7 +2365,12 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 
 int i915_gem_init_object(struct drm_gem_object *obj)
 {
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
 	struct drm_i915_gem_object *obj_priv;
+	struct drm_map_list *list;
+	struct drm_map *map;
+	int ret = 0;
 
 	obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
 	if (obj_priv == NULL)
@@ -2151,12 +2389,63 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 
 	obj->driver_private = obj_priv;
 	obj_priv->obj = obj;
+	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
+
+	/* Set the object up for mmap'ing */
+	list = &obj->map_list;
+	list->map = drm_calloc(1, sizeof(struct drm_map_list),
+			       DRM_MEM_DRIVER);
+	if (!list->map)
+		return -ENOMEM;
+
+	map = list->map;
+	map->type = _DRM_GEM;
+	map->size = obj->size;
+	map->handle = obj;
+
+	/* Get a DRM GEM mmap offset allocated... */
+	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+						    obj->size / PAGE_SIZE, 0, 0);
+	if (!list->file_offset_node) {
+		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+						  obj->size / PAGE_SIZE, 0);
+	if (!list->file_offset_node) {
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->hash.key = list->file_offset_node->start;
+	if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
+		DRM_ERROR("failed to add to map hash\n");
+		goto out_free_mm;
+	}
+
+	/* By now we should be all set, any drm_mmap request on the offset
+	 * below will get to our mmap & fault handler */
+	obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
+
 	return 0;
+
+out_free_mm:
+	drm_mm_put_block(list->file_offset_node);
+out_free_list:
+	drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER);
+
+	return ret;
 }
 
 void i915_gem_free_object(struct drm_gem_object *obj)
 {
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map_list *list;
+	struct drm_map *map;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
 	while (obj_priv->pin_count > 0)
@@ -2164,6 +2453,20 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
 	i915_gem_object_unbind(obj);
 
+	list = &obj->map_list;
+	drm_ht_remove_item(&mm->offset_hash, &list->hash);
+
+	if (list->file_offset_node) {
+		drm_mm_put_block(list->file_offset_node);
+		list->file_offset_node = NULL;
+	}
+
+	map = list->map;
+	if (map) {
+		drm_free(map, sizeof(*map), DRM_MEM_DRIVER);
+		list->map = NULL;
+	}
+
 	drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
 	drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
@@ -2289,7 +2592,15 @@ i915_gem_idle(struct drm_device *dev)
 	 * waited for a sequence higher than any pending execbuffer
 	 */
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
+	if (!list_empty(&dev_priv->mm.flushing_list)) {
+		struct drm_i915_gem_object *obj_priv;
+		DRM_ERROR("flushing list not empty, still has:\n");
+		list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list,
+				    list) {
+			DRM_ERROR("    %p: %d\n", obj_priv,
+				  obj_priv->last_rendering_seqno);
+		}
+	}
 
 	/* Request should now be empty as we've also waited
 	 * for the last request in the list
@@ -2304,7 +2615,6 @@ i915_gem_idle(struct drm_device *dev)
 	}
 
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
 	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
 	BUG_ON(!list_empty(&dev_priv->mm.request_list));
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index e8b85ac..1101aed 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -207,6 +207,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		}
 	}
 	obj_priv->tiling_mode = args->tiling_mode;
+	obj_priv->stride = args->stride;
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5c2d9f2..d39508f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -177,7 +177,7 @@
 /*
  * Instruction and interrupt control regs
  */
-
+#define FENCE_REG_830_0	0x2000
 #define PRB0_TAIL	0x02030
 #define PRB0_HEAD	0x02034
 #define PRB0_START	0x02038
@@ -244,6 +244,7 @@
 #define   CM0_DEPTH_WRITE_DISABLE (1<<1)
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
 #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
+#define FENCE_REG_965_0	0x03000
 
 /*
  * Framebuffer compression (915+ only)
diff --git a/include/drm/drm.h b/include/drm/drm.h
index f46ba4b..ec5daa6 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -173,6 +173,7 @@ enum drm_map_type {
 	_DRM_AGP = 3,		  /**< AGP/GART */
 	_DRM_SCATTER_GATHER = 4,  /**< Scatter/gather memory for PCI DMA */
 	_DRM_CONSISTENT = 5,	  /**< Consistent memory for PCI DMA */
+	_DRM_GEM = 6,		  /**< GEM object */
 };
 
 /**
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 59c796b..aa29b5a 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -523,6 +523,7 @@ struct drm_map_list {
 	struct drm_hash_item hash;
 	struct drm_map *map;			/**< mapping */
 	uint64_t user_token;
+	struct drm_mm_node *file_offset_node;	/**< fake offset */
 };
 
 typedef struct drm_map drm_local_map_t;
@@ -563,6 +564,14 @@ struct drm_ati_pcigart_info {
 };
 
 /**
+ * GEM specific mm private for tracking GEM objects
+ */
+struct drm_gem_mm {
+	struct drm_mm offset_manager;	/**< Offset mgmt for buffer objects */
+	struct drm_open_hash offset_hash; /**< User token hash table for maps */
+};
+
+/**
  * This structure defines the drm_mm memory object, which will be used by the
  * DRM for its buffer objects.
  */
@@ -579,6 +588,9 @@ struct drm_gem_object {
 	/** File representing the shmem storage */
 	struct file *filp;
 
+	/* Mapping info for this object */
+	struct drm_map_list map_list;
+
 	/**
 	 * Size of the object, in bytes.  Immutable over the object's
 	 * lifetime.
@@ -724,6 +736,9 @@ struct drm_driver {
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
 
+	/* Driver private ops for this object */
+	struct vm_operations_struct *gem_vm_ops;
+
 	int major;
 	int minor;
 	int patchlevel;
@@ -883,6 +898,8 @@ struct drm_device {
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	int num_crtcs;                  /**< Number of CRTCs on this device */
 	void *dev_private;		/**< device private data */
+	void *mm_private;
+	struct address_space *dev_mapping;
 	struct drm_sigdata sigdata;	   /**< For block_all_signals */
 	sigset_t sigmask;
 
@@ -999,6 +1016,8 @@ extern int drm_release(struct inode *inode, struct file *filp);
 
 				/* Mapping support (drm_vm.h) */
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
+extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
+extern void drm_vm_open_locked(struct vm_area_struct *vma);
 extern unsigned long drm_core_get_map_ofs(struct drm_map * map);
 extern unsigned long drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
@@ -1255,10 +1274,12 @@ extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size);
 
 /* Graphics Execution Manager library functions (drm_gem.c) */
 int drm_gem_init(struct drm_device *dev);
+void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_free(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 void drm_gem_object_handle_free(struct kref *kref);
+int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index eb4b350..fed1d7c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -159,6 +159,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SW_FINISH	0x20
 #define DRM_I915_GEM_SET_TILING	0x21
 #define DRM_I915_GEM_GET_TILING	0x22
+#define DRM_I915_GEM_MMAP_GTT	0x23
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -186,6 +187,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_PREAD	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
 #define DRM_IOCTL_I915_GEM_PWRITE	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
 #define DRM_IOCTL_I915_GEM_MMAP		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
+#define DRM_IOCTL_I915_GEM_MMAP_GTT	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
 #define DRM_IOCTL_I915_GEM_SET_DOMAIN	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
 #define DRM_IOCTL_I915_GEM_SW_FINISH	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
 #define DRM_IOCTL_I915_GEM_SET_TILING	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
@@ -380,6 +382,24 @@ struct drm_i915_gem_mmap {
 	uint64_t addr_ptr;
 };
 
+struct drm_i915_gem_mmap_gtt {
+	/** Handle for the object being mapped. */
+	uint32_t handle;
+	uint32_t pad;
+	/** Offset in the object to map. */
+	uint64_t offset;
+	/**
+	 * Length of data to map.
+	 *
+	 * The value will be page-aligned.
+	 */
+	uint64_t size;
+	/** Returned pointer the data was mapped at */
+	uint64_t addr_ptr;	/* void *, but pointers are not 32/64 compatible */
+	uint32_t flags;
+	uint32_t alignment;
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	uint32_t handle;
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to