On Thursday, October 23, 2008 5:20 pm Jesse Barnes wrote:
> Ugg, get_fence was pretty broken in the !reg case... fixing that makes
> things a bit more stable...

And here's one that's actually pretty solid.  It doesn't try to unbind from 
get_fence (should be safe, but we don't want to do it anyway, and it seems 
broken); instead it just zaps the virtual mapping (waiting if necessary).

The flushing list is still a little busted; it has all sorts of entries left 
on it at leavevt time, so somehow the fact that a bunch of stuff is sitting in 
the GTT domain is breaking things.  I'm still debugging that part.

-- 
Jesse Barnes, Intel Open Source Technology Center

diff --git a/Makefile b/Makefile
index 16e3fbb..fac635f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 27
-EXTRAVERSION =
+EXTRAVERSION = -drm-gtt
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index bde64b8..9916366 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -262,6 +262,9 @@ static int drm_addmap_core(struct drm_device * dev, unsigned int offset,
 		DRM_DEBUG("AGP offset = 0x%08lx, size = 0x%08lx\n", map->offset, map->size);
 
 		break;
+	case _DRM_GEM:
+		DRM_ERROR("tried to rmmap GEM object\n");
+		break;
 	}
 	case _DRM_SCATTER_GATHER:
 		if (!dev->sg) {
@@ -419,6 +422,9 @@ int drm_rmmap_locked(struct drm_device *dev, drm_local_map_t *map)
 		dmah.size = map->size;
 		__drm_pci_free(dev, &dmah);
 		break;
+	case _DRM_GEM:
+		DRM_ERROR("tried to rmmap GEM object\n");
+		break;
 	}
 	drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 96f416a..aad8d76 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -236,6 +236,7 @@ int drm_lastclose(struct drm_device * dev)
 		dev->lock.file_priv = NULL;
 		wake_up_interruptible(&dev->lock.lock_queue);
 	}
+	dev->dev_mapping = NULL;
 	mutex_unlock(&dev->struct_mutex);
 
 	DRM_DEBUG("lastclose completed\n");
@@ -290,6 +291,8 @@ EXPORT_SYMBOL(drm_init);
  */
 static void drm_cleanup(struct drm_device * dev)
 {
+	struct drm_driver *driver = dev->driver;
+
 	DRM_DEBUG("\n");
 
 	if (!dev) {
@@ -319,6 +322,9 @@ static void drm_cleanup(struct drm_device * dev)
 	drm_ht_remove(&dev->map_hash);
 	drm_ctxbitmap_cleanup(dev);
 
+	if (driver->driver_features & DRIVER_GEM)
+		drm_gem_destroy(dev);
+
 	drm_put_minor(&dev->primary);
 	if (drm_put_dev(dev))
 		DRM_ERROR("Cannot unload module\n");
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 0d46627..0958cf6 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -147,11 +147,21 @@ int drm_open(struct inode *inode, struct file *filp)
 		spin_lock(&dev->count_lock);
 		if (!dev->open_count++) {
 			spin_unlock(&dev->count_lock);
-			return drm_setup(dev);
+			retcode = drm_setup(dev);
+			goto out;
 		}
 		spin_unlock(&dev->count_lock);
 	}
 
+out:
+	mutex_lock(&dev->struct_mutex);
+	if (dev->dev_mapping == NULL)
+		dev->dev_mapping = inode->i_mapping;
+	else if (dev->dev_mapping != inode->i_mapping)
+		WARN(1, "dev->dev_mapping not inode mapping (%p expected %p)\n",
+		     dev->dev_mapping, inode->i_mapping);
+	mutex_unlock(&dev->struct_mutex);
+
 	return retcode;
 }
 EXPORT_SYMBOL(drm_open);
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ccd1afd..431dc3c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -64,6 +64,13 @@
  * up at a later date, and as our interface with shmfs for memory allocation.
  */
 
+/*
+ * We make up offsets for buffer objects so we can recognize them at
+ * mmap time.
+ */
+#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
+#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
+
 /**
  * Initialize the GEM device fields
  */
@@ -71,6 +78,8 @@
 int
 drm_gem_init(struct drm_device *dev)
 {
+	struct drm_gem_mm *mm;
+
 	spin_lock_init(&dev->object_name_lock);
 	idr_init(&dev->object_name_idr);
 	atomic_set(&dev->object_count, 0);
@@ -79,9 +88,41 @@ drm_gem_init(struct drm_device *dev)
 	atomic_set(&dev->pin_memory, 0);
 	atomic_set(&dev->gtt_count, 0);
 	atomic_set(&dev->gtt_memory, 0);
+
+	mm = drm_calloc(1, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	if (!mm) {
+		DRM_ERROR("out of memory\n");
+		return -ENOMEM;
+	}
+
+	dev->mm_private = mm;
+
+	if (drm_ht_create(&mm->offset_hash, 19)) {
+		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		return -ENOMEM;
+	}
+
+	if (drm_mm_init(&mm->offset_manager, DRM_FILE_PAGE_OFFSET_START,
+			DRM_FILE_PAGE_OFFSET_SIZE)) {
+		drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+		drm_ht_remove(&mm->offset_hash);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
+void
+drm_gem_destroy(struct drm_device *dev)
+{
+	struct drm_gem_mm *mm = dev->mm_private;
+
+	drm_mm_takedown(&mm->offset_manager);
+	drm_ht_remove(&mm->offset_hash);
+	drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM);
+	dev->mm_private = NULL;
+}
+
 /**
  * Allocate a GEM object of the specified size with shmfs backing store
  */
@@ -419,3 +460,58 @@ drm_gem_object_handle_free(struct kref *kref)
 }
 EXPORT_SYMBOL(drm_gem_object_handle_free);
 
+int
+drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map *map = NULL;
+	struct drm_gem_object *obj;
+	struct drm_hash_item *hash;
+	unsigned long prot;
+	int ret = 0;
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
+		mutex_unlock(&dev->struct_mutex);
+		return drm_mmap(filp, vma);
+	}
+
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	if (!map ||
+	    ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
+		ret =  -EPERM;
+		goto out_unlock;
+	}
+
+	/* Check for valid size. */
+	if (map->size < vma->vm_end - vma->vm_start) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	obj = map->handle;
+	if (!obj->dev->driver->gem_vm_ops) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_ops = obj->dev->driver->gem_vm_ops;
+	vma->vm_private_data = map->handle;
+	/* FIXME: use pgprot_writecombine when available */
+	prot = pgprot_val(vma->vm_page_prot);
+	prot |= _PAGE_CACHE_WC;
+	vma->vm_page_prot = __pgprot(prot);
+
+	vma->vm_file = filp;	/* Needed for drm_vm_open() */
+	drm_vm_open_locked(vma);
+
+out_unlock:
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_mmap);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index 3316067..af539f7 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -127,6 +127,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_insert_item);
 
 /*
  * Just insert an item and return any "bits" bit key that hasn't been
@@ -188,6 +189,7 @@ int drm_ht_remove_item(struct drm_open_hash *ht, struct drm_hash_item *item)
 	ht->fill--;
 	return 0;
 }
+EXPORT_SYMBOL(drm_ht_remove_item);
 
 void drm_ht_remove(struct drm_open_hash *ht)
 {
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index c234c6f..3ffae02 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -267,6 +267,9 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 				dmah.size = map->size;
 				__drm_pci_free(dev, &dmah);
 				break;
+			case _DRM_GEM:
+				DRM_ERROR("tried to rmmap GEM object\n");
+				break;
 			}
 			drm_free(map, sizeof(*map), DRM_MEM_MAPS);
 		}
@@ -399,7 +402,7 @@ static struct vm_operations_struct drm_vm_sg_ops = {
  * Create a new drm_vma_entry structure as the \p vma private data entry and
  * add it to drm_device::vmalist.
  */
-static void drm_vm_open_locked(struct vm_area_struct *vma)
+void drm_vm_open_locked(struct vm_area_struct *vma)
 {
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
@@ -540,7 +543,7 @@ EXPORT_SYMBOL(drm_core_get_reg_ofs);
  * according to the mapping type and remaps the pages. Finally sets the file
  * pointer and calls vm_open().
  */
-static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
+int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_file *priv = filp->private_data;
 	struct drm_device *dev = priv->minor->dev;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index db34780..9e9b76b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -953,6 +953,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_I915_GEM_PREAD, i915_gem_pread_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_PWRITE, i915_gem_pwrite_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP, i915_gem_mmap_ioctl, 0),
+	DRM_IOCTL_DEF(DRM_I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_SET_TILING, i915_gem_set_tiling, 0),
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a80ead2..1c87509 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -81,6 +81,10 @@ static int i915_resume(struct drm_device *dev)
 	return 0;
 }
 
+static struct vm_operations_struct i915_gem_vm_ops = {
+	.fault = i915_gem_fault,
+};
+
 static struct drm_driver driver = {
 	/* don't use mtrr's here, the Xserver or user space app should
 	 * deal with them for intel hardware.
@@ -111,13 +115,14 @@ static struct drm_driver driver = {
 	.proc_cleanup = i915_gem_proc_cleanup,
 	.gem_init_object = i915_gem_init_object,
 	.gem_free_object = i915_gem_free_object,
+	.gem_vm_ops = &i915_gem_vm_ops,
 	.ioctls = i915_ioctls,
 	.fops = {
 		 .owner = THIS_MODULE,
 		 .open = drm_open,
 		 .release = drm_release,
 		 .ioctl = drm_ioctl,
-		 .mmap = drm_mmap,
+		 .mmap = drm_gem_mmap,
 		 .poll = drm_poll,
 		 .fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eae4ed3..65acc42 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -107,6 +107,13 @@ struct intel_opregion {
 	int enabled;
 };
 
+#define I915_FENCE_REG_NONE -1
+
+struct drm_i915_fence_reg {
+	uint64_t val;
+	struct drm_gem_object *obj;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 
@@ -151,6 +158,8 @@ typedef struct drm_i915_private {
 
 	struct intel_opregion opregion;
 
+	struct drm_i915_fence_reg fence_regs[16];
+
 	/* Register state */
 	u8 saveLBB;
 	u32 saveDSPACNTR;
@@ -360,6 +369,21 @@ struct drm_i915_gem_object {
 	 * This is the same as gtt_space->start
 	 */
 	uint32_t gtt_offset;
+	/**
+	 * Required alignment for the object
+	 */
+	uint32_t gtt_alignment;
+	/**
+	 * Fake offset for use by mmap(2)
+	 */
+	uint64_t mmap_offset;
+
+	/**
+	 * Fence register bits (if any) for this object.  Will be set
+	 * as needed when mapped into the GTT.
+	 * Protected by dev->struct_mutex.
+	 */
+	int fence_reg;
 
 	/** Boolean whether this object has a valid gtt offset. */
 	int gtt_bound;
@@ -372,6 +396,7 @@ struct drm_i915_gem_object {
 
 	/** Current tiling mode for the object. */
 	uint32_t tiling_mode;
+	uint32_t stride;
 
 	/** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
 	uint32_t agp_type;
@@ -480,6 +505,8 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv);
 int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
 int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
@@ -514,6 +541,7 @@ uint32_t i915_get_gem_seqno(struct drm_device *dev);
 void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_work_handler(struct work_struct *work);
 void i915_gem_clflush_object(struct drm_gem_object *obj);
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
@@ -558,6 +586,7 @@ extern void opregion_enable_asle(struct drm_device *dev);
 
 #define I915_READ(reg)          readl(dev_priv->regs + (reg))
 #define I915_WRITE(reg, val)     writel(val, dev_priv->regs + (reg))
+#define I915_WRITE64(reg, val)	writeq(val, dev_priv->regs + (reg))
 #define I915_READ16(reg)	readw(dev_priv->regs + (reg))
 #define I915_WRITE16(reg, val)	writel(val, dev_priv->regs + (reg))
 #define I915_READ8(reg)		readb(dev_priv->regs + (reg))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9ac73dd..9558b93 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -49,6 +49,10 @@ i915_gem_set_domain(struct drm_gem_object *obj,
 static int i915_gem_object_get_page_list(struct drm_gem_object *obj);
 static void i915_gem_object_free_page_list(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
+static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
+					   unsigned alignment);
+static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
+static int i915_gem_evict_something(struct drm_device *dev);
 
 static void
 i915_gem_cleanup_ringbuffer(struct drm_device *dev);
@@ -476,6 +480,133 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/**
+ * i915_gem_fault - fault a page into the GTT
+ * vma: VMA in question
+ * vmf: fault info
+ *
+ * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
+ * from userspace.  The fault handler takes care of binding the object to
+ * the GTT (if needed), allocating and programming a fence register (again,
+ * only if needed based on whether the old reg is still valid or the object
+ * is tiled) and inserting a new PTE into the faulting process.
+ *
+ * Note that the faulting process may involve evicting existing objects
+ * from the GTT and/or fence registers to make room.  So performance may
+ * suffer if the GTT working set is large or there are few fence registers
+ * left.
+ */
+int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	pgoff_t page_offset;
+	unsigned long pfn;
+	int ret = 0;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
+		PAGE_SHIFT;
+
+	/* Now bind it into the GTT if needed */
+	mutex_lock(&dev->struct_mutex);
+	if (!obj_priv->gtt_space) {
+		ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+		if (ret) {
+			mutex_unlock(&dev->struct_mutex);
+			return VM_FAULT_SIGBUS;
+		}
+		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+	}
+
+	/* Need a new fence register? */
+	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+	    obj_priv->tiling_mode != I915_TILING_NONE)
+		i915_gem_object_get_fence_reg(obj);
+
+	pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
+		page_offset;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	switch (ret) {
+	case -ENOMEM:
+	case -EAGAIN:
+		return VM_FAULT_OOM;
+	case -EFAULT:
+	case -EBUSY:
+		DRM_ERROR("can't insert pfn??  fault or busy...\n");
+		return VM_FAULT_SIGBUS;
+	default:
+		return VM_FAULT_NOPAGE;
+	}
+}
+
+/**
+ * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file_priv: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	struct drm_i915_gem_mmap_gtt *args = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
+	int ret;
+
+	if (!(dev->driver->driver_features & DRIVER_GEM))
+		return -ENODEV;
+
+	mutex_lock(&dev->struct_mutex);
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (obj == NULL) {
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+		return -EBADF;
+	}
+
+	obj_priv = obj->driver_private;
+	obj_priv->gtt_alignment = args->alignment;
+	args->addr_ptr = obj_priv->mmap_offset;
+
+	/*
+	 * Pull it into the GTT so that we have a page list (makes the
+	 * initial fault faster and any subsequent flushing possible).
+	 */
+	if (!obj_priv->agp_mem) {
+		ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+		if (ret) {
+			drm_gem_object_unreference(obj);
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+		list_add(&obj_priv->list, &dev_priv->mm.inactive_list);
+	}
+
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 static void
 i915_gem_object_free_page_list(struct drm_gem_object *obj)
 {
@@ -642,12 +773,9 @@ i915_gem_retire_request(struct drm_device *dev,
 		 */
 		if (obj_priv->last_rendering_seqno != request->seqno)
 			return;
-#if WATCH_LRU
-		DRM_INFO("%s: retire %d moves to inactive list %p\n",
-			 __func__, request->seqno, obj);
-#endif
 
-		if (obj->write_domain != 0) {
+		if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|
+					  I915_GEM_DOMAIN_GTT)) {
 			list_move_tail(&obj_priv->list,
 				       &dev_priv->mm.flushing_list);
 		} else {
@@ -774,7 +902,7 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
 	 * buffer to have made it to the inactive list, and we would need
 	 * a separate wait queue to handle that.
 	 */
-	if (ret == 0)
+	if (ret == 0 || ret == -ERESTARTSYS)
 		i915_gem_retire_requests(dev);
 
 	return ret;
@@ -906,7 +1034,9 @@ static int
 i915_gem_object_unbind(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	loff_t offset;
 	int ret = 0;
 
 #if WATCH_BUF
@@ -950,6 +1080,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	BUG_ON(obj_priv->active);
 
+	/* blow away mappings if mapped through GTT */
+	offset = ((loff_t) obj->map_list.hash.key) << PAGE_SHIFT;
+	unmap_mapping_range(dev->dev_mapping, offset, obj->size, 1);
+
+	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
+		I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
+		obj_priv->fence_reg = I915_FENCE_REG_NONE;
+		dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
+	}
+
 	i915_gem_object_free_page_list(obj);
 
 	if (obj_priv->gtt_space) {
@@ -1093,6 +1233,101 @@ i915_gem_object_get_page_list(struct drm_gem_object *obj)
 	return 0;
 }
 
+static int fence_reg_start = 4; /* 4 if userland hasn't ioctl'd us yet */
+static int num_fence_regs = 16; /* 8 on pre-965, 16 otherwise */
+
+/**
+ * i915_gem_object_get_fence_reg - set up a fence reg for an object
+ * @obj: object to map through a fence reg
+ *
+ * When mapping objects through the GTT, userspace wants to be able to write
+ * to them without having to worry about swizzling if the object is tiled.
+ *
+ * This function walks the fence regs looking for a free one for @obj,
+ * stealing one if it can't find any.
+ *
+ * It then sets up the reg based on the object's properties: address, pitch
+ * and tiling format.
+ */
+static void
+i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	struct drm_i915_fence_reg *reg = NULL;
+	int i, ret;
+
+	switch (obj_priv->tiling_mode) {
+	case I915_TILING_NONE:
+		WARN(1, "allocating a fence for non-tiled object?\n");
+		break;
+	case I915_TILING_X:
+		WARN(obj_priv->stride & (512 - 1),
+		     "object is X tiled but has non-512B pitch\n");
+		break;
+	case I915_TILING_Y:
+		WARN(obj_priv->stride & (128 - 1),
+		     "object is Y tiled but has non-128B pitch\n");
+		break;
+	}
+
+	/* First try to find a free reg */
+	/* FIXME: use all fence regs if userland isn't using them */
+	for (i = fence_reg_start; i < num_fence_regs; i++) {
+		reg = &dev_priv->fence_regs[i];
+		if (!reg->obj)
+			break;
+	}
+
+	/* None available, try to steal one or wait for a user to finish */
+	if (i == num_fence_regs) {
+		struct drm_i915_gem_object *old_obj_priv;
+		loff_t offset;
+
+try_again:
+		/* Could try to use LRU here instead... */
+		for (i = fence_reg_start; i < num_fence_regs; i++) {
+			reg = &dev_priv->fence_regs[i];
+			old_obj_priv = reg->obj->driver_private;
+			if (!old_obj_priv->pin_count)
+				break;
+		}
+
+		/*
+		 * Now things get ugly... we have to wait for one of the
+		 * objects to finish before trying again.
+		 */
+		if (i == num_fence_regs) {
+			ret = i915_gem_object_wait_rendering(reg->obj);
+			if (!ret)
+				goto try_again;
+			WARN(ret, "wait_rendering failed: %d\n", ret);
+		}
+
+		/*
+		 * Zap this virtual mapping so we can set up a fence again
+		 * for this object next time we need it.
+		 */
+		offset = ((loff_t) reg->obj->map_list.hash.key) << PAGE_SHIFT;
+		unmap_mapping_range(dev->dev_mapping, offset,
+				    reg->obj->size, 1);
+		old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
+	}
+
+	obj_priv->fence_reg = i;
+	reg->obj = obj;
+	reg->val = ((obj_priv->gtt_offset + obj->size - 4096) &
+		    0xfffff000) << 32;
+	reg->val |= obj_priv->gtt_offset & 0xfffff000;
+	reg->val |= ((obj_priv->stride / 128) - 1) << 2;
+	if (obj_priv->tiling_mode == I915_TILING_Y)
+		reg->val |= 1 << 1;
+	reg->val |= 1; /* valid */
+
+	I915_WRITE64(FENCE_REG_965_0 + (i * 8), reg->val);
+}
+
 /**
  * Finds free space in the GTT aperture and binds the object there.
  */
@@ -2132,7 +2367,12 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 
 int i915_gem_init_object(struct drm_gem_object *obj)
 {
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
 	struct drm_i915_gem_object *obj_priv;
+	struct drm_map_list *list;
+	struct drm_map *map;
+	int ret = 0;
 
 	obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER);
 	if (obj_priv == NULL)
@@ -2151,12 +2391,63 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 
 	obj->driver_private = obj_priv;
 	obj_priv->obj = obj;
+	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
+
+	/* Set the object up for mmap'ing */
+	list = &obj->map_list;
+	list->map = drm_calloc(1, sizeof(struct drm_map_list),
+			       DRM_MEM_DRIVER);
+	if (!list->map)
+		return -ENOMEM;
+
+	map = list->map;
+	map->type = _DRM_GEM;
+	map->size = obj->size;
+	map->handle = obj;
+
+	/* Get a DRM GEM mmap offset allocated... */
+	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+						    obj->size / PAGE_SIZE, 0, 0);
+	if (!list->file_offset_node) {
+		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+						  obj->size / PAGE_SIZE, 0);
+	if (!list->file_offset_node) {
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->hash.key = list->file_offset_node->start;
+	if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
+		DRM_ERROR("failed to add to map hash\n");
+		goto out_free_mm;
+	}
+
+	/* By now we should be all set, any drm_mmap request on the offset
+	 * below will get to our mmap & fault handler */
+	obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
+
 	return 0;
+
+out_free_mm:
+	drm_mm_put_block(list->file_offset_node);
+out_free_list:
+	drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER);
+
+	return ret;
 }
 
 void i915_gem_free_object(struct drm_gem_object *obj)
 {
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map_list *list;
+	struct drm_map *map;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
 	while (obj_priv->pin_count > 0)
@@ -2164,6 +2455,20 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
 	i915_gem_object_unbind(obj);
 
+	list = &obj->map_list;
+	drm_ht_remove_item(&mm->offset_hash, &list->hash);
+
+	if (list->file_offset_node) {
+		drm_mm_put_block(list->file_offset_node);
+		list->file_offset_node = NULL;
+	}
+
+	map = list->map;
+	if (map) {
+		drm_free(map, sizeof(*map), DRM_MEM_DRIVER);
+		list->map = NULL;
+	}
+
 	drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
 	drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
@@ -2289,7 +2594,15 @@ i915_gem_idle(struct drm_device *dev)
 	 * waited for a sequence higher than any pending execbuffer
 	 */
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
+	if (!list_empty(&dev_priv->mm.flushing_list)) {
+		struct drm_i915_gem_object *obj_priv;
+		DRM_ERROR("flushing list not empty, still has:\n");
+		list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list,
+				    list) {
+			DRM_ERROR("    %p: %d\n", obj_priv,
+				  obj_priv->last_rendering_seqno);
+		}
+	}
 
 	/* Request should now be empty as we've also waited
 	 * for the last request in the list
@@ -2304,7 +2617,6 @@ i915_gem_idle(struct drm_device *dev)
 	}
 
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
-	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
 	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
 	BUG_ON(!list_empty(&dev_priv->mm.request_list));
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index e8b85ac..1101aed 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -207,6 +207,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		}
 	}
 	obj_priv->tiling_mode = args->tiling_mode;
+	obj_priv->stride = args->stride;
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5c2d9f2..d39508f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -177,7 +177,7 @@
 /*
  * Instruction and interrupt control regs
  */
-
+#define FENCE_REG_830_0	0x2000
 #define PRB0_TAIL	0x02030
 #define PRB0_HEAD	0x02034
 #define PRB0_START	0x02038
@@ -244,6 +244,7 @@
 #define   CM0_DEPTH_WRITE_DISABLE (1<<1)
 #define   CM0_RC_OP_FLUSH_DISABLE (1<<0)
 #define GFX_FLSH_CNTL	0x02170 /* 915+ only */
+#define FENCE_REG_965_0	0x03000
 
 /*
  * Framebuffer compression (915+ only)
diff --git a/include/drm/drm.h b/include/drm/drm.h
index f46ba4b..ec5daa6 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -173,6 +173,7 @@ enum drm_map_type {
 	_DRM_AGP = 3,		  /**< AGP/GART */
 	_DRM_SCATTER_GATHER = 4,  /**< Scatter/gather memory for PCI DMA */
 	_DRM_CONSISTENT = 5,	  /**< Consistent memory for PCI DMA */
+	_DRM_GEM = 6,		  /**< GEM object */
 };
 
 /**
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 59c796b..aa29b5a 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -523,6 +523,7 @@ struct drm_map_list {
 	struct drm_hash_item hash;
 	struct drm_map *map;			/**< mapping */
 	uint64_t user_token;
+	struct drm_mm_node *file_offset_node;	/**< fake offset */
 };
 
 typedef struct drm_map drm_local_map_t;
@@ -563,6 +564,14 @@ struct drm_ati_pcigart_info {
 };
 
 /**
+ * GEM specific mm private for tracking GEM objects
+ */
+struct drm_gem_mm {
+	struct drm_mm offset_manager;	/**< Offset mgmt for buffer objects */
+	struct drm_open_hash offset_hash; /**< User token hash table for maps */
+};
+
+/**
  * This structure defines the drm_mm memory object, which will be used by the
  * DRM for its buffer objects.
  */
@@ -579,6 +588,9 @@ struct drm_gem_object {
 	/** File representing the shmem storage */
 	struct file *filp;
 
+	/* Mapping info for this object */
+	struct drm_map_list map_list;
+
 	/**
 	 * Size of the object, in bytes.  Immutable over the object's
 	 * lifetime.
@@ -724,6 +736,9 @@ struct drm_driver {
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
 
+	/* Driver private ops for this object */
+	struct vm_operations_struct *gem_vm_ops;
+
 	int major;
 	int minor;
 	int patchlevel;
@@ -883,6 +898,8 @@ struct drm_device {
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	int num_crtcs;                  /**< Number of CRTCs on this device */
 	void *dev_private;		/**< device private data */
+	void *mm_private;
+	struct address_space *dev_mapping;
 	struct drm_sigdata sigdata;	   /**< For block_all_signals */
 	sigset_t sigmask;
 
@@ -999,6 +1016,8 @@ extern int drm_release(struct inode *inode, struct file *filp);
 
 				/* Mapping support (drm_vm.h) */
 extern int drm_mmap(struct file *filp, struct vm_area_struct *vma);
+extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
+extern void drm_vm_open_locked(struct vm_area_struct *vma);
 extern unsigned long drm_core_get_map_ofs(struct drm_map * map);
 extern unsigned long drm_core_get_reg_ofs(struct drm_device *dev);
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
@@ -1255,10 +1274,12 @@ extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size);
 
 /* Graphics Execution Manager library functions (drm_gem.c) */
 int drm_gem_init(struct drm_device *dev);
+void drm_gem_destroy(struct drm_device *dev);
 void drm_gem_object_free(struct kref *kref);
 struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev,
 					    size_t size);
 void drm_gem_object_handle_free(struct kref *kref);
+int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index eb4b350..fed1d7c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -159,6 +159,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SW_FINISH	0x20
 #define DRM_I915_GEM_SET_TILING	0x21
 #define DRM_I915_GEM_GET_TILING	0x22
+#define DRM_I915_GEM_MMAP_GTT	0x23
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -186,6 +187,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_PREAD	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
 #define DRM_IOCTL_I915_GEM_PWRITE	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
 #define DRM_IOCTL_I915_GEM_MMAP		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
+#define DRM_IOCTL_I915_GEM_MMAP_GTT	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
 #define DRM_IOCTL_I915_GEM_SET_DOMAIN	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
 #define DRM_IOCTL_I915_GEM_SW_FINISH	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
 #define DRM_IOCTL_I915_GEM_SET_TILING	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
@@ -380,6 +382,24 @@ struct drm_i915_gem_mmap {
 	uint64_t addr_ptr;
 };
 
+struct drm_i915_gem_mmap_gtt {
+	/** Handle for the object being mapped. */
+	uint32_t handle;
+	uint32_t pad;
+	/** Offset in the object to map. */
+	uint64_t offset;
+	/**
+	 * Length of data to map.
+	 *
+	 * The value will be page-aligned.
+	 */
+	uint64_t size;
+	/** Returned pointer the data was mapped at */
+	uint64_t addr_ptr;	/* void *, but pointers are not 32/64 compatible */
+	uint32_t flags;
+	uint32_t alignment;
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	uint32_t handle;
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to