Reintroduce priority for the TRIM ZIOs instead of using the "NOW" priority

This is based off avg's original work which can be found here:-
http://people.freebsd.org/~avg/zfs-trim-priority.diff

This changes how TRIM requests are generated to use ZIO_TYPE_FREE + a priority
instead of ZIO_TYPE_IOCTL, until processed by vdev_geom; only then is it
translated the required geom values. This reduces the amount of changes
required for FREE requests to be supported by the new IO scheduler.

This eliminates the need for DKIOCTRIM, which has hence been removed.

Also fixed FREE vdev child IO's from running ZIO_STAGE_VDEV_IO_DONE as part
of their schedule.

As the new IO scheduler can result in a request to execute one type of IO to
actually run a different type of IO it requires that zio_trim requests are
processed without holding the trim map lock (tm->tm_lock), as the free request
execute call may result in write request running hence triggering a
trim_map_write_start call, which takes the trim map lock and hence would result
in recused on no-recursive sx lock.
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c.orig	2014-04-25 22:52:16.843218967 +0000
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	2014-04-25 22:57:23.297197323 +0000
@@ -40,9 +40,9 @@
  *
  * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios.  The
  * I/O scheduler determines when and in what order those operations are
- * issued.  The I/O scheduler divides operations into five I/O classes
+ * issued.  The I/O scheduler divides operations into six I/O classes
  * prioritized in the following order: sync read, sync write, async read,
- * async write, and scrub/resilver.  Each queue defines the minimum and
+ * async write, scrub/resilver and trim.  Each queue defines the minimum and
  * maximum number of concurrent operations that may be issued to the device.
  * In addition, the device has an aggregate maximum. Note that the sum of the
  * per-queue minimums must not exceed the aggregate maximum, and if the
@@ -61,7 +61,7 @@
  * done in the order specified above. No further operations are issued if the
  * aggregate maximum number of concurrent operations has been hit or if there
  * are no operations queued for an I/O class that has not hit its maximum.
- * Every time an i/o is queued or an operation completes, the I/O scheduler
+ * Every time an I/O is queued or an operation completes, the I/O scheduler
  * looks for new operations to issue.
  *
  * All I/O classes have a fixed maximum number of outstanding operations
@@ -70,7 +70,7 @@
  * transaction groups (see txg.c). Transaction groups enter the syncing state
  * periodically so the number of queued async writes will quickly burst up and
  * then bleed down to zero. Rather than servicing them as quickly as possible,
- * the I/O scheduler changes the maximum number of active async write i/os
+ * the I/O scheduler changes the maximum number of active async write I/Os
  * according to the amount of dirty data in the pool (see dsl_pool.c). Since
  * both throughput and latency typically increase with the number of
  * concurrent operations issued to physical devices, reducing the burstiness
@@ -113,14 +113,14 @@
  */
 
 /*
- * The maximum number of i/os active to each device.  Ideally, this will be >=
+ * The maximum number of I/Os active to each device.  Ideally, this will be >=
  * the sum of each queue's max_active.  It must be at least the sum of each
  * queue's min_active.
  */
 uint32_t zfs_vdev_max_active = 1000;
 
 /*
- * Per-queue limits on the number of i/os active to each device.  If the
+ * Per-queue limits on the number of I/Os active to each device.  If the
  * sum of the queue's max_active is < zfs_vdev_max_active, then the
  * min_active comes into play.  We will send min_active from each queue,
  * and then select from queues in the order defined by zio_priority_t.
@@ -145,6 +145,14 @@ uint32_t zfs_vdev_async_write_min_active
 uint32_t zfs_vdev_async_write_max_active = 10;
 uint32_t zfs_vdev_scrub_min_active = 1;
 uint32_t zfs_vdev_scrub_max_active = 2;
+uint32_t zfs_vdev_trim_min_active = 1;
+/*
+ * TRIM max active is large in comparison to the other values due to the fact
+ * that TRIM IOs are coalesced at the device layer. This value is set such
+ * that a typical SSD can process the queued IOs in a single request.
+ */
+uint32_t zfs_vdev_trim_max_active = 64;
+
 
 /*
  * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
@@ -171,7 +179,7 @@ SYSCTL_DECL(_vfs_zfs_vdev);
 TUNABLE_INT("vfs.zfs.vdev.max_active", &zfs_vdev_max_active);
 SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, max_active, CTLFLAG_RW,
     &zfs_vdev_max_active, 0,
-    "The maximum number of i/os of all types active for each device.");
+    "The maximum number of I/Os of all types active for each device.");
 
 #define ZFS_VDEV_QUEUE_KNOB_MIN(name)					\
 TUNABLE_INT("vfs.zfs.vdev." #name "_min_active",			\
@@ -199,6 +207,8 @@ ZFS_VDEV_QUEUE_KNOB_MIN(async_write);
 ZFS_VDEV_QUEUE_KNOB_MAX(async_write);
 ZFS_VDEV_QUEUE_KNOB_MIN(scrub);
 ZFS_VDEV_QUEUE_KNOB_MAX(scrub);
+ZFS_VDEV_QUEUE_KNOB_MIN(trim);
+ZFS_VDEV_QUEUE_KNOB_MAX(trim);
 
 #undef ZFS_VDEV_QUEUE_KNOB
 
@@ -297,6 +307,7 @@ static void
 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
 
@@ -313,6 +324,7 @@ static void
 vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 	avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
 
@@ -401,6 +413,8 @@ vdev_queue_class_min_active(zio_priority
 		return (zfs_vdev_async_write_min_active);
 	case ZIO_PRIORITY_SCRUB:
 		return (zfs_vdev_scrub_min_active);
+	case ZIO_PRIORITY_TRIM:
+		return (zfs_vdev_trim_min_active);
 	default:
 		panic("invalid priority %u", p);
 		return (0);
@@ -452,6 +466,8 @@ vdev_queue_class_max_active(spa_t *spa, 
 		    spa->spa_dsl_pool->dp_dirty_total));
 	case ZIO_PRIORITY_SCRUB:
 		return (zfs_vdev_scrub_max_active);
+	case ZIO_PRIORITY_TRIM:
+		return (zfs_vdev_trim_max_active);
 	default:
 		panic("invalid priority %u", p);
 		return (0);
@@ -468,6 +484,8 @@ vdev_queue_class_to_issue(vdev_queue_t *
 	spa_t *spa = vq->vq_vdev->vdev_spa;
 	zio_priority_t p;
 
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
+
 	if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active)
 		return (ZIO_PRIORITY_NUM_QUEUEABLE);
 
@@ -509,10 +527,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	zio_t *first, *last, *aio, *dio, *mandatory, *nio;
 	uint64_t maxgap = 0;
 	uint64_t size;
-	boolean_t stretch = B_FALSE;
-	vdev_queue_class_t *vqc = &vq->vq_class[zio->io_priority];
-	avl_tree_t *t = &vqc->vqc_queued_tree;
-	enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+	boolean_t stretch;
+	avl_tree_t *t;
+	enum zio_flag flags;
+
+	ASSERT(MUTEX_HELD(&vq->vq_lock));
 
 	if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
 		return (NULL);
@@ -550,6 +569,8 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	 * Walk backwards through sufficiently contiguous I/Os
 	 * recording the last non-option I/O.
 	 */
+	flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
+	t = &vq->vq_class[zio->io_priority].vqc_queued_tree;
 	while ((dio = AVL_PREV(t, first)) != NULL &&
 	    (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
 	    IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
@@ -589,6 +610,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	 * non-optional I/O is close enough to make aggregation
 	 * worthwhile.
 	 */
+	stretch = B_FALSE;
 	if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) {
 		zio_t *nio = last;
 		while ((dio = AVL_NEXT(t, nio)) != NULL &&
@@ -729,11 +751,13 @@ vdev_queue_io(zio_t *zio)
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
 		    zio->io_priority != ZIO_PRIORITY_SCRUB)
 			zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
-	} else {
-		ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+	} else if (zio->io_type == ZIO_TYPE_WRITE) {
 		if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&
 		    zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE)
 			zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE;
+	} else {
+		ASSERT(zio->io_type == ZIO_TYPE_FREE);
+		zio->io_priority = ZIO_PRIORITY_TRIM;
 	}
 
 	zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE;
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c.orig	2014-04-25 22:52:16.974217517 +0000
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	2014-04-25 22:52:20.235219441 +0000
@@ -759,10 +759,11 @@ vdev_geom_io_start(zio_t *zio)
 
 	vd = zio->io_vd;
 
-	if (zio->io_type == ZIO_TYPE_IOCTL) {
+	switch (zio->io_type) {
+	case ZIO_TYPE_IOCTL:
 		/* XXPOLICY */
 		if (!vdev_readable(vd)) {
-			zio->io_error = ENXIO;
+			zio->io_error = SET_ERROR(ENXIO);
 			return (ZIO_PIPELINE_CONTINUE);
 		}
 
@@ -771,28 +772,28 @@ vdev_geom_io_start(zio_t *zio)
 			if (zfs_nocacheflush || vdev_geom_bio_flush_disable)
 				break;
 			if (vd->vdev_nowritecache) {
-				zio->io_error = ENOTSUP;
-				break;
-			}
-			goto sendreq;
-		case DKIOCTRIM:
-			if (vdev_geom_bio_delete_disable)
-				break;
-			if (vd->vdev_notrim) {
-				zio->io_error = ENOTSUP;
+				zio->io_error = SET_ERROR(ENOTSUP);
 				break;
 			}
 			goto sendreq;
 		default:
-			zio->io_error = ENOTSUP;
+			zio->io_error = SET_ERROR(ENOTSUP);
 		}
 
 		return (ZIO_PIPELINE_CONTINUE);
+	case ZIO_TYPE_FREE:
+		if (vdev_geom_bio_delete_disable)
+			return (ZIO_PIPELINE_CONTINUE);
+
+		if (vd->vdev_notrim) {
+			zio->io_error = SET_ERROR(ENOTSUP);
+			return (ZIO_PIPELINE_CONTINUE);
+		}
 	}
 sendreq:
 	cp = vd->vdev_tsd;
 	if (cp == NULL) {
-		zio->io_error = ENXIO;
+		zio->io_error = SET_ERROR(ENXIO);
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 	bp = g_alloc_bio();
@@ -805,21 +806,20 @@ sendreq:
 		bp->bio_offset = zio->io_offset;
 		bp->bio_length = zio->io_size;
 		break;
+	case ZIO_TYPE_FREE:
+		bp->bio_cmd = BIO_DELETE;
+		bp->bio_data = NULL;
+		bp->bio_offset = zio->io_offset;
+		bp->bio_length = zio->io_size;
+		break;
 	case ZIO_TYPE_IOCTL:
-		switch (zio->io_cmd) {
-		case DKIOCFLUSHWRITECACHE:
+		if (zio->io_cmd == DKIOCFLUSHWRITECACHE) {
 			bp->bio_cmd = BIO_FLUSH;
 			bp->bio_flags |= BIO_ORDERED;
 			bp->bio_data = NULL;
 			bp->bio_offset = cp->provider->mediasize;
 			bp->bio_length = 0;
 			break;
-		case DKIOCTRIM:
-			bp->bio_cmd = BIO_DELETE;
-			bp->bio_data = NULL;
-			bp->bio_offset = zio->io_offset;
-			bp->bio_length = zio->io_size;
-			break;
 		}
 		break;
 	}
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h.orig	2014-03-26 17:52:33.000000000 +0000
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_impl.h	2014-04-25 22:52:20.236219020 +0000
@@ -215,6 +215,10 @@ enum zio_stage {
 	ZIO_STAGE_FREE_BP_INIT |		\
 	ZIO_STAGE_DVA_FREE)
 
+#define	ZIO_FREE_PHYS_PIPELINE			\
+	(ZIO_INTERLOCK_STAGES |			\
+	ZIO_VDEV_IO_STAGES)
+
 #define	ZIO_DDT_FREE_PIPELINE			\
 	(ZIO_INTERLOCK_STAGES |			\
 	ZIO_STAGE_FREE_BP_INIT |		\
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h.orig	2014-04-25 22:52:16.916217797 +0000
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	2014-04-25 22:52:20.237217275 +0000
@@ -135,9 +135,10 @@ typedef enum zio_priority {
 	ZIO_PRIORITY_ASYNC_READ,	/* prefetch */
 	ZIO_PRIORITY_ASYNC_WRITE,	/* spa_sync() */
 	ZIO_PRIORITY_SCRUB,		/* asynchronous scrub/resilver reads */
+	ZIO_PRIORITY_TRIM,		/* free requests used for TRIM */
 	ZIO_PRIORITY_NUM_QUEUEABLE,
 
-	ZIO_PRIORITY_NOW		/* non-queued i/os (e.g. free) */
+	ZIO_PRIORITY_NOW		/* non-queued I/Os (e.g. ioctl) */
 } zio_priority_t;
 
 #define	ZIO_PIPELINE_CONTINUE		0x100
@@ -508,7 +509,7 @@ extern zio_t *zio_claim(zio_t *pio, spa_
 
 extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
     uint64_t offset, uint64_t size, zio_done_func_t *done, void *priv,
-    enum zio_flag flags);
+    zio_priority_t priority, enum zio_flag flags);
 
 extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
     uint64_t size, void *data, int checksum,
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c.orig	2014-03-26 17:52:34.000000000 +0000
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c	2014-04-25 22:52:20.239217499 +0000
@@ -449,7 +449,7 @@ trim_map_vdev_commit(spa_t *spa, zio_t *
 {
 	trim_map_t *tm = vd->vdev_trimmap;
 	trim_seg_t *ts;
-	uint64_t size, txgtarget, txgsafe;
+	uint64_t size, offset, txgtarget, txgsafe;
 	hrtime_t timelimit;
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
@@ -477,9 +477,20 @@ trim_map_vdev_commit(spa_t *spa, zio_t *
 		avl_remove(&tm->tm_queued_frees, ts);
 		avl_add(&tm->tm_inflight_frees, ts);
 		size = ts->ts_end - ts->ts_start;
-		zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, size));
+		offset = ts->ts_start;
 		TRIM_MAP_SDEC(tm, size);
 		TRIM_MAP_QDEC(tm);
+		/*
+		 * We drop the lock while we call zio_nowait as the IO
+		 * scheduler can result in a different IO being run e.g.
+		 * a write which would result in a recursive lock.
+		 */
+		mutex_exit(&tm->tm_lock);
+
+		zio_nowait(zio_trim(zio, spa, vd, offset, size));
+
+		mutex_enter(&tm->tm_lock);
+		ts = trim_map_first(tm, txgtarget, txgsafe, timelimit);
 	}
 	mutex_exit(&tm->tm_lock);
 }
--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c.orig	2014-04-25 22:52:16.921217901 +0000
+++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c	2014-04-25 22:54:10.302212062 +0000
@@ -793,6 +793,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, ui
 	else if (BP_IS_GANG(bp) || BP_GET_DEDUP(bp))
 		stage |= ZIO_STAGE_ISSUE_ASYNC;
 
+	flags |= ZIO_FLAG_DONT_QUEUE;
+
 	zio = zio_create(pio, spa, txg, bp, NULL, size,
 	    NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW, flags,
 	    NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
@@ -832,14 +834,14 @@ zio_claim(zio_t *pio, spa_t *spa, uint64
 zio_t *
 zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, uint64_t offset,
     uint64_t size, zio_done_func_t *done, void *private,
-    enum zio_flag flags)
+    zio_priority_t priority, enum zio_flag flags)
 {
 	zio_t *zio;
 	int c;
 
 	if (vd->vdev_children == 0) {
 		zio = zio_create(pio, spa, 0, NULL, NULL, size, done, private,
-		    ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, offset, NULL,
+		    ZIO_TYPE_IOCTL, priority, flags, vd, offset, NULL,
 		    ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
 
 		zio->io_cmd = cmd;
@@ -848,7 +850,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t
 
 		for (c = 0; c < vd->vdev_children; c++)
 			zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
-			    offset, size, done, private, flags));
+			    offset, size, done, private, priority, flags));
 	}
 
 	return (zio);
@@ -933,6 +935,10 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *
 		pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
 	}
 
+	/* Not all IO types require vdev io done stage e.g. free */
+	if (!(pio->io_pipeline & ZIO_STAGE_VDEV_IO_DONE))
+		pipeline &= ~ZIO_STAGE_VDEV_IO_DONE;
+
 	if (vd->vdev_children == 0)
 		offset += VDEV_LABEL_START_SIZE;
 
@@ -978,7 +984,7 @@ void
 zio_flush(zio_t *zio, vdev_t *vd)
 {
 	zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE, 0, 0,
-	    NULL, NULL,
+	    NULL, NULL, ZIO_PRIORITY_NOW,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
 }
 
@@ -988,9 +994,10 @@ zio_trim(zio_t *zio, spa_t *spa, vdev_t 
 
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
-	return zio_ioctl(zio, spa, vd, DKIOCTRIM, offset, size,
-	    NULL, NULL,
-	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY);
+	return (zio_create(zio, spa, 0, NULL, NULL, size, NULL, NULL,
+	    ZIO_TYPE_FREE, ZIO_PRIORITY_TRIM, ZIO_FLAG_DONT_AGGREGATE |
+	    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY,
+	    vd, offset, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PHYS_PIPELINE));
 }
 
 void
@@ -2527,7 +2534,8 @@ zio_vdev_io_start(zio_t **ziop)
 		return (vdev_mirror_ops.vdev_op_io_start(zio));
 	}
 
-	if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE) {
+	if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_FREE &&
+	    zio->io_priority == ZIO_PRIORITY_NOW) {
 		trim_map_free(vd, zio->io_offset, zio->io_size, zio->io_txg);
 		return (ZIO_PIPELINE_CONTINUE);
 	}
@@ -2595,31 +2603,33 @@ zio_vdev_io_start(zio_t **ziop)
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 
-	if (vd->vdev_ops->vdev_op_leaf &&
-	    (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
+	if (vd->vdev_ops->vdev_op_leaf) {
+		switch (zio->io_type) {
+		case ZIO_TYPE_READ:
+			if (vdev_cache_read(zio) == 0)
+				return (ZIO_PIPELINE_CONTINUE);
+			/* FALLTHROUGH */
+		case ZIO_TYPE_WRITE:
+		case ZIO_TYPE_FREE:
+			if ((zio = vdev_queue_io(zio)) == NULL)
+				return (ZIO_PIPELINE_STOP);
+			*ziop = zio;
 
-		if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
-			return (ZIO_PIPELINE_CONTINUE);
-
-		if ((zio = vdev_queue_io(zio)) == NULL)
-			return (ZIO_PIPELINE_STOP);
-		*ziop = zio;
-
-		if (!vdev_accessible(vd, zio)) {
-			zio->io_error = SET_ERROR(ENXIO);
-			zio_interrupt(zio);
-			return (ZIO_PIPELINE_STOP);
+			if (!vdev_accessible(vd, zio)) {
+				zio->io_error = SET_ERROR(ENXIO);
+				zio_interrupt(zio);
+				return (ZIO_PIPELINE_STOP);
+			}
+			break;
 		}
-	}
-
-	/*
-	 * Note that we ignore repair writes for TRIM because they can conflict
-	 * with normal writes. This isn't an issue because, by definition, we
-	 * only repair blocks that aren't freed.
-	 */
-	if (vd->vdev_ops->vdev_op_leaf && zio->io_type == ZIO_TYPE_WRITE &&
-	    !(zio->io_flags & ZIO_FLAG_IO_REPAIR)) {
-		if (!trim_map_write_start(zio))
+		/*
+		 * Note that we ignore repair writes for TRIM because they can
+		 * conflict with normal writes. This isn't an issue because, by
+		 * definition, we only repair blocks that aren't freed.
+		 */
+		if (zio->io_type == ZIO_TYPE_WRITE &&
+		    !(zio->io_flags & ZIO_FLAG_IO_REPAIR) &&
+		    !trim_map_write_start(zio))
 			return (ZIO_PIPELINE_STOP);
 	}
 
@@ -2641,7 +2651,8 @@ zio_vdev_io_done(zio_t **ziop)
 	    zio->io_type == ZIO_TYPE_WRITE || zio->io_type == ZIO_TYPE_FREE);
 
 	if (vd != NULL && vd->vdev_ops->vdev_op_leaf &&
-	    (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE)) {
+	    (zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE ||
+	    zio->io_type == ZIO_TYPE_FREE)) {
 
 		if (zio->io_type == ZIO_TYPE_WRITE &&
 		    !(zio->io_flags & ZIO_FLAG_IO_REPAIR))
@@ -2722,7 +2733,8 @@ zio_vdev_io_assess(zio_t **ziop)
 	if (zio_injection_enabled && zio->io_error == 0)
 		zio->io_error = zio_handle_fault_injection(zio, EIO);
 
-	if (zio->io_type == ZIO_TYPE_IOCTL && zio->io_cmd == DKIOCTRIM)
+	if (zio->io_type == ZIO_TYPE_FREE &&
+	    zio->io_priority != ZIO_PRIORITY_NOW) {
 		switch (zio->io_error) {
 		case 0:
 			ZIO_TRIM_STAT_INCR(bytes, zio->io_size);
@@ -2735,6 +2747,7 @@ zio_vdev_io_assess(zio_t **ziop)
 			ZIO_TRIM_STAT_BUMP(failed);
 			break;
 		}
+	}
 
 	/*
 	 * If the I/O failed, determine whether we should attempt to retry it.
--- sys/cddl/compat/opensolaris/sys/dkio.h.orig	2014-03-26 17:52:29.000000000 +0000
+++ sys/cddl/compat/opensolaris/sys/dkio.h	2014-04-25 22:52:20.244217558 +0000
@@ -75,8 +75,6 @@ extern "C" {
  */
 #define	DKIOCFLUSHWRITECACHE	(DKIOC|34)	/* flush cache to phys medium */
 
-#define	DKIOCTRIM		(DKIOC|35)	/* TRIM a block */
-
 struct dk_callback {
 	void (*dkc_callback)(void *dkc_cookie, int error);
 	void *dkc_cookie;
