Module Name:    src
Committed By:   haad
Date:           Sat Oct 23 21:18:55 UTC 2010

Modified Files:
        src/sys/dev/dm: device-mapper.c dm.h dm_target_stripe.c
Added Files:
        src/sys/dev/dm/doc: locking.txt

Log Message:
Add old file describing locking schema used in dm driver.


To generate a diff of this commit:
cvs rdiff -u -r1.24 -r1.25 src/sys/dev/dm/device-mapper.c
cvs rdiff -u -r1.18 -r1.19 src/sys/dev/dm/dm.h
cvs rdiff -u -r1.10 -r1.11 src/sys/dev/dm/dm_target_stripe.c
cvs rdiff -u -r0 -r1.1 src/sys/dev/dm/doc/locking.txt

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/dm/device-mapper.c
diff -u src/sys/dev/dm/device-mapper.c:1.24 src/sys/dev/dm/device-mapper.c:1.25
--- src/sys/dev/dm/device-mapper.c:1.24	Sat Oct  9 12:56:06 2010
+++ src/sys/dev/dm/device-mapper.c	Sat Oct 23 21:18:54 2010
@@ -1,4 +1,4 @@
-/*        $NetBSD: device-mapper.c,v 1.24 2010/10/09 12:56:06 haad Exp $ */
+/*        $NetBSD: device-mapper.c,v 1.25 2010/10/23 21:18:54 haad Exp $ */
 
 /*
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -350,7 +350,6 @@
 	r = 0;
 
 	aprint_debug("dmioctl called\n");
-	
 	KASSERT(data != NULL);
 	
 	if (( r = disk_ioctl_switch(dev, cmd, data)) == ENOTTY) {

Index: src/sys/dev/dm/dm.h
diff -u src/sys/dev/dm/dm.h:1.18 src/sys/dev/dm/dm.h:1.19
--- src/sys/dev/dm/dm.h:1.18	Tue May 18 15:10:41 2010
+++ src/sys/dev/dm/dm.h	Sat Oct 23 21:18:54 2010
@@ -1,4 +1,4 @@
-/*        $NetBSD: dm.h,v 1.18 2010/05/18 15:10:41 haad Exp $      */
+/*        $NetBSD: dm.h,v 1.19 2010/10/23 21:18:54 haad Exp $      */
 
 /*
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -170,12 +170,23 @@
 typedef struct target_linear_config {
 	dm_pdev_t *pdev;
 	uint64_t offset;
+	TAILQ_ENTRY(target_linear_config) entries;
 } dm_target_linear_config_t;
 
+/*
+ * Striping devices are stored in a linked list, this might be inefficient
+ * for more than 8 striping devices and can be changed to something more
+ * scalable.
+ * TODO: look for other options than linked list.
+ */
+TAILQ_HEAD(target_linear_devs, target_linear_config);
+
+typedef struct target_linear_devs dm_target_linear_devs_t;
+
 /* for stripe : */
 typedef struct target_stripe_config {
-#define MAX_STRIPES 2
-	struct target_linear_config stripe_devs[MAX_STRIPES];
+#define DM_STRIPE_DEV_OFFSET 2
+	struct target_linear_devs stripe_devs;
 	uint8_t stripe_num;
 	uint64_t stripe_chunksize;
 	size_t params_len;

Index: src/sys/dev/dm/dm_target_stripe.c
diff -u src/sys/dev/dm/dm_target_stripe.c:1.10 src/sys/dev/dm/dm_target_stripe.c:1.11
--- src/sys/dev/dm/dm_target_stripe.c:1.10	Tue May 18 15:10:41 2010
+++ src/sys/dev/dm/dm_target_stripe.c	Sat Oct 23 21:18:54 2010
@@ -1,4 +1,4 @@
-/*$NetBSD: dm_target_stripe.c,v 1.10 2010/05/18 15:10:41 haad Exp $*/
+/*$NetBSD: dm_target_stripe.c,v 1.11 2010/10/23 21:18:54 haad Exp $*/
 
 /*
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -102,6 +102,8 @@
 
 /*
  * Init function called from dm_table_load_ioctl.
+ * DM_STRIPE_DEV_OFFSET should always hold the index of the first device-offset
+ * pair in the parameters.
  * Example line sent to dm from lvm tools when using striped target.
  * start length striped #stripes chunk_size device1 offset1 ... deviceN offsetN
  * 0 65536 striped 2 512 /dev/hda 0 /dev/hdb 0
@@ -109,9 +111,11 @@
 int
 dm_target_stripe_init(dm_dev_t * dmv, void **target_config, char *params)
 {
+	dm_target_linear_config_t *tlc;
 	dm_target_stripe_config_t *tsc;
 	size_t len;
 	char **ap, *argv[10];
+	int strpc, strpi;
 
 	if (params == NULL)
 		return EINVAL;
@@ -130,33 +134,34 @@
 
 	printf("Stripe target init function called!!\n");
 
-	printf("Stripe target chunk size %s number of stripes %s\n", argv[1], argv[0]);
-	printf("Stripe target device name %s -- offset %s\n", argv[2], argv[3]);
-	printf("Stripe target device name %s -- offset %s\n", argv[4], argv[5]);
+	printf("Stripe target chunk size %s number of stripes %s\n",
+	    argv[1], argv[0]);
 
-	if (atoi(argv[0]) > MAX_STRIPES)
-		return ENOTSUP;
-
-	if ((tsc = kmem_alloc(sizeof(dm_target_stripe_config_t), KM_NOSLEEP))
-	    == NULL)
+	if ((tsc = kmem_alloc(sizeof(*tsc), KM_NOSLEEP)) == NULL)
 		return ENOMEM;
 
-	/* Insert dmp to global pdev list */
-	if ((tsc->stripe_devs[0].pdev = dm_pdev_insert(argv[2])) == NULL)
-		return ENOENT;
-
-	/* Insert dmp to global pdev list */
-	if ((tsc->stripe_devs[1].pdev = dm_pdev_insert(argv[4])) == NULL)
-		return ENOENT;
-
-	tsc->stripe_devs[0].offset = atoi(argv[3]);
-	tsc->stripe_devs[1].offset = atoi(argv[5]);
+	/* Initialize linked list for striping devices */
+	TAILQ_INIT(&tsc->stripe_devs);
 
 	/* Save length of param string */
 	tsc->params_len = len;
 	tsc->stripe_chunksize = atoi(argv[1]);
 	tsc->stripe_num = (uint8_t) atoi(argv[0]);
 
+	strpc = DM_STRIPE_DEV_OFFSET + (tsc->stripe_num * 2);
+	for (strpi = DM_STRIPE_DEV_OFFSET; strpi < strpc; strpi += 2) {
+		printf("Stripe target device name %s -- offset %s\n",
+		       argv[strpi], argv[strpi+1]);
+
+		tlc = kmem_alloc(sizeof(*tlc), KM_NOSLEEP);
+		if ((tlc->pdev = dm_pdev_insert(argv[strpi])) == NULL)
+			return ENOENT; 
+		tlc->offset = atoi(argv[strpi+1]);
+
+		/* Insert striping device to linked list. */
+		TAILQ_INSERT_TAIL(&tsc->stripe_devs, tlc, entries);
+	}
+
 	*target_config = tsc;
 
 	dmv->dev_type = DM_STRIPE_DEV;
@@ -167,18 +172,28 @@
 char *
 dm_target_stripe_status(void *target_config)
 {
+	dm_target_linear_config_t *tlc;
 	dm_target_stripe_config_t *tsc;
-	char *params;
+	char *params, *tmp;
 
 	tsc = target_config;
 
 	if ((params = kmem_alloc(DM_MAX_PARAMS_SIZE, KM_SLEEP)) == NULL)
 		return NULL;
 
-	snprintf(params, DM_MAX_PARAMS_SIZE, "%d %" PRIu64 " %s %" PRIu64 " %s %" PRIu64,
-	    tsc->stripe_num, tsc->stripe_chunksize,
-	    tsc->stripe_devs[0].pdev->name, tsc->stripe_devs[0].offset,
-	    tsc->stripe_devs[1].pdev->name, tsc->stripe_devs[1].offset);
+	if ((tmp = kmem_alloc(DM_MAX_PARAMS_SIZE, KM_SLEEP)) == NULL)
+		return NULL;
+
+	snprintf(params, DM_MAX_PARAMS_SIZE, "%d %" PRIu64,
+	    tsc->stripe_num, tsc->stripe_chunksize);
+
+	TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
+		snprintf(tmp, DM_MAX_PARAMS_SIZE, " %s %" PRIu64,
+		    tlc->pdev->name, tlc->offset);
+		strcat(params, tmp);
+	}
+
+	kmem_free(tmp, DM_MAX_PARAMS_SIZE);
 
 	return params;
 }
@@ -186,12 +201,13 @@
 int
 dm_target_stripe_strategy(dm_table_entry_t * table_en, struct buf * bp)
 {
+	dm_target_linear_config_t *tlc;
 	dm_target_stripe_config_t *tsc;
 	struct buf *nestbuf;
 	uint64_t blkno, blkoff;
 	uint64_t stripe, stripe_blknr;
 	uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
-	int stripe_devnr;
+	int i, stripe_devnr;
 
 	tsc = table_en->target_config;
 	if (tsc == NULL)
@@ -224,9 +240,17 @@
 
 		nestiobuf_setup(bp, nestbuf, blkoff, issue_blks * DEV_BSIZE);
 		nestbuf->b_blkno = stripe_blknr * tsc->stripe_chunksize + stripe_off;
-		nestbuf->b_blkno += tsc->stripe_devs[stripe_devnr].offset;
 
-		VOP_STRATEGY(tsc->stripe_devs[stripe_devnr].pdev->pdev_vnode, nestbuf);
+		tlc = TAILQ_FIRST(&tsc->stripe_devs);
+		for (i = 0; i < stripe_devnr && tlc == NULL; i++)
+			tlc = TAILQ_NEXT(tlc, entries);
+
+		/* by this point we should have an tlc */
+		KASSERT(tlc == NULL);
+
+		nestbuf->b_blkno += tlc->offset;
+
+		VOP_STRATEGY(tlc->pdev->pdev_vnode, nestbuf);
 
 		blkno += issue_blks;
 		blkoff += issue_blks * DEV_BSIZE;
@@ -242,16 +266,17 @@
 int
 dm_target_stripe_sync(dm_table_entry_t * table_en)
 {
-	int cmd, err, i;
+	int cmd, err;
 	dm_target_stripe_config_t *tsc;
+	dm_target_linear_config_t *tlc;
 
 	tsc = table_en->target_config;
 
 	err = 0;
 	cmd = 1;
 
-	for (i = 0; i < tsc->stripe_num; i++) {
-		if ((err = VOP_IOCTL(tsc->stripe_devs[i].pdev->pdev_vnode, DIOCCACHESYNC,
+	TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
+		if ((err = VOP_IOCTL(tlc->pdev->pdev_vnode, DIOCCACHESYNC,
 			    &cmd, FREAD|FWRITE, kauth_cred_get())) != 0)
 			return err;
 	}
@@ -264,19 +289,23 @@
 dm_target_stripe_destroy(dm_table_entry_t * table_en)
 {
 	dm_target_stripe_config_t *tsc;
+	dm_target_linear_config_t *tlc;
 
 	tsc = table_en->target_config;
 
 	if (tsc == NULL)
 		return 0;
 
-	dm_pdev_decr(tsc->stripe_devs[0].pdev);
-	dm_pdev_decr(tsc->stripe_devs[1].pdev);
+	while ((tlc = TAILQ_FIRST(&tsc->stripe_devs)) != NULL) {
+		TAILQ_REMOVE(&tsc->stripe_devs, tlc, entries);
+		dm_pdev_decr(tlc->pdev);
+		kmem_free(tlc, sizeof(*tlc));
+	}
 
 	/* Unbusy target so we can unload it */
 	dm_target_unbusy(table_en->target);
 
-	kmem_free(tsc, sizeof(dm_target_stripe_config_t));
+	kmem_free(tsc, sizeof(*tsc));
 
 	table_en->target_config = NULL;
 
@@ -287,6 +316,7 @@
 dm_target_stripe_deps(dm_table_entry_t * table_en, prop_array_t prop_array)
 {
 	dm_target_stripe_config_t *tsc;
+	dm_target_linear_config_t *tlc;
 	struct vattr va;
 
 	int error;
@@ -296,15 +326,12 @@
 
 	tsc = table_en->target_config;
 
-	if ((error = VOP_GETATTR(tsc->stripe_devs[0].pdev->pdev_vnode, &va, curlwp->l_cred)) != 0)
-		return error;
-
-	prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
+	TAILQ_FOREACH(tlc, &tsc->stripe_devs, entries) {
+		if ((error = VOP_GETATTR(tlc->pdev->pdev_vnode, &va, curlwp->l_cred)) != 0)
+			return error;
 
-	if ((error = VOP_GETATTR(tsc->stripe_devs[1].pdev->pdev_vnode, &va, curlwp->l_cred)) != 0)
-		return error;
-
-	prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
+		prop_array_add_uint64(prop_array, (uint64_t) va.va_rdev);
+	}
 
 	return 0;
 }

Added files:

Index: src/sys/dev/dm/doc/locking.txt
diff -u /dev/null src/sys/dev/dm/doc/locking.txt:1.1
--- /dev/null	Sat Oct 23 21:18:55 2010
+++ src/sys/dev/dm/doc/locking.txt	Sat Oct 23 21:18:55 2010
@@ -0,0 +1,263 @@
+
+				Device-mapper Locking architecture
+
+Overview
+
+There are 2 users in device-mapper driver 
+      a) Users who uses disk drives 
+      b) Users who uses ioctl management interface
+
+Management is done by dm_dev_*_ioctl and dm_table_*_ioctl routines. There are 
+two major structures used in these routines/device-mapper. 
+
+Table entry:
+
+typedef struct dm_table_entry {
+        struct dm_dev *dm_dev;          /* backlink */
+        uint64_t start;
+        uint64_t length;
+
+        struct dm_target *target;      /* Link to table target. */
+        void *target_config;           /* Target specific data. */
+        SLIST_ENTRY(dm_table_entry) next;
+} dm_table_entry_t;
+
+This structure stores every target part of dm device. Every device can have
+more than one target mapping entries stored in a list. This structure describe
+mapping between logical/physical blocks in dm device. 
+
+start  length target block device offset
+0 	   102400 linear /dev/wd1a     384
+102400 204800 linear /dev/wd2a     384
+204800 409600 linear /dev/wd3a     384
+
+Every device has at least two tables ACTIVE and INACTIVE. Only ACTIVE table is 
+used during IO. Every IO operation on dm device have to walk through dm_table_entries list. 
+
+Device entry:
+
+typedef struct dm_dev {
+        char name[DM_NAME_LEN];
+        char uuid[DM_UUID_LEN];
+
+        int minor;
+        uint32_t flags; /* store communication protocol flags */
+
+        kmutex_t dev_mtx; /* mutex for generall device lock */
+        kcondvar_t dev_cv; /* cv for ioctl synchronisation */
+
+        uint32_t event_nr;
+        uint32_t ref_cnt;
+
+        uint32_t dev_type;
+
+        dm_table_head_t table_head;
+
+        struct dm_dev_head upcalls;
+
+        struct disklabel *dk_label;    /* Disklabel for this table. */
+
+        TAILQ_ENTRY(dm_dev) next_upcall; /* LIST of mirrored, snapshoted devices. */
+
+        TAILQ_ENTRY(dm_dev) next_devlist; /* Major device list. */
+} dm_dev_t;
+
+Every device created in dm device-mapper is represented with this structure. 
+All devices are stored in a list. Every ioctl routine have to work with this 
+structure.
+
+	Locking in dm driver
+	
+Locking must be done in two ways. Synchronisation between ioctl routines and 
+between IO operations and ioctl. Table entries are read during IO and during some ioctl routines. There are only few routines which manipulates table lists.
+
+Read access to table list:
+
+dmsize 
+dmstrategy
+dm_dev_status_ioctl
+dm_table_info_ioctl
+dm_table_deps_ioctl
+dm_disk_ioctl 		-> DIOCCACHESYNC ioctl 
+
+Write access to table list:
+dm_dev_remove_ioctl        -> remove device from list, this routine have to 		 
+							  remove all tables.
+dm_dev_resume_ioctl		   -> Switch tables on suspended device, switch INACTIVE 
+							  and ACTIVE tables.
+dm_table_clear_ioctl  	   -> Remove INACTIVE table from table list.
+
+
+Synchronisation between readers and writers in table list
+
+I moved everything needed for table synchronisation to struct dm_table_head.
+
+typedef struct dm_table_head {
+        /* Current active table is selected with this. */
+        int cur_active_table;
+        struct dm_table tables[2];
+
+        kmutex_t   table_mtx;
+        kcondvar_t table_cv; /*IO waiting cv */
+
+        uint32_t io_cnt;
+} dm_table_head_t;
+
+dm_table_head_t is used as entry for every dm_table synchronisation routine.
+
+Because every table user have to get list to table list head I have implemented
+these routines to manage access to table lists. 
+
+/*                                                                                                            
+ * Destroy all table data. This function can run when there are no                                            
+ * readers on table lists.                                                                                    
+ */
+int dm_table_destroy(dm_table_head_t *, uint8_t);
+
+/*                                                                                                            
+ * Return length of active table in device.                                                                   
+ */
+uint64_t dm_table_size(dm_table_head_t *);
+
+/*                                                                                                            
+ * Return current active table to caller, increment io_cnt reference counter.                                 
+ */
+struct dm_table * dm_table_get_entry(dm_table_head_t *, uint8_t);
+
+/*                                                                                                            
+ * Return > 0 if table is at least one table entry (returns number of entries)                                
+ * and return 0 if there is not. Target count returned from this function                                     
+ * doesn't need to be true when userspace user receive it (after return                                       
+ * there can be dm_dev_resume_ioctl), therfore this isonly informative.                                       
+ */
+int dm_table_get_target_count(dm_table_head_t *, uint8_t);
+
+/*                                                                                                            
+ * Decrement io reference counter and wake up all callers, with table_head cv.                                
+ */
+void dm_table_release(dm_table_head_t *, uint8_t s);
+
+/*                                                                                                            
+ * Switch table from inactive to active mode. Have to wait until io_cnt is 0.                                 
+ */
+void dm_table_switch_tables(dm_table_head_t *);
+
+/*                                                                                                            
+ * Initialize table_head structures, I'm trying to keep this structure as                                     
+ * opaque as possible.                                                                                        
+ */
+void dm_table_head_init(dm_table_head_t *);
+
+/*                                                                                                            
+ * Destroy all variables in table_head                                                                        
+ */
+void dm_table_head_destroy(dm_table_head_t *);
+
+Internal table synchronisation protocol
+
+Readers:
+dm_table_size
+dm_table_get_target_count
+dm_table_get_target_count
+
+Readers with hold reference counter:
+dm_table_get_entry
+dm_table_release
+
+Writer:
+dm_table_destroy
+dm_table_switch_tables
+
+For managing synchronisation to table lists I use these routines. Every reader 
+uses dm_table_busy routine to hold reference counter during work and dm_table_unbusy for reference counter release. Every writer have to wait while 
+is reference counter 0 and only then it can work with device. It will sleep on 
+head->table_cv while there are other readers. dm_table_get_entry is specific in that it will return table with hold reference counter. After dm_table_get_entry 
+every caller must call dm_table_release when it doesn't want to work with it. 
+
+/*                                                                                                            
+ * Function to increment table user reference counter. Return id                                              
+ * of table_id table.                                                                                         
+ * DM_TABLE_ACTIVE will return active table id.                                                               
+ * DM_TABLE_INACTIVE will return inactive table id.                                                           
+ */
+static int
+dm_table_busy(dm_table_head_t *head, uint8_t table_id)
+{
+        uint8_t id;
+
+        id = 0;
+
+        mutex_enter(&head->table_mtx);
+
+        if (table_id == DM_TABLE_ACTIVE)
+                id = head->cur_active_table;
+        else
+                id = 1 - head->cur_active_table;
+
+        head->io_cnt++;
+
+        mutex_exit(&head->table_mtx);
+        return id;
+}
+
+/*                                                                                                            
+ * Function release table lock and eventually wakeup all waiters.                                             
+ */
+static void
+dm_table_unbusy(dm_table_head_t *head)
+{
+        KASSERT(head->io_cnt != 0);
+
+        mutex_enter(&head->table_mtx);
+
+        if (--head->io_cnt == 0)
+                cv_broadcast(&head->table_cv);
+
+        mutex_exit(&head->table_mtx);
+}
+
+Device-mapper betwwen ioctl device synchronisation 
+
+
+Every ioctl user have to find dm_device with name, uuid, minor number. 
+For this dm_dev_lookup is used. This routine returns device with hold reference 
+counter. 
+
+void
+dm_dev_busy(dm_dev_t *dmv)
+{
+        mutex_enter(&dmv->dev_mtx);
+        dmv->ref_cnt++;
+        mutex_exit(&dmv->dev_mtx);
+}
+
+void
+dm_dev_unbusy(dm_dev_t *dmv)
+{
+        KASSERT(dmv->ref_cnt != 0);
+
+        mutex_enter(&dmv->dev_mtx);
+        if (--dmv->ref_cnt == 0)
+                cv_broadcast(&dmv->dev_cv);
+        mutex_exit(&dmv->dev_mtx);
+}
+
+Before returning from ioctl routine must release reference counter with 
+dm_dev_unbusy.
+
+dm_dev_remove_ioctl routine have to remove dm_dev from global device list,
+and wait until all ioctl users from dm_dev are gone. 
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Reply via email to