I got an email saying unified diff is preferred, so here's a resend
in that format.

Index: rthread_tls.c
===================================================================
RCS file: /cvs/src/lib/librthread/rthread_tls.c,v
retrieving revision 1.13
diff -u -r1.13 rthread_tls.c
--- rthread_tls.c	6 Nov 2011 11:48:59 -0000	1.13
+++ rthread_tls.c	30 Sep 2013 14:48:18 -0000
@@ -96,7 +96,12 @@
 	struct rthread_storage *rs;
 	pthread_t self;
 
-	_spinlock(&rkeyslock);
+	/* No lock is needed if
+	   (1) the key is not valid (undefined behavior)
+	   (2) storage is already allocated for the key (the linked
+	       list is only ever modified by adding to the head)
+	*/
+
 	if (!rkeys[key].used) {
 		rs = NULL;
 		goto out;
@@ -109,17 +114,25 @@
 			break;
 	}
 	if (!rs) {
-		rs = calloc(1, sizeof(*rs));
-		if (!rs)
-			goto out;
-		rs->keyid = key;
-		rs->data = NULL;
-		rs->next = self->local_storage;
-		self->local_storage = rs;
+		_spinlock(&rkeyslock);
+		/* Repeat search with lock held */
+		for (rs = self->local_storage; rs; rs = rs->next) {
+			if (rs->keyid == key)
+				break;
+		}
+		if (!rs) {
+			rs = calloc(1, sizeof(*rs));
+			if (rs) {
+				rs->keyid = key;
+				rs->data = NULL;
+				rs->next = self->local_storage;
+				self->local_storage = rs;
+			}
+		}
+		_spinunlock(&rkeyslock);
 	}
 
 out:
-	_spinunlock(&rkeyslock);
 	return (rs);
 }
 
> Here is a diff relative of OpenBSD 5.3 to avoid taking a lock in
> pthread_getspecific in the common case where storage for the key is
> already allocated.
> 
> I have only tested this patch for my use case where a single key is
> created once and used many times.  In that case, the bottleneck I
> observed is gone.  My parallel program sees linear speedup.
> 
> This patch would be unsafe if elements were ever removed from the
> per-thread list of allocated storage.  In the 5.3 implementation the
> list can only grow.  Its size is bounded by PTHREAD_KEYS_MAX so I
> do not consider this a leak.  Possibly it should be an array instead
> of a list.
> 

Reply via email to