2013/10/26 Ted Unangst <[email protected]>:
> This may or may not be a working implementation of sem_open.
>
> Using tricks similar to those used in shm_open, we create a semaphore
> in a tmp file shared by mmap. This lets other processes share the
> spinlock and counter. Our thrsleep and thrwakeup syscalls don't really
> work between processes, so we have to synchronize all processes on the
> same in kernel address. This will cause spurious wakeups, but it
> shouldn't be the end of the world for light use.
>
> Index: sys/kern/kern_synch.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.108
> diff -u -p -r1.108 kern_synch.c
> --- sys/kern/kern_synch.c 14 Sep 2013 01:35:01 -0000 1.108
> +++ sys/kern/kern_synch.c 25 Oct 2013 20:20:47 -0000
> @@ -419,6 +419,7 @@ thrsleep_unlock(void *lock, int lockflag
> return (error);
> }
>
> +static int magicnumber;
>
> int
> thrsleep(struct proc *p, struct sys___thrsleep_args *v)
> @@ -482,9 +483,13 @@ thrsleep(struct proc *p, struct sys___th
>
> if (p->p_thrslpid == 0)
> error = 0;
> - else
> - error = tsleep(&p->p_thrslpid, PUSER | PCATCH, "thrsleep",
> + else {
> + void *sleepaddr = &p->p_thrslpid;
> + if (ident == -1)
> + sleepaddr = &magicnumber;
> + error = tsleep(sleepaddr, PUSER | PCATCH, "thrsleep",
> (int)to_ticks);
> + }
>
> out:
> p->p_thrslpid = 0;
> @@ -535,6 +540,8 @@ sys___thrwakeup(struct proc *p, void *v,
>
> if (ident == 0)
> *retval = EINVAL;
> + else if (ident == -1)
> + wakeup(&magicnumber);
> else {
> TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) {
> if (q->p_thrslpid == ident) {
> Index: lib/librthread/rthread.h
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread.h,v
> retrieving revision 1.45
> diff -u -p -r1.45 rthread.h
> --- lib/librthread/rthread.h 21 Jun 2013 06:08:50 -0000 1.45
> +++ lib/librthread/rthread.h 25 Oct 2013 20:20:47 -0000
> @@ -64,7 +64,7 @@ struct __sem {
> struct _spinlock lock;
> volatile int waitcount;
> volatile int value;
> - int __pad;
> + struct __sem *shared;
> };
>
> TAILQ_HEAD(pthread_queue, pthread);
> Index: lib/librthread/rthread_sem.c
> ===================================================================
> RCS file: /cvs/src/lib/librthread/rthread_sem.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 rthread_sem.c
> --- lib/librthread/rthread_sem.c 1 Jun 2013 23:06:26 -0000 1.9
> +++ lib/librthread/rthread_sem.c 25 Oct 2013 20:20:47 -0000
> @@ -1,6 +1,6 @@
> /* $OpenBSD: rthread_sem.c,v 1.9 2013/06/01 23:06:26 tedu Exp $ */
> /*
> - * Copyright (c) 2004,2005 Ted Unangst <[email protected]>
> + * Copyright (c) 2004,2005,2013 Ted Unangst <[email protected]>
> * All Rights Reserved.
> *
> * Permission to use, copy, modify, and distribute this software for any
> @@ -15,15 +15,25 @@
> * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> */
> +#include <sys/param.h>
> +#include <sys/types.h>
> +#include <sys/mman.h>
> +#include <sys/stat.h>
>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <sha2.h>
> #include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> #include <unistd.h>
> -#include <errno.h>
>
> #include <pthread.h>
>
> #include "rthread.h"
>
> +#define SHARED_IDENT ((void *)-1)
> +
> /*
> * Internal implementation of semaphores
> */
> @@ -31,8 +41,14 @@ int
> _sem_wait(sem_t sem, int tryonly, const struct timespec *abstime,
> int *delayed_cancel)
> {
> + void *ident = (void *)&sem->waitcount;
> int r;
>
> + if (sem->shared) {
> + sem = sem->shared;
> + ident = SHARED_IDENT;
> + }
> +
> _spinlock(&sem->lock);
> if (sem->value) {
> sem->value--;
> @@ -42,7 +58,7 @@ _sem_wait(sem_t sem, int tryonly, const
> } else {
> sem->waitcount++;
> do {
> - r = __thrsleep(&sem->waitcount, CLOCK_REALTIME |
> + r = __thrsleep(ident, CLOCK_REALTIME |
> _USING_TICKETS, abstime, &sem->lock.ticket,
> delayed_cancel);
> _spinlock(&sem->lock);
> @@ -63,12 +79,18 @@ _sem_wait(sem_t sem, int tryonly, const
> int
> _sem_post(sem_t sem)
> {
> + void *ident = (void *)&sem->waitcount;
> int rv = 0;
>
> + if (sem->shared) {
> + sem = sem->shared;
> + ident = SHARED_IDENT;
> + }
> +
> _spinlock(&sem->lock);
> sem->value++;
> if (sem->waitcount) {
> - __thrwakeup(&sem->waitcount, 1);
> + __thrwakeup(ident, 1);
> rv = 1;
> }
> _spinunlock(&sem->lock);
> @@ -136,6 +158,9 @@ sem_getvalue(sem_t *semp, int *sval)
> return (-1);
> }
>
> + if (sem->shared)
> + sem = sem->shared;
> +
> _spinlock(&sem->lock);
> *sval = sem->value;
> _spinunlock(&sem->lock);
> @@ -227,27 +252,88 @@ sem_trywait(sem_t *semp)
> return (0);
> }
>
> -/* ARGSUSED */
> +
> +/* SHA256_DIGEST_STRING_LENGTH includes nul */
> +/* "/tmp/" + sha256 + ".sem" */
> +#define SEM_PATH_SIZE (5 + SHA256_DIGEST_STRING_LENGTH + 4)
> +
> +static void
> +makesempath(const char *origpath, char *sempath, size_t len)
> +{
> + char buf[SHA256_DIGEST_STRING_LENGTH];
> +
> + SHA256Data(origpath, strlen(origpath), buf);
> + snprintf(sempath, len, "/tmp/%s.sem", buf);
> +}
> +
> sem_t *
> -sem_open(const char *name __unused, int oflag __unused, ...)
> +sem_open(const char *name, int oflag, ...)
> {
> - errno = ENOSYS;
> - return (SEM_FAILED);
> + char sempath[SEM_PATH_SIZE];
> + struct stat sb;
> + int fd;
> + sem_t sem, shared;
> + sem_t *semp = SEM_FAILED;
> + int len = PAGE_SIZE;
> +
> + if (oflag & ~(O_CREAT | O_EXCL)) {
> + errno = EINVAL;
> + return semp;
> + }
> +
> + makesempath(name, sempath, sizeof(sempath));
> + fd = open(sempath, O_RDWR | O_NOFOLLOW | oflag, 0600);
> + if (fd == -1)
> + return semp;
> + if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) {
> + close(fd);
> + errno = EINVAL;
> + return semp;
> + }
> + if (sb.st_uid != getuid()) {
> + close(fd);
> + errno = EPERM;
> + return semp;
> + }
> + if (sb.st_size < len)
> + ftruncate(fd, len);
> + shared = mmap(NULL, len, PROT_READ | PROT_WRITE,
> + MAP_FILE | MAP_SHARED | MAP_HASSEMAPHORE, fd, 0);
> + close(fd);
> + if (shared == MAP_FAILED)
> + return semp;
This could return wrong errno if close(fd) above fails.
> + semp = malloc(sizeof(*semp));
> + sem = calloc(1, sizeof(*sem));
> + if (!semp || !sem) {
> + munmap(shared, len);
Missing free(semp) and free(sem)?
> + return SEM_FAILED;
> + }
> + *semp = sem;
> + sem->shared = shared;
> +
> + return semp;
> +
> }
>
> -/* ARGSUSED */
> int
> -sem_close(sem_t *sem __unused)
> +sem_close(sem_t *semp)
> {
> - errno = ENOSYS;
> - return (-1);
> + sem_t sem = *semp;
> + int len = PAGE_SIZE;
> +
> + munmap(sem->shared, len);
Maybe it's worth to abort(3) immediately if munmap() call fails?
Because this will mean someone messed up with our internals (or we
messed up things ourselves, which is not that better).
> + free(sem);
> + free(semp);
> +
> + return (0);
> }
>
> -/* ARGSUSED */
> int
> -sem_unlink(const char *name __unused)
> +sem_unlink(const char *name)
> {
> - errno = ENOSYS;
> - return (-1);
> + char sempath[SEM_PATH_SIZE];
> +
> + makesempath(name, sempath, sizeof(sempath));
> + return unlink(sempath);
> }
Aftwerword: looks nice, thank you for your work! I'll add glue to
kdelibs to make it use sem_open in KSharedDataCache, this will be a
good test. Hope to get results at this weekend.
--
WBR,
Vadim Zhukov