Ok, here we are.  The lock instruction overhead is significant relative
    the execution time of the instruction, though on a 450 MHz P-III it is
    still not a huge problem.  Basically 12 vs 89 ns for the non competitive
    case, and 26 vs 161 ns for the competitive case.

empty loop        9.33 ns/loop  mode 0
1proc no locks   12.86 ns/loop  mode 9
2proc no locks   26.16 ns/loop  mode 10
1proc w/ locks   89.87 ns/loop  mode 1
2proc w/ locks  161.09 ns/loop  mode 2

    If you modify the source code and add three nop() calls inside the loop
    for case 9 and 1 (which also covers 10 and 2), which avoids some of the
    synchronization overhead, you get numbers closer to what I expect in the
    real world.  ( recompile the code -DREAL_WORLD )

empty loop        9.33 ns/loop  mode 0
1proc no locks   55.89 ns/loop
2proc no locks  129.96 ns/loop
1proc w/ locks   98.35 ns/loop
2proc w/ locks  160.96 ns/loop


/*
 * Compile -O2
 */

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdarg.h>
#include <unistd.h>

#define LOOPS   10000000

#define ATOMIC_ASM(type,op)     \
    __asm __volatile ("lock; " op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))

#define ATOMIC_ASM_NOLOCK(type,op)     \
    __asm __volatile (op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))

static __inline void
atomic_add_int(void *p, u_int v)
{
        ATOMIC_ASM(int, "addl %1,%0");
}

static __inline void
atomic_add_int_nolock(void *p, u_int v)
{
        ATOMIC_ASM_NOLOCK(int, "addl %1,%0");
}

void
nop(void)
{
}

int
main(int ac, char **av)
{
    int fd;
    char *ptr;
    char *wlocks;
    int pgsize = getpagesize();
    volatile int i;
    int m;
    int usec;
    pid_t pid = -1;
    struct timeval tv1;
    struct timeval tv2;

    if (ac == 1) {
        printf("%s {0,1,2}\n", av[0]);
        printf(" 0 - empty loop\n");
        printf(" 1 - one process\n");
        printf(" 2 - two processes\n");
        printf(" 8 - empty loop - no lock\n");
        printf(" 9 - one process - no lock\n");
        printf(" 10- two processes - no lock\n");
        exit(1);
    }
    m = strtol(av[1], NULL, 0);

    fd = open("test.dat", O_RDWR|O_CREAT|O_TRUNC, 0666);
    ftruncate(fd, pgsize);
    ptr = mmap(NULL, pgsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

    if (m & 8)
        wlocks = "";
    else
        wlocks = " w/locks";

    gettimeofday(&tv1, NULL);
    switch(m) {
    case 10:
        pid = fork();
        /* fall through */
    case 9:
        for (i = 0; i < LOOPS; ++i) {
            atomic_add_int_nolock(ptr, 1);
#ifdef REAL_WORLD
            nop();
            nop();
            nop();
#endif
        }
        break;
    case 8:
        for (i = 0; i < LOOPS; ++i) {
                ;
        }
        break;
    case 2:
        pid = fork();
        /* fall through */
    case 1:
        for (i = 0; i < LOOPS; ++i) {
            atomic_add_int(ptr, 1);
#ifdef REAL_WORLD
            nop();
            nop();
            nop();
#endif
        }
        break;
    case 0:
        for (i = 0; i < LOOPS; ++i) {
                ;
        }
        break;
    default:
        printf("huh?\n");
        exit(1);
    }
    if (pid == 0)
        _exit(0);
    while (wait(NULL) > 0)
        ;
    gettimeofday(&tv2, NULL);

    usec = tv2.tv_usec + 1000000 - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec - 1) * 
1000000;

    switch(m) {
    case 0:
    case 8:
        printf("empty%s %4.2f ns/loop\n", wlocks, (double)usec * 1000.0 / 
(double)LOOPS);
        break;
    case 1:
    case 9:
        printf("1proc%s %4.2f ns/loop\n", wlocks, (double)usec * 1000.0 / 
(double)LOOPS);
        break;
    case 2:
    case 10:
        printf("2proc%s %4.2f ns/loop\n", wlocks, (double)usec * 1000.0 / 
(double)LOOPS / 2.0);
        break;
    }
    return(0);
}



To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message

Reply via email to