Here we are:

    Empty loop
        mode 0    9.21 ns/loop nproc=1 lcks=EMPTY

    Tight loop, 1 and 2 processes, with and without lock prefix

        mode 1   16.48 ns/loop nproc=1 lcks=no
        mode 2   23.65 ns/loop nproc=2 lcks=no
        mode 3   93.02 ns/loop nproc=1 lcks=yes
        mode 4  160.82 ns/loop nproc=2 lcks=yes

    Spread loop, 1 and 2 processes, with and without lock prefix.  Other
    memory operations included in loop to mimic more typical situations.

        mode 5   37.64 ns/loop nproc=1 lcks=no
        mode 6   89.28 ns/loop nproc=2 lcks=no
        mode 7   88.32 ns/loop nproc=1 lcks=yes
        mode 8  161.08 ns/loop nproc=2 lcks=yes


    As you can see, the lock prefix creates a stall condition on the locked
    memory, but does *NOT* stall other memory.    The overhead is the same
    with and without the other assembly ops when the lock prefix is used.

    Therefore I believe the impact will be unnoticeable.  On a duel 
    450MHz P-III we are talking 37 ns vs 88 ns - an overhead of 50 ns
    for the one processor case, and an overhead of 72 ns for the two processor
    case.  Still, I suppose if we wanted to optimize the UP case we could
    do away with the lock prefix on non-SMP machines.  I don't know if the 
    SMP variable is accessible from within the i386/include/atomic.h header
    file, though.

                                        -Matt


/*
 * Compile -O2.  Appropriate variables have been volatilized to avoid 
 * inappropriate optimizations.
 */

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdarg.h>
#include <unistd.h>

#define LOOPS   10000000

#define ATOMIC_ASM(type,op)     \
    __asm __volatile ("lock; " op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))

#define ATOMIC_ASM_NOLOCK(type,op)     \
    __asm __volatile (op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))

static __inline void
atomic_add_int(void *p, u_int v)
{
        ATOMIC_ASM(int, "addl %1,%0");
}

static __inline void
atomic_add_int_nolock(void *p, u_int v)
{
        ATOMIC_ASM_NOLOCK(int, "addl %1,%0");
}

volatile int GX[8];     /* note: not shared between processes */

int
main(int ac, char **av)
{
    int fd;
    int *ptr;
    char *wlocks;
    int pgsize = getpagesize();
    volatile int i;
    int m;
    int usec;
    struct timeval tv1;
    struct timeval tv2;

    fd = open("test.dat", O_RDWR|O_CREAT|O_TRUNC, 0666);
    ftruncate(fd, pgsize);
    ptr = mmap(NULL, pgsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

    for (m = 0; m <= 8; ++m) {
        pid_t pid = -1;
        int nproc = 1;
        const char *lcks = "EMPTY";

        gettimeofday(&tv1, NULL);
        switch(m) {
        case 8:
            pid = fork();
            nproc = 2;
            /* fall through */
        case 7:
            for (i = 0; i < LOOPS; ++i) {
                atomic_add_int(ptr, 1);
                GX[0] = 1;
                GX[1] = 1;
                GX[2] = 1;
                GX[3] = 1;
                GX[4] = 1;
                GX[5] = 1;
                GX[6] = 1;
                GX[7] = 1;
            }
            lcks = "yes";
            break;
        case 6:
            pid = fork();
            nproc = 2;
            /* fall through */
        case 5:
            for (i = 0; i < LOOPS; ++i) {
                atomic_add_int_nolock(ptr, 1);
                GX[0] = 1;
                GX[1] = 1;
                GX[2] = 1;
                GX[3] = 1;
                GX[4] = 1;
                GX[5] = 1;
                GX[6] = 1;
                GX[7] = 1;
            }
            lcks = "no";
            break;
        case 4:
            pid = fork();
            nproc = 2;
            /* fall through */
        case 3:
            for (i = 0; i < LOOPS; ++i) {
                atomic_add_int(ptr, 1);
            }
            lcks = "yes";
            break;
        case 2:
            pid = fork();
            nproc = 2;
            /* fall through */
        case 1:
            for (i = 0; i < LOOPS; ++i) {
                atomic_add_int_nolock(ptr, 1);
            }
            lcks = "no";
            break;
        case 0:
            for (i = 0; i < LOOPS; ++i) {
                    ;
            }
            break;
        default:
            printf("huh?\n");
            exit(1);
        }
        if (pid == 0)
            _exit(0);
        while (wait(NULL) > 0)
            ;
        gettimeofday(&tv2, NULL);

        usec = tv2.tv_usec + 1000000 - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec - 1) * 
1000000;

        printf("mode %d\t%6.2f ns/loop nproc=%d lcks=%s\n", m, (double)usec * 1000.0 / 
(double)LOOPS / (double)nproc, nproc, lcks);
    }
    return(0);
}



To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message

Reply via email to