Here we are:
Empty loop
mode 0 9.21 ns/loop nproc=1 lcks=EMPTY
Tight loop, 1 and 2 processes, with and without lock prefix
mode 1 16.48 ns/loop nproc=1 lcks=no
mode 2 23.65 ns/loop nproc=2 lcks=no
mode 3 93.02 ns/loop nproc=1 lcks=yes
mode 4 160.82 ns/loop nproc=2 lcks=yes
Spread loop, 1 and 2 processes, with and without lock prefix. Other
memory operations included in loop to mimic more typical situations.
mode 5 37.64 ns/loop nproc=1 lcks=no
mode 6 89.28 ns/loop nproc=2 lcks=no
mode 7 88.32 ns/loop nproc=1 lcks=yes
mode 8 161.08 ns/loop nproc=2 lcks=yes
As you can see, the lock prefix creates a stall condition on the locked
memory, but does *NOT* stall other memory. The overhead is the same
with and without the other assembly ops when the lock prefix is used.
Therefore I believe the impact will be unnoticeable. On a duel
450MHz P-III we are talking 37 ns vs 88 ns - an overhead of 50 ns
for the one processor case, and an overhead of 72 ns for the two processor
case. Still, I suppose if we wanted to optimize the UP case we could
do away with the lock prefix on non-SMP machines. I don't know if the
SMP variable is accessible from within the i386/include/atomic.h header
file, though.
-Matt
/*
* Compile -O2. Appropriate variables have been volatilized to avoid
* inappropriate optimizations.
*/
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdarg.h>
#include <unistd.h>
#define LOOPS 10000000
#define ATOMIC_ASM(type,op) \
__asm __volatile ("lock; " op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))
#define ATOMIC_ASM_NOLOCK(type,op) \
__asm __volatile (op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))
static __inline void
atomic_add_int(void *p, u_int v)
{
ATOMIC_ASM(int, "addl %1,%0");
}
static __inline void
atomic_add_int_nolock(void *p, u_int v)
{
ATOMIC_ASM_NOLOCK(int, "addl %1,%0");
}
volatile int GX[8]; /* note: not shared between processes */
int
main(int ac, char **av)
{
int fd;
int *ptr;
char *wlocks;
int pgsize = getpagesize();
volatile int i;
int m;
int usec;
struct timeval tv1;
struct timeval tv2;
fd = open("test.dat", O_RDWR|O_CREAT|O_TRUNC, 0666);
ftruncate(fd, pgsize);
ptr = mmap(NULL, pgsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
for (m = 0; m <= 8; ++m) {
pid_t pid = -1;
int nproc = 1;
const char *lcks = "EMPTY";
gettimeofday(&tv1, NULL);
switch(m) {
case 8:
pid = fork();
nproc = 2;
/* fall through */
case 7:
for (i = 0; i < LOOPS; ++i) {
atomic_add_int(ptr, 1);
GX[0] = 1;
GX[1] = 1;
GX[2] = 1;
GX[3] = 1;
GX[4] = 1;
GX[5] = 1;
GX[6] = 1;
GX[7] = 1;
}
lcks = "yes";
break;
case 6:
pid = fork();
nproc = 2;
/* fall through */
case 5:
for (i = 0; i < LOOPS; ++i) {
atomic_add_int_nolock(ptr, 1);
GX[0] = 1;
GX[1] = 1;
GX[2] = 1;
GX[3] = 1;
GX[4] = 1;
GX[5] = 1;
GX[6] = 1;
GX[7] = 1;
}
lcks = "no";
break;
case 4:
pid = fork();
nproc = 2;
/* fall through */
case 3:
for (i = 0; i < LOOPS; ++i) {
atomic_add_int(ptr, 1);
}
lcks = "yes";
break;
case 2:
pid = fork();
nproc = 2;
/* fall through */
case 1:
for (i = 0; i < LOOPS; ++i) {
atomic_add_int_nolock(ptr, 1);
}
lcks = "no";
break;
case 0:
for (i = 0; i < LOOPS; ++i) {
;
}
break;
default:
printf("huh?\n");
exit(1);
}
if (pid == 0)
_exit(0);
while (wait(NULL) > 0)
;
gettimeofday(&tv2, NULL);
usec = tv2.tv_usec + 1000000 - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec - 1) *
1000000;
printf("mode %d\t%6.2f ns/loop nproc=%d lcks=%s\n", m, (double)usec * 1000.0 /
(double)LOOPS / (double)nproc, nproc, lcks);
}
return(0);
}
To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-current" in the body of the message