Hi,

I've done some additional very simple minded measurements to estimate the effects of inling the reference counters and the potential overhead for checking if we are on a SMP system. I got the following numbers:

I:      inlining
NOI:    no-inlining
SMPC:   SMP check
NOSMPC: no SMP check

Times are in seconds.

                    NOI/NOSMPC, I/NOSMPC, NOI/SMPC, I/SMPC
P-IV 1800 (single)    7.634       6.892     1.796   0.784
Xeon 3.06GHz (multi)  6.50        4.07      6.67    4.11

Conclusions: Checking for SMP costs about 1% (4.11s vs. 4.07s) additionally on multi-processor machines, and yields about 880% speed improvement on older non-HT/non-multiprocessor systems. Inlining is significant, too. The effect of inlining dwarfs the penalty for checking for SMP on modern multi-processor systems.

The measurements were done with the simple benchmark attached, they are of course no substitute for doing some real profiling with the office code.

Heiner

--
Jens-Heiner Rechtien
[EMAIL PROTECTED]
CFLAGS= -I. -fPIC -O2 -Wall -DINLINE -DCHECKSMP
#CFLAGS= -I. -fPIC -O2 -Wall -DINLINE
#CFLAGS= -I. -fPIC -O2 -Wall -DCHECKSMP
#CFLAGS= -I. -fPIC -O2 -Wall

intrlock: intrlock.o libsal.so
        $(CC) $(CFLAGS) -o intrlock $< -L. -lsal

libsal.so: sal.o
        $(CC) -shared -o libsal.so $<


clean:
        rm *.o libsal.so intrlock
        
all: intrlock libsal.so
        
extern int is_smp;

#if defined(INLINE)
#if defined(CHECKSMP)
inline int incrementInterlockedCount(int *p) {
    int n;
    if ( is_smp ) {
        __asm__ __volatile__ (
            "movl $1, %0\n\t"
            "lock\n\t"
            "xaddl %0, %2\n\t"
            "incl %0" :
            "=&r" (n), "=m" (*p) :
            "m" (*p) :
            "memory");
    }
    else {
        __asm__ __volatile__ (
            "movl $1, %0\n\t"
            "xaddl %0, %2\n\t"
            "incl %0" :
            "=&r" (n), "=m" (*p) :
            "m" (*p) :
            "memory");
    }
    return n;
}
#else /* !CHECKSMP */
inline int incrementInterlockedCount(int *p) {
    int n;
    __asm__ __volatile__ (
        "movl $1, %0\n\t"
        "lock\n\t"
        "xaddl %0, %2\n\t"
        "incl %0" :
        "=&r" (n), "=m" (*p) :
        "m" (*p) :
        "memory");
    return n;
}
#endif /* !CHECKSMP */
#else  /* INLINE */
int incrementInterlockedCount(int *p);
#endif  /* INLINE */

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to