On Wed, 19 Feb 2003, Abramo Bagnara wrote:

> The results are amazing and I'd say Jaroslav has done some mistakes in
> his handmade asm.

I don't think so. It seems that my brain still remembers assembler ;-)
You passed wrong values to my code so it did unaligned accesses.

Fixes to make things same:

--- sum.c       2003-02-19 18:55:20.000000000 +0100
+++ a.c 2003-02-19 19:31:00.000000000 +0100
@@ -11,6 +11,8 @@
 typedef short int s16;
 typedef int s32;
 
+#define CONFIG_SMP
+
 #ifdef CONFIG_SMP
 #define LOCK_PREFIX "lock ; "
 #else
@@ -54,7 +56,7 @@
 static inline void atomic_add(volatile int *dst, int v)
 {
        __asm__ __volatile__(
-               LOCK_PREFIX "addl %0,%1"
+               LOCK_PREFIX "addl %1,%0"
                :"=m" (*dst)
                :"ir" (v));
 }
@@ -62,7 +64,9 @@
 void mix_areas0(unsigned int size,
                volatile s16 *dst, s16 *src,
                volatile s32 *sum,
-               unsigned int dst_step, unsigned int src_step)
+               unsigned int dst_step,
+               unsigned int src_step,
+               unsigned int sum_step)
 {
        while (size-- > 0) {
                s32 sample = *dst + *src;
@@ -70,8 +74,8 @@
                        *dst = sample > 0 ? 0x7fff : -0x8000;
                else
                        *dst = sample;
-               dst += dst_step;
-               src += src_step;
+               ((char *)dst) += dst_step;
+               ((char *)src) += src_step;
        }
 }
 
@@ -194,7 +198,9 @@
 void mix_areas2(unsigned int size,
                volatile s16 *dst, s16 *src,
                volatile s32 *sum,
-               unsigned int dst_step, unsigned int src_step)
+               unsigned int dst_step,
+               unsigned int src_step,
+               unsigned int sum_step)
 {
        while (size-- > 0) {
                s32 sample = *src;
@@ -204,15 +210,15 @@
                do {
                        sample = *sum;
                        s16 s;
-                       if (unlikely(sample & 0xffff0000))
+                       if (unlikely(sample & 0x7fff0000))
                                s = sample > 0 ? 0x7fff : -0x8000;
                        else
                                s = sample;
                        *dst = s;
                } while (unlikely(sample != *sum));
-               sum++;
-               dst += dst_step;
-               src += src_step;
+               ((char *)sum) += sum_step;
+               ((char *)dst) += dst_step;
+               ((char *)src) += src_step;
        }
 }
 
@@ -236,19 +242,19 @@
        }
        rdtscll(begin);
        for (i = 0; i < n; i++) {
-               mix_areas0(size, dst, srcs[i], sum, 1, 1);
+               mix_areas0(size, dst, srcs[i], sum, 2, 2, 4);
        }
        rdtscll(end);
        printf("mix_areas0: %lld\n", end - begin);
        rdtscll(begin);
        for (i = 0; i < n; i++) {
-               mix_areas1(size, dst, srcs[i], sum, 1, 1, 1);
+               mix_areas1(size, dst, srcs[i], sum, 2, 2, 4);
        }
        rdtscll(end);
        printf("mix_areas1: %lld\n", end - begin);
        rdtscll(begin);
        for (i = 0; i < n; i++) {
-               mix_areas2(size, dst, srcs[i], sum, 1, 1);
+               mix_areas2(size, dst, srcs[i], sum, 2, 2, 4);
        }
        rdtscll(end);
        printf("mix_areas2: %lld\n", end - begin);

perex@pnote:~> cat /proc/cpuinfo
processor       : 0
vendor_id       : GenuineIntel
cpu family      : 6
model           : 8
model name      : Pentium III (Coppermine)
stepping        : 6
cpu MHz         : 847.473
cache size      : 256 KB
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 sep mtrr pge mca cmov 
pat pse36 mmx fxsr sse
bogomips        : 1679.36

perex@pnote:~> ./a.out 2048 4 32267
mix_areas0: 170691
mix_areas1: 675795
mix_areas2: 708995


                                        Have fun,
                                                Jaroslav

-----
Jaroslav Kysela <[EMAIL PROTECTED]>
Linux Kernel Sound Maintainer
ALSA Project, SuSE Labs



-------------------------------------------------------
This SF.net email is sponsored by: SlickEdit Inc. Develop an edge.
The most comprehensive and flexible code editor you can use.
Code faster. C/C++, C#, Java, HTML, XML, many more. FREE 30-Day Trial.
www.slickedit.com/sourceforge
_______________________________________________
Alsa-devel mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/alsa-devel

Reply via email to