Tracy R Reed wrote:
On Wed, Feb 09, 2005 at 05:16:49PM -0800, Lew Wolfgang spake thusly:
BTW, the Opteron is about 10-times faster on a single job than
the SPARC, which is vintage 1996.
Only 10 times? That isn't even in keeping with Moore's law.
Hi Tracy,
Yes, but you have to consider that we're comparing two different
classes of machines. What was a desktop X86 box doing when
the Sun E4000 was state of the art? If memory serves, the Sun
was close to an order of magnitude faster than contemporary
desktops when new.
Also, I lied. The 246 Opteron is only 6.7 times faster than
the Sun as measured by the included benchmark, running one
instance. If the Sun's eight processors are considered against
the Opteron's two, the Opteron still comes out 2.6 times
faster on a throughput basis when all the processors are
loaded. (I just measured it) Sure, this is a cpu-bound benchmark,
but it is typical of the loads we give it.
Regards,
Lew
Here's Dave Edelblut's double precision fft benchmark:
#include <math.h>
#include <time.h>
/* #define NP_MAX 8388608 */
#define NP_MAX 16777216
#define DATA_TYPE float
/*#define DATA_TYPE double */
typedef struct { DATA_TYPE r; DATA_TYPE i; } complex;
/*
A Duhamel-Hollman split-radix dif fft
Ref: Electronics Letters, Jan. 5, 1984
Complex input and output data in arrays x and y
Length is n
*/
int cfft( complex *x, int np )
{
int i,j,k,m,n,i0,i1,i2,i3,is,id,n1,n2,n4 ;
DATA_TYPE a,e,a3,cc1,ss1,cc3,ss3,r1,r2,s1,s2,s3,xt ;
x = x - 1;
i = 2; m = 1; while (i < np) { i = i+i; m = m+1; };
n = i; if (n != np) {
for (i = np+1; i <= n; i++) { x[i].r=0.0; x[i].i=0.0; };
/* printf("\nuse %d point fft\n",n); */ }
n2 = n+n;
for (k = 1; k <= m-1; k++ ) {
n2 = n2 / 2; n4 = n2 / 4; e = 2.0 * M_PI / n2; a = 0.0;
for (j = 1; j<= n4 ; j++) {
a3 = 3.0*a; cc1 = cos(a); ss1 = sin(a);
cc3 = cos(a3); ss3 = sin(a3); a = j*e; is = j; id = 2*n2;
while ( is < n ) {
for (i0 = is; i0 <= n-1; i0 = i0 + id) {
i1 = i0 + n4; i2 = i1 + n4; i3 = i2 + n4;
r1 = x[i0].r - x[i2].r;
x[i0].r = x[i0].r + x[i2].r;
r2 = x[i1].r - x[i3].r;
x[i1].r = x[i1].r + x[i3].r;
s1 = x[i0].i - x[i2].i;
x[i0].i = x[i0].i + x[i2].i;
s2 = x[i1].i - x[i3].i;
x[i1].i = x[i1].i + x[i3].i;
s3 = r1 - s2; r1 = r1 + s2; s2 = r2 - s1; r2 = r2 + s1;
x[i2].r = r1*cc1 - s2*ss1;
x[i2].i = -s2*cc1 - r1*ss1;
x[i3].r = s3*cc3 + r2*ss3;
x[i3].i = r2*cc3 - s3*ss3;
}
is = 2*id - n2 + j; id = 4*id;
}
}
}
/*
---------------------Last stage, length=2 butterfly---------------------
*/
is = 1; id = 4;
while ( is < n) {
for (i0 = is; i0 <= n; i0 = i0 + id) {
i1 = i0 + 1; r1 = x[i0].r;
x[i0].r = r1 + x[i1].r;
x[i1].r = r1 - x[i1].r;
r1 = x[i0].i;
x[i0].i = r1 + x[i1].i;
x[i1].i = r1 - x[i1].i;
}
is = 2*id - 1; id = 4 * id; }
/*
c--------------------------Bit reverse counter
*/
j = 1; n1 = n - 1;
for (i = 1; i <= n1; i++) {
if (i < j) {
xt = x[j].r;
x[j].r = x[i].r; x[i].r = xt;
xt = x[j].i; x[j].i = x[i].i;
x[i].i = xt;
}
k = n / 2; while (k < j) { j = j - k; k = k / 2; }
j = j + k;
}
return(n);
}
/*
program to test fast fourier transform in double precision;
*/
void main()
{
int i,j,ib,np,npm,n2,kr,ki;
double a,enp,t,rx,y,zr,zi,pi,el_t;
clock_t ct0,ct1,ct2,ctd;
static complex x[NP_MAX];
pi = M_PI;
np = 1024;
ct0 = clock();
printf("\n fft benchmark - double precision - GNU C\n");
while (np <= NP_MAX){
printf("np =%7d",np); enp = np; npm = np/2-1; t = pi/enp;
x[0].r = (enp - 1.0) / 2.0; x[0].i = 0;
n2 = np / 2; x[n2].r = -0.5; x[n2].i = 0.0;
for (i = 1; i <= npm; i++) { j = np - i;
x[i].r = -0.5; x[j].r = -0.5;
y = t * i; y = -cos(y)/sin(y)/2.0;
x[i].i = y; x[j].i = -y;
}
ct1 = clock(); i = cfft(x,np); ct2 = clock(); ctd = ct2 - ct1;
el_t = (double) ctd; el_t = el_t / CLOCKS_PER_SEC;
printf("%6.1f sec ",el_t);
zr = 0.0; zi = 0.0; kr = 0; ki = 0; npm = np-1;
for (i = 0; i <= npm; i++ ) {
a = fabs(x[i].r - i ); if (zr < a) { zr = a; kr = i; }
a = fabs(x[i].i); if (zi < a) { zi = a; ki = i; }
}
printf("re %7d %10.2g im %7d %10.2g\n",kr,zr,ki,zi);
np = 2 * np;
}
ct2 = clock(); el_t = (ct2 - ct0);
el_t = el_t / CLOCKS_PER_SEC;
printf("The total run time was %6.1f sec.\n",el_t);
}
--
[email protected]
http://www.kernel-panic.org/cgi-bin/mailman/listinfo/kplug-list