Author: Hakan Ardo <[email protected]>
Branch: extradoc
Changeset: r4511:565da378b80b
Date: 2012-08-10 22:21 +0200
http://bitbucket.org/pypy/extradoc/changeset/565da378b80b/

Log:    import scimark

diff --git a/talk/iwtc11/benchmarks/scimark/FFT.c 
b/talk/iwtc11/benchmarks/scimark/FFT.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/FFT.c
@@ -0,0 +1,165 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "FFT.h"
+
+#define PI  3.1415926535897932
+
+/*-----------------------------------------------------------------------*/
+
+static int int_log2(int n);
+
+double FFT_num_flops(int N)
+{
+
+     double Nd = (double) N;
+     double logN = (double) int_log2(N);
+
+     return (5.0*Nd-2)*logN + 2*(Nd+1);
+}
+
+static int int_log2 (int n)
+{
+    int k = 1;
+    int log = 0;
+    for(/*k=1*/; k < n; k *= 2, log++);
+    if (n != (1 << log))
+    {
+      printf("FFT: Data length is not a power of 2!: %d ",n);
+      exit(1);
+    }
+    return log; 
+}
+
+static void FFT_transform_internal (int N, double *data, int direction) {
+    int n = N/2;
+    int bit = 0;
+    int logn;
+    int dual = 1;
+
+    if (n == 1) return;         /* Identity operation! */
+    logn = int_log2(n);
+
+
+    if (N == 0) return;    
+
+    /* bit reverse the input data for decimation in time algorithm */
+    FFT_bitreverse(N, data) ;
+
+    /* apply fft recursion */
+    /* this loop executed int_log2(N) times */
+    for (bit = 0; bit < logn; bit++, dual *= 2) {
+      double w_real = 1.0;
+      double w_imag = 0.0;
+      int a;
+      int b;
+
+      double theta = 2.0 * direction * PI / (2.0 * (double) dual);
+      double s = sin(theta);
+      double t = sin(theta / 2.0);
+      double s2 = 2.0 * t * t;
+
+      for (a=0, b = 0; b < n; b += 2 * dual) {
+        int i = 2*b ;
+        int j = 2*(b + dual);
+
+        double wd_real = data[j] ;
+        double wd_imag = data[j+1] ;
+          
+        data[j]   = data[i]   - wd_real;
+        data[j+1] = data[i+1] - wd_imag;
+        data[i]  += wd_real;
+        data[i+1]+= wd_imag;
+      }
+      
+      /* a = 1 .. (dual-1) */
+      for (a = 1; a < dual; a++) {
+        /* trignometric recurrence for w-> exp(i theta) w */
+        {
+          double tmp_real = w_real - s * w_imag - s2 * w_real;
+          double tmp_imag = w_imag + s * w_real - s2 * w_imag;
+          w_real = tmp_real;
+          w_imag = tmp_imag;
+        }
+        for (b = 0; b < n; b += 2 * dual) {
+          int i = 2*(b + a);
+          int j = 2*(b + a + dual);
+
+          double z1_real = data[j];
+          double z1_imag = data[j+1];
+              
+          double wd_real = w_real * z1_real - w_imag * z1_imag;
+          double wd_imag = w_real * z1_imag + w_imag * z1_real;
+
+          data[j]   = data[i]   - wd_real;
+          data[j+1] = data[i+1] - wd_imag;
+          data[i]  += wd_real;
+          data[i+1]+= wd_imag;
+        }
+      }
+    }
+  }
+
+
+void FFT_bitreverse(int N, double *data) {
+    /* This is the Goldrader bit-reversal algorithm */
+    int n=N/2;
+    int nm1 = n-1;
+    int i=0; 
+    int j=0;
+    for (; i < nm1; i++) {
+
+      /*int ii = 2*i; */
+      int ii = i << 1;
+
+      /*int jj = 2*j; */
+      int jj = j << 1;
+
+      /* int k = n / 2 ; */
+      int k = n >> 1;
+
+      if (i < j) {
+        double tmp_real    = data[ii];
+        double tmp_imag    = data[ii+1];
+        data[ii]   = data[jj];
+        data[ii+1] = data[jj+1];
+        data[jj]   = tmp_real;
+        data[jj+1] = tmp_imag; }
+
+      while (k <= j) 
+      {
+        /*j = j - k ; */
+        j -= k;
+
+        /*k = k / 2 ;  */
+        k >>= 1 ; 
+      }
+      j += k ;
+    }
+  }
+
+
+void FFT_transform(int N, double *data)
+{
+    FFT_transform_internal(N, data, -1);
+}
+
+
+void FFT_inverse(int N, double *data)
+{
+    int n = N/2;
+    double norm = 0.0;
+    int i=0;
+    FFT_transform_internal(N, data, +1);
+
+    /* Normalize */
+
+
+    norm=1/((double) n);
+    for(i=0; i<N; i++)
+      data[i] *= norm;
+  
+}
+
+
diff --git a/talk/iwtc11/benchmarks/scimark/FFT.h 
b/talk/iwtc11/benchmarks/scimark/FFT.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/FFT.h
@@ -0,0 +1,5 @@
+
+void FFT_transform(int N, double *data);
+void FFT_inverse(int N, double *data);
+void FFT_bitreverse(int N, double *data);
+double FFT_num_flops(int N);
diff --git a/talk/iwtc11/benchmarks/scimark/LU.c 
b/talk/iwtc11/benchmarks/scimark/LU.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/LU.c
@@ -0,0 +1,102 @@
+#include <math.h>
+#include "LU.h"
+
+double LU_num_flops(int N)
+{
+        /* rougly 2/3*N^3 */
+
+    double Nd = (double) N;
+
+    return (2.0 * Nd *Nd *Nd/ 3.0);
+}
+
+
+void LU_copy_matrix(int M, int N, double **lu, double **A)
+{
+    int i;
+    int j;
+
+    for (i=0; i<M; i++)
+        for (j=0; j<N; j++)
+            lu[i][j] = A[i][j];
+}
+
+
+int LU_factor(int M, int N, double **A,  int *pivot)
+{
+ 
+
+    int minMN =  M < N ? M : N;
+    int j=0;
+
+    for (j=0; j<minMN; j++)
+    {
+        /* find pivot in column j and  test for singularity. */
+
+        int jp=j;
+        int i;
+        
+        double t = fabs(A[j][j]);
+        for (i=j+1; i<M; i++)
+        {
+            double ab = fabs(A[i][j]);
+            if ( ab > t)
+            {
+                jp = i;
+                t = ab;
+            }
+        }
+        
+        pivot[j] = jp;
+
+        /* jp now has the index of maximum element  */
+        /* of column j, below the diagonal          */
+
+        if ( A[jp][j] == 0 )                 
+            return 1;       /* factorization failed because of zero pivot */
+
+
+        if (jp != j)
+        {
+            /* swap rows j and jp */
+            double *tA = A[j];
+            A[j] = A[jp];
+            A[jp] = tA;
+        }
+
+        if (j<M-1)                /* compute elements j+1:M of jth column  */
+        {
+            /* note A(j,j), was A(jp,p) previously which was */
+            /* guarranteed not to be zero (Label #1)         */
+
+            double recp =  1.0 / A[j][j];
+            int k;
+            for (k=j+1; k<M; k++)
+                A[k][j] *= recp;
+        }
+
+
+        if (j < minMN-1)
+        {
+            /* rank-1 update to trailing submatrix:   E = E - x*y; */
+            /* E is the region A(j+1:M, j+1:N) */
+            /* x is the column vector A(j+1:M,j) */
+            /* y is row vector A(j,j+1:N)        */
+
+            int ii;
+            for (ii=j+1; ii<M; ii++)
+            {
+                double *Aii = A[ii];
+                double *Aj = A[j];
+                double AiiJ = Aii[j];
+                int jj;
+                for (jj=j+1; jj<N; jj++)
+                  Aii[jj] -= AiiJ * Aj[jj];
+
+            }
+        }
+    }
+
+    return 0;
+}
+
diff --git a/talk/iwtc11/benchmarks/scimark/LU.h 
b/talk/iwtc11/benchmarks/scimark/LU.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/LU.h
@@ -0,0 +1,9 @@
+#ifndef LU_H
+#define LU_H
+
+double LU_num_flops(int N);
+void LU_copy_matrix(int M, int N, double **lu, double **A);
+int LU_factor(int M, int N, double **A, int *pivot);
+
+
+#endif
diff --git a/talk/iwtc11/benchmarks/scimark/Makefile 
b/talk/iwtc11/benchmarks/scimark/Makefile
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/Makefile
@@ -0,0 +1,18 @@
+.SUFFIXES: .c .o
+
+.c.o:
+       $(CC) $(CFLAGS) -c $<
+
+all: scimark2 
+
+CC = cc
+LDFLAGS = 
+
+OBJS = FFT.o kernel.o Stopwatch.o Random.o SOR.o SparseCompRow.o \
+       array.o MonteCarlo.o LU.o 
+
+scimark2 : scimark2.o $(OBJS)
+       $(CC) $(CFLAGS) -o scimark2 scimark2.o $(OBJS) $(LDFLAGS) -lm
+
+clean:
+       rm $(OBJS) scimark2
diff --git a/talk/iwtc11/benchmarks/scimark/MonteCarlo.c 
b/talk/iwtc11/benchmarks/scimark/MonteCarlo.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/MonteCarlo.c
@@ -0,0 +1,70 @@
+#include "Random.h"
+
+/**
+ Estimate Pi by approximating the area of a circle.
+
+ How: generate N random numbers in the unit square, (0,0) to (1,1)
+ and see how are within a radius of 1 or less, i.e.
+ <pre>  
+
+ sqrt(x^2 + y^2) < r
+
+ </pre>
+  since the radius is 1.0, we can square both sides
+  and avoid a sqrt() computation:
+  <pre>
+
+    x^2 + y^2 <= 1.0
+
+  </pre>
+  this area under the curve is (Pi * r^2)/ 4.0,
+  and the area of the unit of square is 1.0,
+  so Pi can be approximated by 
+  <pre>
+                # points with x^2+y^2 < 1
+     Pi =~      --------------------------  * 4.0
+                     total # points
+
+  </pre>
+
+*/
+
+static const int SEED = 113;
+
+
+    double MonteCarlo_num_flops(int Num_samples)
+    {
+        /* 3 flops in x^2+y^2 and 1 flop in random routine */
+
+        return ((double) Num_samples)* 4.0;
+
+    }
+
+    
+
+    double MonteCarlo_integrate(int Num_samples)
+    {
+
+
+        Random R = new_Random_seed(SEED);
+
+
+        int under_curve = 0;
+        int count;
+
+        for (count=0; count<Num_samples; count++)
+        {
+            double x= Random_nextDouble(R);
+            double y= Random_nextDouble(R);
+
+            if ( x*x + y*y <= 1.0)
+                 under_curve ++;
+            
+        }
+
+        Random_delete(R);
+
+        return ((double) under_curve / Num_samples) * 4.0;
+    }
+
+
diff --git a/talk/iwtc11/benchmarks/scimark/MonteCarlo.h 
b/talk/iwtc11/benchmarks/scimark/MonteCarlo.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/MonteCarlo.h
@@ -0,0 +1,2 @@
+double MonteCarlo_integrate(int Num_samples);
+double MonteCarlo_num_flops(int Num_samples);
diff --git a/talk/iwtc11/benchmarks/scimark/README 
b/talk/iwtc11/benchmarks/scimark/README
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/README
@@ -0,0 +1,51 @@
+               SciMark2 (C version)
+
+This is an ANSI C version of the SciMark2 benchmark,
+translated from the original Java sources.  The intent
+in making this benchmark available in C is for mainly
+for performance comparisons.   For more information
+about SciMark, see http://math.nist.gov/scimark.
+
+Results of this benchmark can be sent to [email protected].
+
+The program is split up into the main driver (scimark2.c) and
+kernel routines.  A sample makefile is included; 
+however, one could simply write 
+
+
+> cc -o scimark2  -O *.c
+
+and then run
+
+> scimark2
+
+This produces an output similar to
+
+
+**                                                              **
+** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark **
+** for details. (Results can be submitted to [email protected])     **
+**                                                              **
+Using       2.00 seconds min time per kenel.
+Composite Score:           65.56
+FFT             Mflops:    63.38    (N=1024)
+SOR             Mflops:   124.80    (100 x 100)
+MonteCarlo:     Mflops:    16.05
+Sparse matmult  Mflops:    59.15    (N=1000, nz=5000)
+LU              Mflops:    64.40    (M=100, N=100)
+0:29.62 Elapsed, 29.620 user sec, 0.010 sys sec, 100.0% utilization.
+
+
+
+The first SciMark number reported is the composite score, followed
+by the an approximate Mflop rate for each kernel.
+
+
+To run the "large" version of this benchmark (with data structures
+that typically do not fit in cache) use
+
+>scimark2 -large
+
+
+------------------------------------------------------------------
+
diff --git a/talk/iwtc11/benchmarks/scimark/Random.c 
b/talk/iwtc11/benchmarks/scimark/Random.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/Random.c
@@ -0,0 +1,173 @@
+
+
+#include <stdlib.h>
+
+#include "Random.h"
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+
+  /* static const int mdig = 32; */
+#define MDIG 32
+
+  /* static const int one = 1; */
+#define ONE 1
+
+  static const int m1 = (ONE << (MDIG-2)) + ((ONE << (MDIG-2) )-ONE);
+  static const int m2 = ONE << MDIG/2;
+
+  /* For mdig = 32 : m1 =          2147483647, m2 =      65536
+     For mdig = 64 : m1 = 9223372036854775807, m2 = 4294967296 
+  */
+
+                                /* move to initialize() because  */
+                                /* compiler could not resolve as */
+                                /*   a constant.                 */
+
+  static /*const*/ double dm1;  /*  = 1.0 / (double) m1; */
+
+
+/* private methods (defined below, but not in Random.h */
+
+static void initialize(Random R, int seed);
+
+Random new_Random_seed(int seed)
+{
+    Random R = (Random) malloc(sizeof(Random_struct));
+
+    initialize(R, seed);
+    R->left = 0.0;
+    R->right = 1.0;
+    R->width = 1.0;
+    R->haveRange = 0 /*false*/;
+
+    return R;
+}
+
+Random new_Random(int seed, double left, double right) 
+{
+    Random R = (Random) malloc(sizeof(Random_struct));
+
+    initialize(R, seed);
+    R->left = left;
+    R->right = right;
+    R->width = right - left;
+    R->haveRange = 1;          /* true */
+
+    return R;
+}
+
+void Random_delete(Random R)
+{
+    free(R);
+}
+
+
+
+/* Returns the next random number in the sequence.  */
+
+double Random_nextDouble(Random R) 
+{
+    int k;
+
+    int I = R->i;
+    int J = R->j;
+    int *m = R->m;
+
+    k = m[I] - m[J];
+    if (k < 0) k += m1;
+    R->m[J] = k;
+
+    if (I == 0) 
+        I = 16;
+    else I--;
+    R->i = I;
+
+    if (J == 0) 
+        J = 16 ;
+    else J--;
+    R->j = J;
+
+    if (R->haveRange) 
+        return  R->left +  dm1 * (double) k * R->width;
+    else
+        return dm1 * (double) k;
+
+} 
+
+
+
+
+/*--------------------------------------------------------------------
+                           PRIVATE METHODS
+  ----------------------------------------------------------------- */
+
+static void initialize(Random R, int seed) 
+{
+
+    int jseed, k0, k1, j0, j1, iloop;
+
+    dm1  = 1.0 / (double) m1; 
+
+    R->seed = seed;
+
+    if (seed < 0 ) seed = -seed;            /* seed = abs(seed) */  
+    jseed = (seed < m1 ? seed : m1);        /* jseed = min(seed, m1) */
+    if (jseed % 2 == 0) --jseed;
+    k0 = 9069 % m2;
+    k1 = 9069 / m2;
+    j0 = jseed % m2;
+    j1 = jseed / m2;
+    for (iloop = 0; iloop < 17; ++iloop) 
+    {
+        jseed = j0 * k0;
+        j1 = (jseed / m2 + j0 * k1 + j1 * k0) % (m2 / 2);
+        j0 = jseed % m2;
+        R->m[iloop] = j0 + m2 * j1;
+    }
+    R->i = 4;
+    R->j = 16;
+
+}
+
+double *RandomVector(int N, Random R)
+{
+    int i;
+    double *x = (double *) malloc(sizeof(double)*N);
+
+    for (i=0; i<N; i++)
+        x[i] = Random_nextDouble(R);
+
+    return x;
+}
+
+
+double **RandomMatrix(int M, int N, Random R)
+{
+    int i;
+    int j;
+
+    /* allocate matrix */
+
+    double **A = (double **) malloc(sizeof(double*)*M);
+
+    if (A == NULL) return NULL;
+
+    for (i=0; i<M; i++)
+    {
+        A[i] = (double *) malloc(sizeof(double)*N);
+        if (A[i] == NULL) 
+        {
+            free(A);
+            return NULL;
+        }
+        for (j=0; j<N; j++)
+            A[i][j] = Random_nextDouble(R);
+    }
+    return A;
+}
+
+
+
diff --git a/talk/iwtc11/benchmarks/scimark/Random.h 
b/talk/iwtc11/benchmarks/scimark/Random.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/Random.h
@@ -0,0 +1,18 @@
+typedef struct
+{
+  int m[17];                        
+  int seed;                             
+  int i;                                /* originally = 4 */
+  int j;                                /* originally =  16 */
+  int /*boolean*/ haveRange;            /* = false; */
+  double left;                          /*= 0.0; */
+  double right;                         /* = 1.0; */
+  double width;                         /* = 1.0; */
+}
+Random_struct, *Random;
+
+Random new_Random_seed(int seed);
+double Random_nextDouble(Random R);
+void Random_delete(Random R);
+double *RandomVector(int N, Random R);
+double **RandomMatrix(int M, int N, Random R);
diff --git a/talk/iwtc11/benchmarks/scimark/SOR.c 
b/talk/iwtc11/benchmarks/scimark/SOR.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/SOR.c
@@ -0,0 +1,43 @@
+#include "SOR.h"
+
+    double SOR_num_flops(int M, int N, int num_iterations)
+    {
+        double Md = (double) M;
+        double Nd = (double) N;
+        double num_iterD = (double) num_iterations;
+
+        return (Md-1)*(Nd-1)*num_iterD*6.0;
+    }
+
+    void SOR_execute(int M, int N, double omega, double **G, int 
+            num_iterations)
+    {
+
+        double omega_over_four = omega * 0.25;
+        double one_minus_omega = 1.0 - omega;
+
+        /* update interior points */
+
+        int Mm1 = M-1;
+        int Nm1 = N-1; 
+        int p;
+        int i;
+        int j;
+        double *Gi;
+        double *Gim1;
+        double *Gip1;
+
+        for (p=0; p<num_iterations; p++)
+        {
+            for (i=1; i<Mm1; i++)
+            {
+                Gi = G[i];
+                Gim1 = G[i-1];
+                Gip1 = G[i+1];
+                for (j=1; j<Nm1; j++)
+                    Gi[j] = omega_over_four * (Gim1[j] + Gip1[j] + Gi[j-1] 
+                                + Gi[j+1]) + one_minus_omega * Gi[j];
+            }
+        }
+    }
+            
diff --git a/talk/iwtc11/benchmarks/scimark/SOR.h 
b/talk/iwtc11/benchmarks/scimark/SOR.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/SOR.h
@@ -0,0 +1,4 @@
+
+double SOR_num_flops(int M, int N, int num_iterations);
+void SOR_execute(int M, int N,double omega, double **G, int num_iterations);
+
diff --git a/talk/iwtc11/benchmarks/scimark/SparseCompRow.c 
b/talk/iwtc11/benchmarks/scimark/SparseCompRow.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/SparseCompRow.c
@@ -0,0 +1,43 @@
+    /* multiple iterations used to make kernel have roughly
+        same granulairty as other Scimark kernels. */
+
+    double SparseCompRow_num_flops(int N, int nz, int num_iterations)
+    {
+        /* Note that if nz does not divide N evenly, then the
+           actual number of nonzeros used is adjusted slightly.
+        */
+        int actual_nz = (nz/N) * N;
+        return ((double)actual_nz) * 2.0 * ((double) num_iterations);
+    }
+
+
+    /* computes  a matrix-vector multiply with a sparse matrix
+        held in compress-row format.  If the size of the matrix
+        in MxN with nz nonzeros, then the val[] is the nz nonzeros,
+        with its ith entry in column col[i].  The integer vector row[]
+        is of size M+1 and row[i] points to the begining of the
+        ith row in col[].  
+    */
+
+    void SparseCompRow_matmult( int M, double *y, double *val, int *row,
+        int *col, double *x, int NUM_ITERATIONS)
+    {
+        int reps;
+        int r;
+        int i;
+
+        for (reps=0; reps<NUM_ITERATIONS; reps++)
+        {
+        
+            for (r=0; r<M; r++)
+            {
+                double sum = 0.0; 
+                int rowR = row[r];
+                int rowRp1 = row[r+1];
+                for (i=rowR; i<rowRp1; i++)
+                    sum += x[ col[i] ] * val[i];
+                y[r] = sum;
+            }
+        }
+    }
+
diff --git a/talk/iwtc11/benchmarks/scimark/SparseCompRow.h 
b/talk/iwtc11/benchmarks/scimark/SparseCompRow.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/SparseCompRow.h
@@ -0,0 +1,10 @@
+
+#ifndef SPARSE_COMPROW_H
+#define SPARSE_COMPROW_H
+
+double SparseCompRow_num_flops(int N, int nz, int num_iterations);
+
+void SparseCompRow_matmult( int M, double *y, double *val, int *row,
+        int *col, double *x, int NUM_ITERATIONS);
+
+#endif
diff --git a/talk/iwtc11/benchmarks/scimark/Stopwatch.c 
b/talk/iwtc11/benchmarks/scimark/Stopwatch.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/Stopwatch.c
@@ -0,0 +1,82 @@
+#include <stdlib.h>
+#include "Stopwatch.h"
+
+double seconds()
+{
+        return ((double) clock()) / (double) CLOCKS_PER_SEC; 
+}
+
+void Stopwtach_reset(Stopwatch Q)
+{
+    Q->running = 0;         /* false */
+    Q->last_time = 0.0;
+    Q->total= 0.0;
+}
+
+
+Stopwatch new_Stopwatch(void)
+{
+    Stopwatch S = (Stopwatch) malloc(sizeof(Stopwatch_struct));
+    if (S == NULL)
+        return NULL;
+
+    Stopwtach_reset(S);
+    return S;
+}
+
+void Stopwatch_delete(Stopwatch S)
+{
+    if (S != NULL)
+        free(S);
+}
+
+
+/* Start resets the timer to 0.0; use resume for continued total */
+
+void Stopwatch_start(Stopwatch Q)
+{
+    if (! (Q->running)  )
+    {
+        Q->running = 1;  /* true */
+        Q->total = 0.0;
+        Q->last_time = seconds();
+    }
+}
+   
+/** 
+    Resume timing, after stopping.  (Does not wipe out
+        accumulated times.)
+
+*/
+
+void Stopwatch_resume(Stopwatch Q)
+{
+    if (!(Q->running))
+    {
+        Q-> last_time = seconds(); 
+        Q->running = 1;  /*true*/
+    }
+}
+   
+void Stopwatch_stop(Stopwatch Q)  
+{ 
+    if (Q->running) 
+    { 
+        Q->total += seconds() - Q->last_time; 
+        Q->running = 0;  /* false */
+    }
+}
+  
+ 
+double Stopwatch_read(Stopwatch Q)
+{  
+    
+    if (Q->running) 
+    {
+        double t = seconds();
+        Q->total += t - Q->last_time;
+        Q->last_time = t;
+    }
+    return Q->total;
+}
+        
diff --git a/talk/iwtc11/benchmarks/scimark/Stopwatch.h 
b/talk/iwtc11/benchmarks/scimark/Stopwatch.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/Stopwatch.h
@@ -0,0 +1,23 @@
+
+#include <time.h>
+
+typedef struct{
+    int running;        /* boolean */
+    double last_time;
+    double total;
+
+} *Stopwatch, Stopwatch_struct;
+
+
+
+double seconds();
+
+void Stopwtach_reset(Stopwatch Q);
+
+Stopwatch new_Stopwatch(void);
+void Stopwatch_delete(Stopwatch S);
+void Stopwatch_start(Stopwatch Q);
+void Stopwatch_resume(Stopwatch Q);
+void Stopwatch_stop(Stopwatch Q);
+double Stopwatch_read(Stopwatch Q);
+        
diff --git a/talk/iwtc11/benchmarks/scimark/array.c 
b/talk/iwtc11/benchmarks/scimark/array.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/array.c
@@ -0,0 +1,77 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "array.h"
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+
+double** new_Array2D_double(int M, int N)
+{
+    int i=0;
+    int failed = 0;
+
+    double **A = (double**) malloc(sizeof(double*)*M);
+    if (A == NULL)
+        return NULL;
+
+    for (i=0; i<M; i++)
+    {
+        A[i] = (double*) malloc(N * sizeof(double));
+        if (A[i] == NULL)
+        {
+            failed = 1;
+            break;
+        }
+    }
+
+    /* if we didn't successfully allocate all rows of A      */
+    /* clean up any allocated memory (i.e. go back and free  */
+    /* previous rows) and return NULL                        */
+
+    if (failed)
+    {
+        i--;
+        for (; i<=0; i--)
+            free(A[i]);
+        free(A);
+        return NULL;
+    }
+    else
+        return A;
+}
+void Array2D_double_delete(int M, int N, double **A)
+{
+    int i;
+    if (A == NULL) return;
+
+    for (i=0; i<M; i++)
+        free(A[i]);
+
+    free(A);
+}
+
+
+  void Array2D_double_copy(int M, int N, double **B, double **A)
+  {
+
+        int remainder = N & 3;       /* N mod 4; */
+        int i=0;
+        int j=0;
+
+        for (i=0; i<M; i++)
+        {
+            double *Bi = B[i];
+            double *Ai = A[i];
+            for (j=0; j<remainder; j++)
+                Bi[j] = Ai[j];
+            for (j=remainder; j<N; j+=4)
+            {
+                Bi[j] = Ai[j];
+                Bi[j+1] = Ai[j+1];
+                Bi[j+2] = Ai[j+2];
+                Bi[j+3] = Ai[j+3];
+            }
+        }
+  }
diff --git a/talk/iwtc11/benchmarks/scimark/array.h 
b/talk/iwtc11/benchmarks/scimark/array.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/array.h
@@ -0,0 +1,9 @@
+
+#ifndef ARRAY_H
+#define ARRAY_H
+
+double **new_Array2D_double(int M, int N);
+void Array2D_double_delete(int M, int N, double **A);
+void Array2D_double_copy(int M, int N, double **B, double **A);
+
+#endif
diff --git a/talk/iwtc11/benchmarks/scimark/constants.h 
b/talk/iwtc11/benchmarks/scimark/constants.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/constants.h
@@ -0,0 +1,35 @@
+#ifndef CONSTANTS_H_
+#define CONSTANTS_H_
+
+     const  double RESOLUTION_DEFAULT = 2.0;  /* secs (normally 2.0) */
+     const  int RANDOM_SEED = 101010;
+
+    /* default: small (cache-contained) problem sizes */
+
+     const  int FFT_SIZE = 1024;  /* must be a power of two */
+     const  int SOR_SIZE =100; /* NxN grid */
+     const  int SPARSE_SIZE_M = 1000;
+     const  int SPARSE_SIZE_nz = 5000;
+     const  int LU_SIZE = 100;
+
+    /* large (out-of-cache) problem sizes */
+
+     const  int LG_FFT_SIZE = 1048576;  /* must be a power of two */
+     const  int LG_SOR_SIZE =1000;  /*  NxN grid  */
+     const  int LG_SPARSE_SIZE_M = 100000;
+     const  int LG_SPARSE_SIZE_nz =1000000;
+     const  int LG_LU_SIZE = 1000;
+
+    /* tiny problem sizes (used to mainly to preload network classes     */
+    /*                     for applet, so that network download times    */
+    /*                     are factored out of benchmark.)               */
+    /*                                                                   */
+     const  int TINY_FFT_SIZE = 16;  /* must be a power of two */
+     const  int TINY_SOR_SIZE =10; /* NxN grid */
+     const  int TINY_SPARSE_SIZE_M = 10;
+     const  int TINY_SPARSE_SIZE_N = 10;
+     const  int TINY_SPARSE_SIZE_nz = 50;
+     const  int TINY_LU_SIZE = 10;
+
+#endif
+
diff --git a/talk/iwtc11/benchmarks/scimark/kernel.c 
b/talk/iwtc11/benchmarks/scimark/kernel.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/kernel.c
@@ -0,0 +1,231 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "LU.h"
+#include "FFT.h"
+#include "SOR.h"
+#include "MonteCarlo.h"
+#include "LU.h"
+#include "Random.h" 
+#include "Stopwatch.h"  
+#include "SparseCompRow.h"
+#include "array.h"
+
+
+    double kernel_measureFFT(int N, double mintime, Random R)
+    {
+        /* initialize FFT data as complex (N real/img pairs) */
+
+        int twoN = 2*N;
+        double *x = RandomVector(twoN, R);
+        long cycles = 1;
+        Stopwatch Q = new_Stopwatch();
+        int i=0;
+        double result = 0.0;
+
+        while(1)
+        {
+            Stopwatch_start(Q);
+            for (i=0; i<cycles; i++)
+            {
+                FFT_transform(twoN, x);     /* forward transform */
+                FFT_inverse(twoN, x);       /* backward transform */
+            }
+            Stopwatch_stop(Q);
+            if (Stopwatch_read(Q) >= mintime)
+                break;
+
+            cycles *= 2;
+
+        }
+        /* approx Mflops */
+
+        result = FFT_num_flops(N)*cycles/ Stopwatch_read(Q) * 1.0e-6;
+        Stopwatch_delete(Q);
+        free(x);
+        return result;
+    }
+
+    double kernel_measureSOR(int N, double min_time, Random R)
+    {
+        double **G = RandomMatrix(N, N, R);
+        double result = 0.0;
+
+        Stopwatch Q = new_Stopwatch();
+        int cycles=1;
+        while(1)
+        {
+            Stopwatch_start(Q);
+            SOR_execute(N, N, 1.25, G, cycles);
+            Stopwatch_stop(Q);
+
+            if (Stopwatch_read(Q) >= min_time) break;
+
+            cycles *= 2;
+        }
+        /* approx Mflops */
+
+        printf("SOR cycles: %d, runtime: %f\n", cycles, Stopwatch_read(Q));
+        result = SOR_num_flops(N, N, cycles) / Stopwatch_read(Q) * 1.0e-6;
+        Stopwatch_delete(Q);
+        Array2D_double_delete(N, N, G);
+        return result;
+
+    }
+
+
+
+    double kernel_measureMonteCarlo(double min_time, Random R)
+    {
+        double result = 0.0;
+        Stopwatch Q = new_Stopwatch();
+
+        int cycles=1;
+        while(1)
+        {
+            Stopwatch_start(Q);
+            MonteCarlo_integrate(cycles);
+            Stopwatch_stop(Q);
+            if (Stopwatch_read(Q) >= min_time) break;
+
+            cycles *= 2;
+        }
+        /* approx Mflops */
+        result = MonteCarlo_num_flops(cycles) / Stopwatch_read(Q) * 1.0e-6;
+        Stopwatch_delete(Q);
+        return result;
+    }
+
+
+    double kernel_measureSparseMatMult(int N, int nz, 
+            double min_time, Random R)
+    {
+        /* initialize vector multipliers and storage for result */
+        /* y = A*y;  */
+
+        double *x = RandomVector(N, R);
+        double *y = (double*) malloc(sizeof(double)*N);
+
+        double result = 0.0;
+
+#if 0
+        // initialize square sparse matrix
+        //
+        // for this test, we create a sparse matrix with M/nz nonzeros
+        // per row, with spaced-out evenly between the begining of the
+        // row to the main diagonal.  Thus, the resulting pattern looks
+        // like
+        //             +-----------------+
+        //             +*                +
+        //             +***              +
+        //             +* * *            +
+        //             +** *  *          +
+        //             +**  *   *        +
+        //             +* *   *   *      +
+        //             +*  *   *    *    +
+        //             +*   *    *    *  + 
+        //             +-----------------+
+        //
+        // (as best reproducible with integer artihmetic)
+        // Note that the first nr rows will have elements past
+        // the diagonal.
+#endif
+
+        int nr = nz/N;      /* average number of nonzeros per row  */
+        int anz = nr *N;    /* _actual_ number of nonzeros         */
+
+            
+        double *val = RandomVector(anz, R);
+        int *col = (int*) malloc(sizeof(int)*nz);
+        int *row = (int*) malloc(sizeof(int)*(N+1));
+        int r=0;
+        int cycles=1;
+
+        Stopwatch Q = new_Stopwatch();
+
+        row[0] = 0; 
+        for (r=0; r<N; r++)
+        {
+            /* initialize elements for row r */
+
+            int rowr = row[r];
+            int step = r/ nr;
+            int i=0;
+
+            row[r+1] = rowr + nr;
+            if (step < 1) step = 1;   /* take at least unit steps */
+
+
+            for (i=0; i<nr; i++)
+                col[rowr+i] = i*step;
+                
+        }
+
+
+        while(1)
+        {
+            Stopwatch_start(Q);
+            SparseCompRow_matmult(N, y, val, row, col, x, cycles);
+            Stopwatch_stop(Q);
+            if (Stopwatch_read(Q) >= min_time) break;
+
+            cycles *= 2;
+        }
+        /* approx Mflops */
+        result = SparseCompRow_num_flops(N, nz, cycles) / 
+                        Stopwatch_read(Q) * 1.0e-6;
+
+        Stopwatch_delete(Q);
+        free(row);
+        free(col);
+        free(val);
+        free(y);
+        free(x);
+
+        return result;
+    }
+
+
+    double kernel_measureLU(int N, double min_time, Random R)
+    {
+
+        double **A = NULL;
+        double **lu = NULL; 
+        int *pivot = NULL;
+
+    
+
+        Stopwatch Q = new_Stopwatch();
+        double result = 0.0;
+        int i=0;
+        int cycles=1;
+
+        if ((A = RandomMatrix(N, N,  R)) == NULL) exit(1);
+        if ((lu = new_Array2D_double(N, N)) == NULL) exit(1);
+        if ((pivot = (int *) malloc(N * sizeof(int))) == NULL) exit(1);
+
+
+        while(1)
+        {
+            Stopwatch_start(Q);
+            for (i=0; i<cycles; i++)
+            {
+                Array2D_double_copy(N, N, lu, A);
+                LU_factor(N, N, lu, pivot);
+            }
+            Stopwatch_stop(Q);
+            if (Stopwatch_read(Q) >= min_time) break;
+
+            cycles *= 2;
+        }
+        /* approx Mflops */
+        result = LU_num_flops(N) * cycles / Stopwatch_read(Q) * 1.0e-6;
+
+        Stopwatch_delete(Q);
+        free(pivot); 
+        Array2D_double_delete(N, N, lu); 
+        Array2D_double_delete(N, N, A);
+
+        return result;
+
+    }
+
diff --git a/talk/iwtc11/benchmarks/scimark/kernel.h 
b/talk/iwtc11/benchmarks/scimark/kernel.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/kernel.h
@@ -0,0 +1,11 @@
+#ifndef KERNEL_H
+#define KERNEL_H
+
+double kernel_measureFFT( int FFT_size, double min_time, Random R);
+double kernel_measureSOR( int SOR_size, double min_time, Random R);
+double kernel_measureMonteCarlo( double min_time, Random R);
+double kernel_measureSparseMatMult(int Sparse_size_N,
+    int Sparse_size_nz, double min_time, Random R);
+double kernel_measureLU( int LU_size, double min_time, Random R);
+
+#endif
diff --git a/talk/iwtc11/benchmarks/scimark/scimark2.c 
b/talk/iwtc11/benchmarks/scimark/scimark2.c
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/scimark2.c
@@ -0,0 +1,98 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "Random.h"
+#include "kernel.h"
+#include "constants.h"
+
+void print_banner(void);
+
+int main(int argc, char *argv[])
+{
+        /* default to the (small) cache-contained version */
+
+        double min_time = RESOLUTION_DEFAULT;
+
+        int FFT_size = FFT_SIZE;
+        int SOR_size =  SOR_SIZE;
+        int Sparse_size_M = SPARSE_SIZE_M;
+        int Sparse_size_nz = SPARSE_SIZE_nz;
+        int LU_size = LU_SIZE;
+
+
+        /* run the benchmark */
+
+        double res[6] = {0.0};
+        Random R = new_Random_seed(RANDOM_SEED);
+
+
+        if (argc > 1)
+        {
+                       int current_arg = 1;
+
+                       if (strcmp(argv[1], "-help")==0  ||
+                                       strcmp(argv[1], "-h") == 0)
+                       {
+                               fprintf(stderr, "Usage: [-large] 
[minimum_time]\n");
+                               exit(0);
+                       }
+
+                       if (strcmp(argv[1], "-large")==0)
+                       {
+                               FFT_size = LG_FFT_SIZE;
+                               SOR_size = LG_SOR_SIZE;
+                               Sparse_size_M = LG_SPARSE_SIZE_M;
+                               Sparse_size_nz = LG_SPARSE_SIZE_nz;
+                               LU_size = LG_LU_SIZE;
+
+                               current_arg++;
+                       }
+
+                       if (current_arg < argc)
+                       {
+                               min_time = atof(argv[current_arg]);
+                       }
+                       
+        }
+
+       
+               print_banner();
+               printf("Using %10.2f seconds min time per kenel.\n", min_time);
+
+        res[1] = kernel_measureFFT( FFT_size, min_time, R);   
+        res[2] = kernel_measureSOR( SOR_size, min_time, R);   
+        res[3] = kernel_measureMonteCarlo(min_time, R); 
+        res[4] = kernel_measureSparseMatMult( Sparse_size_M, 
+                Sparse_size_nz, min_time, R);           
+        res[5] = kernel_measureLU( LU_size, min_time, R);  
+
+
+
+        res[0] = (res[1] + res[2] + res[3] + res[4] + res[5]) / 5;
+
+        /* print out results  */
+        printf("Composite Score:        %8.2f\n" ,res[0]);
+        printf("FFT             Mflops: %8.2f    (N=%d)\n", res[1], FFT_size);
+        printf("SOR             Mflops: %8.2f    (%d x %d)\n",                 
+                               res[2], SOR_size, SOR_size);
+        printf("MonteCarlo:     Mflops: %8.2f\n", res[3]);
+        printf("Sparse matmult  Mflops: %8.2f    (N=%d, nz=%d)\n", res[4], 
+                                       Sparse_size_M, Sparse_size_nz);
+        printf("LU              Mflops: %8.2f    (M=%d, N=%d)\n", res[5],
+                               LU_size, LU_size);
+
+
+        Random_delete(R);
+
+        return 0;
+  
+}
+
+void print_banner()
+{
+ printf("**                                                              
**\n");
+ printf("** SciMark2 Numeric Benchmark, see http://math.nist.gov/scimark 
**\n");
+ printf("** for details. (Results can be submitted to [email protected])     
**\n");
+ printf("**                                                              
**\n");
+}
diff --git a/talk/iwtc11/benchmarks/scimark/scimark2.h 
b/talk/iwtc11/benchmarks/scimark/scimark2.h
new file mode 100644
--- /dev/null
+++ b/talk/iwtc11/benchmarks/scimark/scimark2.h
@@ -0,0 +1,22 @@
+
+#ifndef SCIMARK2_H
+#define SCIMARK2_H
+
+#define VERSION 2.0
+
+#ifndef NULL 
+#define NULL 0
+#endif
+
+#ifndef TRUE
+#define TRUE 1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+
+
+#endif
+
_______________________________________________
pypy-commit mailing list
[email protected]
http://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to