------- Comment #2 from jb at gcc dot gnu dot org 2006-11-04 20:34 -------
I did some experimenting, and it seems the C version of a trivial matrix
multiply program is much slower than the same program written in Fortran?
Switch the commented declarations and c[i][j] = 0 in the loop to get the float
version.
#include <stdio.h>
#include <complex.h>
#include <sys/time.h>
#include <time.h>
int main(void)
{
const int n = 300;
complex float a[n][n], b[n][n], c[n][n];
//float a[n][n], b[n][n], c[n][n];
int i, j, k, tc;
struct timeval tv, tv2;
float res;
tc = 0;
gettimeofday (&tv, NULL);
for (i = 0; i < n; i++)
{
for (j = 0; j < n; j++)
{
c[i][j] = 0.0 + 0.0*I;
//c[i][j] = 0.0;
for (k = 0; k < n; k++)
{
// printf("i %i, j %i, k %i\n", i, j, k);
c[i][j] = c[i][j] + a[i][k] * b[k][j];
tc++;
}
}
}
gettimeofday (&tv2, NULL);
res = tv2.tv_sec - tv.tv_sec + (tv2.tv_usec - tv.tv_usec) / 1000000.0;
printf ("gemm time: %f\n", res);
printf ("trip count: %i\n", tc);
}
Fortran version:
program mymatmul
implicit none
integer, parameter :: n = 300
real, dimension(n,n) :: rr, ri
complex, dimension(n,n) :: a,b,c
real :: t1, t2
integer :: i, j, k
call random_number (rr)
call random_number (ri)
a = cmplx (rr, ri)
call random_number (rr)
call random_number (ri)
b = cmplx (rr, ri)
call cpu_time (t1)
do j = 1, n
do i = 1, n
c(i,j) = cmplx (0., 0.)
do k = 1, n
c(i,j) = c(i,j) + a(i,k) * b(k,j)
end do
end do
end do
call cpu_time (t2)
write (*,'(F8.4)') t2-t1
open (10, file="cmatrix", form='unformatted')
write (10) c
close (10)
end program mymatmul
Fortran version with real instead of complex:
program mymatmul
implicit none
integer, parameter :: n = 300
real, dimension(n,n) :: a,b,c
real :: t1, t2
integer :: i, j, k, tc
call random_number (a)
call random_number (b)
call cpu_time (t1)
tc = 0
do j = 1, n
do i = 1, n
c(i,j) = 0.
do k = 1, n
c(i,j) = c(i,j) + a(i,k) * b(k,j)
tc = tc + 1
end do
end do
end do
call cpu_time (t2)
write (*,'(F8.4)') t2-t1
write (*, *) 'Trip count: ', tc
open (10, file="rmatrix", form='unformatted')
write (10) c
close (10)
end program mymatmul
And my results:
C version, complex:
-O2
2.0 s
-ffast-math
0.9
gfortran -O2:
0.32
float:
-O2 0.6 s
fast math makes no difference!
gfortran -O2 -g
0.07
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29549