Hi,
I have noticed a big performance decrease in one of my numerical codes
when switching from gcc 4.4 to gcc 4.5. A small test case is attached.
When compiling this test case with "gcc -O3 perf.c -lm -std=c99"
and executing the resulting binary, the CPU time with the head of
the 4.4 branch is about 1.1s, with the head of the trunk it is 2.1s.
This is on a Pentium D CPU. I have verified that both binaries produce
identical results.
If I can do anything to help locate the reason for this slowdown, I'd be
glad to help, but I must admit that I'm no good at interpreting assembler.
Any insight would be greatly appreciated.
Thanks,
Martin
#include <math.h>
#include <stdlib.h>
static inline double max (double a, double b)
{ return (a>=b) ? a : b; }
static inline int nearest_int (double arg)
{
arg += 0.5;
return (arg>=0) ? (int)arg : (int)arg-1;
}
void wrec3jj (double l2, double l3, double m2, double m3, double *res, int sz)
{
const int expo=250;
const double srhuge=ldexp(1.,expo),
tiny=ldexp(1.,-2*expo), srtiny=ldexp(1.,-expo);
const double m1 = -m2 -m3;
const double l1min = max(fabs(l2-l3),fabs(m1)),
l1max = l2 + l3;
const int ncoef = nearest_int(l1max-l1min)+1;
const double l2ml3sq = (l2-l3)*(l2-l3),
pre1 = (l2+l3+1.)*(l2+l3+1.),
m1sq = m1*m1,
pre2 = m1*(l2*(l2+1.)-l3*(l3+1.)),
m3mm2 = m3-m2;
int i=0;
res[i] = srtiny;
double sumfor = (2.*l1min+1.) * res[i]*res[i];
double c1=1e300;
double oldfac=0.;
do
{
if (i==ncoef-1) break; // all done
++i;
const double l1 = l1min+i,
l1sq = l1*l1;
const double c1old=fabs(c1);
const double newfac = sqrt((l1sq-l2ml3sq)*(pre1-l1sq)*(l1sq-m1sq));
if (i>1)
{
const double tmp1 = 1./((l1-1.)*newfac);
c1 = (2.*l1-1.)*(pre2-(l1sq-l1)*m3mm2) * tmp1;
res[i] = res[i-1]*c1 - res[i-2]*l1*oldfac*tmp1;
}
else
{
c1 = (l1>1.000001) ? (2.*l1-1.)*(pre2-(l1sq-l1)*m3mm2)/((l1-1.)*newfac)
: (2.*l1-1.)*l1*(m3mm2)/newfac;
res[i] = res[i-1]*c1;
}
oldfac=newfac;
if (c1old<=fabs(c1)) break;
}
while (1);
}
int main(void)
{
double *res = (double *)malloc(1000*sizeof(double));
for (int m=0; m<1000000; ++m)
wrec3jj (100, 60, 60, -50, res, 1000);
return 0;
}