The code below runs significantly slower when compiled in 64 bit with 3.4.3 than
it does in 3.3.4, and both are significantly slower than a 32 bit compile.
Can anyone tell what's going on:
1) between 32 and 64 bits
2) between 3.3.4 and 3.4.3
Thanks.
amd64 3200, 1024k cache
with gcc 3.4.3
-O3 -march=k8 -m32 (runtime: 0.62)
-O3 -march=k8 -m64 (runtime: 3.01)
with gcc 3.3.4
-O3 -march=k8 -m32 (runtime: 0.65)
-O3 -march=k8 -m64 (runtime: 2.06)
------------------------------------------------------------
// run time is anywhere from 33 to 50 % longer when compiled with gcc 3.4.3
compared to 3.3.4
// compiled with g++ -O3 -Wall -march=k8 (same performance lag observed
with -O2)
//
// Objects are created in a heirarchy of classes. When referenced,
// it seems that the pointer lookups
// must cause more cache misses in gcc 3.4.3 binaries.
#include <stdio.h>
#include <vector>
class mytype_A {
public:
int id;
mytype_A():id(0) {}
};
class mytype_B {
public:
mytype_A* A;
mytype_B(mytype_A* p):A(p) {}
};
class mytype_C {
public:
mytype_B* B;
mytype_C(mytype_B* p):B(p) {}
};
class mytype_D {
public:
// mytype_C* C[2]; // less performance difference if we use simple
arrays
std::vector<mytype_C*> C;
int junk[3]; // affects performance (must cause cache misses)
public:
mytype_D(mytype_A* a0, mytype_A* a1) {
// C[0] = new mytype_C(new mytype_B(a0));
// C[1] = new mytype_C(new mytype_B(a0));
C.push_back(new mytype_C(new mytype_B(a0)));
C.push_back(new mytype_C(new mytype_B(a0)));
}
};
int main() {
int k = 5000; // run-time not linear in k
mytype_A* A[k];
mytype_D* D[k];
for (int i=0;i<=k;i++)
A[i] = new mytype_A();
for (int i=0;i<k;i++)
D[i] = new mytype_D(A[i],A[k-i]); // intentionally make some pointers
farther apart
clock_t before = clock();
int k0 = 0;
for (int i=0;i<k;i++) {
k0 = 0;
for (int j=0;j<k;j++) { // run through list of D's, and reference
pointers
mytype_D* d = D[j];
if (d->C[0]->B->A->id) k0++;
if (d->C[1]->B->A->id) k0++;
}
}
printf("%d\n",k0); // don't allow compiler to optimize away k0
printf("time: %f\n",(double)(clock()-before)/CLOCKS_PER_SEC);
return 0;
}