Hi,
Interesting question, so I took your examples and made them do
the same thing with regards to allocation (using malloc instead
of new in both languages).
I removed the stopwatch to use "time" instead.
Now the programs should do the very same thing. Will they be as
fast too?
D code:
------------------------ bench.d
import std.stdio, std.math;
import core.stdc.stdlib;
import core.stdc.stdio;
int main() {
double C=0.0;
for (int k=0;k<10000;++k) { // iterate 1000x
double S0 = 100.0;
double r = 0.03;
double alpha = 0.07;
double sigma = 0.2;
double T = 1.0;
double strike = 100.0;
double S = 0.0;
const int n = 252;
double dt = T / n;
double R = exp(r*dt);
double u = exp(alpha*dt + sigma*sqrt(dt));
double d = exp(alpha*dt - sigma*sqrt(dt));
double qU = (R - d) / (R*(u - d));
double qD = (1 - R*qU) / R;
double* call = cast(double*)malloc(double.sizeof * (n+1));
for (int i = 0; i <= n; ++i) call[i] = fmax(S0*pow(u,
n-i)*pow(d, i)-strike, 0.0);
for (int i = n-1; i >= 0 ; --i) {
for (int j = 0; j <= i; ++j) {
call[j] = qU * call[j] + qD * call[j+1];
}
}
C = call[0];
}
printf("%f\n", C);
return 0;
}
------------------------
C++ code
------------------------ bench.cpp
#include <cmath>
#include <cstdlib>
#include <cstdio>
int main() {
double C=0.0;
for (int k=0;k<10000;++k) { // iterate 1000x
double S0 = 100.0;
double r = 0.03;
double alpha = 0.07;
double sigma = 0.2;
double T = 1.0;
double strike = 100.0;
double S = 0.0;
const int n = 252;
double dt = T / n;
double R = exp(r*dt);
double u = exp(alpha*dt + sigma*sqrt(dt));
double d = exp(alpha*dt - sigma*sqrt(dt));
double qU = (R - d) / (R*(u - d));
double qD = (1 - R*qU) / R;
double* call = (double*)malloc(sizeof(double) * (n+1));
for (int i = 0; i <= n; ++i) call[i] = fmax(S0*pow(u,
n-i)*pow(d, i)-strike, 0.0);
for (int i = n-1; i >= 0 ; --i) {
for (int j = 0; j <= i; ++j) {
call[j] = qU * call[j] + qD * call[j+1];
}
}
C = call[0];
}
printf("%f\n", C);
return 0;
}
------------------------
Here is the bench script:
------------------------ bench.sh
#!/bin/sh
ldc2 -O2 bench.d
clang++ -O2 bench.cpp -o bench-cpp;
time ./bench
time ./bench-cpp
time ./bench
time ./bench-cpp
time ./bench
time ./bench-cpp
time ./bench
time ./bench-cpp
------------------------
Note that I use clang-703.0.31 that comes with Xcode 7.3 that is
based on LLVM 3.8.0 from what I can gather.
Using ldc 1.0.0-b2 which is at LLVM 3.8.0 too! Maybe the backend
is out of the equation.
The results at -O2 (minimum of 4 samples):
// C++
real 0m0.484s
user 0m0.466s
sys 0m0.011s
// D
real 0m0.390s
user 0m0.373s
sys 0m0.012s
Why is the D code 1.25x as fast as the C++ code if they do the
same thing?
Well I don't know, I've not analyzed further.