Hi,
Actually it's a *lot* more - and probably nothing to do with memory
considerations - and faik possibly even on topic.
Before I forget - I'm running perl 5.6.1 on Win 2k, msvc++ 6.0,
Inline-0.43.
You may recall I posted about an inline function speeding up inexplicably
when I
made inconsequential changes to #defined values. I also later found that
making inconsequential changes to various declarations had similar effect.
To illustrate, I present below 'aargh.pl' which some of you might (or might
not) like to look at. I've pasted it in - so if it *has* wrapped, it will
hopefully unwrap nicely upon copying.
It contains 2 inline functions ('modc1' and 'modc2') which are identical
(right down to the white space) save that 'modc1' declares an integer ('int
rubbish') which does not exist anywhere in the program. In 'modc2' the
declaration of 'int rubbish' is commented out.
When I run 'aargh.pl' it reports that 100 iterations of 'modc1' takes 2.46
seconds, whereas 100 iterations of 'modc2' takes 2.95 seconds. That is, the
function that contains the extra bogus declaration runs nearly 20% faster.
When run separately, the 2 functions produce dll's of exactly the same size,
yet the same running-time discrepancy exists.
MD5 reports that the respective dll's are different but I suspect that is
just a timestamp discrepancy. (If I delete the dll for a script, and re-run
that script having made no changes to it , then the newly created dll has a
different md5 to the deleted one - which I presume means that each dll has
some timestamp info built into it.)
But then, if the respective dll's for 'modc1' and 'modc2' are in fact
identical, how can we account for the time discrepancy ? What does the
Inline module do that could account for it ? What is there in the code that
could possibly account for it ?
Just a brief word about what 'aargh.pl' does - it's output is simply 2
statements announcing whether the 2 functions have performed as they should,
followed by the Benchmark report. The functions perform a '%' operation on
big integers represented by arrays (@num1 % @num2). '@check' contains the
correct answer. The functions are little more than a copy'n'paste of
Math::BigInt's bmod function (with the '$' signs removed).
There's nothing special about the values chosen - other values exhibit the
same anomaly - even for different array sizes, the same proportionate
discrepancy exists. I just chose those (hard coded) values for ease of
coding and demonstration.
eg, for some larger values I tried, 100 iterations of modc1 took 12.84
seconds and 100 iterations of modc2 took 14.92 seconds - not quite the same
proportion, but it shows that I'm not just losing a flat 5ms per iteration.
Particularly curious to know whether the damn thing even compiles on another
platform .... and if it does compile, does the same running time anomaly
exist ?
I've so far been unable to replicate the problem with a simpler inline
function.
Anyway .... 'nuff said ... I'm happy to respond to any comments/questions
(but I promise not to *raise* this issue again).
Cheers,
Rob
############ Start aargh.pl #################
use Benchmark;
use Inline C => <<'END_OF_C_CODE';
#define BASE 67108864
#define ARRAY 1000
void modc1 ( SV* x, ...)
{
Inline_Stack_Vars;
int i, i2, t, iy, ix, lend = 0, lenx,leny, p, q;
double qq, prd = 0, tl, tr, tt;
int u0;
double uu0;
int u1;
int u2;
int v1;
double vv1;
int v2;
double vv2;
int d[ARRAY] = {0};
int a[ARRAY] = {0};
int y[ARRAY] = {0};
double dd;
int ddn;
int bar = 0;
int car = 0;
double cc = 0;
double xx;
double yy;
int rubbish;
Inline_Stack_Reset;
lenx = SvIV(Inline_Stack_Item(0));
for (i = 1; i <= lenx; ++i) {
a[i - 1] = SvIV(Inline_Stack_Item(i));
}
for (i = lenx + 1; i < Inline_Stack_Items; ++i) {
y[i - lenx - 1] = SvIV(Inline_Stack_Item(i));
}
leny = Inline_Stack_Items - lenx - 1;
dd = BASE / (y[leny - 1] + 1);
ddn = dd;
if (ddn != 1) {
for (i = 0; i < lenx; ++i) {
xx = a[i] * dd + car;
car = xx / BASE;
a[i] = xx - ((double) car * BASE);
}
a[i] = car;
++lenx;
car = 0;
for (i = 0; i < leny; ++i) {
yy = y[i] * dd + car;
car = yy / BASE;
y[i] = yy - ((double) car * BASE);
}
}
else {
++lenx;
}
if (leny > 1) {
v2 = y[leny - 2];
}
else {
v2 = 0;
}
v1 = y[leny - 1];
while (lenx > leny) { /* open w */
if (lenx > 2) {
u2 = a[lenx -3];
}
else {
u2 = 0;
}
u1 = a[lenx - 2];
u0 = a[lenx - 1];
uu0 = u0;
qq = ((u0 == v1) ? BASE - 1 : (uu0 * BASE + u1) / v1);
q = qq;
vv2 = v2;
vv1 = v1;
while ((tl = q * vv2) >
(tr = (uu0 * BASE + u1 - q * vv1) * BASE + u2)) {
--q;
}
qq = q;
if (qq > 0) {
bar = 0;
car = 0;
for (iy = 0, ix = lenx - leny - 1;
iy <= leny - 1;
++iy, ++ix) {
prd = qq * y[iy] + car;
car = prd / BASE;
p = prd - ((double) car * BASE);
if (bar = ((a[ix] -= p +bar) < 0)) {
a[ix] += BASE;
}
} /* close for loop */
if (a[lenx - 1] < car + bar) {
car = 0;
--q;
for (iy = 0, ix = lenx - leny - 1;
iy <= leny - 1;
++iy, ++ix) {
if (car = ((a[ix] += y[iy] + car) > BASE)) {
a[ix] -= BASE;
}
}
}
}
a[lenx - 1] = 0;
--lenx;
} /* close w */
if (ddn != 1) {
car = 0;
for (i = lenx - 1; i >= 0; --i) {
prd = (double) car * BASE + a[i];
t = prd / dd;
car = prd - t * dd;
for (i2 = lend - 1; i2 >= 0; --i2) {
d[i2 + 1] = d[i2];
}
d[0] = t;
++lend;
}
while (lend > 1) {
if (d[lend - 1] == 0) --lend;
else break;
}
for (i = 0; i < lend; ++i) {
Inline_Stack_Push(sv_2mortal(newSViv(d[i])));
}
Inline_Stack_Done;
Inline_Stack_Return(lend);
}
else {
while (lenx > 1) {
if (a[lenx - 1] == 0) --lenx;
else break;
}
for (i = 0; i < lenx; ++i) {
Inline_Stack_Push(sv_2mortal(newSViv(a[i])));
}
Inline_Stack_Done;
Inline_Stack_Return(lenx);
}
}
void modc2 ( SV* x, ...)
{
Inline_Stack_Vars;
int i, i2, t, iy, ix, lend = 0, lenx,leny, p, q;
double qq, prd = 0, tl, tr, tt;
int u0;
double uu0;
int u1;
int u2;
int v1;
double vv1;
int v2;
double vv2;
int d[ARRAY] = {0};
int a[ARRAY] = {0};
int y[ARRAY] = {0};
double dd;
int ddn;
int bar = 0;
int car = 0;
double cc = 0;
double xx;
double yy;
/* int rubbish; */
Inline_Stack_Reset;
lenx = SvIV(Inline_Stack_Item(0));
for (i = 1; i <= lenx; ++i) {
a[i - 1] = SvIV(Inline_Stack_Item(i));
}
for (i = lenx + 1; i < Inline_Stack_Items; ++i) {
y[i - lenx - 1] = SvIV(Inline_Stack_Item(i));
}
leny = Inline_Stack_Items - lenx - 1;
dd = BASE / (y[leny - 1] + 1);
ddn = dd;
if (ddn != 1) {
for (i = 0; i < lenx; ++i) {
xx = a[i] * dd + car;
car = xx / BASE;
a[i] = xx - ((double) car * BASE);
}
a[i] = car;
++lenx;
car = 0;
for (i = 0; i < leny; ++i) {
yy = y[i] * dd + car;
car = yy / BASE;
y[i] = yy - ((double) car * BASE);
}
}
else {
++lenx;
}
if (leny > 1) {
v2 = y[leny - 2];
}
else {
v2 = 0;
}
v1 = y[leny - 1];
while (lenx > leny) { /* open w */
if (lenx > 2) {
u2 = a[lenx -3];
}
else {
u2 = 0;
}
u1 = a[lenx - 2];
u0 = a[lenx - 1];
uu0 = u0;
qq = ((u0 == v1) ? BASE - 1 : (uu0 * BASE + u1) / v1);
q = qq;
vv2 = v2;
vv1 = v1;
while ((tl = q * vv2) >
(tr = (uu0 * BASE + u1 - q * vv1) * BASE + u2)) {
--q;
}
qq = q;
if (qq > 0) {
bar = 0;
car = 0;
for (iy = 0, ix = lenx - leny - 1;
iy <= leny - 1;
++iy, ++ix) {
prd = qq * y[iy] + car;
car = prd / BASE;
p = prd - ((double) car * BASE);
if (bar = ((a[ix] -= p +bar) < 0)) {
a[ix] += BASE;
}
} /* close for loop */
if (a[lenx - 1] < car + bar) {
car = 0;
--q;
for (iy = 0, ix = lenx - leny - 1;
iy <= leny - 1;
++iy, ++ix) {
if (car = ((a[ix] += y[iy] + car) > BASE)) {
a[ix] -= BASE;
}
}
}
}
a[lenx - 1] = 0;
--lenx;
} /* close w */
if (ddn != 1) {
car = 0;
for (i = lenx - 1; i >= 0; --i) {
prd = (double) car * BASE + a[i];
t = prd / dd;
car = prd - t * dd;
for (i2 = lend - 1; i2 >= 0; --i2) {
d[i2 + 1] = d[i2];
}
d[0] = t;
++lend;
}
while (lend > 1) {
if (d[lend - 1] == 0) --lend;
else break;
}
for (i = 0; i < lend; ++i) {
Inline_Stack_Push(sv_2mortal(newSViv(d[i])));
}
Inline_Stack_Done;
Inline_Stack_Return(lend);
}
else {
while (lenx > 1) {
if (a[lenx - 1] == 0) --lenx;
else break;
}
for (i = 0; i < lenx; ++i) {
Inline_Stack_Push(sv_2mortal(newSViv(a[i])));
}
Inline_Stack_Done;
Inline_Stack_Return(lenx);
}
}
END_OF_C_CODE
print "*\n";
@num1 = (20012) x 693;
@num2 = (101) x 385;
@check = (20061) x 308;
@p2 = (49) x 77;
push @check, @p2;
@l_modc1 = modc1(scalar(@num1), @num1, @num2);
$cmp1 = 0;
if (scalar(@l_modc1) == scalar(@check)) {$cmp1 = 1}
$cmp2 = 1;
for ($i = 0; $i < scalar(@check); $i++) {
if ($check[$i] != $l_modc1[$i]) {
$cmp2 = 0;
last;
}
}
if ($cmp1 == 1 && $cmp2 == 1)
{print "Calculation performed correctly by modc1\n"}
else {print "Error in modc1\n"}
@l_modc2 = modc2(scalar(@num1), @num1, @num2);
$cmp1 = 0;
if (scalar(@l_modc2) == scalar(@check)) {$cmp1 = 1}
$cmp2 = 1;
for ($i = 0; $i < scalar(@check); $i++) {
if ($check[$i] != $l_modc2[$i]) {
$cmp2 = 0;
last;
}
}
if ($cmp1 == 1 && $cmp2 == 1)
{print "Calculation performed correctly by modc2\n"}
else {print "Error in modc2\n"}
timethese (100, {
'modc1' => '@ret1 = modc1(scalar(@num1), @num1, @num2);',
'modc2' => '@ret2 = modc2(scalar(@num1), @num1, @num2);',
});
############ Finish aargh.pl ################