I've observed large differences in timing performance while filling arrays using different methods (for vs foreach vs arr[] = x) and don't know why. I've looked at array.d (https://github.com/dlang/dmd/blob/9792735c82ac997d11d7fe6c3d6c604389b3f5bd/src/dmd/root/array.d) but I'm still none the wiser.

Here is an example:

```
/* fill.d */

import std.stdio: writeln;
import std.typecons: Tuple, tuple;
import std.algorithm.iteration: mean;
import std.algorithm.iteration: sum;
import std.datetime.stopwatch: AutoStart, StopWatch;


/* Benchmarking Function */
auto bench(alias fun, string units = "msecs",
ulong minN = 10, bool doPrint = false)(ulong n, string msg = "")
{
  auto times = new double[n];
  auto sw = StopWatch(AutoStart.no);
  for(ulong i = 0; i < n; ++i)
  {
    sw.start();
    fun();
    sw.stop();
    times[i] = cast(double)sw.peek.total!units;
    sw.reset();
  }
  double ave = mean(times);
  double sd = 0;

  if(n >= minN)
  {
    for(ulong i = 0; i < n; ++i)
      sd += (times[i] - ave)^^2;
    sd /= (n - 1);
    sd ^^= 0.5;
  }else{
    sd = double.nan;
  }

  static if(doPrint)
writeln(msg ~ "Mean time("~ units ~ "): ", ave, ", Standard Deviation: ", sd);

  return tuple!("mean", "sd")(ave, sd);
}

/* Fill Functions */
auto fill_for(alias x, ulong n)()
{
  alias T = typeof(x);
  auto arr = new T[n];

  for(ulong i = 0; i < n; ++i)
    arr[i] = x;

  return arr;
}

auto fill_foreach(alias x, ulong n)()
{
  alias T = typeof(x);
  auto arr = new T[n];

  foreach(ref el; arr)
    el = x;

  return arr;
}

auto fill_slice(alias x, ulong n)()
{
  alias T = typeof(x);
  auto arr = new T[n];

  arr[] = x;

  return arr;
}


void main()
{
  double x = 42;

bench!(fill_slice!(x, 100_000), "usecs", 10, true)(100, "Slice: "); bench!(fill_foreach!(x, 100_000), "usecs", 10, true)(100, "Foreach: ");
  bench!(fill_for!(x, 100_000), "usecs", 10, true)(100, "For: ");
}

/*
$ dmd fill.d && ./fill
Slice: Mean time(usecs): 87.38, Standard Deviation: 54.1542
Foreach: Mean time(usecs): 179.9, Standard Deviation: 41.4109
For: Mean time(usecs): 245.81, Standard Deviation: 53.0798

$ dmd --version
DMD64 D Compiler v2.090.1
...
*/
```

It would be great to know why there are large differences in performance between these approaches and it would be great to see where array's opSliceAssign (or the equivalent method) for D's native array is implemented. Playing with `-boundscheck` made no difference in the contrasting performances. Thanks.



Reply via email to