In `testSOA`, only the content of seq `m` need to be written to main memory . I 
think in `testAOS`, not only field `m` in object S, but also field `x, y, z` 
need to be written to main memory because they are also likely in the cache 
line containing field `m`.

According to this wikipedia entry:

<https://en.wikipedia.org/wiki/CPU_cache#Cache_entries>

> Data is transferred between memory and cache in blocks of fixed size, called 
> cache lines or cache blocks.

I moved field `m` in object S in your code to separete seq `m2`. Then `testAOS` 
become slighty faster than `testSOA`.
    
    
    import random, std/monotimes
    
    const MAX = 9000000
    
    type Elem = int
    
    var x = newSeq[Elem](MAX)
    var y = newSeq[Elem](MAX)
    var z = newSeq[Elem](MAX)
    var m = newSeq[Elem](MAX)
    var m2 = newSeq[Elem](MAX)
    
    type S = object
      x, y, z: Elem
    
    var d = newSeq[S](MAX)
    
    template bench(name: string, code: untyped) =
      let s = getMonotime().ticks
      code
      let e = getMonotime().ticks
      echo "test ", name, ": ", e - s
    
    randomize()
    
    proc testSOA(i: int) =
      m[i] = x[i] + y[i] + z[i]
    
    proc testAOS(i: int) =
      m2[i] = d[i].x + d[i].y + d[i].z
    
    proc test() =
      for i in 0 .. MAX - 1:
        x[i] = rand(Elem.high)
        y[i] = rand(Elem.high)
        z[i] = rand(Elem.high)
        m[i] = rand(Elem.high)
        m2[i] = rand(Elem.high)
        d[i] = S(x: x[i], y: y[i], z: z[i])
      
      bench "A1":
        for i in 0 .. MAX - 1:
          testSOA(i)
      
      bench "S1":
        for i in 0 .. MAX - 1:
          testAOS(i)
    
    test()
    
    Run

Output:
    
    
    test A1: 27826736
    test S1: 24209680
    
    Run

Reply via email to