I also would like to have rdtsc tests to catch perf regression, see: [https://github.com/mratsim/Arraymancer/issues/135](https://github.com/mratsim/Arraymancer/issues/135)
@Stefan_Salewski, note that it's easy to wrap RDTSC, see [https://gist.github.com/edubart/f6c92b1fdfca1c1e15ec34bb45f88595](https://gist.github.com/edubart/f6c92b1fdfca1c1e15ec34bb45f88595) import sequtils, random proc c_malloc(size: csize): pointer {.importc: "malloc", header: "<stdlib.h>".} proc c_aligned_alloc(alignment, size: csize): pointer {.importc: "aligned_alloc", header: "<stdlib.h>".} proc c_free(p: pointer) {.importc: "free", header: "<stdlib.h>".} proc mkl_malloc(size: csize, align: int): pointer {.importc: "mkl_malloc", header: "<mkl.h>".} proc mkl_free(p: pointer) {.importc: "mkl_free", header: "<mkl.h>".} {.passL:"-lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lm".} proc rdtsc(): int64 = var hi, lo: uint32 asm """ rdtsc :"=a"(`lo`), "=d"(`hi`) """ result = int64(lo) or (int64(hi) shl 32) type CycleCounter = object start_cycles: int64 proc restart(self: var CycleCounter) {.inline.} = self.start_cycles = rdtsc() proc elapsedCycles(self: CycleCounter): int64 {.inline.} = rdtsc() - self.start_cycles proc iterationCycles(self: CycleCounter, iters: int64): float {.inline.} = self.elapsedCycles().float / iters.float const ALIGNMENT = 64 const MAX_SIZE = 128*128*128 const MAX_ITERS = 1000000 var alloc_sizes = newSeqWith(MAX_ITERS, max((random(MAX_SIZE) div ALIGNMENT) * ALIGNMENT, ALIGNMENT)) proc main() = var cycler: CycleCounter cycler.restart() var c = 0.0f for size in alloc_sizes: let data = c_malloc(size*sizeof(float32)) c += cast[ptr UncheckedArray[float32]](data)[0] c_free(data) echo "C malloc ", cycler.iterationCycles(MAX_ITERS), " cycles" cycler.restart() for size in alloc_sizes: let data = c_aligned_alloc(ALIGNMENT, size*sizeof(float32)) c += cast[ptr UncheckedArray[float32]](data)[0] c_free(data) echo "C aligned_alloc ", cycler.iterationCycles(MAX_ITERS), " cycles" cycler.restart() for size in alloc_sizes: let data = allocShared(size*sizeof(float32)) c += cast[ptr UncheckedArray[float32]](data)[0] deallocShared(data) echo "nim allocShared ", cycler.iterationCycles(MAX_ITERS), " cycles" cycler.restart() for size in alloc_sizes: let data = alloc(size*sizeof(float32)) c += cast[ptr UncheckedArray[float32]](data)[0] dealloc(data) echo "nim alloc ", cycler.iterationCycles(MAX_ITERS), " cycles" cycler.restart() for size in alloc_sizes: var data = newSeqOfCap[float32](size) data.setLen(size) c += data[0] echo "nim seq ", cycler.iterationCycles(MAX_ITERS), " cycles" cycler.restart() for size in alloc_sizes: let data = mkl_malloc(size*sizeof(float32), ALIGNMENT) c += cast[ptr UncheckedArray[float32]](data)[0] mkl_free(data) echo "mkl_alloc ", cycler.iterationCycles(MAX_ITERS), " cycles" echo "just printed to foll gcc: ", c echo "--- initial ---" main() echo "--- after warmup ---" main() Run
