Are you writing an interpreter? Your benchmark might be a bit too simple and compilers might optimize some stuff away.
Here are mine like 4 years ago, at the bottom: <https://github.com/status-im/nimbus-eth1/wiki/Interpreter-optimization-resources> import random, sequtils, times type Op = enum Halt # = 0x0000 Inc # = 0x0100 Dec # = 0x0110 Mul2 # = 0x0230 Div2 # = 0x0240 Add7 # = 0x0307 Neg # = 0x0400 func interp_switch(code: seq[Op], initVal: int): int = var pc = 0 result = initVal while true: case code[pc]: of Halt: return of Inc: inc pc inc result of Dec: inc pc dec result of Mul2: inc pc result *= 2 of Div2: inc pc result = result div 2 of Add7: inc pc inc result, 7 of Neg: inc pc result = -result ################################################################################################################# func interp_cgoto(code: seq[Op], initVal: int): int = # Requires a dense enum var pc = 0 result = initVal while true: {.computedGoto.} let instr = code[pc] case instr: of Halt: return of Inc: inc pc inc result of Dec: inc pc dec result of Mul2: inc pc result *= 2 of Div2: inc pc result = result div 2 of Add7: inc pc inc result, 7 of Neg: inc pc result = -result ################################################################################################################# func halt(result: var int, stop: var bool) {.inline, nimcall.}= stop = true func inc(result: var int, stop: var bool) {.inline, nimcall.}= inc result func dec(result: var int, stop: var bool) {.inline, nimcall.}= dec result func mul2(result: var int, stop: var bool) {.inline, nimcall.}= result *= 2 func div2(result: var int, stop: var bool) {.inline, nimcall.}= result = result div 2 func add7(result: var int, stop: var bool) {.inline, nimcall.}= inc result, 7 func neg(result: var int, stop: var bool) {.inline, nimcall.}= result = -result # Requires dense enum type InstrF = proc (result: var int, stop: var bool){.inline, nimcall, noSideEffect, gcsafe, locks: 0.} type FuncTable = array[Op, InstrF] const funcTable: FuncTable = [ Halt: halt, Inc: inc, Dec: dec, Mul2: mul2, Div2: div2, Add7: add7, Neg: neg ] proc interp_ftable(code: seq[Op], initVal: int): int = # Requires dense enum var pc = 0 stop = false result = initVal while not stop: funcTable[code[pc]](result, stop) inc pc ################################################################################################################# type InstrNext = proc (val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} OpH = ref object handler: InstrNext FuncTableNext = array[Op, OpH] proc halt(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} proc inc(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} proc dec(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} proc mul2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} proc div2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} proc add7(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} proc neg(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.} let funcTableNext: FuncTableNext = [ Halt: OpH(handler: halt), Inc: OpH(handler: inc), Dec: OpH(handler: dec), Mul2: OpH(handler: mul2), Div2: OpH(handler: div2), Add7: OpH(handler: add7), Neg: OpH(handler: neg) ] proc halt(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}= stop = true proc inc(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}= inc val inc pc result = funcTableNext[code[pc]] proc dec(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}= dec val inc pc result = funcTableNext[code[pc]] proc mul2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}= val *= 2 inc pc result = funcTableNext[code[pc]] proc div2(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}= val = val div 2 inc pc result = funcTableNext[code[pc]] proc add7(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}= inc val, 7 inc pc result = funcTableNext[code[pc]] proc neg(val: var int, code: seq[Op], pc: var int, stop: var bool): OpH {.inline, nimcall.}= val = -val inc pc result = funcTableNext[code[pc]] proc interp_handlers(code: seq[Op], initVal: int): int = # Requires dense enum var pc = 0 stop = false oph = funcTableNext[code[pc]] result = initVal while not stop: oph = oph.handler(result, code, pc, stop) ################################################################################################################# type OpD = ref object {.inheritable.} Ohalt {.final.}= ref object of OpD Oinc {.final.}= ref object of OpD Odec {.final.}= ref object of OpD Omul2 {.final.}= ref object of OpD Odiv2 {.final.}= ref object of OpD Oadd7 {.final.}= ref object of OpD Oneg {.final.}= ref object of OpD FuncTableToken = array[Op, OpD] method execute(op: OpD, result: var int, stop: var bool) {.base, inline, noSideEffect.} = raise newException(ValueError, "To override") method execute(op: Ohalt, result: var int, stop: var bool) {.inline, noSideEffect.}= stop = true method execute(op: Oinc, result: var int, stop: var bool) {.inline, noSideEffect.}= inc result method execute(op: Odec, result: var int, stop: var bool) {.inline, noSideEffect.}= dec result method execute(op: Omul2, result: var int, stop: var bool) {.inline, noSideEffect.}= result *= 2 method execute(op: Odiv2, result: var int, stop: var bool) {.inline, noSideEffect.}= result = result div 2 method execute(op: Oadd7, result: var int, stop: var bool) {.inline, noSideEffect.}= inc result, 7 method execute(op: Oneg, result: var int, stop: var bool) {.inline, noSideEffect.}= result = -result let funcTableToken: FuncTableToken = [ Halt: Ohalt(), Inc: Oinc(), Dec: Odec(), Mul2: Omul2(), Div2: Odiv2(), Add7: Oadd7(), Neg: Oneg() ] proc interp_methods(code: seq[Op], initVal: int): int = # Requires dense enum var pc = 0 stop = false opt: OpD result = initVal while not stop: opt = funcTableToken[code[pc]] opt.execute(result, stop) inc pc ################################################################################################################# import random, sequtils, times, os, strutils, strformat const Nb_Instructions = 1_000_000_000 template bench(impl: untyped) = let start = cpuTime() let r = impl(instructions, n) let stop = cpuTIme() let elapsed = stop - start echo "result: " & $r let procname = impl.astToStr let mips = (Nb_Instructions.float / (1_000_000.0 * elapsed)) echo procname & " took " & $elapsed & "s for " & $Nb_Instructions & " instructions: " & $mips & " Mips (M instructions/s)" proc main(n: int)= randomize(42) let ops = [Inc, Dec, Mul2, Div2, Add7, Neg] let instructions = newSeqWith(Nb_Instructions, rand(ops)) & Halt bench(interp_switch) bench(interp_cgoto) # requires dense enum (no holes) bench(interp_ftable) # requires dense enum (no holes) or tables (instead of arrays) bench(interp_handlers) # requires dense enum (no holes) or tables (instead of arrays) bench(interp_methods) # requires dense enum (no holes) or tables (instead of arrays) # Warmup var start = cpuTime() block: var foo = 123 for i in 0 ..< 1_000_000_000: foo += i*i mod 456 foo = foo mod 789 # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects var stop = cpuTime() echo "Warmup: " & $(stop - start) & "s" # Main loop let arguments = commandLineParams() let initial = if arguments.len > 0: parseInt($arguments[0]) else: 1 main(initial) ## Results on i5-5257U (Broadwell mobile dual core 2.7 turbo 3.1Ghz) # Note that since Haswell, Intel CPU are significantly improed on Switch prediction # This probably won't carry to ARM devices # Warmup: 4.081501s # result: -14604293096444 # interp_switch took 8.604712000000003s for 1000000000 instructions: 116.2153945419672 Mips (M instructions/s) # result: -14604293096444 # interp_cgoto took 7.367597000000004s for 1000000000 instructions: 135.7294651159665 Mips (M instructions/s) # result: -201628509198920 <--- some bug here to fix # interp_ftable took 8.957571000000002s for 1000000000 instructions: 111.6374070604631 Mips (M instructions/s) # result: -14604293096444 # interp_handlers took 11.039072s for 1000000000 instructions: 90.58732473164413 Mips (M instructions/s) # result: -14604293096444 # interp_methods took 23.359635s for 1000000000 instructions: 42.80888806695823 Mips (M instructions/s) Run
