All the snippets are still working for me.
import std/macros
import std/monotimes
from times import inMilliseconds
const cLOOPS {.intdefine.} = 1225
# avoids some "bit-twiddling" for better speed...
const cBITMSK = [ 1'u8, 2, 4, 8, 16, 32, 64, 128 ]
macro unrollLoops(ca, sz, strtndx, bp: untyped) =
let cmpstsalmtid = "cmpstsalmt".newIdentNode
let szbitsid = "szbits".newIdentNode
let strtndx0id = "strtndx0".newIdentNode
let strt0id = "strt0".newIdentNode
let strt7id = "strt7".newIdentNode
let endalmtid = "endalmt".newIdentNode
let bpintid = "bpint".newIdentNode
let cullaid = "culla".newIdentNode
result = quote do:
let `szbitsid` = `sz` shl 3
let `cmpstsalmtid` = `ca` + `sz`
let `bpintid` = `bp`.int
let `strtndx0id` = `strtndx`
let `strt0id` = `strtndx0id` shr 3
for i in 1 .. 7:
let strtndxido = newIdentNode("strtndx" & $(i - 1))
let strtndxidn = newIdentNode("strtndx" & $i)
let strtid = newIdentNode("strt" & $i)
result.add quote do:
let `strtndxidn` = `strtndxido` + `bp`
let `strtid` = (`strtndxidn` shr 3) - `strt0id`
let csstmnt = quote do:
case (((`bpintid` and 0x6) shl 2) + (`strtndx0id` and 7)).uint8
of 0'u8: break
csstmnt.del 1 # delete last dummy "of"
for n in 0'u8 .. 0x3F'u8: # actually used cases...
let pn = (n shr 2) or 1'u8
let cn = n and 7'u8
let mod0id = newLit(cn)
let cptr0id = "cptr0".newIdentNode
let loopstmnts = nnkStmtList.newTree()
for i in 0'u8 .. 7'u8:
let mskid = newLit(1'u8 shl ((cn + pn * i.uint8) and 7).int)
let cptrid = ("cptr" & $i).newIdentNode
let strtid = ("strt" & $i).newIdentNode
if i == 0'u8:
loopstmnts.add quote do:
let `cptrid` = cast[ptr uint8](`cullaid`)
else:
loopstmnts.add quote do:
let `cptrid` = cast[ptr uint8](`cullaid` + `strtid`)
loopstmnts.add quote do:
`cptrid`[] = `cptrid`[] or `mskid`
loopstmnts.add quote do:
`cullaid` += `bpintid`
let ofbrstmnts = quote do:
while `cullaid` < `endalmtid`:
`loopstmnts`
`cullaid` = ((`cullaid` - `ca`) shl 3) or `mod0id`.int
while `cullaid` < `szbitsid`:
let `cptr0id` = cast[ptr uint8](`ca` + (`cullaid` shr 3))
`cptr0id`[] = `cptr0id`[] or cBITMSK[`cullaid` and 7]
`cullaid` += `bpintid`
csstmnt.add nnkOfBranch.newTree(
newLit(n),
ofbrstmnts
)
for n in 0x40'u8 .. 255'u8: # fill in defaults for remaining possibilities
csstmnt.add nnkOfBranch.newTree(
newLit(n),
nnkStmtList.newTree(
nnkBreakStmt.newTree(
newEmptyNode()
)
)
)
result.add quote do:
let `endalmtid` = `cmpstsalmtid` - `strt7id`
var `cullaid` = `ca` + `strt0id`
`csstmnt`
# echo csstmnt[9].astGenRepr # see AST for a given case
# echo csstmnt[9].toStrLit # see code for a given case
# echo result.toStrLit # see entire produced code at compile time
proc benchSoE(): iterator(): int {.closure.} =
var cmpsts = newSeq[byte](16384)
let cmpstsa = cast[int](cmpsts[0].addr)
for _ in 0 ..< cLOOPS:
for i in 0 .. 254: # cull to square root of limit
if (cmpsts[i shr 3] and cBITMSK[i and 7]) == 0'u8: # if prime -> cull
its composites
let bp = i +% i +% 3
let swi = (i +% i) *% (i +% 3) +% 3
unrollLoops(cmpstsa, 16384, swi, bp)
return iterator(): int {.closure.} =
yield 2 # the only even prime
for i in 0 .. 131071: # separate iteration over results
if (cmpsts[i shr 3] and cBITMSK[i and 7]) == 0'u8:
yield i +% i +% 3
let strt = getMonotime()
let answr = benchSoE()
let elpsd = (getMonotime() - strt).inMilliseconds
var cnt = 0; for _ in answr(): cnt += 1
echo "Found ", cnt, " primes to 262146 for ", cLOOPS,
" loops in ", elpsd, " milliseconds."
Run
$nim --version
Nim Compiler Version 2.0.0 [Linux: amd64]
Compiled at 2023-08-01
Copyright (c) 2006-2023 by Andreas Rumpf
git hash: a488067a4130f029000be4550a0fb1b39e0e9e7c
active boot switches: -d:release
$nim r --d:release eratosthenes2.nim
Found 23000 primes to 262146 for 1225 loops in 78 milliseconds.
Run
My CPU: CPU: 11th Gen Intel i5-1135G7 (8) @ 4.200GHz (Laptop CPU, we can do
much better on an i9 desktop CPU I guess ^^).