As @Stefan mentioned the string stuff can be slow. My `MemSlice` techniques might be more broadly interesting. The tokenizer below only works for "strict" one-character delimiting, not, e.g., repeated whitespace. Also, I agree completely with @arnetheduck that algorithm choices matter more than languages for Nim vs C comparisons.
That said, the below Nim is faster than the C version on my machine (215 ms for C, 176 ms for Nim). @enthus1ast can try that on his data and report back if he likes. There are several little `defined` switches to play with. FWIW, the C version referenced is a rather lame one (fixed 64 KiEntry hash table with external chaining resolution). I didn't try profile-guided-optimization which could improve the Nim results some. Since the C version "cheats" a bit by pre-sizing its hash table, my version below takes any estimate on the number of unique words that will be found as input and pre-sizes the Nim table to fit that estimate. Also, **beware** ` CountTable.sort()`. It uses Shell's sort, not a fast algorithm. They're always integers and typically small. A radix sort would be the best choice, but regular old merge sort from `algorithm` is a lot better. On my test input with 291,000 unique words, the version using `count.sort` took 120 seconds instead of 176 milliseconds, almost 3 full orders of magnitude slower. Times seemed insensitive to hash functions (order 1% variations), though both the sorting and hash-sensitivity will be data set dependent, obviously. For very high entropy distributions you are going to want to avoid the current `CountTable.sort()`. Also, `CountTable` itself was a little slower than regular `Table` using the same sorting procedure. So, you might want to just avoid `CountTable` in general in peformance-sensitive cases. import memfiles, hashes, tables, os, algorithm, strutils proc hash*(ms: MemSlice): Hash {.inline.} = when defined(useNimHash): result = hashData(ms.data, ms.size) elif defined(useCB2): proc c_memhash(h0: int, a: pointer, size: int): int {. nodecl, importc: "memhash_cb2", header: getHomeDir() & "s/cb/hash_cb2.c".} result = c_memhash(0, ms.data, ms.size) elif defined(useCB4): proc c_memhash(h0: int, a: pointer, size: int): int {. nodecl, importc: "memhash_cb4", header: getHomeDir() & "s/cb/hash_cb4.c".} result = c_memhash(0, ms.data, ms.size) else: result = 0 for i in 0 ..< ms.size: result = (result + (result shl 5)) xor int(cast[cstring](ms.data)[i]) proc split*(ms: MemSlice, fields: var seq[MemSlice], nA: int, sep=' ', size = -1): int = proc c_memchr(s: cstring, c: char, n: csize): cstring {. importc: "memchr", header: "<string.h>".} proc `+!`(p: cstring, i: int): cstring {.inline.} = cast[cstring](cast[int](p) +% i) proc `-!`(p, q: cstring): int {.inline.} = cast[int](p) -% cast[int](q) var b = cast[cstring](ms.data) var eob = cast[cstring](ms.data) +! (if size != -1: size else: ms.size) var e = c_memchr(b, sep, eob -! b) while e != nil: fields[result].data = cast[pointer](b) fields[result].size = e -! b result += 1 if result == nA - 2: #max ix nA-1; Need one more slot for final field break b = e +! 1 e = c_memchr(b, sep, eob -! b) fields[result].data = cast[pointer](b) fields[result].size = eob -! b result += 1 proc main(input: string, sizeGuess: int) = const mxCols = 1024 #file format-dependent max when defined(useCountTab): var count = initCountTable[MemSlice](rightSize(sizeGuess)) else: var count = initTable[MemSlice, int](rightSize(sizeGuess)) var fields = newSeq[MemSlice](mxCols) for line in memSlices(memfiles.open(input)): fields.setLen(split(line, fields, mxCols, ' ')) for word in fields: when defined(useCountTab): count.inc(word) else: inc(count.mgetOrPut(word, 0)) stderr.write count.len, " unique words\n" when defined(useCountTab) and defined(useCountSort): count.sort() #uses Shell's sort internally for key, cnt in count: stdout.write cnt, " " discard stdout.writeBuffer(cast[cstring](key.data), key.size) stdout.write "\n" else: var answer = newSeq[tuple[cnt: int, key: MemSlice]](count.len) var i = 0 for key, cnt in count.pairs(): answer[i].cnt = cnt answer[i].key = key inc(i) proc myCmp(x, y: tuple[cnt: int, key: MemSlice]): int = - cmp(x.cnt, y.cnt) answer.sort(myCmp) for ans in answer: stdout.write ans.cnt, " " discard stdout.writeBuffer(cast[cstring](ans.key.data), ans.key.size) stdout.write "\n" #Must call with (seekable/mmapable) input file name and a size guess main(paramStr(1), parseInt(paramStr(2))) Run