Levenshtein distance, based on my old Pascal source. In some cases, faster than
a implementation in strutils module by 10%.
(Enj|Destr)oy!
import strutils, os
import nimbench
proc editDistance2*(a, b: string): int = #{.noSideEffect.} =
## Returns the edit distance between `a` and `b`.
##
## This uses the `Levenshtein`:idx: distance algorithm with only a linear
## memory overhead. This implementation is highly optimized!
var len1 = a.len
var len2 = b.len
if len1 > len2:
# make `b` the longer string
return editDistance(b, a)
# strip common prefix:
var s = 0
while a[s] == b[s] and a[s] != '\0':
inc(s)
dec(len1)
dec(len2)
# strip common suffix:
while len1 > 0 and len2 > 0 and a[s+len1-1] == b[s+len2-1]:
dec(len1)
dec(len2)
# trivial cases:
if len1 == 0: return len2
if len2 == 0: return len1
# another special case:
if len1 == 1:
for j in s..s+len2-1:
if a[s] == b[j]: return len2 - 1
return len2
inc(len1)
inc(len2)
var row: seq[int]
newSeq(row, len2)
for i in 0..len2 - 1: row[i] = i
for i in 1 .. len1- 1:
var char1 = a[s + i - 1]
var prevCost = i - 1;
var newCost = i;
for j in 1 .. len2 - 1:
var char2 = b[s + j - 1]
if char1 == char2:
newCost = prevCost
else:
newCost = min(newCost, min(prevCost, row[j])) + 1
prevCost = row[j]
row[j] = newCost
result = row[len2 - 1]
var s1: string = "0123456789"
var s2: string = "0123455779"
if paramCount() > 1:
if fileExists(paramStr(1)):
s1 = readFile(paramStr(1))
else:
s1 = paramStr(1)
if fileExists(paramStr(2)):
s2 = readFile(paramStr(2))
else:
s2 = paramStr(2)
echo "editDistance: ", editDistance(s1, s2)
echo "editDistance2: ", editDistance2(s1, s2)
bench(editDistance, m):
var d = 0
for i in 1..m:
d = editDistance(s1, s2)
doNotOptimizeAway(d)
benchRelative(editDistance2, m):
var d = 0
for i in 1..m:
d = editDistance2(s1, s2)
doNotOptimizeAway(d)
runBenchmarks()