I wonder if sequtils.deduplicate() should be renamed to deduplicated() as it
returns a seq like sorted()? And maybe it should be noted that it is O(n^2),
which is fine for small collections, but not so nice for large ones.
And maybe we should add a O(n) deduplicate to stdlib -- sets modul is really
fast as shown below.
import sets
import random, times
import sequtils # deduplicate
# O(n)
proc uniqCopy[T](s: openArray[T]): seq[T] =
let h = s.toOrderedSet
newSeq(result, h.len) # can we avoid the unnecessary initialization?
var i = 0 # is there no items with index for sets?
for el in h:
result[i] = el
inc(i)
# O(n)
proc uniq[T](s: var seq[T]) =
let h = s.toOrderedSet
var i = 0
for el in h:
s[i] = el
inc(i)
s.setLen(h.len)
# O(n^2)
proc xdeduplicate[T](s: var seq[T]) =
var i, j: int
while i < s.len:
s[j] = s[i]
var k = j
while k > 0:
dec k
if s[k] == s[j]:
dec(j)
break
inc(j)
inc(i)
s.setlen(j)
var a = @[1, 3, 1, 2, 2, 5, 2]
var b = a
echo a.uniqCopy
a.xdeduplicate
b.uniq
echo a
echo b
var t: float # cpuTime()
var x: array[1000, int]
for el in mitems(x): el = random(1000)
var z = @x
t = cpuTime()
z = z.deduplicate
echo "sequtils.dedublicate: ", cpuTime() - t
z = @x
t = cpuTime()
z.xdeduplicate
echo "inplace dedublicate: ", cpuTime() - t
z = @x
t = cpuTime()
z = z.uniqCopy
echo "uniqCopy: ", cpuTime() - t
z = @x
t = cpuTime()
z.uniq
echo "uniq: ", cpuTime() - t
# nim c -d:release t.nim
# Output
# @[1, 3, 2, 5]
# @[1, 3, 2, 5]
# @[1, 3, 2, 5]
# sequtils.dedublicate: 0.0001410000000000001
# inplace dedublicate: 0.0001329999999999999
# uniqCopy: 3.099999999999999e-05
# uniq: 2.700000000000011e-05