I was planning to add a Julia entry for LZW compression at Rosetta Code,
see http://rosettacode.org/wiki/LZW_compression , and did a first Julia
implementation below. It allocates quite some mem though so ideas for
speeding it up and/or clarifying the code before I submit it would be
appreciated. Thanks!
julia> function makedict(size)
dict = Dict{AbstractString,UInt32}()
[dict[string(Char(i))] = i for i in 0:(size-1)]
dict
end
makedict (generic function with 1 method)
julia> function lzw_compress(uncompressed)
dict, w, result = makedict(256), "", Int64[]
for i in 1:length(uncompressed)
wc = w * uncompressed[i:i]
if haskey(dict, wc)
w = wc
else
push!(result, dict[w])
dict[wc] = length(dict)
w = uncompressed[i:i]
end
end
isempty(w) || push!(result, dict[w])
result
end
lzw_compress (generic function with 1 method)
julia> t = "TOBEORNOTTOBEORTOBEORNOT"
"TOBEORNOTTOBEORTOBEORNOT"
julia> compressed = lzw_compress(t)
16-element Array{Int64,1}:
84
79
66
69
79
82
78
79
84
256
258
260
265
259
261
263
julia> tl = t ^ 10000;
julia> @time lzw_compress(tl);
0.101714 seconds (971.11 k allocations: 54.181 MB, 28.92% gc time)