note that the running time does not change with a partial loop unroll, like
this:
~~~
function signed_loop{D<:Unsigned, A<:Unsigned}(::Type{D}, r::A, data,
table::Vector{A})
local j = 0
for i = 1 : div(length(data),20)
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
r = (r >>> 8) $ table[1 + (data[j+=1]$convert(D,r))]
end
return r
end
~~~