Here it is copy/pasted from Base, in case it's useful for you for now:
using Base.Cartesian, Base.Prehashed
@ngenerate N typeof(A) function indunique{T,N}(A::AbstractArray{T,N}, dim::
Int)
1 <= dim <= N || return copy(A)
hashes = zeros(Uint, size(A, dim))
# Compute hash for each row
k = 0
@nloops N i A d->(if d == dim; k = i_d; end) begin
@inbounds hashes[k] = hash(hashes[k], hash((@nref N A i)))
end
# Collect index of first row for each hash
uniquerow = Array(Int, size(A, dim))
firstrow = Dict{Prehashed,Int}()
for k = 1:size(A, dim)
uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k)
end
uniquerows = collect(values(firstrow))
# Check for collisions
collided = falses(size(A, dim))
@inbounds begin
@nloops N i A d->(if d == dim
k = i_d
j_d = uniquerow[k]
else
j_d = i_d
end) begin
if (@nref N A j) != (@nref N A i)
collided[k] = true
end
end
end
if any(collided)
nowcollided = BitArray(size(A, dim))
while any(collided)
# Collect index of first row for each collided hash
empty!(firstrow)
for j = 1:size(A, dim)
collided[j] || continue
uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j)
end
for v in values(firstrow)
push!(uniquerows, v)
end
# Check for collisions
fill!(nowcollided, false)
@nloops N i A d->begin
if d == dim
k = i_d
j_d = uniquerow[k]
(!collided[k] || j_d == k) && continue
else
j_d = i_d
end
end begin
if (@nref N A j) != (@nref N A i)
nowcollided[k] = true
end
end
(collided, nowcollided) = (nowcollided, collided)
end
end
sort!(uniquerows)
end
On Monday, August 11, 2014 2:43:24 PM UTC-4, Simon Kornblith wrote:
>
> unique with a dim argument actually computes this as byproduct but does
> not return it. All we need is an API.
>
> On Monday, August 11, 2014 1:27:57 PM UTC-4, Jacob Quinn wrote:
>>
>> There's an open issue about it here:
>> https://github.com/JuliaLang/julia/issues/1845
>>
>> I've also played around with a simple version based on a Set:
>>
>> function uniqueind(C)
>> out = Int[]
>> seen = Set{eltype(C)}()
>> for i = 1:length(C)
>> @inbounds x = C[i]
>> if !in(x, seen)
>> push!(seen, x)
>> push!(out, i)
>> end
>> end
>> out
>> end
>>
>>
>> On Mon, Aug 11, 2014 at 1:22 PM, Peter Simon <[email protected]> wrote:
>>
>>> Greetings.
>>>
>>> I didn't find any built-in function to return the indices for the unique
>>> rows in a matrix, so I tried rolling my own. I looked at, but didn't
>>> understand, the highly macro-ized, arbitrary dimensional code in the
>>> built-in unique() function. I did discover hashing, though, from looking
>>> at that code. Here is what I have so far:
>>>
>>> function uniquerowindices2{T}(a::Matrix{T})
>>> rowhash = Uint64[hash(a[k,:]) for k = 1:size(a,1)]
>>> hu = unique(rowhash)
>>> ind = zeros(Int,length(hu))
>>> k1 = 1
>>> for m = 1:length(ind)
>>> for k = k1:length(rowhash)
>>> if rowhash[k] == hu[m]
>>> ind[m] = k
>>> k1 = k
>>> break
>>> end
>>> end
>>> end
>>> return ind
>>> end
>>>
>>> This is quite a bit faster than my first effort, where I was comparing
>>> rows, rather than row hashes, but I'm betting someone out there has a much
>>> better way. Thanks in advance,
>>>
>>> --Peter
>>>
>>
>>