Here it is copy/pasted from Base, in case it's useful for you for now:

using Base.Cartesian, Base.Prehashed
@ngenerate N typeof(A) function indunique{T,N}(A::AbstractArray{T,N}, dim::
Int)
    1 <= dim <= N || return copy(A)
    hashes = zeros(Uint, size(A, dim))

    # Compute hash for each row
    k = 0
    @nloops N i A d->(if d == dim; k = i_d; end) begin
       @inbounds hashes[k] = hash(hashes[k], hash((@nref N A i)))
    end

    # Collect index of first row for each hash
    uniquerow = Array(Int, size(A, dim))
    firstrow = Dict{Prehashed,Int}()
    for k = 1:size(A, dim)
        uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k)
    end
    uniquerows = collect(values(firstrow))

    # Check for collisions
    collided = falses(size(A, dim))
    @inbounds begin
        @nloops N i A d->(if d == dim
                              k = i_d
                              j_d = uniquerow[k]
                          else
                              j_d = i_d
                          end) begin
            if (@nref N A j) != (@nref N A i)
                collided[k] = true
            end
        end
    end

    if any(collided)
        nowcollided = BitArray(size(A, dim))
        while any(collided)
            # Collect index of first row for each collided hash
            empty!(firstrow)
            for j = 1:size(A, dim)
                collided[j] || continue
                uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j)
            end
            for v in values(firstrow)
                push!(uniquerows, v)
            end

            # Check for collisions
            fill!(nowcollided, false)
            @nloops N i A d->begin
                                 if d == dim
                                     k = i_d
                                     j_d = uniquerow[k]
                                     (!collided[k] || j_d == k) && continue
                                 else
                                     j_d = i_d
                                 end
                             end begin
                if (@nref N A j) != (@nref N A i)
                    nowcollided[k] = true
                end
            end
            (collided, nowcollided) = (nowcollided, collided)
        end
    end

    sort!(uniquerows)
end

On Monday, August 11, 2014 2:43:24 PM UTC-4, Simon Kornblith wrote:
>
> unique with a dim argument actually computes this as byproduct but does 
> not return it. All we need is an API.
>
> On Monday, August 11, 2014 1:27:57 PM UTC-4, Jacob Quinn wrote:
>>
>> There's an open issue about it here: 
>> https://github.com/JuliaLang/julia/issues/1845
>>
>> I've also played around with a simple version based on a Set:
>>
>> function uniqueind(C)
>>     out = Int[]
>>     seen = Set{eltype(C)}()
>>     for i = 1:length(C)
>>         @inbounds x = C[i]
>>         if !in(x, seen)
>>             push!(seen, x)
>>             push!(out, i)
>>         end
>>     end
>>     out
>> end
>>
>>
>> On Mon, Aug 11, 2014 at 1:22 PM, Peter Simon <[email protected]> wrote:
>>
>>> Greetings.
>>>
>>> I didn't find any built-in function to return the indices for the unique 
>>> rows in a matrix, so I tried rolling my own.  I looked at, but didn't 
>>> understand, the highly macro-ized, arbitrary dimensional code in the 
>>> built-in unique() function.  I did discover hashing, though, from looking 
>>> at that code.  Here is what I have so far:
>>>
>>>  function uniquerowindices2{T}(a::Matrix{T})
>>>      rowhash = Uint64[hash(a[k,:]) for k = 1:size(a,1)]
>>>      hu = unique(rowhash)
>>>      ind = zeros(Int,length(hu))
>>>      k1 = 1
>>>      for m = 1:length(ind)
>>>          for k = k1:length(rowhash)
>>>              if rowhash[k] == hu[m]
>>>                  ind[m] = k
>>>                  k1 = k
>>>                  break
>>>              end
>>>          end
>>>      end
>>>      return ind
>>>  end
>>>
>>> This is quite a bit faster than my first effort, where I was comparing 
>>> rows, rather than row hashes, but I'm betting someone out there has a much 
>>> better way.  Thanks in advance,
>>>
>>> --Peter    
>>>
>>
>>

Reply via email to