Hi guys,

I'm trying to implement the K-Means Clustering Algorithm, but I'm having 
some problems. The function I wrote:

function kcluster(data; distance = pearson, k=4)
    # Generate a list of tuples of the min and max values of each column of 
"data"
    ranges = [(minimum(data[:,i]), maximum(data[:,i])) for i in 1:size(data,
2)]

    # Create k randomly placed centroids
    centroids = [rand()*ranges[j][2] - ranges[j][1] + ranges[j][1] for i in 
1:k, j in 1:length(ranges)]

    lastmatches = Any[]
    for t in 1:100
        println("Iteration $t")
        bestmatches = [Int[] for i in 1:k]

        # Get best matches for each cluster
        for j in 1:size(data, 1)
            row = data[j, :]
            bestmatch = 1
            bestd = distance(centroids[bestmatch, :], row)

            for i in 1:k
                d = distance(centroids[i, :], row)
                if d < bestd
                    bestd = d
                    bestmatch  = i
                end
            end

            push!(bestmatches[bestmatch], j)
        end

        if lastmatches == bestmatches
            return lastmatches
        end

        lastmatches = bestmatches

        # Move clusters to the average of its matches
        numcols = size(data, 2)
        for i in 1:k
            avgs = zeros(1, numcols)
            if length(bestmatches[i]) > 0
                for row in bestmatches[i]
                    avgs += data[row, :]
                end

                avgs /= length(bestmatches[i])
                centroids[i, :] = avgs
            end
        end
    end

    return lastmatches
end

The "data" argument is a two dimensional Array, each row representing an 
individual, and each column its position on space.

The problem is the following: the same algorithm in Python (with the same 
"data" input), use to stop near iteration #5, and in Julia it always goes 
to the iteration #100. The not-empty clusters on Python are also smaller, 
therefore there are less empty clusters. Can somebody find why it never 
enters the "if lastmatches == bestmatches" block?

Sorry about my poor english

Reply via email to