Yann Moisan created MAHOUT-1090:
-----------------------------------

             Summary: Add a similarity implementation that computes cosine over 
all entries
                 Key: MAHOUT-1090
                 URL: https://issues.apache.org/jira/browse/MAHOUT-1090
             Project: Mahout
          Issue Type: New Feature
          Components: Collaborative Filtering
    Affects Versions: 0.7
            Reporter: Yann Moisan
            Assignee: Sean Owen
            Priority: Minor


The aim of this feature is to use a recommender to compute similarities as the 
hadoop RowSimilarityJob. It will be faster for small dataset because in-memory. 
So we need an in-memory implementation of the Cosine Similarity which computes 
cosine over all entries (UncenteredCosineSimilarity use only entries that are 
in both vectors).

Here is my implementation (doesn't support refresh for the moment):

import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.similarity.AbstractItemSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;

public class CosineSimilarity extends AbstractItemSimilarity {

    protected CosineSimilarity(DataModel dataModel) {
        super(dataModel);
    }

    @Override
    public void refresh(Collection<Refreshable> alreadyRefreshed) {
        throw new UnsupportedOperationException();
    }

    @Override
    public double itemSimilarity(long itemID1, long itemID2) throws 
TasteException {
        DataModel model = getDataModel();
        PreferenceArray xPrefs = model.getPreferencesForItem(itemID1);
        PreferenceArray yPrefs = model.getPreferencesForItem(itemID2);
        double sumXY = 0;
        double sumX2 = 0;
        double sumY2 = 0;

        Map<Long, Float> mX = new HashMap<Long, Float>();
        for (int xPrefIndex = 0; xPrefIndex < xPrefs.length(); xPrefIndex++) {
            float x = xPrefs.get(xPrefIndex).getValue();
            mX.put(xPrefs.get(xPrefIndex).getUserID(), x);
            sumX2 += x * x;
        }

        for (int yPrefIndex = 0; yPrefIndex < yPrefs.length(); yPrefIndex++) {
            float y = yPrefs.get(yPrefIndex).getValue();
            Float x = mX.get(yPrefs.get(yPrefIndex).getUserID());
            if (x != null) {
                sumXY += x * y;
            }
            sumY2 += y * y;
        }

        return sumXY / (Math.sqrt(sumX2) * Math.sqrt(sumY2));
    }

    @Override
    public double[] itemSimilarities(long itemID1, long[] itemID2s) throws 
TasteException {
        int length = itemID2s.length;
        double[] result = new double[length];
        for (int i = 0; i < length; i++) {
          result[i] = itemSimilarity(itemID1, itemID2s[i]);
        }
        return result;
    }

}


--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to