http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java new file mode 100644 index 0000000..817e917 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java @@ -0,0 +1,928 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.hadoop.item; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Counter; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; +import org.apache.mahout.cf.taste.hadoop.MutableRecommendedItem; +import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable; +import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils; +import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper; +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.common.Pair; +import org.apache.mahout.common.iterator.FileLineIterable; +import org.apache.mahout.math.RandomAccessSparseVector; +import org.apache.mahout.math.VarIntWritable; +import org.apache.mahout.math.VarLongWritable; +import org.apache.mahout.math.Vector; +import org.apache.mahout.math.VectorWritable; +import org.apache.mahout.math.hadoop.MathHelper; +import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CooccurrenceCountSimilarity; +import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity; +import org.apache.mahout.math.map.OpenIntLongHashMap; +import org.easymock.IArgumentMatcher; +import org.easymock.EasyMock; +import org.junit.Test; + +public class RecommenderJobTest extends TasteTestCase { + + /** + * tests {@link ItemIDIndexMapper} + */ + @Test + public void testItemIDIndexMapper() throws Exception { + Mapper<LongWritable,Text, VarIntWritable, VarLongWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(new VarIntWritable(TasteHadoopUtils.idToIndex(789L)), new VarLongWritable(789L)); + EasyMock.replay(context); + + new ItemIDIndexMapper().map(new LongWritable(123L), new Text("456,789,5.0"), context); + + EasyMock.verify(context); + } + + /** + * tests {@link ItemIDIndexReducer} + */ + @Test + public void testItemIDIndexReducer() throws Exception { + Reducer<VarIntWritable, VarLongWritable, VarIntWritable,VarLongWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + context.write(new VarIntWritable(123), new VarLongWritable(45L)); + EasyMock.replay(context); + + new ItemIDIndexReducer().reduce(new VarIntWritable(123), Arrays.asList(new VarLongWritable(67L), + new VarLongWritable(89L), new VarLongWritable(45L)), context); + + EasyMock.verify(context); + } + + /** + * tests {@link ToItemPrefsMapper} + */ + @Test + public void testToItemPrefsMapper() throws Exception { + Mapper<LongWritable,Text, VarLongWritable,VarLongWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(new VarLongWritable(12L), new EntityPrefWritable(34L, 1.0f)); + context.write(new VarLongWritable(56L), new EntityPrefWritable(78L, 2.0f)); + EasyMock.replay(context); + + ToItemPrefsMapper mapper = new ToItemPrefsMapper(); + mapper.map(new LongWritable(123L), new Text("12,34,1"), context); + mapper.map(new LongWritable(456L), new Text("56,78,2"), context); + + EasyMock.verify(context); + } + + /** + * tests {@link ToItemPrefsMapper} using boolean data + */ + @Test + public void testToItemPrefsMapperBooleanData() throws Exception { + Mapper<LongWritable,Text, VarLongWritable,VarLongWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(new VarLongWritable(12L), new VarLongWritable(34L)); + context.write(new VarLongWritable(56L), new VarLongWritable(78L)); + EasyMock.replay(context); + + ToItemPrefsMapper mapper = new ToItemPrefsMapper(); + setField(mapper, "booleanData", true); + mapper.map(new LongWritable(123L), new Text("12,34"), context); + mapper.map(new LongWritable(456L), new Text("56,78"), context); + + EasyMock.verify(context); + } + + /** + * tests {@link ToUserVectorsReducer} + */ + @Test + public void testToUserVectorReducer() throws Exception { + Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + Counter userCounters = EasyMock.createMock(Counter.class); + + EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters); + userCounters.increment(1); + context.write(EasyMock.eq(new VarLongWritable(12L)), MathHelper.vectorMatches( + MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 2.0))); + + EasyMock.replay(context, userCounters); + + Collection<VarLongWritable> varLongWritables = Lists.newLinkedList(); + varLongWritables.add(new EntityPrefWritable(34L, 1.0f)); + varLongWritables.add(new EntityPrefWritable(56L, 2.0f)); + + new ToUserVectorsReducer().reduce(new VarLongWritable(12L), varLongWritables, context); + + EasyMock.verify(context, userCounters); + } + + /** + * tests {@link ToUserVectorsReducer} using boolean data + */ + @Test + public void testToUserVectorReducerWithBooleanData() throws Exception { + Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + Counter userCounters = EasyMock.createMock(Counter.class); + + EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters); + userCounters.increment(1); + context.write(EasyMock.eq(new VarLongWritable(12L)), MathHelper.vectorMatches( + MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 1.0))); + + EasyMock.replay(context, userCounters); + + new ToUserVectorsReducer().reduce(new VarLongWritable(12L), Arrays.asList(new VarLongWritable(34L), + new VarLongWritable(56L)), context); + + EasyMock.verify(context, userCounters); + } + + /** + * tests {@link SimilarityMatrixRowWrapperMapper} + */ + @Test + public void testSimilarityMatrixRowWrapperMapper() throws Exception { + Mapper<IntWritable,VectorWritable,VarIntWritable,VectorOrPrefWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(EasyMock.eq(new VarIntWritable(12)), vectorOfVectorOrPrefWritableMatches(MathHelper.elem(34, 0.5), + MathHelper.elem(56, 0.7))); + + EasyMock.replay(context); + + RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + vector.set(12, 1.0); + vector.set(34, 0.5); + vector.set(56, 0.7); + + new SimilarityMatrixRowWrapperMapper().map(new IntWritable(12), new VectorWritable(vector), context); + + EasyMock.verify(context); + } + + /** + * verifies the {@link Vector} included in a {@link VectorOrPrefWritable} + */ + private static VectorOrPrefWritable vectorOfVectorOrPrefWritableMatches(final Vector.Element... elements) { + EasyMock.reportMatcher(new IArgumentMatcher() { + @Override + public boolean matches(Object argument) { + if (argument instanceof VectorOrPrefWritable) { + Vector v = ((VectorOrPrefWritable) argument).getVector(); + return MathHelper.consistsOf(v, elements); + } + return false; + } + + @Override + public void appendTo(StringBuffer buffer) {} + }); + return null; + } + + /** + * tests {@link UserVectorSplitterMapper} + */ + @Test + public void testUserVectorSplitterMapper() throws Exception { + Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f)); + context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f)); + + EasyMock.replay(context); + + UserVectorSplitterMapper mapper = new UserVectorSplitterMapper(); + setField(mapper, "maxPrefsPerUserConsidered", 10); + + RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + vector.set(34, 0.5); + vector.set(56, 0.7); + + mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context); + + EasyMock.verify(context); + } + + /** + * verifies a preference in a {@link VectorOrPrefWritable} + */ + private static VectorOrPrefWritable prefOfVectorOrPrefWritableMatches(final long userID, final float prefValue) { + EasyMock.reportMatcher(new IArgumentMatcher() { + @Override + public boolean matches(Object argument) { + if (argument instanceof VectorOrPrefWritable) { + VectorOrPrefWritable pref = (VectorOrPrefWritable) argument; + return pref.getUserID() == userID && pref.getValue() == prefValue; + } + return false; + } + + @Override + public void appendTo(StringBuffer buffer) {} + }); + return null; + } + + /** + * tests {@link UserVectorSplitterMapper} in the special case that some userIDs shall be excluded + */ + @Test + public void testUserVectorSplitterMapperUserExclusion() throws Exception { + Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f)); + context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f)); + + EasyMock.replay(context); + + FastIDSet usersToRecommendFor = new FastIDSet(); + usersToRecommendFor.add(123L); + + UserVectorSplitterMapper mapper = new UserVectorSplitterMapper(); + setField(mapper, "maxPrefsPerUserConsidered", 10); + setField(mapper, "usersToRecommendFor", usersToRecommendFor); + + + RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + vector.set(34, 0.5); + vector.set(56, 0.7); + + mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context); + mapper.map(new VarLongWritable(456L), new VectorWritable(vector), context); + + EasyMock.verify(context); + } + + /** + * tests {@link UserVectorSplitterMapper} in the special case that the number of preferences to be considered + * is less than the number of available preferences + */ + @Test + public void testUserVectorSplitterMapperOnlySomePrefsConsidered() throws Exception { + Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatchesNaN(123L)); + context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f)); + + EasyMock.replay(context); + + UserVectorSplitterMapper mapper = new UserVectorSplitterMapper(); + setField(mapper, "maxPrefsPerUserConsidered", 1); + + RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + vector.set(34, 0.5); + vector.set(56, 0.7); + + mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context); + + EasyMock.verify(context); + } + + /** + * verifies that a preference value is NaN in a {@link VectorOrPrefWritable} + */ + private static VectorOrPrefWritable prefOfVectorOrPrefWritableMatchesNaN(final long userID) { + EasyMock.reportMatcher(new IArgumentMatcher() { + @Override + public boolean matches(Object argument) { + if (argument instanceof VectorOrPrefWritable) { + VectorOrPrefWritable pref = (VectorOrPrefWritable) argument; + return pref.getUserID() == userID && Float.isNaN(pref.getValue()); + } + return false; + } + + @Override + public void appendTo(StringBuffer buffer) {} + }); + return null; + } + + /** + * tests {@link ToVectorAndPrefReducer} + */ + @Test + public void testToVectorAndPrefReducer() throws Exception { + Reducer<VarIntWritable,VectorOrPrefWritable,VarIntWritable,VectorAndPrefsWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + context.write(EasyMock.eq(new VarIntWritable(1)), vectorAndPrefsWritableMatches(Arrays.asList(123L, 456L), + Arrays.asList(1.0f, 2.0f), MathHelper.elem(3, 0.5), MathHelper.elem(7, 0.8))); + + EasyMock.replay(context); + + Vector similarityColumn = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumn.set(3, 0.5); + similarityColumn.set(7, 0.8); + + VectorOrPrefWritable itemPref1 = new VectorOrPrefWritable(123L, 1.0f); + VectorOrPrefWritable itemPref2 = new VectorOrPrefWritable(456L, 2.0f); + VectorOrPrefWritable similarities = new VectorOrPrefWritable(similarityColumn); + + new ToVectorAndPrefReducer().reduce(new VarIntWritable(1), Arrays.asList(itemPref1, itemPref2, similarities), + context); + + EasyMock.verify(context); + } + + /** + * verifies a {@link VectorAndPrefsWritable} + */ + private static VectorAndPrefsWritable vectorAndPrefsWritableMatches(final List<Long> userIDs, + final List<Float> prefValues, final Vector.Element... elements) { + EasyMock.reportMatcher(new IArgumentMatcher() { + @Override + public boolean matches(Object argument) { + if (argument instanceof VectorAndPrefsWritable) { + VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument; + + if (!vectorAndPrefs.getUserIDs().equals(userIDs)) { + return false; + } + if (!vectorAndPrefs.getValues().equals(prefValues)) { + return false; + } + return MathHelper.consistsOf(vectorAndPrefs.getVector(), elements); + } + return false; + } + + @Override + public void appendTo(StringBuffer buffer) {} + }); + return null; + } + + /** + * tests {@link ToVectorAndPrefReducer} in the error case that two similarity column vectors a supplied for the same + * item (which should never happen) + */ + @Test + public void testToVectorAndPrefReducerExceptionOn2Vectors() throws Exception { + Reducer<VarIntWritable,VectorOrPrefWritable,VarIntWritable,VectorAndPrefsWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + EasyMock.replay(context); + + Vector similarityColumn1 = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + Vector similarityColumn2 = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + + VectorOrPrefWritable similarities1 = new VectorOrPrefWritable(similarityColumn1); + VectorOrPrefWritable similarities2 = new VectorOrPrefWritable(similarityColumn2); + + try { + new ToVectorAndPrefReducer().reduce(new VarIntWritable(1), Arrays.asList(similarities1, similarities2), context); + fail(); + } catch (IllegalStateException e) { + // good + } + + EasyMock.verify(context); + } + + /** + * tests {@link org.apache.mahout.cf.taste.hadoop.item.ItemFilterMapper} + */ + @Test + public void testItemFilterMapper() throws Exception { + + Mapper<LongWritable,Text,VarLongWritable,VarLongWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(new VarLongWritable(34L), new VarLongWritable(12L)); + context.write(new VarLongWritable(78L), new VarLongWritable(56L)); + + EasyMock.replay(context); + + ItemFilterMapper mapper = new ItemFilterMapper(); + mapper.map(null, new Text("12,34"), context); + mapper.map(null, new Text("56,78"), context); + + EasyMock.verify(context); + } + + /** + * tests {@link org.apache.mahout.cf.taste.hadoop.item.ItemFilterAsVectorAndPrefsReducer} + */ + @Test + public void testItemFilterAsVectorAndPrefsReducer() throws Exception { + Reducer<VarLongWritable,VarLongWritable,VarIntWritable,VectorAndPrefsWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + int itemIDIndex = TasteHadoopUtils.idToIndex(123L); + context.write(EasyMock.eq(new VarIntWritable(itemIDIndex)), vectorAndPrefsForFilteringMatches(123L, 456L, 789L)); + + EasyMock.replay(context); + + new ItemFilterAsVectorAndPrefsReducer().reduce(new VarLongWritable(123L), Arrays.asList(new VarLongWritable(456L), + new VarLongWritable(789L)), context); + + EasyMock.verify(context); + } + + static VectorAndPrefsWritable vectorAndPrefsForFilteringMatches(final long itemID, final long... userIDs) { + EasyMock.reportMatcher(new IArgumentMatcher() { + @Override + public boolean matches(Object argument) { + if (argument instanceof VectorAndPrefsWritable) { + VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument; + Vector vector = vectorAndPrefs.getVector(); + if (vector.getNumNondefaultElements() != 1) { + return false; + } + if (!Double.isNaN(vector.get(TasteHadoopUtils.idToIndex(itemID)))) { + return false; + } + if (userIDs.length != vectorAndPrefs.getUserIDs().size()) { + return false; + } + for (long userID : userIDs) { + if (!vectorAndPrefs.getUserIDs().contains(userID)) { + return false; + } + } + return true; + } + return false; + } + + @Override + public void appendTo(StringBuffer buffer) {} + }); + return null; + } + + /** + * tests {@link PartialMultiplyMapper} + */ + @Test + public void testPartialMultiplyMapper() throws Exception { + + Vector similarityColumn = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumn.set(3, 0.5); + similarityColumn.set(7, 0.8); + + Mapper<VarIntWritable,VectorAndPrefsWritable,VarLongWritable,PrefAndSimilarityColumnWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + PrefAndSimilarityColumnWritable one = new PrefAndSimilarityColumnWritable(); + PrefAndSimilarityColumnWritable two = new PrefAndSimilarityColumnWritable(); + one.set(1.0f, similarityColumn); + two.set(3.0f, similarityColumn); + + context.write(EasyMock.eq(new VarLongWritable(123L)), EasyMock.eq(one)); + context.write(EasyMock.eq(new VarLongWritable(456L)), EasyMock.eq(two)); + + EasyMock.replay(context); + + VectorAndPrefsWritable vectorAndPrefs = new VectorAndPrefsWritable(similarityColumn, Arrays.asList(123L, 456L), + Arrays.asList(1.0f, 3.0f)); + + new PartialMultiplyMapper().map(new VarIntWritable(1), vectorAndPrefs, context); + + EasyMock.verify(context); + } + + + /** + * tests {@link AggregateAndRecommendReducer} + */ + @Test + public void testAggregateAndRecommendReducer() throws Exception { + Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f), + new MutableRecommendedItem(2L, 2.0f))); + + EasyMock.replay(context); + + RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumnOne.set(1, 0.1); + similarityColumnOne.set(2, 0.5); + + RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumnTwo.set(1, 0.9); + similarityColumnTwo.set(2, 0.5); + + List<PrefAndSimilarityColumnWritable> values = Arrays.asList( + new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne), + new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo)); + + OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap(); + indexItemIDMap.put(1, 1L); + indexItemIDMap.put(2, 2L); + + AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer(); + + setField(reducer, "indexItemIDMap", indexItemIDMap); + setField(reducer, "recommendationsPerUser", 3); + + reducer.reduce(new VarLongWritable(123L), values, context); + + EasyMock.verify(context); + } + + /** + * tests {@link AggregateAndRecommendReducer} + */ + @Test + public void testAggregateAndRecommendReducerExcludeRecommendationsBasedOnOneItem() throws Exception { + Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f))); + + EasyMock.replay(context); + + RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumnOne.set(1, 0.1); + + RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumnTwo.set(1, 0.9); + similarityColumnTwo.set(2, 0.5); + + List<PrefAndSimilarityColumnWritable> values = Arrays.asList( + new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne), + new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo)); + + OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap(); + indexItemIDMap.put(1, 1L); + indexItemIDMap.put(2, 2L); + + AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer(); + + setField(reducer, "indexItemIDMap", indexItemIDMap); + setField(reducer, "recommendationsPerUser", 3); + + reducer.reduce(new VarLongWritable(123L), values, context); + + EasyMock.verify(context); + } + + /** + * tests {@link AggregateAndRecommendReducer} with a limit on the recommendations per user + */ + @Test + public void testAggregateAndRecommendReducerLimitNumberOfRecommendations() throws Exception { + Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f))); + + EasyMock.replay(context); + + RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumnOne.set(1, 0.1); + similarityColumnOne.set(2, 0.5); + + RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); + similarityColumnTwo.set(1, 0.9); + similarityColumnTwo.set(2, 0.5); + + List<PrefAndSimilarityColumnWritable> values = Arrays.asList( + new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne), + new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo)); + + OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap(); + indexItemIDMap.put(1, 1L); + indexItemIDMap.put(2, 2L); + + AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer(); + + setField(reducer, "indexItemIDMap", indexItemIDMap); + setField(reducer, "recommendationsPerUser", 1); + + reducer.reduce(new VarLongWritable(123L), values, context); + + EasyMock.verify(context); + } + + /** + * verifies a {@link RecommendedItemsWritable} + */ + static RecommendedItemsWritable recommendationsMatch(final RecommendedItem... items) { + EasyMock.reportMatcher(new IArgumentMatcher() { + @Override + public boolean matches(Object argument) { + if (argument instanceof RecommendedItemsWritable) { + RecommendedItemsWritable recommendedItemsWritable = (RecommendedItemsWritable) argument; + List<RecommendedItem> expectedItems = Arrays.asList(items); + return expectedItems.equals(recommendedItemsWritable.getRecommendedItems()); + } + return false; + } + + @Override + public void appendTo(StringBuffer buffer) {} + }); + return null; + } + + /** + * small integration test that runs the full job + * + * As a tribute to http://www.slideshare.net/srowen/collaborative-filtering-at-scale, + * we recommend people food to animals in this test :) + * + * <pre> + * + * user-item-matrix + * + * burger hotdog berries icecream + * dog 5 5 2 - + * rabbit 2 - 3 5 + * cow - 5 - 3 + * donkey 3 - - 5 + * + * + * item-item-similarity-matrix (tanimoto-coefficient of the item-vectors of the user-item-matrix) + * + * burger hotdog berries icecream + * burger - 0.25 0.66 0.5 + * hotdog 0.25 - 0.33 0.25 + * berries 0.66 0.33 - 0.25 + * icecream 0.5 0.25 0.25 - + * + * + * Prediction(dog, icecream) = (0.5 * 5 + 0.25 * 5 + 0.25 * 2 ) / (0.5 + 0.25 + 0.25) ~ 4.3 + * Prediction(rabbit, hotdog) = (0.25 * 2 + 0.33 * 3 + 0.25 * 5) / (0.25 + 0.33 + 0.25) ~ 3,3 + * Prediction(cow, burger) = (0.25 * 5 + 0.5 * 3) / (0.25 + 0.5) ~ 3,7 + * Prediction(cow, berries) = (0.33 * 5 + 0.25 * 3) / (0.33 + 0.25) ~ 4,1 + * Prediction(donkey, hotdog) = (0.25 * 3 + 0.25 * 5) / (0.25 + 0.25) ~ 4 + * Prediction(donkey, berries) = (0.66 * 3 + 0.25 * 5) / (0.66 + 0.25) ~ 3,5 + * + * </pre> + */ + @Test + public void testCompleteJob() throws Exception { + + File inputFile = getTestTempFile("prefs.txt"); + File outputDir = getTestTempDir("output"); + outputDir.delete(); + File similaritiesOutputDir = getTestTempDir("outputSimilarities"); + similaritiesOutputDir.delete(); + File tmpDir = getTestTempDir("tmp"); + + writeLines(inputFile, + "1,1,5", + "1,2,5", + "1,3,2", + "2,1,2", + "2,3,3", + "2,4,5", + "3,2,5", + "3,4,3", + "4,1,3", + "4,4,5"); + + RecommenderJob recommenderJob = new RecommenderJob(); + + Configuration conf = getConfiguration(); + conf.set("mapred.input.dir", inputFile.getAbsolutePath()); + conf.set("mapred.output.dir", outputDir.getAbsolutePath()); + conf.setBoolean("mapred.output.compress", false); + + recommenderJob.setConf(conf); + + recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", + TanimotoCoefficientSimilarity.class.getName(), "--numRecommendations", "4", + "--outputPathForSimilarityMatrix", similaritiesOutputDir.getAbsolutePath() }); + + Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000")); + assertEquals(4, recommendations.size()); + + for (Entry<Long,List<RecommendedItem>> entry : recommendations.entrySet()) { + long userID = entry.getKey(); + List<RecommendedItem> items = entry.getValue(); + assertNotNull(items); + RecommendedItem item1 = items.get(0); + + if (userID == 1L) { + assertEquals(1, items.size()); + assertEquals(4L, item1.getItemID()); + assertEquals(4.3, item1.getValue(), 0.05); + } + if (userID == 2L) { + assertEquals(1, items.size()); + assertEquals(2L, item1.getItemID()); + assertEquals(3.3, item1.getValue(), 0.05); + } + if (userID == 3L) { + assertEquals(2, items.size()); + assertEquals(3L, item1.getItemID()); + assertEquals(4.1, item1.getValue(), 0.05); + RecommendedItem item2 = items.get(1); + assertEquals(1L, item2.getItemID()); + assertEquals(3.7, item2.getValue(), 0.05); + } + if (userID == 4L) { + assertEquals(2, items.size()); + assertEquals(2L, item1.getItemID()); + assertEquals(4.0, item1.getValue(), 0.05); + RecommendedItem item2 = items.get(1); + assertEquals(3L, item2.getItemID()); + assertEquals(3.5, item2.getValue(), 0.05); + } + } + + Map<Pair<Long, Long>, Double> similarities = readSimilarities(new File(similaritiesOutputDir, "part-r-00000")); + assertEquals(6, similarities.size()); + + assertEquals(0.25, similarities.get(new Pair<>(1L, 2L)), EPSILON); + assertEquals(0.6666666666666666, similarities.get(new Pair<>(1L, 3L)), EPSILON); + assertEquals(0.5, similarities.get(new Pair<>(1L, 4L)), EPSILON); + assertEquals(0.3333333333333333, similarities.get(new Pair<>(2L, 3L)), EPSILON); + assertEquals(0.25, similarities.get(new Pair<>(2L, 4L)), EPSILON); + assertEquals(0.25, similarities.get(new Pair<>(3L, 4L)), EPSILON); + } + + /** + * small integration test for boolean data + */ + @Test + public void testCompleteJobBoolean() throws Exception { + + File inputFile = getTestTempFile("prefs.txt"); + File outputDir = getTestTempDir("output"); + outputDir.delete(); + File tmpDir = getTestTempDir("tmp"); + File usersFile = getTestTempFile("users.txt"); + writeLines(usersFile, "3"); + + writeLines(inputFile, + "1,1", + "1,2", + "1,3", + "2,1", + "2,3", + "2,4", + "3,2", + "3,4", + "4,1", + "4,4"); + + RecommenderJob recommenderJob = new RecommenderJob(); + + Configuration conf = getConfiguration(); + conf.set("mapred.input.dir", inputFile.getAbsolutePath()); + conf.set("mapred.output.dir", outputDir.getAbsolutePath()); + conf.setBoolean("mapred.output.compress", false); + + recommenderJob.setConf(conf); + + recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", + CooccurrenceCountSimilarity.class.getName(), "--booleanData", "true", + "--usersFile", usersFile.getAbsolutePath() }); + + Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000")); + + List<RecommendedItem> recommendedToCow = recommendations.get(3L); + assertEquals(2, recommendedToCow.size()); + + RecommendedItem item1 = recommendedToCow.get(0); + RecommendedItem item2 = recommendedToCow.get(1); + + assertEquals(1L, item1.getItemID()); + assertEquals(3L, item2.getItemID()); + + /* predicted pref must be the sum of similarities: + * item1: coocc(burger, hotdog) + coocc(burger, icecream) = 3 + * item2: coocc(berries, hotdog) + coocc(berries, icecream) = 2 */ + assertEquals(3, item1.getValue(), 0.05); + assertEquals(2, item2.getValue(), 0.05); + } + + /** + * check whether the explicit user/item filter works + */ + @Test + public void testCompleteJobWithFiltering() throws Exception { + + File inputFile = getTestTempFile("prefs.txt"); + File userFile = getTestTempFile("users.txt"); + File filterFile = getTestTempFile("filter.txt"); + File outputDir = getTestTempDir("output"); + outputDir.delete(); + File tmpDir = getTestTempDir("tmp"); + + writeLines(inputFile, + "1,1,5", + "1,2,5", + "1,3,2", + "2,1,2", + "2,3,3", + "2,4,5", + "3,2,5", + "3,4,3", + "4,1,3", + "4,4,5"); + + /* only compute recommendations for the donkey */ + writeLines(userFile, "4"); + /* do not recommend the hotdog for the donkey */ + writeLines(filterFile, "4,2"); + + RecommenderJob recommenderJob = new RecommenderJob(); + + Configuration conf = getConfiguration(); + conf.set("mapred.input.dir", inputFile.getAbsolutePath()); + conf.set("mapred.output.dir", outputDir.getAbsolutePath()); + conf.setBoolean("mapred.output.compress", false); + + recommenderJob.setConf(conf); + + recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", + TanimotoCoefficientSimilarity.class.getName(), "--numRecommendations", "1", + "--usersFile", userFile.getAbsolutePath(), "--filterFile", filterFile.getAbsolutePath() }); + + Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000")); + + assertEquals(1, recommendations.size()); + assertTrue(recommendations.containsKey(4L)); + assertEquals(1, recommendations.get(4L).size()); + + /* berries should have been recommended to the donkey */ + RecommendedItem recommendedItem = recommendations.get(4L).get(0); + assertEquals(3L, recommendedItem.getItemID()); + assertEquals(3.5, recommendedItem.getValue(), 0.05); + } + + static Map<Pair<Long,Long>, Double> readSimilarities(File file) throws IOException { + Map<Pair<Long,Long>, Double> similarities = Maps.newHashMap(); + for (String line : new FileLineIterable(file)) { + String[] parts = line.split("\t"); + similarities.put(new Pair<>(Long.parseLong(parts[0]), Long.parseLong(parts[1])), + Double.parseDouble(parts[2])); + } + return similarities; + } + + static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws IOException { + Map<Long,List<RecommendedItem>> recommendations = Maps.newHashMap(); + for (String line : new FileLineIterable(file)) { + + String[] keyValue = line.split("\t"); + long userID = Long.parseLong(keyValue[0]); + String[] tokens = keyValue[1].replaceAll("\\[", "") + .replaceAll("\\]", "").split(","); + + List<RecommendedItem> items = Lists.newLinkedList(); + for (String token : tokens) { + String[] itemTokens = token.split(":"); + long itemID = Long.parseLong(itemTokens[0]); + float value = Float.parseFloat(itemTokens[1]); + items.add(new GenericRecommendedItem(itemID, value)); + } + recommendations.put(userID, items); + } + return recommendations; + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java new file mode 100644 index 0000000..bb22b71 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.hadoop.item; + +import org.apache.hadoop.mapreduce.Counter; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils; +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.apache.mahout.math.VarLongWritable; +import org.apache.mahout.math.VectorWritable; +import org.apache.mahout.math.hadoop.MathHelper; +import org.easymock.EasyMock; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; + +/** + * tests {@link ToUserVectorsReducer} + */ +public class ToUserVectorsReducerTest extends TasteTestCase { + + @Test + public void testToUsersReducerMinPreferencesUserIgnored() throws Exception { + Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + ToUserVectorsReducer reducer = new ToUserVectorsReducer(); + setField(reducer, "minPreferences", 2); + + EasyMock.replay(context); + + reducer.reduce(new VarLongWritable(123), Collections.singletonList(new VarLongWritable(456)), context); + + EasyMock.verify(context); + } + + @Test + public void testToUsersReducerMinPreferencesUserPasses() throws Exception { + Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + Counter userCounters = EasyMock.createMock(Counter.class); + + ToUserVectorsReducer reducer = new ToUserVectorsReducer(); + setField(reducer, "minPreferences", 2); + + EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters); + userCounters.increment(1); + context.write(EasyMock.eq(new VarLongWritable(123)), MathHelper.vectorMatches( + MathHelper.elem(TasteHadoopUtils.idToIndex(456L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(789L), 1.0))); + + EasyMock.replay(context, userCounters); + + reducer.reduce(new VarLongWritable(123), Arrays.asList(new VarLongWritable(456), new VarLongWritable(789)), context); + + EasyMock.verify(context, userCounters); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java new file mode 100644 index 0000000..f61b5e6 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java @@ -0,0 +1,269 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.hadoop.similarity.item; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FilenameFilter; +import java.util.Arrays; +import java.util.regex.Pattern; + +import com.google.common.base.Charsets; +import com.google.common.io.Files; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.apache.mahout.math.RandomAccessSparseVector; +import org.apache.mahout.math.Vector; +import org.apache.mahout.math.VectorWritable; +import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity; +import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity; +import org.apache.mahout.math.map.OpenIntLongHashMap; +import org.easymock.EasyMock; +import org.junit.Test; + +/** + * Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity.item + * some integration tests with tiny data sets at the end + */ +public final class ItemSimilarityJobTest extends TasteTestCase { + + private static final Pattern TAB = Pattern.compile("\t"); + + /** + * Tests {@link ItemSimilarityJob.MostSimilarItemPairsMapper} + */ + @Test + public void testMostSimilarItemsPairsMapper() throws Exception { + + OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap(); + indexItemIDMap.put(12, 12L); + indexItemIDMap.put(34, 34L); + indexItemIDMap.put(56, 56L); + + Mapper<IntWritable,VectorWritable,EntityEntityWritable,DoubleWritable>.Context context = + EasyMock.createMock(Mapper.Context.class); + + context.write(new EntityEntityWritable(34L, 56L), new DoubleWritable(0.9)); + + EasyMock.replay(context); + + Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE); + vector.set(12, 0.2); + vector.set(56, 0.9); + + ItemSimilarityJob.MostSimilarItemPairsMapper mapper = new ItemSimilarityJob.MostSimilarItemPairsMapper(); + setField(mapper, "indexItemIDMap", indexItemIDMap); + setField(mapper, "maxSimilarItemsPerItem", 1); + + mapper.map(new IntWritable(34), new VectorWritable(vector), context); + + EasyMock.verify(context); + } + + /** + * Tests {@link ItemSimilarityJob.MostSimilarItemPairsReducer} + */ + @Test + public void testMostSimilarItemPairsReducer() throws Exception { + Reducer<EntityEntityWritable,DoubleWritable,EntityEntityWritable,DoubleWritable>.Context context = + EasyMock.createMock(Reducer.Context.class); + + context.write(new EntityEntityWritable(123L, 456L), new DoubleWritable(0.5)); + + EasyMock.replay(context); + + new ItemSimilarityJob.MostSimilarItemPairsReducer().reduce(new EntityEntityWritable(123L, 456L), + Arrays.asList(new DoubleWritable(0.5), new DoubleWritable(0.5)), context); + + EasyMock.verify(context); + } + + /** + * Integration test with a tiny data set + * + * <pre> + * user-item-matrix + * + * Game Mouse PC Disk + * Jane - 1 2 - + * Paul 1 - 1 - + * Fred - - - 1 + * </pre> + */ + @Test + public void testCompleteJob() throws Exception { + + File inputFile = getTestTempFile("prefs.txt"); + File outputDir = getTestTempDir("output"); + outputDir.delete(); + File tmpDir = getTestTempDir("tmp"); + + writeLines(inputFile, + "2,1,1", + "1,2,1", + "3,4,1", + "1,3,2", + "2,3,1"); + + ItemSimilarityJob similarityJob = new ItemSimilarityJob(); + + Configuration conf = getConfiguration(); + conf.set("mapred.input.dir", inputFile.getAbsolutePath()); + conf.set("mapred.output.dir", outputDir.getAbsolutePath()); + conf.setBoolean("mapred.output.compress", false); + + similarityJob.setConf(conf); + similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", + CosineSimilarity.class.getName() }); + File outPart = outputDir.listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.startsWith("part-"); + } + })[0]; + BufferedReader reader = Files.newReader(outPart, Charsets.UTF_8); + + String line; + int currentLine = 1; + while ( (line = reader.readLine()) != null) { + + String[] tokens = TAB.split(line); + + long itemAID = Long.parseLong(tokens[0]); + long itemBID = Long.parseLong(tokens[1]); + double similarity = Double.parseDouble(tokens[2]); + + if (currentLine == 1) { + assertEquals(1L, itemAID); + assertEquals(3L, itemBID); + assertEquals(0.45, similarity, 0.01); + } + + if (currentLine == 2) { + assertEquals(2L, itemAID); + assertEquals(3L, itemBID); + assertEquals(0.89, similarity, 0.01); + } + + currentLine++; + } + + int linesWritten = currentLine-1; + assertEquals(2, linesWritten); + } + + /** + * integration test for the limitation of the number of computed similarities + * + * <pre> + * user-item-matrix + * + * i1 i2 i3 + * u1 1 0 1 + * u2 0 1 1 + * u3 1 1 0 + * u4 1 1 1 + * u5 0 1 0 + * u6 1 1 0 + * + * tanimoto(i1,i2) = 0.5 + * tanimoto(i2,i3) = 0.333 + * tanimoto(i3,i1) = 0.4 + * + * When we set maxSimilaritiesPerItem to 1 the following pairs should be found: + * + * i1 --> i2 + * i2 --> i1 + * i3 --> i1 + * </pre> + */ + @Test + public void testMaxSimilaritiesPerItem() throws Exception { + + File inputFile = getTestTempFile("prefsForMaxSimilarities.txt"); + File outputDir = getTestTempDir("output"); + outputDir.delete(); + File tmpDir = getTestTempDir("tmp"); + + writeLines(inputFile, + "1,1,1", + "1,3,1", + "2,2,1", + "2,3,1", + "3,1,1", + "3,2,1", + "4,1,1", + "4,2,1", + "4,3,1", + "5,2,1", + "6,1,1", + "6,2,1"); + + ItemSimilarityJob similarityJob = new ItemSimilarityJob(); + + Configuration conf = getConfiguration(); + conf.set("mapred.input.dir", inputFile.getAbsolutePath()); + conf.set("mapred.output.dir", outputDir.getAbsolutePath()); + conf.setBoolean("mapred.output.compress", false); + + similarityJob.setConf(conf); + similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", + TanimotoCoefficientSimilarity.class.getName(), "--maxSimilaritiesPerItem", "1" }); + File outPart = outputDir.listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.startsWith("part-"); + } + })[0]; + BufferedReader reader = Files.newReader(outPart, Charsets.UTF_8); + + String line; + int currentLine = 1; + while ((line = reader.readLine()) != null) { + + String[] tokens = TAB.split(line); + + long itemAID = Long.parseLong(tokens[0]); + long itemBID = Long.parseLong(tokens[1]); + double similarity = Double.parseDouble(tokens[2]); + + if (currentLine == 1) { + assertEquals(1L, itemAID); + assertEquals(2L, itemBID); + assertEquals(0.5, similarity, 0.0001); + } + + if (currentLine == 2) { + assertEquals(1L, itemAID); + assertEquals(3L, itemBID); + assertEquals(0.4, similarity, 0.0001); + } + + currentLine++; + } + + int linesWritten = currentLine - 1; + assertEquals(2, linesWritten); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java new file mode 100644 index 0000000..c1891c0 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl; + +import com.google.common.collect.Lists; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel; +import org.apache.mahout.common.MahoutTestCase; +import org.apache.mahout.cf.taste.impl.model.GenericDataModel; +import org.apache.mahout.cf.taste.impl.model.GenericPreference; +import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; + +import java.util.List; + +public abstract class TasteTestCase extends MahoutTestCase { + + public static DataModel getDataModel(long[] userIDs, Double[][] prefValues) { + FastByIDMap<PreferenceArray> result = new FastByIDMap<>(); + for (int i = 0; i < userIDs.length; i++) { + List<Preference> prefsList = Lists.newArrayList(); + for (int j = 0; j < prefValues[i].length; j++) { + if (prefValues[i][j] != null) { + prefsList.add(new GenericPreference(userIDs[i], j, prefValues[i][j].floatValue())); + } + } + if (!prefsList.isEmpty()) { + result.put(userIDs[i], new GenericUserPreferenceArray(prefsList)); + } + } + return new GenericDataModel(result); + } + + public static DataModel getBooleanDataModel(long[] userIDs, boolean[][] prefs) { + FastByIDMap<FastIDSet> result = new FastByIDMap<>(); + for (int i = 0; i < userIDs.length; i++) { + FastIDSet prefsSet = new FastIDSet(); + for (int j = 0; j < prefs[i].length; j++) { + if (prefs[i][j]) { + prefsSet.add(j); + } + } + if (!prefsSet.isEmpty()) { + result.put(userIDs[i], prefsSet); + } + } + return new GenericBooleanPrefDataModel(result); + } + + protected static DataModel getDataModel() { + return getDataModel( + new long[] {1, 2, 3, 4}, + new Double[][] { + {0.1, 0.3}, + {0.2, 0.3, 0.3}, + {0.4, 0.3, 0.5}, + {0.7, 0.3, 0.8}, + }); + } + + protected static DataModel getBooleanDataModel() { + return getBooleanDataModel(new long[] {1, 2, 3, 4}, + new boolean[][] { + {false, true, false}, + {false, true, true, false}, + {true, false, false, true}, + {true, false, true, true}, + }); + } + + protected static boolean arrayContains(long[] array, long value) { + for (long l : array) { + if (l == value) { + return true; + } + } + return false; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java new file mode 100644 index 0000000..1f7c76b --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.junit.Test; + +public final class BitSetTest extends TasteTestCase { + + private static final int NUM_BITS = 100; + + @Test + public void testGetSet() { + BitSet bitSet = new BitSet(NUM_BITS); + for (int i = 0; i < NUM_BITS; i++) { + assertFalse(bitSet.get(i)); + } + bitSet.set(0); + bitSet.set(NUM_BITS-1); + assertTrue(bitSet.get(0)); + assertTrue(bitSet.get(NUM_BITS-1)); + } + + @Test(expected = ArrayIndexOutOfBoundsException.class) + public void testBounds1() { + BitSet bitSet = new BitSet(NUM_BITS); + bitSet.set(1000); + } + + @Test(expected = ArrayIndexOutOfBoundsException.class) + public void testBounds2() { + BitSet bitSet = new BitSet(NUM_BITS); + bitSet.set(-1); + } + + @Test + public void testClear() { + BitSet bitSet = new BitSet(NUM_BITS); + for (int i = 0; i < NUM_BITS; i++) { + bitSet.set(i); + } + for (int i = 0; i < NUM_BITS; i++) { + assertTrue(bitSet.get(i)); + } + bitSet.clear(); + for (int i = 0; i < NUM_BITS; i++) { + assertFalse(bitSet.get(i)); + } + } + + @Test + public void testClone() { + BitSet bitSet = new BitSet(NUM_BITS); + bitSet.set(NUM_BITS-1); + bitSet = bitSet.clone(); + assertTrue(bitSet.get(NUM_BITS-1)); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java new file mode 100644 index 0000000..e6298bc --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.apache.mahout.common.RandomUtils; +import org.junit.Test; + +import java.util.Random; + +public final class CacheTest extends TasteTestCase { + + @Test + public void testLotsOfGets() throws TasteException { + Retriever<Object,Object> retriever = new IdentityRetriever(); + Cache<Object,Object> cache = new Cache<>(retriever, 1000); + for (int i = 0; i < 1000000; i++) { + assertEquals(i, cache.get(i)); + } + } + + @Test + public void testMixedUsage() throws TasteException { + Random random = RandomUtils.getRandom(); + Retriever<Object,Object> retriever = new IdentityRetriever(); + Cache<Object,Object> cache = new Cache<>(retriever, 1000); + for (int i = 0; i < 1000000; i++) { + double r = random.nextDouble(); + if (r < 0.01) { + cache.clear(); + } else if (r < 0.1) { + cache.remove(r - 100); + } else { + assertEquals(i, cache.get(i)); + } + } + } + + private static class IdentityRetriever implements Retriever<Object,Object> { + @Override + public Object get(Object key) throws TasteException { + return key; + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java new file mode 100644 index 0000000..8195a90 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.apache.mahout.common.RandomUtils; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +/** <p>Tests {@link FastByIDMap}.</p> */ +public final class FastByIDMapTest extends TasteTestCase { + + @Test + public void testPutAndGet() { + FastByIDMap<Long> map = new FastByIDMap<>(); + assertNull(map.get(500000L)); + map.put(500000L, 2L); + assertEquals(2L, (long) map.get(500000L)); + } + + @Test + public void testRemove() { + FastByIDMap<Long> map = new FastByIDMap<>(); + map.put(500000L, 2L); + map.remove(500000L); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + assertNull(map.get(500000L)); + } + + @Test + public void testClear() { + FastByIDMap<Long> map = new FastByIDMap<>(); + map.put(500000L, 2L); + map.clear(); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + assertNull(map.get(500000L)); + } + + @Test + public void testSizeEmpty() { + FastByIDMap<Long> map = new FastByIDMap<>(); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + map.put(500000L, 2L); + assertEquals(1, map.size()); + assertFalse(map.isEmpty()); + map.remove(500000L); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + } + + @Test + public void testContains() { + FastByIDMap<String> map = buildTestFastMap(); + assertTrue(map.containsKey(500000L)); + assertTrue(map.containsKey(47L)); + assertTrue(map.containsKey(2L)); + assertTrue(map.containsValue("alpha")); + assertTrue(map.containsValue("bang")); + assertTrue(map.containsValue("beta")); + assertFalse(map.containsKey(999)); + assertFalse(map.containsValue("something")); + } + + @Test + public void testRehash() { + FastByIDMap<String> map = buildTestFastMap(); + map.remove(500000L); + map.rehash(); + assertNull(map.get(500000L)); + assertEquals("bang", map.get(47L)); + } + + @Test + public void testGrow() { + FastByIDMap<String> map = new FastByIDMap<>(1,1); + map.put(500000L, "alpha"); + map.put(47L, "bang"); + assertNull(map.get(500000L)); + assertEquals("bang", map.get(47L)); + } + + @Test + public void testVersusHashMap() { + FastByIDMap<String> actual = new FastByIDMap<>(); + Map<Long, String> expected = new HashMap<>(1000000); + Random r = RandomUtils.getRandom(); + for (int i = 0; i < 1000000; i++) { + double d = r.nextDouble(); + Long key = (long) r.nextInt(100); + if (d < 0.4) { + assertEquals(expected.get(key), actual.get(key)); + } else { + if (d < 0.7) { + assertEquals(expected.put(key, "bang"), actual.put(key, "bang")); + } else { + assertEquals(expected.remove(key), actual.remove(key)); + } + assertEquals(expected.size(), actual.size()); + assertEquals(expected.isEmpty(), actual.isEmpty()); + } + } + } + + @Test + public void testMaxSize() { + FastByIDMap<String> map = new FastByIDMap<>(); + map.put(4, "bang"); + assertEquals(1, map.size()); + map.put(47L, "bang"); + assertEquals(2, map.size()); + assertNull(map.get(500000L)); + map.put(47L, "buzz"); + assertEquals(2, map.size()); + assertEquals("buzz", map.get(47L)); + } + + + private static FastByIDMap<String> buildTestFastMap() { + FastByIDMap<String> map = new FastByIDMap<>(); + map.put(500000L, "alpha"); + map.put(47L, "bang"); + map.put(2L, "beta"); + return map; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java new file mode 100644 index 0000000..aec1738 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import com.google.common.collect.Sets; +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.apache.mahout.common.RandomUtils; +import org.junit.Test; + +import java.util.Collection; +import java.util.Random; + +/** <p>Tests {@link FastIDSet}.</p> */ +public final class FastIDSetTest extends TasteTestCase { + + @Test + public void testContainsAndAdd() { + FastIDSet set = new FastIDSet(); + assertFalse(set.contains(1)); + set.add(1); + assertTrue(set.contains(1)); + } + + @Test + public void testRemove() { + FastIDSet set = new FastIDSet(); + set.add(1); + set.remove(1); + assertEquals(0, set.size()); + assertTrue(set.isEmpty()); + assertFalse(set.contains(1)); + } + + @Test + public void testClear() { + FastIDSet set = new FastIDSet(); + set.add(1); + set.clear(); + assertEquals(0, set.size()); + assertTrue(set.isEmpty()); + assertFalse(set.contains(1)); + } + + @Test + public void testSizeEmpty() { + FastIDSet set = new FastIDSet(); + assertEquals(0, set.size()); + assertTrue(set.isEmpty()); + set.add(1); + assertEquals(1, set.size()); + assertFalse(set.isEmpty()); + set.remove(1); + assertEquals(0, set.size()); + assertTrue(set.isEmpty()); + } + + @Test + public void testContains() { + FastIDSet set = buildTestFastSet(); + assertTrue(set.contains(1)); + assertTrue(set.contains(2)); + assertTrue(set.contains(3)); + assertFalse(set.contains(4)); + } + + @Test + public void testReservedValues() { + FastIDSet set = new FastIDSet(); + try { + set.add(Long.MIN_VALUE); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + // good + } + assertFalse(set.contains(Long.MIN_VALUE)); + try { + set.add(Long.MAX_VALUE); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException iae) { + // good + } + assertFalse(set.contains(Long.MAX_VALUE)); + } + + @Test + public void testRehash() { + FastIDSet set = buildTestFastSet(); + set.remove(1); + set.rehash(); + assertFalse(set.contains(1)); + } + + @Test + public void testGrow() { + FastIDSet set = new FastIDSet(1); + set.add(1); + set.add(2); + assertTrue(set.contains(1)); + assertTrue(set.contains(2)); + } + + @Test + public void testIterator() { + FastIDSet set = buildTestFastSet(); + Collection<Long> expected = Sets.newHashSetWithExpectedSize(3); + expected.add(1L); + expected.add(2L); + expected.add(3L); + LongPrimitiveIterator it = set.iterator(); + while (it.hasNext()) { + expected.remove(it.nextLong()); + } + assertTrue(expected.isEmpty()); + } + + @Test + public void testVersusHashSet() { + FastIDSet actual = new FastIDSet(1); + Collection<Integer> expected = Sets.newHashSetWithExpectedSize(1000000); + Random r = RandomUtils.getRandom(); + for (int i = 0; i < 1000000; i++) { + double d = r.nextDouble(); + Integer key = r.nextInt(100); + if (d < 0.4) { + assertEquals(expected.contains(key), actual.contains(key)); + } else { + if (d < 0.7) { + assertEquals(expected.add(key), actual.add(key)); + } else { + assertEquals(expected.remove(key), actual.remove(key)); + } + assertEquals(expected.size(), actual.size()); + assertEquals(expected.isEmpty(), actual.isEmpty()); + } + } + } + + private static FastIDSet buildTestFastSet() { + FastIDSet set = new FastIDSet(); + set.add(1); + set.add(2); + set.add(3); + return set; + } + + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java new file mode 100644 index 0000000..c27151a --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.apache.mahout.common.RandomUtils; +import org.junit.Test; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +/** <p>Tests {@link FastMap}.</p> */ +public final class FastMapTest extends TasteTestCase { + + @Test + public void testPutAndGet() { + Map<String, String> map = new FastMap<>(); + assertNull(map.get("foo")); + map.put("foo", "bar"); + assertEquals("bar", map.get("foo")); + } + + @Test + public void testRemove() { + Map<String, String> map = new FastMap<>(); + map.put("foo", "bar"); + map.remove("foo"); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + assertNull(map.get("foo")); + } + + @Test + public void testClear() { + Map<String, String> map = new FastMap<>(); + map.put("foo", "bar"); + map.clear(); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + assertNull(map.get("foo")); + } + + @Test + public void testSizeEmpty() { + Map<String, String> map = new FastMap<>(); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + map.put("foo", "bar"); + assertEquals(1, map.size()); + assertFalse(map.isEmpty()); + map.remove("foo"); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + } + + @Test + public void testContains() { + FastMap<String, String> map = buildTestFastMap(); + assertTrue(map.containsKey("foo")); + assertTrue(map.containsKey("baz")); + assertTrue(map.containsKey("alpha")); + assertTrue(map.containsValue("bar")); + assertTrue(map.containsValue("bang")); + assertTrue(map.containsValue("beta")); + assertFalse(map.containsKey("something")); + assertFalse(map.containsValue("something")); + } + + @Test(expected = NullPointerException.class) + public void testNull1() { + Map<String, String> map = new FastMap<>(); + assertNull(map.get(null)); + map.put(null, "bar"); + } + + @Test(expected = NullPointerException.class) + public void testNull2() { + Map<String, String> map = new FastMap<>(); + map.put("foo", null); + } + + @Test + public void testRehash() { + FastMap<String, String> map = buildTestFastMap(); + map.remove("foo"); + map.rehash(); + assertNull(map.get("foo")); + assertEquals("bang", map.get("baz")); + } + + @Test + public void testGrow() { + Map<String, String> map = new FastMap<>(1, FastMap.NO_MAX_SIZE); + map.put("foo", "bar"); + map.put("baz", "bang"); + assertEquals("bar", map.get("foo")); + assertEquals("bang", map.get("baz")); + } + + @Test + public void testKeySet() { + FastMap<String, String> map = buildTestFastMap(); + Collection<String> expected = Sets.newHashSetWithExpectedSize(3); + expected.add("foo"); + expected.add("baz"); + expected.add("alpha"); + Set<String> actual = map.keySet(); + assertTrue(expected.containsAll(actual)); + assertTrue(actual.containsAll(expected)); + Iterator<String> it = actual.iterator(); + while (it.hasNext()) { + String value = it.next(); + if (!"baz".equals(value)) { + it.remove(); + } + } + assertTrue(map.containsKey("baz")); + assertFalse(map.containsKey("foo")); + assertFalse(map.containsKey("alpha")); + } + + @Test + public void testValues() { + FastMap<String, String> map = buildTestFastMap(); + Collection<String> expected = Sets.newHashSetWithExpectedSize(3); + expected.add("bar"); + expected.add("bang"); + expected.add("beta"); + Collection<String> actual = map.values(); + assertTrue(expected.containsAll(actual)); + assertTrue(actual.containsAll(expected)); + Iterator<String> it = actual.iterator(); + while (it.hasNext()) { + String value = it.next(); + if (!"bang".equals(value)) { + it.remove(); + } + } + assertTrue(map.containsValue("bang")); + assertFalse(map.containsValue("bar")); + assertFalse(map.containsValue("beta")); + } + + @Test + public void testEntrySet() { + FastMap<String, String> map = buildTestFastMap(); + Set<Map.Entry<String, String>> actual = map.entrySet(); + Collection<String> expectedKeys = Sets.newHashSetWithExpectedSize(3); + expectedKeys.add("foo"); + expectedKeys.add("baz"); + expectedKeys.add("alpha"); + Collection<String> expectedValues = Sets.newHashSetWithExpectedSize(3); + expectedValues.add("bar"); + expectedValues.add("bang"); + expectedValues.add("beta"); + assertEquals(3, actual.size()); + for (Map.Entry<String, String> entry : actual) { + expectedKeys.remove(entry.getKey()); + expectedValues.remove(entry.getValue()); + } + assertEquals(0, expectedKeys.size()); + assertEquals(0, expectedValues.size()); + } + + @Test + public void testVersusHashMap() { + Map<Integer, String> actual = new FastMap<>(1, 1000000); + Map<Integer, String> expected = Maps.newHashMapWithExpectedSize(1000000); + Random r = RandomUtils.getRandom(); + for (int i = 0; i < 1000000; i++) { + double d = r.nextDouble(); + Integer key = r.nextInt(100); + if (d < 0.4) { + assertEquals(expected.get(key), actual.get(key)); + } else { + if (d < 0.7) { + assertEquals(expected.put(key, "foo"), actual.put(key, "foo")); + } else { + assertEquals(expected.remove(key), actual.remove(key)); + } + assertEquals(expected.size(), actual.size()); + assertEquals(expected.isEmpty(), actual.isEmpty()); + } + } + } + + @Test + public void testMaxSize() { + Map<String, String> map = new FastMap<>(1, 1); + map.put("foo", "bar"); + assertEquals(1, map.size()); + map.put("baz", "bang"); + assertEquals(1, map.size()); + assertNull(map.get("foo")); + map.put("baz", "buzz"); + assertEquals(1, map.size()); + assertEquals("buzz", map.get("baz")); + } + + private static FastMap<String, String> buildTestFastMap() { + FastMap<String, String> map = new FastMap<>(); + map.put("foo", "bar"); + map.put("baz", "bang"); + map.put("alpha", "beta"); + return map; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java new file mode 100644 index 0000000..1fcc800 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.junit.Test; + +public final class InvertedRunningAverageTest extends TasteTestCase { + + @Test + public void testAverage() { + RunningAverage avg = new FullRunningAverage(); + RunningAverage inverted = new InvertedRunningAverage(avg); + assertEquals(0, inverted.getCount()); + avg.addDatum(1.0); + assertEquals(1, inverted.getCount()); + assertEquals(-1.0, inverted.getAverage(), EPSILON); + avg.addDatum(2.0); + assertEquals(2, inverted.getCount()); + assertEquals(-1.5, inverted.getAverage(), EPSILON); + } + + @Test(expected = UnsupportedOperationException.class) + public void testUnsupported1() { + RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage()); + inverted.addDatum(1.0); + } + + @Test(expected = UnsupportedOperationException.class) + public void testUnsupported2() { + RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage()); + inverted.changeDatum(1.0); + } + + @Test(expected = UnsupportedOperationException.class) + public void testUnsupported3() { + RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage()); + inverted.removeDatum(1.0); + } + + @Test + public void testAverageAndStdDev() { + RunningAverageAndStdDev avg = new FullRunningAverageAndStdDev(); + RunningAverageAndStdDev inverted = new InvertedRunningAverageAndStdDev(avg); + assertEquals(0, inverted.getCount()); + avg.addDatum(1.0); + assertEquals(1, inverted.getCount()); + assertEquals(-1.0, inverted.getAverage(), EPSILON); + avg.addDatum(2.0); + assertEquals(2, inverted.getCount()); + assertEquals(-1.5, inverted.getAverage(), EPSILON); + assertEquals(Math.sqrt(2.0)/2.0, inverted.getStandardDeviation(), EPSILON); + } + + @Test(expected = UnsupportedOperationException.class) + public void testAndStdDevUnsupported1() { + RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev()); + inverted.addDatum(1.0); + } + + @Test(expected = UnsupportedOperationException.class) + public void testAndStdDevUnsupported2() { + RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev()); + inverted.changeDatum(1.0); + } + + @Test(expected = UnsupportedOperationException.class) + public void testAndStdDevUnsupported3() { + RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev()); + inverted.removeDatum(1.0); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java new file mode 100644 index 0000000..7458df3 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.junit.Test; + +import java.util.NoSuchElementException; + +public final class LongPrimitiveArrayIteratorTest extends TasteTestCase { + + @Test(expected = NoSuchElementException.class) + public void testEmpty() { + LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[0]); + assertFalse(it.hasNext()); + it.next(); + } + + @Test(expected = NoSuchElementException.class) + public void testNext() { + LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[] {3,2,1}); + assertTrue(it.hasNext()); + assertEquals(3, (long) it.next()); + assertTrue(it.hasNext()); + assertEquals(2, it.nextLong()); + assertTrue(it.hasNext()); + assertEquals(1, (long) it.next()); + assertFalse(it.hasNext()); + it.nextLong(); + } + + @Test + public void testPeekSkip() { + LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[] {3,2,1}); + assertEquals(3, it.peek()); + it.skip(2); + assertEquals(1, it.nextLong()); + assertFalse(it.hasNext()); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java new file mode 100644 index 0000000..20233a7 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import org.apache.mahout.cf.taste.common.Refreshable; + +import java.util.Collection; +import java.util.concurrent.Callable; + +/** A mock {@link Refreshable} which counts the number of times it has been refreshed, for use in tests. */ +final class MockRefreshable implements Refreshable, Callable<Object> { + + private int callCount; + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + call(); + } + + @Override + public Object call() { + callCount++; + return null; + } + + int getCallCount() { + return callCount; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java new file mode 100644 index 0000000..54c97e3 --- /dev/null +++ b/community/mahout-mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.common; + +import com.google.common.collect.Sets; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.impl.TasteTestCase; +import org.junit.Test; + +import java.util.Collection; + +/** Tests {@link RefreshHelper} */ +public final class RefreshHelperTest extends TasteTestCase { + + @Test + public void testCallable() { + MockRefreshable mock = new MockRefreshable(); + Refreshable helper = new RefreshHelper(mock); + helper.refresh(null); + assertEquals(1, mock.getCallCount()); + } + + @Test + public void testNoCallable() { + Refreshable helper = new RefreshHelper(null); + helper.refresh(null); + } + + @Test + public void testDependencies() { + RefreshHelper helper = new RefreshHelper(null); + MockRefreshable mock1 = new MockRefreshable(); + MockRefreshable mock2 = new MockRefreshable(); + helper.addDependency(mock1); + helper.addDependency(mock2); + helper.refresh(null); + assertEquals(1, mock1.getCallCount()); + assertEquals(1, mock2.getCallCount()); + } + + @Test + public void testAlreadyRefreshed() { + RefreshHelper helper = new RefreshHelper(null); + MockRefreshable mock1 = new MockRefreshable(); + MockRefreshable mock2 = new MockRefreshable(); + helper.addDependency(mock1); + helper.addDependency(mock2); + Collection<Refreshable> alreadyRefreshed = Sets.newHashSetWithExpectedSize(1); + alreadyRefreshed.add(mock1); + helper.refresh(alreadyRefreshed); + assertEquals(0, mock1.getCallCount()); + assertEquals(1, mock2.getCallCount()); + } + +}
