http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
new file mode 100644
index 0000000..1ac5b72
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchItemException.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+public final class NoSuchItemException extends TasteException {
+  
+  public NoSuchItemException() { }
+
+  public NoSuchItemException(long itemID) {
+    this(String.valueOf(itemID));
+  }
+  
+  public NoSuchItemException(String message) {
+    super(message);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
new file mode 100644
index 0000000..cbb60fa
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/common/NoSuchUserException.java
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+public final class NoSuchUserException extends TasteException {
+  
+  public NoSuchUserException() { }
+
+  public NoSuchUserException(long userID) {
+    this(String.valueOf(userID));
+  }
+  
+  public NoSuchUserException(String message) {
+    super(message);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
new file mode 100644
index 0000000..9b26bee
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/common/Refreshable.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+import java.util.Collection;
+
+/**
+ * <p>
+ * Implementations of this interface have state that can be periodically 
refreshed. For example, an
+ * implementation instance might contain some pre-computed information that 
should be periodically refreshed.
+ * The {@link #refresh(Collection)} method triggers such a refresh.
+ * </p>
+ * 
+ * <p>
+ * All Taste components implement this. In particular,
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}s do. Callers may 
want to call
+ * {@link #refresh(Collection)} periodically to re-compute information 
throughout the system and bring it up
+ * to date, though this operation may be expensive.
+ * </p>
+ */
+public interface Refreshable {
+  
+  /**
+   * <p>
+   * Triggers "refresh" -- whatever that means -- of the implementation. The 
general contract is that any
+   * {@link Refreshable} should always leave itself in a consistent, 
operational state, and that the refresh
+   * atomically updates internal state from old to new.
+   * </p>
+   * 
+   * @param alreadyRefreshed
+   *          {@link org.apache.mahout.cf.taste.common.Refreshable}s that are 
known to have already been
+   *          refreshed as a result of an initial call to a 
{#refresh(Collection)} method on some
+   *          object. This ensure that objects in a refresh dependency graph 
aren't refreshed twice
+   *          needlessly.
+   */
+  void refresh(Collection<Refreshable> alreadyRefreshed);
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
new file mode 100644
index 0000000..1792eff
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/common/TasteException.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+/**
+ * <p>
+ * An exception thrown when an error occurs inside the Taste engine.
+ * </p>
+ */
+public class TasteException extends Exception {
+  
+  public TasteException() { }
+  
+  public TasteException(String message) {
+    super(message);
+  }
+  
+  public TasteException(Throwable cause) {
+    super(cause);
+  }
+  
+  public TasteException(String message, Throwable cause) {
+    super(message, cause);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
new file mode 100644
index 0000000..4e39617
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/common/Weighting.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+/**
+ * <p>
+ * A simple enum which gives symbolic names to the ideas of "weighted" and 
"unweighted", to make various API
+ * calls which take a weighting parameter more readable.
+ * </p>
+ */
+public enum Weighting {
+  
+  WEIGHTED,
+  UNWEIGHTED
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
new file mode 100644
index 0000000..875c65e
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/eval/DataModelBuilder.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+/**
+ * <p>
+ * Implementations of this inner interface are simple helper classes which 
create a {@link DataModel} to be
+ * used while evaluating a {@link 
org.apache.mahout.cf.taste.recommender.Recommender}.
+ * 
+ * @see RecommenderBuilder
+ * @see RecommenderEvaluator
+ */
+public interface DataModelBuilder {
+  
+  /**
+   * <p>
+   * Builds a {@link DataModel} implementation to be used in an evaluation, 
given training data.
+   * </p>
+   * 
+   * @param trainingData
+   *          data to be used in the {@link DataModel}
+   * @return {@link DataModel} based upon the given data
+   */
+  DataModel buildDataModel(FastByIDMap<PreferenceArray> trainingData);
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
new file mode 100644
index 0000000..9c442ff
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+/**
+ * <p>
+ * Implementations encapsulate information retrieval-related statistics about a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s 
recommendations.
+ * </p>
+ * 
+ * <p>
+ * See <a 
href="http://en.wikipedia.org/wiki/Information_retrieval";>Information 
retrieval</a>.
+ * </p>
+ */
+public interface IRStatistics {
+  
+  /**
+   * <p>
+   * See <a 
href="http://en.wikipedia.org/wiki/Information_retrieval#Precision";>Precision</a>.
+   * </p>
+   */
+  double getPrecision();
+  
+  /**
+   * <p>
+   * See <a 
href="http://en.wikipedia.org/wiki/Information_retrieval#Recall";>Recall</a>.
+   * </p>
+   */
+  double getRecall();
+  
+  /**
+   * <p>
+   * See <a 
href="http://en.wikipedia.org/wiki/Information_retrieval#Fall-Out";>Fall-Out</a>.
+   * </p>
+   */
+  double getFallOut();
+  
+  /**
+   * <p>
+   * See <a 
href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure";>F-measure</a>.
+   * </p>
+   */
+  double getF1Measure();
+  
+  /**
+   * <p>
+   * See <a 
href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure";>F-measure</a>.
+   * </p>
+   */
+  double getFNMeasure(double n);
+
+  /**
+   * <p>
+   * See <a 
href="http://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG";>
+   * Normalized Discounted Cumulative Gain</a>.
+   * </p>
+   */
+  double getNormalizedDiscountedCumulativeGain();
+  
+  /**
+   * @return the fraction of all users for whom recommendations could be 
produced
+   */
+  double getReach();
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
new file mode 100644
index 0000000..1805092
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+/**
+ * <p>
+ * Implementations of this inner interface are simple helper classes which 
create a {@link Recommender} to be
+ * evaluated based on the given {@link DataModel}.
+ * </p>
+ */
+public interface RecommenderBuilder {
+  
+  /**
+   * <p>
+   * Builds a {@link Recommender} implementation to be evaluated, using the 
given {@link DataModel}.
+   * </p>
+   * 
+   * @param dataModel
+   *          {@link DataModel} to build the {@link Recommender} on
+   * @return {@link Recommender} based upon the given {@link DataModel}
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  Recommender buildRecommender(DataModel dataModel) throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
new file mode 100644
index 0000000..dcbbcf8
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>
+ * Implementations of this interface evaluate the quality of a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s 
recommendations.
+ * </p>
+ */
+public interface RecommenderEvaluator {
+  
+  /**
+   * <p>
+   * Evaluates the quality of a {@link 
org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
+   * The range of values that may be returned depends on the implementation, 
but <em>lower</em> values must
+   * mean better recommendations, with 0 being the lowest / best possible 
evaluation, meaning a perfect match.
+   * This method does not accept a {@link 
org.apache.mahout.cf.taste.recommender.Recommender} directly, but
+   * rather a {@link RecommenderBuilder} which can build the
+   * {@link org.apache.mahout.cf.taste.recommender.Recommender} to test on top 
of a given {@link DataModel}.
+   * </p>
+   *
+   * <p>
+   * Implementations will take a certain percentage of the preferences 
supplied by the given {@link DataModel}
+   * as "training data". This is typically most of the data, like 90%. This 
data is used to produce
+   * recommendations, and the rest of the data is compared against estimated 
preference values to see how much
+   * the {@link org.apache.mahout.cf.taste.recommender.Recommender}'s 
predicted preferences match the user's
+   * real preferences. Specifically, for each user, this percentage of the 
user's ratings are used to produce
+   * recommendations, and for each user, the remaining preferences are 
compared against the user's real
+   * preferences.
+   * </p>
+   *
+   * <p>
+   * For large datasets, it may be desirable to only evaluate based on a small 
percentage of the data.
+   * {@code evaluationPercentage} controls how many of the {@link DataModel}'s 
users are used in
+   * evaluation.
+   * </p>
+   *
+   * <p>
+   * To be clear, {@code trainingPercentage} and {@code evaluationPercentage} 
are not related. They
+   * do not need to add up to 1.0, for example.
+   * </p>
+   *
+   * @param recommenderBuilder
+   *          object that can build a {@link 
org.apache.mahout.cf.taste.recommender.Recommender} to test
+   * @param dataModelBuilder
+   *          {@link DataModelBuilder} to use, or if null, a default {@link 
DataModel}
+   *          implementation will be used
+   * @param dataModel
+   *          dataset to test on
+   * @param trainingPercentage
+   *          percentage of each user's preferences to use to produce 
recommendations; the rest are compared
+   *          to estimated preference values to evaluate
+   *          {@link org.apache.mahout.cf.taste.recommender.Recommender} 
performance
+   * @param evaluationPercentage
+   *          percentage of users to use in evaluation
+   * @return a "score" representing how well the {@link 
org.apache.mahout.cf.taste.recommender.Recommender}'s
+   *         estimated preferences match real values; <em>lower</em> scores 
mean a better match and 0 is a
+   *         perfect match
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  double evaluate(RecommenderBuilder recommenderBuilder,
+                  DataModelBuilder dataModelBuilder,
+                  DataModel dataModel,
+                  double trainingPercentage,
+                  double evaluationPercentage) throws TasteException;
+
+  /**
+   * @deprecated see {@link DataModel#getMaxPreference()}
+   */
+  @Deprecated
+  float getMaxPreference();
+
+  @Deprecated
+  void setMaxPreference(float maxPreference);
+
+  /**
+   * @deprecated see {@link DataModel#getMinPreference()}
+   */
+  @Deprecated
+  float getMinPreference();
+
+  @Deprecated
+  void setMinPreference(float minPreference);
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
new file mode 100644
index 0000000..6e4e9c7
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.IDRescorer;
+
+/**
+ * <p>
+ * Implementations collect information retrieval-related statistics on a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s performance, 
including precision, recall and
+ * f-measure.
+ * </p>
+ * 
+ * <p>
+ * See <a 
href="http://en.wikipedia.org/wiki/Information_retrieval";>Information 
retrieval</a>.
+ */
+public interface RecommenderIRStatsEvaluator {
+  
+  /**
+   * @param recommenderBuilder
+   *          object that can build a {@link 
org.apache.mahout.cf.taste.recommender.Recommender} to test
+   * @param dataModelBuilder
+   *          {@link DataModelBuilder} to use, or if null, a default {@link 
DataModel} implementation will be
+   *          used
+   * @param dataModel
+   *          dataset to test on
+   * @param rescorer
+   *          if any, to use when computing recommendations
+   * @param at
+   *          as in, "precision at 5". The number of recommendations to 
consider when evaluating precision,
+   *          etc.
+   * @param relevanceThreshold
+   *          items whose preference value is at least this value are 
considered "relevant" for the purposes
+   *          of computations
+   * @return {@link IRStatistics} with resulting precision, recall, etc.
+   * @throws TasteException
+   *           if an error occurs while accessing the {@link DataModel}
+   */
+  IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
+                        DataModelBuilder dataModelBuilder,
+                        DataModel dataModel,
+                        IDRescorer rescorer,
+                        int at,
+                        double relevanceThreshold,
+                        double evaluationPercentage) throws TasteException;
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
new file mode 100644
index 0000000..da318d5
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/eval/RelevantItemsDataSplitter.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+/**
+ * Implementations of this interface determine the items that are considered 
relevant,
+ * and splits data into a training and test subset, for purposes of 
precision/recall
+ * tests as implemented by implementations of {@link 
RecommenderIRStatsEvaluator}.
+ */
+public interface RelevantItemsDataSplitter {
+
+  /**
+   * During testing, relevant items are removed from a particular users' 
preferences,
+   * and a model is build using this user's other preferences and all other 
users.
+   *
+   * @param at                 Maximum number of items to be removed
+   * @param relevanceThreshold Minimum strength of preference for an item to 
be considered
+   *                           relevant
+   * @return IDs of relevant items
+   */
+  FastIDSet getRelevantItemsIDs(long userID,
+                                int at,
+                                double relevanceThreshold,
+                                DataModel dataModel) throws TasteException;
+
+  /**
+   * Adds a single user and all their preferences to the training model.
+   *
+   * @param userID          ID of user whose preferences we are trying to 
predict
+   * @param relevantItemIDs IDs of items considered relevant to that user
+   * @param trainingUsers   the database of training preferences to which we 
will
+   *                        append the ones for otherUserID.
+   * @param otherUserID     for whom we are adding preferences to the training 
model
+   */
+  void processOtherUser(long userID,
+                        FastIDSet relevantItemIDs,
+                        FastByIDMap<PreferenceArray> trainingUsers,
+                        long otherUserID,
+                        DataModel dataModel) throws TasteException;
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
new file mode 100644
index 0000000..e70a675
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityEntityWritable.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import com.google.common.primitives.Longs;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.math.Varint;
+
+/** A {@link WritableComparable} encapsulating two items. */
+public final class EntityEntityWritable implements 
WritableComparable<EntityEntityWritable>, Cloneable {
+  
+  private long aID;
+  private long bID;
+  
+  public EntityEntityWritable() {
+  // do nothing
+  }
+  
+  public EntityEntityWritable(long aID, long bID) {
+    this.aID = aID;
+    this.bID = bID;
+  }
+  
+  long getAID() {
+    return aID;
+  }
+  
+  long getBID() {
+    return bID;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    Varint.writeSignedVarLong(aID, out);
+    Varint.writeSignedVarLong(bID, out);
+  }
+  
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    aID = Varint.readSignedVarLong(in);
+    bID = Varint.readSignedVarLong(in);
+  }
+  
+  @Override
+  public int compareTo(EntityEntityWritable that) {
+    int aCompare = compare(aID, that.getAID());
+    return aCompare == 0 ? compare(bID, that.getBID()) : aCompare;
+  }
+  
+  private static int compare(long a, long b) {
+    return a < b ? -1 : a > b ? 1 : 0;
+  }
+  
+  @Override
+  public int hashCode() {
+    return Longs.hashCode(aID) + 31 * Longs.hashCode(bID);
+  }
+  
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof EntityEntityWritable) {
+      EntityEntityWritable that = (EntityEntityWritable) o;
+      return aID == that.getAID() && bID == that.getBID();
+    }
+    return false;
+  }
+  
+  @Override
+  public String toString() {
+    return aID + "\t" + bID;
+  }
+
+  @Override
+  public EntityEntityWritable clone() {
+    return new EntityEntityWritable(aID, bID);
+  }
+  
+}
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
new file mode 100644
index 0000000..2aab63c
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.VarLongWritable;
+
+/** A {@link org.apache.hadoop.io.Writable} encapsulating an item ID and a 
preference value. */
+public final class EntityPrefWritable extends VarLongWritable implements 
Cloneable {
+  
+  private float prefValue;
+  
+  public EntityPrefWritable() {
+    // do nothing
+  }
+  
+  public EntityPrefWritable(long itemID, float prefValue) {
+    super(itemID);
+    this.prefValue = prefValue;
+  }
+  
+  public EntityPrefWritable(EntityPrefWritable other) {
+    this(other.get(), other.getPrefValue());
+  }
+
+  public long getID() {
+    return get();
+  }
+
+  public float getPrefValue() {
+    return prefValue;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    out.writeFloat(prefValue);
+  }
+  
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    prefValue = in.readFloat();
+  }
+
+  @Override
+  public int hashCode() {
+    return super.hashCode() ^ RandomUtils.hashFloat(prefValue);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof EntityPrefWritable)) {
+      return false;
+    }
+    EntityPrefWritable other = (EntityPrefWritable) o;
+    return get() == other.get() && prefValue == other.getPrefValue();
+  }
+
+  @Override
+  public String toString() {
+    return get() + "\t" + prefValue;
+  }
+
+  @Override
+  public EntityPrefWritable clone() {
+    return new EntityPrefWritable(get(), prefValue);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java
 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java
new file mode 100644
index 0000000..3de272d
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/MutableRecommendedItem.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.RandomUtils;
+
+/**
+ * Mutable variant of {@link RecommendedItem}
+ */
+public class MutableRecommendedItem implements RecommendedItem {
+
+  private long itemID;
+  private float value;
+
+  public MutableRecommendedItem() {}
+
+  public MutableRecommendedItem(long itemID, float value) {
+    this.itemID = itemID;
+    this.value = value;
+  }
+
+  @Override
+  public long getItemID() {
+    return itemID;
+  }
+
+  @Override
+  public float getValue() {
+    return value;
+  }
+
+  public void setItemID(long itemID) {
+    this.itemID = itemID;
+  }
+
+  public void set(long itemID, float value) {
+    this.itemID = itemID;
+    this.value = value;
+  }
+
+  public void capToMaxValue(float maxValue) {
+    if (value > maxValue) {
+      value = maxValue;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "MutableRecommendedItem[item:" + itemID + ", value:" + value + ']';
+  }
+
+  @Override
+  public int hashCode() {
+    return (int) itemID ^ RandomUtils.hashFloat(value);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof MutableRecommendedItem)) {
+      return false;
+    }
+    RecommendedItem other = (RecommendedItem) o;
+    return itemID == other.getItemID() && value == other.getValue();
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
new file mode 100644
index 0000000..947204d
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/RecommendedItemsWritable.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import com.google.common.collect.Lists;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.math.Varint;
+
+/**
+ * A {@link Writable} which encapsulates a list of {@link RecommendedItem}s. 
This is the mapper (and reducer)
+ * output, and represents items recommended to a user. The first item is the 
one whose estimated preference is
+ * highest.
+ */
+public final class RecommendedItemsWritable implements Writable {
+
+  private List<RecommendedItem> recommended;
+  
+  public RecommendedItemsWritable() {
+  // do nothing
+  }
+  
+  public RecommendedItemsWritable(List<RecommendedItem> recommended) {
+    this.recommended = recommended;
+  }
+  
+  public List<RecommendedItem> getRecommendedItems() {
+    return recommended;
+  }
+
+  public void set(List<RecommendedItem> recommended) {
+    this.recommended = recommended;
+  }
+  
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(recommended.size());
+    for (RecommendedItem item : recommended) {
+      Varint.writeSignedVarLong(item.getItemID(), out);
+      out.writeFloat(item.getValue());
+    }
+  }
+  
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    int size = in.readInt();
+    recommended = Lists.newArrayListWithCapacity(size);
+    for (int i = 0; i < size; i++) {
+      long itemID = Varint.readSignedVarLong(in);
+      float value = in.readFloat();
+      RecommendedItem recommendedItem = new GenericRecommendedItem(itemID, 
value);
+      recommended.add(recommendedItem);
+    }
+  }
+  
+  @Override
+  public String toString() {
+    StringBuilder result = new StringBuilder(200);
+    result.append('[');
+    boolean first = true;
+    for (RecommendedItem item : recommended) {
+      if (first) {
+        first = false;
+      } else {
+        result.append(',');
+      }
+      result.append(String.valueOf(item.getItemID()));
+      result.append(':');
+      result.append(String.valueOf(item.getValue()));
+    }
+    result.append(']');
+    return result.toString();
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
new file mode 100644
index 0000000..e3fab29
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import com.google.common.primitives.Longs;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
+import org.apache.mahout.math.map.OpenIntLongHashMap;
+
+import java.util.regex.Pattern;
+
+/**
+ * Some helper methods for the hadoop-related stuff in 
org.apache.mahout.cf.taste
+ */
+public final class TasteHadoopUtils {
+
+  public static final int USER_ID_POS = 0;
+  public static final int ITEM_ID_POS = 1;
+
+  /** Standard delimiter of textual preference data */
+  private static final Pattern PREFERENCE_TOKEN_DELIMITER = 
Pattern.compile("[\t,]");
+
+  private TasteHadoopUtils() {}
+
+  /**
+   * Splits a preference data line into string tokens
+   */
+  public static String[] splitPrefTokens(CharSequence line) {
+    return PREFERENCE_TOKEN_DELIMITER.split(line);
+  }
+
+  /**
+   * Maps a long to an int with range of 0 to Integer.MAX_VALUE-1
+   */
+  public static int idToIndex(long id) {
+    return 0x7FFFFFFF & Longs.hashCode(id) % 0x7FFFFFFE;
+  }
+
+  public static int readID(String token, boolean usesLongIDs) {
+    return usesLongIDs ? idToIndex(Long.parseLong(token)) : 
Integer.parseInt(token);
+  }
+
+  /**
+   * Reads a binary mapping file
+   */
+  public static OpenIntLongHashMap readIDIndexMap(String idIndexPathStr, 
Configuration conf) {
+    OpenIntLongHashMap indexIDMap = new OpenIntLongHashMap();
+    Path itemIDIndexPath = new Path(idIndexPathStr);
+    for (Pair<VarIntWritable,VarLongWritable> record
+         : new 
SequenceFileDirIterable<VarIntWritable,VarLongWritable>(itemIDIndexPath,
+                                                                       
PathType.LIST,
+                                                                       
PathFilters.partFilter(),
+                                                                       null,
+                                                                       true,
+                                                                       conf)) {
+      indexIDMap.put(record.getFirst().get(), record.getSecond().get());
+    }
+    return indexIDMap;
+  }
+
+
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
new file mode 100644
index 0000000..fdb552e
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob;
+import org.apache.mahout.math.VarLongWritable;
+
+import java.io.IOException;
+import java.util.regex.Pattern;
+
+public abstract class ToEntityPrefsMapper extends
+    Mapper<LongWritable,Text, VarLongWritable,VarLongWritable> {
+
+  public static final String TRANSPOSE_USER_ITEM = ToEntityPrefsMapper.class + 
"transposeUserItem";
+  public static final String RATING_SHIFT = ToEntityPrefsMapper.class + 
"shiftRatings";
+
+  private static final Pattern DELIMITER = Pattern.compile("[\t,]");
+
+  private boolean booleanData;
+  private boolean transpose;
+  private final boolean itemKey;
+  private float ratingShift;
+
+  ToEntityPrefsMapper(boolean itemKey) {
+    this.itemKey = itemKey;
+  }
+
+  @Override
+  protected void setup(Context context) {
+    Configuration jobConf = context.getConfiguration();
+    booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
+    transpose = jobConf.getBoolean(TRANSPOSE_USER_ITEM, false);
+    ratingShift = Float.parseFloat(jobConf.get(RATING_SHIFT, "0.0"));
+  }
+
+  @Override
+  public void map(LongWritable key,
+                  Text value,
+                  Context context) throws IOException, InterruptedException {
+    String[] tokens = DELIMITER.split(value.toString());
+    long userID = Long.parseLong(tokens[0]);
+    long itemID = Long.parseLong(tokens[1]);
+    if (itemKey ^ transpose) {
+      // If using items as keys, and not transposing items and users, then 
users are items!
+      // Or if not using items as keys (users are, as usual), but transposing 
items and users,
+      // then users are items! Confused?
+      long temp = userID;
+      userID = itemID;
+      itemID = temp;
+    }
+    if (booleanData) {
+      context.write(new VarLongWritable(userID), new VarLongWritable(itemID));
+    } else {
+      float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) + 
ratingShift : 1.0f;
+      context.write(new VarLongWritable(userID), new 
EntityPrefWritable(itemID, prefValue));
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
new file mode 100644
index 0000000..f5f9574
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+/**
+ * <h1>Input</h1>
+ *
+ * <p>
+ * Intended for use with {@link 
org.apache.hadoop.mapreduce.lib.input.TextInputFormat};
+ * accepts line number / line pairs as
+ * {@link org.apache.hadoop.io.LongWritable}/{@link org.apache.hadoop.io.Text} 
pairs.
+ * </p>
+ *
+ * <p>
+ * Each line is assumed to be of the form {@code userID,itemID,preference}, or 
{@code userID,itemID}.
+ * </p>
+ *
+ * <h1>Output</h1>
+ *
+ * <p>
+ * Outputs the user ID as a {@link org.apache.mahout.math.VarLongWritable} 
mapped to the item ID and preference as a
+ * {@link EntityPrefWritable}.
+ * </p>
+ */
+public final class ToItemPrefsMapper extends ToEntityPrefsMapper {
+
+  public ToItemPrefsMapper() {
+    super(false);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
new file mode 100644
index 0000000..0f9ea75
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/TopItemsQueue.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop;
+
+import com.google.common.collect.Lists;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.util.Collections;
+import java.util.List;
+
+public class TopItemsQueue extends PriorityQueue<MutableRecommendedItem> {
+
+  private static final long SENTINEL_ID = Long.MIN_VALUE;
+
+  private final int maxSize;
+
+  public TopItemsQueue(int maxSize) {
+    super(maxSize);
+    this.maxSize = maxSize;
+  }
+
+  public List<RecommendedItem> getTopItems() {
+    List<RecommendedItem> recommendedItems = 
Lists.newArrayListWithCapacity(maxSize);
+    while (size() > 0) {
+      MutableRecommendedItem topItem = pop();
+      // filter out "sentinel" objects necessary for maintaining an efficient 
priority queue
+      if (topItem.getItemID() != SENTINEL_ID) {
+        recommendedItems.add(topItem);
+      }
+    }
+    Collections.reverse(recommendedItems);
+    return recommendedItems;
+  }
+
+  @Override
+  protected boolean lessThan(MutableRecommendedItem one, 
MutableRecommendedItem two) {
+    return one.getValue() < two.getValue();
+  }
+
+  @Override
+  protected MutableRecommendedItem getSentinelObject() {
+    return new MutableRecommendedItem(SENTINEL_ID, Float.MIN_VALUE);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
new file mode 100644
index 0000000..c5ccf38
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.io.Closeables;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
+import 
org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.als.AlternatingLeastSquaresSolver;
+import org.apache.mahout.math.map.OpenIntObjectHashMap;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+final class ALS {
+
+  private ALS() {}
+
+  static Vector readFirstRow(Path dir, Configuration conf) throws IOException {
+    Iterator<VectorWritable> iterator = new 
SequenceFileDirValueIterator<>(dir, PathType.LIST,
+        PathFilters.partFilter(), null, true, conf);
+    return iterator.hasNext() ? iterator.next().get() : null;
+  }
+
+  public static OpenIntObjectHashMap<Vector> 
readMatrixByRowsFromDistributedCache(int numEntities,
+      Configuration conf) throws IOException {
+
+    IntWritable rowIndex = new IntWritable();
+    VectorWritable row = new VectorWritable();
+
+
+    OpenIntObjectHashMap<Vector> featureMatrix = numEntities > 0
+        ? new OpenIntObjectHashMap<Vector>(numEntities) : new 
OpenIntObjectHashMap<Vector>();
+
+    Path[] cachedFiles = HadoopUtil.getCachedFiles(conf);
+    LocalFileSystem localFs = FileSystem.getLocal(conf);
+
+    for (Path cachedFile : cachedFiles) {
+
+      SequenceFile.Reader reader = null;
+      try {
+        reader = new SequenceFile.Reader(localFs, cachedFile, conf);
+        while (reader.next(rowIndex, row)) {
+          featureMatrix.put(rowIndex.get(), row.get());
+        }
+      } finally {
+        Closeables.close(reader, true);
+      }
+    }
+
+    Preconditions.checkState(!featureMatrix.isEmpty(), "Feature matrix is 
empty");
+    return featureMatrix;
+  }
+
+  public static OpenIntObjectHashMap<Vector> readMatrixByRows(Path dir, 
Configuration conf) {
+    OpenIntObjectHashMap<Vector> matrix = new OpenIntObjectHashMap<>();
+    for (Pair<IntWritable,VectorWritable> pair
+        : new SequenceFileDirIterable<IntWritable,VectorWritable>(dir, 
PathType.LIST, PathFilters.partFilter(), conf)) {
+      int rowIndex = pair.getFirst().get();
+      Vector row = pair.getSecond().get();
+      matrix.put(rowIndex, row);
+    }
+    return matrix;
+  }
+
+  public static Vector solveExplicit(VectorWritable ratingsWritable, 
OpenIntObjectHashMap<Vector> uOrM,
+    double lambda, int numFeatures) {
+    Vector ratings = ratingsWritable.get();
+
+    List<Vector> featureVectors = 
Lists.newArrayListWithCapacity(ratings.getNumNondefaultElements());
+    for (Vector.Element e : ratings.nonZeroes()) {
+      int index = e.index();
+      featureVectors.add(uOrM.get(index));
+    }
+
+    return AlternatingLeastSquaresSolver.solve(featureVectors, ratings, 
lambda, numFeatures);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
new file mode 100644
index 0000000..b061a63
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.common.AbstractJob;
+import org.apache.mahout.common.RandomUtils;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+/**
+ * <p>Split a recommendation dataset into a training and a test set</p>
+ *
+  * <p>Command line arguments specific to this class are:</p>
+ *
+ * <ol>
+ * <li>--input (path): Directory containing one or more text files with the 
dataset</li>
+ * <li>--output (path): path where output should go</li>
+ * <li>--trainingPercentage (double): percentage of the data to use as 
training set (optional, default 0.9)</li>
+ * <li>--probePercentage (double): percentage of the data to use as probe set 
(optional, default 0.1)</li>
+ * </ol>
+ */
+public class DatasetSplitter extends AbstractJob {
+
+  private static final String TRAINING_PERCENTAGE = 
DatasetSplitter.class.getName() + ".trainingPercentage";
+  private static final String PROBE_PERCENTAGE = 
DatasetSplitter.class.getName() + ".probePercentage";
+  private static final String PART_TO_USE = DatasetSplitter.class.getName() + 
".partToUse";
+
+  private static final Text INTO_TRAINING_SET = new Text("T");
+  private static final Text INTO_PROBE_SET = new Text("P");
+
+  private static final double DEFAULT_TRAINING_PERCENTAGE = 0.9;
+  private static final double DEFAULT_PROBE_PERCENTAGE = 0.1;
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new DatasetSplitter(), args);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+
+    addInputOption();
+    addOutputOption();
+    addOption("trainingPercentage", "t", "percentage of the data to use as 
training set (default: " 
+        + DEFAULT_TRAINING_PERCENTAGE + ')', 
String.valueOf(DEFAULT_TRAINING_PERCENTAGE));
+    addOption("probePercentage", "p", "percentage of the data to use as probe 
set (default: " 
+        + DEFAULT_PROBE_PERCENTAGE + ')', 
String.valueOf(DEFAULT_PROBE_PERCENTAGE));
+
+    Map<String,List<String>> parsedArgs = parseArguments(args);
+    if (parsedArgs == null) {
+      return -1;
+    }
+
+    double trainingPercentage = 
Double.parseDouble(getOption("trainingPercentage"));
+    double probePercentage = Double.parseDouble(getOption("probePercentage"));
+    String tempDir = getOption("tempDir");
+
+    Path markedPrefs = new Path(tempDir, "markedPreferences");
+    Path trainingSetPath = new Path(getOutputPath(), "trainingSet");
+    Path probeSetPath = new Path(getOutputPath(), "probeSet");
+
+    Job markPreferences = prepareJob(getInputPath(), markedPrefs, 
TextInputFormat.class, MarkPreferencesMapper.class,
+        Text.class, Text.class, SequenceFileOutputFormat.class);
+    markPreferences.getConfiguration().set(TRAINING_PERCENTAGE, 
String.valueOf(trainingPercentage));
+    markPreferences.getConfiguration().set(PROBE_PERCENTAGE, 
String.valueOf(probePercentage));
+    boolean succeeded = markPreferences.waitForCompletion(true);
+    if (!succeeded) {
+      return -1;
+    }
+
+    Job createTrainingSet = prepareJob(markedPrefs, trainingSetPath, 
SequenceFileInputFormat.class,
+        WritePrefsMapper.class, NullWritable.class, Text.class, 
TextOutputFormat.class);
+    createTrainingSet.getConfiguration().set(PART_TO_USE, 
INTO_TRAINING_SET.toString());
+    succeeded = createTrainingSet.waitForCompletion(true);
+    if (!succeeded) {
+      return -1;
+    }
+
+    Job createProbeSet = prepareJob(markedPrefs, probeSetPath, 
SequenceFileInputFormat.class,
+        WritePrefsMapper.class, NullWritable.class, Text.class, 
TextOutputFormat.class);
+    createProbeSet.getConfiguration().set(PART_TO_USE, 
INTO_PROBE_SET.toString());
+    succeeded = createProbeSet.waitForCompletion(true);
+    if (!succeeded) {
+      return -1;
+    }
+
+    return 0;
+  }
+
+  static class MarkPreferencesMapper extends 
Mapper<LongWritable,Text,Text,Text> {
+
+    private Random random;
+    private double trainingBound;
+    private double probeBound;
+
+    @Override
+    protected void setup(Context ctx) throws IOException, InterruptedException 
{
+      random = RandomUtils.getRandom();
+      trainingBound = 
Double.parseDouble(ctx.getConfiguration().get(TRAINING_PERCENTAGE));
+      probeBound = trainingBound + 
Double.parseDouble(ctx.getConfiguration().get(PROBE_PERCENTAGE));
+    }
+
+    @Override
+    protected void map(LongWritable key, Text text, Context ctx) throws 
IOException, InterruptedException {
+      double randomValue = random.nextDouble();
+      if (randomValue <= trainingBound) {
+        ctx.write(INTO_TRAINING_SET, text);
+      } else if (randomValue <= probeBound) {
+        ctx.write(INTO_PROBE_SET, text);
+      }
+    }
+  }
+
+  static class WritePrefsMapper extends Mapper<Text,Text,NullWritable,Text> {
+
+    private String partToUse;
+
+    @Override
+    protected void setup(Context ctx) throws IOException, InterruptedException 
{
+      partToUse = ctx.getConfiguration().get(PART_TO_USE);
+    }
+
+    @Override
+    protected void map(Text key, Text text, Context ctx) throws IOException, 
InterruptedException {
+      if (partToUse.equals(key.toString())) {
+        ctx.write(NullWritable.get(), text);
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
new file mode 100644
index 0000000..3048b77
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.util.List;
+import java.util.Map;
+
+import com.google.common.base.Charsets;
+import com.google.common.io.Closeables;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.common.AbstractJob;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
+import org.apache.mahout.common.iterator.sequencefile.PathType;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.map.OpenIntObjectHashMap;
+
+/**
+ * <p>Measures the root-mean-squared error of a rating matrix factorization 
against a test set.</p>
+ *
+ * <p>Command line arguments specific to this class are:</p>
+ *
+ * <ol>
+ * <li>--output (path): path where output should go</li>
+ * <li>--pairs (path): path containing the test ratings, each line must be 
userID,itemID,rating</li>
+ * <li>--userFeatures (path): path to the user feature matrix</li>
+ * <li>--itemFeatures (path): path to the item feature matrix</li>
+ * </ol>
+ */
+public class FactorizationEvaluator extends AbstractJob {
+
+  private static final String USER_FEATURES_PATH = 
RecommenderJob.class.getName() + ".userFeatures";
+  private static final String ITEM_FEATURES_PATH = 
RecommenderJob.class.getName() + ".itemFeatures";
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new FactorizationEvaluator(), args);
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+
+    addInputOption();
+    addOption("userFeatures", null, "path to the user feature matrix", true);
+    addOption("itemFeatures", null, "path to the item feature matrix", true);
+    addOption("usesLongIDs", null, "input contains long IDs that need to be 
translated");
+    addOutputOption();
+
+    Map<String,List<String>> parsedArgs = parseArguments(args);
+    if (parsedArgs == null) {
+      return -1;
+    }
+
+    Path errors = getTempPath("errors");
+
+    Job predictRatings = prepareJob(getInputPath(), errors, 
TextInputFormat.class, PredictRatingsMapper.class,
+        DoubleWritable.class, NullWritable.class, 
SequenceFileOutputFormat.class);
+
+    Configuration conf = predictRatings.getConfiguration();
+    conf.set(USER_FEATURES_PATH, getOption("userFeatures"));
+    conf.set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
+
+    boolean usesLongIDs = Boolean.parseBoolean(getOption("usesLongIDs"));
+    if (usesLongIDs) {
+      conf.set(ParallelALSFactorizationJob.USES_LONG_IDS, 
String.valueOf(true));
+    }
+
+
+    boolean succeeded = predictRatings.waitForCompletion(true);
+    if (!succeeded) {
+      return -1;
+    }
+
+    BufferedWriter writer  = null;
+    try {
+      FileSystem fs = FileSystem.get(getOutputPath().toUri(), getConf());
+      FSDataOutputStream outputStream = fs.create(getOutputPath("rmse.txt"));
+      double rmse = computeRmse(errors);
+      writer = new BufferedWriter(new OutputStreamWriter(outputStream, 
Charsets.UTF_8));
+      writer.write(String.valueOf(rmse));
+    } finally {
+      Closeables.close(writer, false);
+    }
+
+    return 0;
+  }
+
+  double computeRmse(Path errors) {
+    RunningAverage average = new FullRunningAverage();
+    for (Pair<DoubleWritable,NullWritable> entry
+        : new SequenceFileDirIterable<DoubleWritable, NullWritable>(errors, 
PathType.LIST, PathFilters.logsCRCFilter(),
+          getConf())) {
+      DoubleWritable error = entry.getFirst();
+      average.addDatum(error.get() * error.get());
+    }
+
+    return Math.sqrt(average.getAverage());
+  }
+
+  public static class PredictRatingsMapper extends 
Mapper<LongWritable,Text,DoubleWritable,NullWritable> {
+
+    private OpenIntObjectHashMap<Vector> U;
+    private OpenIntObjectHashMap<Vector> M;
+
+    private boolean usesLongIDs;
+
+    private final DoubleWritable error = new DoubleWritable();
+
+    @Override
+    protected void setup(Context ctx) throws IOException, InterruptedException 
{
+      Configuration conf = ctx.getConfiguration();
+
+      Path pathToU = new Path(conf.get(USER_FEATURES_PATH));
+      Path pathToM = new Path(conf.get(ITEM_FEATURES_PATH));
+
+      U = ALS.readMatrixByRows(pathToU, conf);
+      M = ALS.readMatrixByRows(pathToM, conf);
+
+      usesLongIDs = conf.getBoolean(ParallelALSFactorizationJob.USES_LONG_IDS, 
false);
+    }
+
+    @Override
+    protected void map(LongWritable key, Text value, Context ctx) throws 
IOException, InterruptedException {
+
+      String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString());
+
+      int userID = 
TasteHadoopUtils.readID(tokens[TasteHadoopUtils.USER_ID_POS], usesLongIDs);
+      int itemID = 
TasteHadoopUtils.readID(tokens[TasteHadoopUtils.ITEM_ID_POS], usesLongIDs);
+      double rating = Double.parseDouble(tokens[2]);
+
+      if (U.containsKey(userID) && M.containsKey(itemID)) {
+        double estimate = U.get(userID).dot(M.get(itemID));
+        error.set(rating - estimate);
+        ctx.write(error, NullWritable.get());
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MultithreadedSharingMapper.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MultithreadedSharingMapper.java
 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MultithreadedSharingMapper.java
new file mode 100644
index 0000000..d93e3a4
--- /dev/null
+++ 
b/mr/src/main/java/org/apache/mahout/cf/taste/hadoop/als/MultithreadedSharingMapper.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.als;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import java.io.IOException;
+
+/**
+ * Multithreaded Mapper for {@link SharingMapper}s. Will call 
setupSharedInstance() once in the controlling thread
+ * before executing the mappers using a thread pool.
+ *
+ * @param <K1>
+ * @param <V1>
+ * @param <K2>
+ * @param <V2>
+ */
+public class MultithreadedSharingMapper<K1, V1, K2, V2> extends 
MultithreadedMapper<K1, V1, K2, V2> {
+
+  @Override
+  public void run(Context ctx) throws IOException, InterruptedException {
+    Class<Mapper<K1, V1, K2, V2>> mapperClass =
+        MultithreadedSharingMapper.getMapperClass((JobContext) ctx);
+    Preconditions.checkNotNull(mapperClass, "Could not find Multithreaded 
Mapper class.");
+
+    Configuration conf = ctx.getConfiguration();
+    // instantiate the mapper
+    Mapper<K1, V1, K2, V2> mapper1 = ReflectionUtils.newInstance(mapperClass, 
conf);
+    SharingMapper<K1, V1, K2, V2, ?> mapper = null;
+    if (mapper1 instanceof SharingMapper) {
+      mapper = (SharingMapper<K1, V1, K2, V2, ?>) mapper1;
+    }
+    Preconditions.checkNotNull(mapper, "Could not instantiate SharingMapper. 
Class was: %s",
+                               mapper1.getClass().getName());
+
+    // single threaded call to setup the sharing mapper
+    mapper.setupSharedInstance(ctx);
+
+    // multithreaded execution
+    super.run(ctx);
+  }
+}

Reply via email to