Author: ssc
Date: Sun Apr 17 10:09:42 2011
New Revision: 1094125
URL: http://svn.apache.org/viewvc?rev=1094125&view=rev
Log:
MAHOUT-667 Persistent storage of factorizations in SVDRecommender
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java?rev=1094125&r1=1094124&r2=1094125&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
Sun Apr 17 10:09:42 2011
@@ -17,6 +17,10 @@
package org.apache.mahout.cf.taste.impl.recommender.svd;
+import java.util.Arrays;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
@@ -38,8 +42,8 @@ public class Factorization {
public Factorization(FastByIDMap<Integer> userIDMapping,
FastByIDMap<Integer> itemIDMapping, double[][] userFeatures,
double[][] itemFeatures) {
- this.userIDMapping = userIDMapping;
- this.itemIDMapping = itemIDMapping;
+ this.userIDMapping = Preconditions.checkNotNull(userIDMapping);
+ this.itemIDMapping = Preconditions.checkNotNull(itemIDMapping);
this.userFeatures = userFeatures;
this.itemFeatures = itemFeatures;
}
@@ -60,4 +64,41 @@ public class Factorization {
return itemFeatures[index];
}
+ public Iterable<Map.Entry<Long,Integer>> getUserIDMappings() {
+ return userIDMapping.entrySet();
+ }
+
+ public Iterable<Map.Entry<Long,Integer>> getItemIDMappings() {
+ return itemIDMapping.entrySet();
+ }
+
+ public int numFeatures() {
+ return userFeatures[0].length;
+ }
+
+ public int numUsers() {
+ return userIDMapping.size();
+ }
+
+ public int numItems() {
+ return itemIDMapping.size();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof Factorization) {
+ Factorization other = (Factorization) o;
+ return userIDMapping.equals(other.userIDMapping) &&
itemIDMapping.equals(other.itemIDMapping) &&
+ Arrays.deepEquals(userFeatures, other.userFeatures) &&
Arrays.deepEquals(itemFeatures, other.itemFeatures);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int hashCode = 31 * userIDMapping.hashCode() + itemIDMapping.hashCode();
+ hashCode = 31 * hashCode + Arrays.deepHashCode(userFeatures);
+ hashCode = 31 * hashCode + Arrays.deepHashCode(itemFeatures);
+ return hashCode;
+ }
}
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java?rev=1094125&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
Sun Apr 17 10:09:42 2011
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Map;
+
+/** Provides a file-based persistent store. */
+public class FilePersistenceStrategy implements PersistenceStrategy {
+
+ private final File file;
+
+ private static final Logger log =
LoggerFactory.getLogger(FilePersistenceStrategy.class);
+
+ /**
+ * @param file the file to use for storage. If the file does not exist it
will be created when required.
+ */
+ public FilePersistenceStrategy(File file) {
+ this.file = Preconditions.checkNotNull(file);
+ }
+
+ @Override
+ public Factorization load() throws IOException {
+ if (!file.exists()) {
+ log.info("{} does not yet exist, no factorization found",
file.getAbsolutePath());
+ return null;
+ }
+ DataInputStream in = null;
+ try {
+ log.info("Reading factorization from {}...", file.getAbsolutePath());
+ in = new DataInputStream(new BufferedInputStream(new
FileInputStream(file)));
+ return readBinary(in);
+ } finally {
+ Closeables.closeQuietly(in);
+ }
+ }
+
+ @Override
+ public void maybePersist(Factorization factorization) throws IOException {
+ DataOutputStream out = null;
+ try {
+ log.info("Writing factorization to {}...", file.getAbsolutePath());
+ out = new DataOutputStream(new BufferedOutputStream(new
FileOutputStream(file)));
+ writeBinary(factorization, out);
+ } finally {
+ Closeables.closeQuietly(out);
+ }
+ }
+
+ protected void writeBinary(Factorization factorization, DataOutput out)
throws IOException {
+ out.writeInt(factorization.numFeatures());
+ out.writeInt(factorization.numUsers());
+ out.writeInt(factorization.numItems());
+
+ for (Map.Entry<Long,Integer> mappingEntry :
factorization.getUserIDMappings()) {
+ long userID = mappingEntry.getKey();
+ out.writeInt(mappingEntry.getValue());
+ out.writeLong(userID);
+ try {
+ double[] userFeatures = factorization.getUserFeatures(userID);
+ for (int feature = 0; feature < factorization.numFeatures();
feature++) {
+ out.writeDouble(userFeatures[feature]);
+ }
+ } catch (NoSuchUserException e) {
+ throw new IOException("Unable to persist factorization", e);
+ }
+ }
+
+ for (Map.Entry<Long,Integer> entry : factorization.getItemIDMappings()) {
+ long itemID = entry.getKey();
+ out.writeInt(entry.getValue());
+ out.writeLong(itemID);
+ try {
+ double[] itemFeatures = factorization.getItemFeatures(itemID);
+ for (int feature = 0; feature < factorization.numFeatures();
feature++) {
+ out.writeDouble(itemFeatures[feature]);
+ }
+ } catch (NoSuchItemException e) {
+ throw new IOException("Unable to persist factorization", e);
+ }
+ }
+ }
+
+ public Factorization readBinary(DataInput in) throws IOException {
+ int numFeatures = in.readInt();
+ int numUsers = in.readInt();
+ int numItems = in.readInt();
+
+ FastByIDMap<Integer> userIDMapping = new FastByIDMap<Integer>(numUsers);
+ double[][] userFeatures = new double[numUsers][numFeatures];
+
+ for (int n = 0; n < numUsers; n++) {
+ int userIndex = in.readInt();
+ long userID = in.readLong();
+ userIDMapping.put(userID, userIndex);
+ for (int feature = 0; feature < numFeatures; feature++) {
+ userFeatures[userIndex][feature] = in.readDouble();
+ }
+ }
+
+ FastByIDMap<Integer> itemIDMapping = new FastByIDMap<Integer>(numItems);
+ double[][] itemFeatures = new double[numItems][numFeatures];
+
+ for (int n = 0; n < numItems; n++) {
+ int itemIndex = in.readInt();
+ long itemID = in.readLong();
+ itemIDMapping.put(itemID, itemIndex);
+ for (int feature = 0; feature < numFeatures; feature++) {
+ itemFeatures[itemIndex][feature] = in.readDouble();
+ }
+ }
+
+ return new Factorization(userIDMapping, itemIDMapping, userFeatures,
itemFeatures);
+ }
+
+}
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java?rev=1094125&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
Sun Apr 17 10:09:42 2011
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import java.io.IOException;
+
+/**
+ * A {@link PersistenceStrategy} which does nothing.
+ */
+public class NoPersistenceStrategy implements PersistenceStrategy {
+
+ @Override
+ public Factorization load() throws IOException {
+ return null;
+ }
+
+ @Override
+ public void maybePersist(Factorization factorization) throws IOException {
+ // do nothing.
+ }
+
+}
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java?rev=1094125&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
Sun Apr 17 10:09:42 2011
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import java.io.IOException;
+
+/**
+ * Provides storage for {@link Factorization}s
+ */
+public interface PersistenceStrategy {
+
+ /**
+ * Load a factorization from a persistent store.
+ *
+ * @return a Factorization or null if the persistent store is empty.
+ *
+ * @throws IOException
+ */
+ Factorization load() throws IOException;
+
+ /**
+ * Write a factorization to a persistent store unless it already
+ * contains an identical factorization.
+ *
+ * @param factorization
+ *
+ * @throws IOException
+ */
+ void maybePersist(Factorization factorization) throws IOException;
+
+}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=1094125&r1=1094124&r2=1094125&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
Sun Apr 17 10:09:42 2011
@@ -17,6 +17,7 @@
package org.apache.mahout.cf.taste.impl.recommender.svd;
+import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.Callable;
@@ -44,19 +45,65 @@ public final class SVDRecommender extend
private Factorization factorization;
private final Factorizer factorizer;
+ private final PersistenceStrategy persistenceStrategy;
private final RefreshHelper refreshHelper;
private static final Logger log =
LoggerFactory.getLogger(SVDRecommender.class);
public SVDRecommender(DataModel dataModel, Factorizer factorizer) throws
TasteException {
- this(dataModel, factorizer, getDefaultCandidateItemsStrategy());
+ this(dataModel, factorizer, getDefaultCandidateItemsStrategy(),
getDefaultPersistenceStrategy());
}
public SVDRecommender(DataModel dataModel, Factorizer factorizer,
CandidateItemsStrategy candidateItemsStrategy)
throws TasteException {
+ this(dataModel, factorizer, candidateItemsStrategy,
getDefaultPersistenceStrategy());
+ }
+
+ /**
+ * Create an SVDRecommender using a persistent store to cache
factorizations. A factorization is loaded from the
+ * store if present, otherwise a new factorization is computed and saved in
the store.
+ *
+ * The {@link #refresh(java.util.Collection) refresh} method recomputes the
factorization and overwrites the store.
+ *
+ * @param dataModel
+ * @param factorizer
+ * @param persistenceStrategy
+ * @throws TasteException
+ * @throws IOException
+ */
+ public SVDRecommender(DataModel dataModel, Factorizer factorizer,
PersistenceStrategy persistenceStrategy)
+ throws TasteException {
+ this(dataModel, factorizer, getDefaultCandidateItemsStrategy(),
persistenceStrategy);
+ }
+
+ /**
+ * Create an SVDRecommender using a persistent store to cache
factorizations. A factorization is loaded from the
+ * store if present, otherwise a new factorization is computed and saved in
the store.
+ *
+ * The {@link #refresh(java.util.Collection) refresh} method recomputes the
factorization and overwrites the store.
+ *
+ * @param dataModel
+ * @param factorizer
+ * @param candidateItemsStrategy
+ * @param persistenceStrategy
+ *
+ * @throws TasteException
+ */
+ public SVDRecommender(DataModel dataModel, Factorizer factorizer,
CandidateItemsStrategy candidateItemsStrategy,
+ PersistenceStrategy persistenceStrategy) throws TasteException {
super(dataModel, candidateItemsStrategy);
- this.factorizer = factorizer;
- train();
+ this.factorizer = Preconditions.checkNotNull(factorizer);
+ this.persistenceStrategy = Preconditions.checkNotNull(persistenceStrategy);
+ try {
+ factorization = persistenceStrategy.load();
+ } catch (IOException e) {
+ throw new TasteException("Error loading factorization", e);
+ }
+
+ if (factorization == null) {
+ train();
+ }
+
refreshHelper = new RefreshHelper(new Callable<Object>() {
@Override
public Object call() throws TasteException {
@@ -68,8 +115,17 @@ public final class SVDRecommender extend
refreshHelper.addDependency(factorizer);
}
+ protected static PersistenceStrategy getDefaultPersistenceStrategy() {
+ return new NoPersistenceStrategy();
+ }
+
private void train() throws TasteException {
factorization = factorizer.factorize();
+ try {
+ persistenceStrategy.maybePersist(factorization);
+ } catch (IOException e) {
+ throw new TasteException("Error persisting factorization", e);
+ }
}
@Override
@@ -122,5 +178,5 @@ public final class SVDRecommender extend
public void refresh(Collection<Refreshable> alreadyRefreshed) {
refreshHelper.refresh(alreadyRefreshed);
}
-
+
}
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java?rev=1094125&view=auto
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
(added)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
Sun Apr 17 10:09:42 2011
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.junit.Test;
+
+import java.io.File;
+
+public class FilePersistenceStrategyTest extends TasteTestCase {
+
+ @Test
+ public void persistAndLoad() throws Exception {
+ FastByIDMap<Integer> userIDMapping = new FastByIDMap<Integer>();
+ FastByIDMap<Integer> itemIDMapping = new FastByIDMap<Integer>();
+
+ userIDMapping.put(123, 0);
+ userIDMapping.put(456, 1);
+
+ itemIDMapping.put(12, 0);
+ itemIDMapping.put(34, 1);
+
+ double[][] userFeatures = { { 0.1, 0.2, 0.3 }, { 0.4, 0.5, 0.6 } };
+ double[][] itemFeatures = { { 0.7, 0.8, 0.9 }, { 1.0, 1.1, 1.2 } };
+
+ Factorization original = new Factorization(userIDMapping, itemIDMapping,
userFeatures, itemFeatures);
+ File storage = getTestTempFile("storage.bin");
+ PersistenceStrategy persistenceStrategy = new
FilePersistenceStrategy(storage);
+
+ assertNull(persistenceStrategy.load());
+
+ persistenceStrategy.maybePersist(original);
+ Factorization clone = persistenceStrategy.load();
+
+ assertEquals(original, clone);
+ }
+}