Author: ssc
Date: Sun Apr 17 10:09:42 2011
New Revision: 1094125

URL: http://svn.apache.org/viewvc?rev=1094125&view=rev
Log:
MAHOUT-667 Persistent storage of factorizations in SVDRecommender

Added:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java?rev=1094125&r1=1094124&r2=1094125&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/Factorization.java
 Sun Apr 17 10:09:42 2011
@@ -17,6 +17,10 @@
 
 package org.apache.mahout.cf.taste.impl.recommender.svd;
 
+import java.util.Arrays;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
 import org.apache.mahout.cf.taste.common.NoSuchItemException;
 import org.apache.mahout.cf.taste.common.NoSuchUserException;
 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
@@ -38,8 +42,8 @@ public class Factorization {
 
   public Factorization(FastByIDMap<Integer> userIDMapping, 
FastByIDMap<Integer> itemIDMapping, double[][] userFeatures,
       double[][] itemFeatures) {
-    this.userIDMapping = userIDMapping;
-    this.itemIDMapping = itemIDMapping;
+    this.userIDMapping = Preconditions.checkNotNull(userIDMapping);
+    this.itemIDMapping = Preconditions.checkNotNull(itemIDMapping);
     this.userFeatures = userFeatures;
     this.itemFeatures = itemFeatures;
   }
@@ -60,4 +64,41 @@ public class Factorization {
     return itemFeatures[index];
   }
 
+  public Iterable<Map.Entry<Long,Integer>> getUserIDMappings() {
+    return userIDMapping.entrySet();
+  }
+
+  public Iterable<Map.Entry<Long,Integer>> getItemIDMappings() {
+    return itemIDMapping.entrySet();
+  }
+
+  public int numFeatures() {
+    return userFeatures[0].length;
+  }
+
+  public int numUsers() {
+    return userIDMapping.size();
+  }
+
+  public int numItems() {
+    return itemIDMapping.size();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (o instanceof Factorization) {
+      Factorization other = (Factorization) o;
+      return userIDMapping.equals(other.userIDMapping) && 
itemIDMapping.equals(other.itemIDMapping) &&
+          Arrays.deepEquals(userFeatures, other.userFeatures) && 
Arrays.deepEquals(itemFeatures, other.itemFeatures);
+    }
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    int hashCode = 31 * userIDMapping.hashCode() + itemIDMapping.hashCode();
+    hashCode = 31 * hashCode + Arrays.deepHashCode(userFeatures);
+    hashCode = 31 * hashCode + Arrays.deepHashCode(itemFeatures);
+    return hashCode;
+  }
 }

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java?rev=1094125&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategy.java
 Sun Apr 17 10:09:42 2011
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.NoSuchUserException;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Map;
+
+/** Provides a file-based persistent store. */
+public class FilePersistenceStrategy implements PersistenceStrategy {
+
+  private final File file;
+
+  private static final Logger log = 
LoggerFactory.getLogger(FilePersistenceStrategy.class);
+
+  /**
+   * @param file the file to use for storage. If the file does not exist it 
will be created when required.
+   */
+  public FilePersistenceStrategy(File file) {
+    this.file = Preconditions.checkNotNull(file);
+  }
+
+  @Override
+  public Factorization load() throws IOException {
+    if (!file.exists()) {
+      log.info("{} does not yet exist, no factorization found", 
file.getAbsolutePath());
+      return null;
+    }
+    DataInputStream in = null;
+    try {
+      log.info("Reading factorization from {}...", file.getAbsolutePath());
+      in = new DataInputStream(new BufferedInputStream(new 
FileInputStream(file)));
+      return readBinary(in);
+    } finally {
+      Closeables.closeQuietly(in);
+    }
+  }
+
+  @Override
+  public void maybePersist(Factorization factorization) throws IOException {
+    DataOutputStream out = null;
+    try {
+      log.info("Writing factorization to {}...", file.getAbsolutePath());
+      out = new DataOutputStream(new BufferedOutputStream(new 
FileOutputStream(file)));
+      writeBinary(factorization, out);
+    } finally {
+      Closeables.closeQuietly(out);
+    }
+  }
+
+  protected void writeBinary(Factorization factorization, DataOutput out) 
throws IOException {
+    out.writeInt(factorization.numFeatures());
+    out.writeInt(factorization.numUsers());
+    out.writeInt(factorization.numItems());
+
+    for (Map.Entry<Long,Integer> mappingEntry : 
factorization.getUserIDMappings()) {
+      long userID = mappingEntry.getKey();
+      out.writeInt(mappingEntry.getValue());
+      out.writeLong(userID);
+      try {
+        double[] userFeatures = factorization.getUserFeatures(userID);
+        for (int feature = 0; feature < factorization.numFeatures(); 
feature++) {
+          out.writeDouble(userFeatures[feature]);
+        }
+      } catch (NoSuchUserException e) {
+        throw new IOException("Unable to persist factorization", e);
+      }
+    }
+
+    for (Map.Entry<Long,Integer> entry : factorization.getItemIDMappings()) {
+      long itemID = entry.getKey();
+      out.writeInt(entry.getValue());
+      out.writeLong(itemID);
+      try {
+        double[] itemFeatures = factorization.getItemFeatures(itemID);
+        for (int feature = 0; feature < factorization.numFeatures(); 
feature++) {
+          out.writeDouble(itemFeatures[feature]);
+        }
+      } catch (NoSuchItemException e) {
+        throw new IOException("Unable to persist factorization", e);
+      }
+    }
+  }
+
+  public Factorization readBinary(DataInput in) throws IOException {
+    int numFeatures = in.readInt();
+    int numUsers = in.readInt();
+    int numItems = in.readInt();
+
+    FastByIDMap<Integer> userIDMapping = new FastByIDMap<Integer>(numUsers);
+    double[][] userFeatures = new double[numUsers][numFeatures];
+
+    for (int n = 0; n < numUsers; n++) {
+      int userIndex = in.readInt();
+      long userID = in.readLong();
+      userIDMapping.put(userID, userIndex);
+      for (int feature = 0; feature < numFeatures; feature++) {
+        userFeatures[userIndex][feature] = in.readDouble();
+      }
+    }
+
+    FastByIDMap<Integer> itemIDMapping = new FastByIDMap<Integer>(numItems);
+    double[][] itemFeatures = new double[numItems][numFeatures];
+
+    for (int n = 0; n < numItems; n++) {
+      int itemIndex = in.readInt();
+      long itemID = in.readLong();
+      itemIDMapping.put(itemID, itemIndex);
+      for (int feature = 0; feature < numFeatures; feature++) {
+        itemFeatures[itemIndex][feature] = in.readDouble();
+      }
+    }
+
+    return new Factorization(userIDMapping, itemIDMapping, userFeatures, 
itemFeatures);
+  }
+
+}

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java?rev=1094125&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/NoPersistenceStrategy.java
 Sun Apr 17 10:09:42 2011
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import java.io.IOException;
+
+/**
+ * A {@link PersistenceStrategy} which does nothing.
+ */
+public class NoPersistenceStrategy implements PersistenceStrategy {
+
+  @Override
+  public Factorization load() throws IOException {
+    return null;
+  }
+
+  @Override
+  public void maybePersist(Factorization factorization) throws IOException {
+    // do nothing.
+  }
+
+}

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java?rev=1094125&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/PersistenceStrategy.java
 Sun Apr 17 10:09:42 2011
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import java.io.IOException;
+
+/**
+ * Provides storage for {@link Factorization}s
+ */
+public interface PersistenceStrategy {
+
+  /**
+   * Load a factorization from a persistent store.
+   *
+   * @return a Factorization or null if the persistent store is empty.
+   *
+   * @throws IOException
+   */
+  Factorization load() throws IOException;
+
+  /**
+   * Write a factorization to a persistent store unless it already
+   * contains an identical factorization.
+   *
+   * @param factorization
+   *
+   * @throws IOException
+   */
+  void maybePersist(Factorization factorization) throws IOException;
+
+}

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java?rev=1094125&r1=1094124&r2=1094125&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/SVDRecommender.java
 Sun Apr 17 10:09:42 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.impl.recommender.svd;
 
+import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.concurrent.Callable;
@@ -44,19 +45,65 @@ public final class SVDRecommender extend
 
   private Factorization factorization;
   private final Factorizer factorizer;
+  private final PersistenceStrategy persistenceStrategy;
   private final RefreshHelper refreshHelper;
 
   private static final Logger log = 
LoggerFactory.getLogger(SVDRecommender.class);
 
   public SVDRecommender(DataModel dataModel, Factorizer factorizer) throws 
TasteException {
-    this(dataModel, factorizer, getDefaultCandidateItemsStrategy());
+    this(dataModel, factorizer, getDefaultCandidateItemsStrategy(), 
getDefaultPersistenceStrategy());
   }
 
   public SVDRecommender(DataModel dataModel, Factorizer factorizer, 
CandidateItemsStrategy candidateItemsStrategy)
     throws TasteException {
+    this(dataModel, factorizer, candidateItemsStrategy, 
getDefaultPersistenceStrategy());
+  }
+
+  /**
+   * Create an SVDRecommender using a persistent store to cache 
factorizations. A factorization is loaded from the
+   * store if present, otherwise a new factorization is computed and saved in 
the store.
+   *
+   * The {@link #refresh(java.util.Collection) refresh} method recomputes the 
factorization and overwrites the store.
+   *
+   * @param dataModel
+   * @param factorizer
+   * @param persistenceStrategy
+   * @throws TasteException
+   * @throws IOException
+   */
+  public SVDRecommender(DataModel dataModel, Factorizer factorizer, 
PersistenceStrategy persistenceStrategy) 
+    throws TasteException {
+    this(dataModel, factorizer, getDefaultCandidateItemsStrategy(), 
persistenceStrategy);
+  }
+
+  /**
+   * Create an SVDRecommender using a persistent store to cache 
factorizations. A factorization is loaded from the
+   * store if present, otherwise a new factorization is computed and saved in 
the store. 
+   *
+   * The {@link #refresh(java.util.Collection) refresh} method recomputes the 
factorization and overwrites the store.
+   *
+   * @param dataModel
+   * @param factorizer
+   * @param candidateItemsStrategy
+   * @param persistenceStrategy
+   *
+   * @throws TasteException
+   */
+  public SVDRecommender(DataModel dataModel, Factorizer factorizer, 
CandidateItemsStrategy candidateItemsStrategy,
+      PersistenceStrategy persistenceStrategy) throws TasteException {
     super(dataModel, candidateItemsStrategy);
-    this.factorizer = factorizer;
-    train();
+    this.factorizer = Preconditions.checkNotNull(factorizer);
+    this.persistenceStrategy = Preconditions.checkNotNull(persistenceStrategy);
+    try {
+      factorization = persistenceStrategy.load();
+    } catch (IOException e) {
+      throw new TasteException("Error loading factorization", e);
+    }
+    
+    if (factorization == null) {
+      train();
+    }
+    
     refreshHelper = new RefreshHelper(new Callable<Object>() {
       @Override
       public Object call() throws TasteException {
@@ -68,8 +115,17 @@ public final class SVDRecommender extend
     refreshHelper.addDependency(factorizer);
   }
 
+  protected static PersistenceStrategy getDefaultPersistenceStrategy() {
+    return new NoPersistenceStrategy();
+  }
+
   private void train() throws TasteException {
     factorization = factorizer.factorize();
+    try {
+      persistenceStrategy.maybePersist(factorization);
+    } catch (IOException e) {
+      throw new TasteException("Error persisting factorization", e);
+    }
   }
   
   @Override
@@ -122,5 +178,5 @@ public final class SVDRecommender extend
   public void refresh(Collection<Refreshable> alreadyRefreshed) {
     refreshHelper.refresh(alreadyRefreshed);
   }
-  
+
 }

Added: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java?rev=1094125&view=auto
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
 (added)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/FilePersistenceStrategyTest.java
 Sun Apr 17 10:09:42 2011
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender.svd;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.junit.Test;
+
+import java.io.File;
+
+public class FilePersistenceStrategyTest extends TasteTestCase {
+
+  @Test
+  public void persistAndLoad() throws Exception {
+    FastByIDMap<Integer> userIDMapping = new FastByIDMap<Integer>();
+    FastByIDMap<Integer> itemIDMapping = new FastByIDMap<Integer>();
+
+    userIDMapping.put(123, 0);
+    userIDMapping.put(456, 1);
+
+    itemIDMapping.put(12, 0);
+    itemIDMapping.put(34, 1);
+
+    double[][] userFeatures = { { 0.1, 0.2, 0.3 }, { 0.4, 0.5, 0.6 } };
+    double[][] itemFeatures = { { 0.7, 0.8, 0.9 }, { 1.0, 1.1, 1.2 } };
+
+    Factorization original = new Factorization(userIDMapping, itemIDMapping, 
userFeatures, itemFeatures);
+    File storage = getTestTempFile("storage.bin");
+    PersistenceStrategy persistenceStrategy = new 
FilePersistenceStrategy(storage);
+
+    assertNull(persistenceStrategy.load());
+
+    persistenceStrategy.maybePersist(original);
+    Factorization clone = persistenceStrategy.load();
+
+    assertEquals(original, clone);
+  }
+}


Reply via email to