spark git commit: [SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation

meng Thu, 30 Apr 2015 23:51:47 -0700

Repository: spark
Updated Branches:
  refs/heads/master 14b32886f -> c24aeb6a3



[SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation

Adds

rank, recommendUsers and RecommendProducts to MatrixFactorizationModel in 
PySpark.

Author: MechCoder <[email protected]>

Closes #5807 from MechCoder/spark-6257 and squashes the following commits:

09629c6 [MechCoder] doc
953b326 [MechCoder] [SPARK-6257] MLlib API missing items in Recommendation


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c24aeb6a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c24aeb6a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c24aeb6a

Branch: refs/heads/master
Commit: c24aeb6a310b49dba8db1f4642531780a2e27253
Parents: 14b3288
Author: MechCoder <[email protected]>
Authored: Thu Apr 30 23:51:00 2015 -0700
Committer: Xiangrui Meng <[email protected]>
Committed: Thu Apr 30 23:51:00 2015 -0700

----------------------------------------------------------------------
 docs/mllib-collaborative-filtering.md  |  2 +-
 python/pyspark/mllib/recommendation.py | 39 +++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c24aeb6a/docs/mllib-collaborative-filtering.md
----------------------------------------------------------------------
diff --git a/docs/mllib-collaborative-filtering.md 
b/docs/mllib-collaborative-filtering.md
index 7614028..7b397e3 100644
--- a/docs/mllib-collaborative-filtering.md
+++ b/docs/mllib-collaborative-filtering.md
@@ -216,7 +216,7 @@ model = ALS.train(ratings, rank, numIterations)
 testdata = ratings.map(lambda p: (p[0], p[1]))
 predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
 ratesAndPreds = ratings.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
-MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).reduce(lambda x, y: 
x + y) / ratesAndPreds.count()
+MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
 print("Mean Squared Error = " + str(MSE))
 
 # Save and load model

http://git-wip-us.apache.org/repos/asf/spark/blob/c24aeb6a/python/pyspark/mllib/recommendation.py
----------------------------------------------------------------------
diff --git a/python/pyspark/mllib/recommendation.py 
b/python/pyspark/mllib/recommendation.py
index 4b7d17d..9c4647d 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -65,6 +65,13 @@ class MatrixFactorizationModel(JavaModelWrapper, 
JavaSaveable, JavaLoader):
     >>> model.userFeatures().collect()
     [(1, array('d', [...])), (2, array('d', [...]))]
 
+    >>> model.recommendUsers(1, 2)
+    [Rating(user=2, product=1, rating=1.9...), Rating(user=1, product=1, 
rating=1.0...)]
+    >>> model.recommendProducts(1, 2)
+    [Rating(user=1, product=2, rating=1.9...), Rating(user=1, product=1, 
rating=1.0...)]
+    >>> model.rank
+    4
+
     >>> first_user = model.userFeatures().take(1)[0]
     >>> latents = first_user[1]
     >>> len(latents) == 4
@@ -105,9 +112,15 @@ class MatrixFactorizationModel(JavaModelWrapper, 
JavaSaveable, JavaLoader):
     ...     pass
     """
     def predict(self, user, product):
+        """
+        Predicts rating for the given user and product.
+        """
         return self._java_model.predict(int(user), int(product))
 
     def predictAll(self, user_product):
+        """
+        Returns a list of predicted ratings for input user and product pairs.
+        """
         assert isinstance(user_product, RDD), "user_product should be RDD of 
(user, product)"
         first = user_product.first()
         assert len(first) == 2, "user_product should be RDD of (user, product)"
@@ -115,11 +128,37 @@ class MatrixFactorizationModel(JavaModelWrapper, 
JavaSaveable, JavaLoader):
         return self.call("predict", user_product)
 
     def userFeatures(self):
+        """
+        Returns a paired RDD, where the first element is the user and the
+        second is an array of features corresponding to that user.
+        """
         return self.call("getUserFeatures").mapValues(lambda v: 
array.array('d', v))
 
     def productFeatures(self):
+        """
+        Returns a paired RDD, where the first element is the product and the
+        second is an array of features corresponding to that product.
+        """
         return self.call("getProductFeatures").mapValues(lambda v: 
array.array('d', v))
 
+    def recommendUsers(self, product, num):
+        """
+        Recommends the top "num" number of users for a given product and 
returns a list
+        of Rating objects sorted by the predicted rating in descending order.
+        """
+        return list(self.call("recommendUsers", product, num))
+
+    def recommendProducts(self, user, num):
+        """
+        Recommends the top "num" number of products for a given user and 
returns a list
+        of Rating objects sorted by the predicted rating in descending order.
+        """
+        return list(self.call("recommendProducts", user, num))
+
+    @property
+    def rank(self):
+        return self.call("rank")
+
     @classmethod
     def load(cls, sc, path):
         model = cls._load_java(sc, path)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation

Reply via email to