Github user holdenk commented on a diff in the pull request:
https://github.com/apache/spark/pull/7963#discussion_r82472791
--- Diff: python/pyspark/mllib/linalg/distributed.py ---
@@ -303,6 +303,121 @@ def tallSkinnyQR(self, computeQ=False):
R = decomp.call("R")
return QRDecomposition(Q, R)
+ def computeSVD(self, k, computeU=False, rCond=1e-9):
+ """
+ Computes the singular value decomposition of the RowMatrix.
+
+ The given row matrix A of dimension (m X n) is decomposed into
+ U * s * V'T where
+
+ * U: (m X k) (left singular vectors) is a RowMatrix whose
+ columns are the eigenvectors of (A X A')
+ * s: DenseVector consisting of square root of the eigenvalues
+ (singular values) in descending order.
+ * v: (n X k) (right singular vectors) is a Matrix whose columns
+ are the eigenvectors of (A' X A)
+
+ For more specific details on implementation, please refer
+ the scala documentation.
+
+ :param k: Set the number of singular values to keep.
+ :param computeU: Whether or not to compute U. If set to be
+ True, then U is computed by A * V * s^-1
+ :param rCond: Reciprocal condition number. All singular values
+ smaller than rCond * s[0] are treated as zero
+ where s[0] is the largest singular value.
+ :returns: SingularValueDecomposition object
+
+ >>> data = [(3, 1, 1), (-1, 3, 1)]
+ >>> rm = RowMatrix(sc.parallelize(data))
+ >>> svd_model = rm.computeSVD(2, True)
+ >>> svd_model.U.rows.collect()
+ [DenseVector([-0.7071, 0.7071]), DenseVector([-0.7071, -0.7071])]
+ >>> svd_model.s
+ DenseVector([3.4641, 3.1623])
+ >>> svd_model.V
+ DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472,
0.0], 0)
+ """
+ j_model = self._java_matrix_wrapper.call(
+ "computeSVD", int(k), bool(computeU), float(rCond))
+ return SingularValueDecomposition(j_model)
+
+ def computePrincipalComponents(self, k):
+ """
+ Computes the k principal components of the given row matrix
+
+ :param k: Number of principal components to keep.
+ :returns: DenseMatrix
+
+ >>> data = sc.parallelize([[1, 2, 3], [2, 4, 5], [3, 6, 1]])
+ >>> rm = RowMatrix(data)
+
+ >>> # Returns the two principal components of rm
+ >>> pca = rm.computePrincipalComponents(2)
+ >>> pca
+ DenseMatrix(3, 2, [-0.349, -0.6981, 0.6252, -0.2796, -0.5592,
-0.7805], 0)
+
+ >>> # Transform into new dimensions with the greatest variance.
+ >>> rm.multiply(pca).rows.collect() # doctest:
+NORMALIZE_WHITESPACE
+ [DenseVector([0.1305, -3.7394]), DenseVector([-0.3642, -6.6983]), \
+ DenseVector([-4.6102, -4.9745])]
+ """
+ return
self._java_matrix_wrapper.call("computePrincipalComponents", k)
+
+ def multiply(self, matrix):
+ """
+ Multiplies the given RowMatrix with another matrix.
+
+ :param matrix: Matrix to multiply with.
+ :returns: RowMatrix
+
+ >>> rm = RowMatrix(sc.parallelize([[0, 1], [2, 3]]))
+ >>> rm.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect()
+ [DenseVector([2.0, 3.0]), DenseVector([6.0, 11.0])]
+ """
+ if not isinstance(matrix, DenseMatrix):
+ raise ValueError("Only multiplication with DenseMatrix "
+ "is supported.")
+ j_model = self._java_matrix_wrapper.call("multiply", matrix)
+ return RowMatrix(j_model)
+
+
+class SingularValueDecomposition(JavaModelWrapper):
+ """Wrapper around the SingularValueDecomposition scala case class"""
--- End diff --
Probably add a versionAdded
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]