Github user dbtsai commented on a diff in the pull request:
https://github.com/apache/spark/pull/15628#discussion_r87669463
--- Diff:
mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala ---
@@ -153,6 +153,86 @@ sealed trait Matrix extends Serializable {
*/
@Since("2.0.0")
def numActives: Int
+
+ /**
+ * Converts this matrix to a sparse matrix.
+ *
+ * @param columnMajor Whether the values of the resulting sparse matrix
should be in column major
+ * or row major order. If `false`, resulting matrix
will be row major.
+ */
+ private[ml] def toSparseMatrix(columnMajor: Boolean): SparseMatrix
+
+ /**
+ * Converts this matrix to a sparse matrix in column major order.
+ */
+ @Since("2.1.0")
+ def toSparse: SparseMatrix = toSparseMatrix(columnMajor = true)
+
+ /**
+ * Converts this matrix to a dense matrix.
+ *
+ * @param columnMajor Whether the values of the resulting dense matrix
should be in column major
+ * or row major order. If `false`, resulting matrix
will be row major.
+ */
+ private [ml] def toDenseMatrix(columnMajor: Boolean): DenseMatrix
+
+ /**
+ * Converts this matrix to a dense matrix in column major order.
+ */
+ @Since("2.1.0")
+ def toDense: DenseMatrix = toDenseMatrix(columnMajor = true)
+
+ /**
+ * Returns a matrix in either dense or sparse format, whichever uses
less storage.
+ *
+ * @param columnMajor Whether the values of the resulting matrix should
be in column major
+ * or row major order. If `false`, resulting matrix
will be row major.
+ */
+ @Since("2.1.0")
+ def compressed(columnMajor: Boolean): Matrix = {
+ if (getDenseSizeInBytes < getSparseSizeInBytes(columnMajor)) {
+ toDenseMatrix(columnMajor)
+ } else {
+ toSparseMatrix(columnMajor)
+ }
+ }
+
+ /**
+ * Returns a matrix in dense column major, dense row major, sparse row
major, or sparse column
+ * major format, whichever uses less storage. When dense representation
is optimal, it maintains
+ * the current layout order.
+ */
+ @Since("2.1.0")
+ def compressed: Matrix = {
+ val cscSize = getSparseSizeInBytes(columnMajor = true)
+ val csrSize = getSparseSizeInBytes(columnMajor = false)
+ val minSparseSize = cscSize.min(csrSize)
+ if (getDenseSizeInBytes < minSparseSize) {
+ // size is the same either way, so maintain current layout
--- End diff --
``` scala
if (getDenseSizeInBytes < math.min(cscSize, csrSize))
...
...
if (cscSize < csrSize)
```
could be easier to read.
Also, can you elaborate the comment like
```
// sizes for dense matrix in row major or column major are the same, so
maintain current layout
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]