Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/3319#discussion_r22120303
--- Diff: mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
---
@@ -197,6 +335,167 @@ class SparseMatrix(
}
override def copy = new SparseMatrix(numRows, numCols, colPtrs,
rowIndices, values.clone())
+
+ private[mllib] def map(f: Double => Double) =
+ new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f))
+
+ private[mllib] def update(f: Double => Double): SparseMatrix = {
+ val len = values.length
+ var i = 0
+ while (i < len) {
+ values(i) = f(values(i))
+ i += 1
+ }
+ this
+ }
+
+ /** Generate a `DenseMatrix` from the given `SparseMatrix`. */
+ def toDense(): DenseMatrix = {
+ new DenseMatrix(numRows, numCols, toArray)
+ }
+}
+
+/**
+ * Factory methods for [[org.apache.spark.mllib.linalg.SparseMatrix]].
+ */
+object SparseMatrix {
+
+ /**
+ * Generate a `SparseMatrix` from Coordinate List (COO) format. Input
must be an array of
+ * (row, column, value) tuples.
+ * @param numRows number of rows of the matrix
+ * @param numCols number of columns of the matrix
+ * @param entries Array of (row, column, value) tuples
+ * @return The corresponding `SparseMatrix`
+ */
+ def fromCOO(numRows: Int, numCols: Int, entries: Array[(Int, Int,
Double)]): SparseMatrix = {
+ val sortedEntries = entries.sortBy(v => (v._2, v._1))
+ val colPtrs = new Array[Int](numCols + 1)
+ var nnz = 0
+ var lastCol = -1
+ val values = sortedEntries.map { case (i, j, v) =>
+ while (j != lastCol) {
+ colPtrs(lastCol + 1) = nnz
+ lastCol += 1
+ }
+ nnz += 1
+ v
+ }
+ while (numCols > lastCol) {
+ colPtrs(lastCol + 1) = nnz
+ lastCol += 1
+ }
+ new SparseMatrix(numRows, numCols, colPtrs.toArray,
sortedEntries.map(_._1), values)
+ }
+
+ /**
+ * Generate an Identity Matrix in `SparseMatrix` format.
+ * @param n number of rows and columns of the matrix
+ * @return `SparseMatrix` with size `n` x `n` and values of ones on the
diagonal
+ */
+ def speye(n: Int): SparseMatrix = {
+ new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray,
Array.fill(n)(1.0))
+ }
+
+ /** Generates a `SparseMatrix` with a given random number generator and
`method`, which
+ * specifies the distribution. */
+ private def genRandMatrix(
+ numRows: Int,
+ numCols: Int,
+ density: Double,
+ rng: Random,
+ method: Random => Double): SparseMatrix = {
+ require(density >= 0.0 && density <= 1.0, "density must be a double in
the range " +
+ s"0.0 <= d <= 1.0. Currently, density: $density")
+ val length = math.ceil(numRows * numCols * density).toInt
+ val entries = MutableMap[(Int, Int), Double]()
+ var i = 0
+ if (density == 0.0) {
+ return new SparseMatrix(numRows, numCols, new Array[Int](numCols +
1),
+ Array[Int](), Array[Double]())
+ } else if (density == 1.0) {
+ return new SparseMatrix(numRows, numCols, (0 to numRows * numCols by
numRows).toArray,
+ (0 until numRows * numCols).toArray, Array.fill(numRows *
numCols)(method(rng)))
+ }
+ // Expected number of iterations is less than 1.5 * length
+ if (density < 0.34) {
+ while (i < length) {
+ var rowIndex = rng.nextInt(numRows)
+ var colIndex = rng.nextInt(numCols)
+ while (entries.contains((rowIndex, colIndex))) {
+ rowIndex = rng.nextInt(numRows)
+ colIndex = rng.nextInt(numCols)
+ }
+ entries += (rowIndex, colIndex) -> method(rng)
+ i += 1
+ }
+ } else { // selection - rejection method
+ var j = 0
+ val pool = numRows * numCols
+ // loop over columns so that the sort in fromCOO requires less
sorting
+ while (i < length && j < numCols) {
+ var passedInPool = j * numRows
+ var r = 0
+ while (i < length && r < numRows) {
+ if (rng.nextDouble() < 1.0 * (length - i) / (pool -
passedInPool)) {
+ entries += (r, j) -> method(rng)
+ i += 1
+ }
+ r += 1
+ passedInPool += 1
+ }
+ j += 1
+ }
+ }
+ SparseMatrix.fromCOO(numRows, numCols, entries.toArray.map(v =>
(v._1._1, v._1._2, v._2)))
+ }
+
+ /**
+ * Generate a `SparseMatrix` consisting of i.i.d. uniform random
numbers. The number of non-zero
+ * elements equal the ceiling of `numRows` x `numCols` x `density`
+ *
+ * @param numRows number of rows of the matrix
+ * @param numCols number of columns of the matrix
+ * @param density the desired density for the matrix
+ * @param rng a random number generator
+ * @return `SparseMatrix` with size `numRows` x `numCols` and values in
U(0, 1)
+ */
+ def sprand(numRows: Int, numCols: Int, density: Double, rng: Random):
SparseMatrix = {
+ def method(rand: Random): Double = rand.nextDouble()
+ genRandMatrix(numRows, numCols, density, rng, method)
+ }
+
+ /**
+ * Generate a `SparseMatrix` consisting of i.i.d. gaussian random
numbers.
+ * @param numRows number of rows of the matrix
+ * @param numCols number of columns of the matrix
+ * @param density the desired density for the matrix
+ * @param rng a random number generator
+ * @return `SparseMatrix` with size `numRows` x `numCols` and values in
N(0, 1)
+ */
+ def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random):
SparseMatrix = {
+ def method(rand: Random): Double = rand.nextGaussian()
+ genRandMatrix(numRows, numCols, density, rng, method)
+ }
+
+ /**
+ * Generate a diagonal matrix in `SparseMatrix` format from the supplied
values.
+ * @param vector a `Vector` that will form the values on the diagonal of
the matrix
+ * @return Square `SparseMatrix` with size `values.length` x
`values.length` and non-zero
+ * `values` on the diagonal
+ */
+ def diag(vector: Vector): SparseMatrix = {
+ val n = vector.size
+ vector match {
+ case sVec: SparseVector =>
+ val indices = sVec.indices
--- End diff --
This is only used once. Maybe we don't need to declare a val for it.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]