Repository: mahout Updated Branches: refs/heads/master eb70eb820 -> f4a71d084
MAHOUT-1837: fix bug in drm.blockify(): use SparseRowMatrix by default to test for density. closes apache/mahout#252 Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/f4a71d08 Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/f4a71d08 Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/f4a71d08 Branch: refs/heads/master Commit: f4a71d084958f2e1865efc8ac8115cd51e1e57d9 Parents: eb70eb8 Author: Andrew Palumbo <[email protected]> Authored: Sat Aug 27 12:21:51 2016 -0400 Committer: Andrew Palumbo <[email protected]> Committed: Sat Aug 27 12:21:51 2016 -0400 ---------------------------------------------------------------------- .../mahout/sparkbindings/drm/package.scala | 28 +++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/f4a71d08/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala ---------------------------------------------------------------------- diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala index f739a43..77262bd 100644 --- a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala +++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala @@ -60,26 +60,22 @@ package object drm { val keys = data.map(t => t._1).toArray[K] val vectors = data.map(t => t._2).toArray - // create the block by default as dense. - // would probably be better to sample a subset of these - // vectors first before creating the entire matrix. - // so that we don't have the overhead of creating a full second matrix in - // the case that the matrix is not dense. - val block = new DenseMatrix(vectors.length, blockncol) - var row = 0 - while (row < vectors.length) { - block(row, ::) := vectors(row) - row += 1 - } + // create the block by default as Sparse. + val block = new SparseRowMatrix(vectors.length, blockncol, vectors, true, false) - // Test the density of the data. If the matrix does not meet the - // requirements for density, convert the Vectors to a sparse Matrix. + // Test the density of the data. If the matrix does meets the + // requirements for density, convert the Vectors to a DenseMatrix. val resBlock = if (densityAnalysis(block)) { - block + val dBlock = new DenseMatrix(vectors.length, blockncol) + var row = 0 + while (row < vectors.length) { + dBlock(row, ::) := vectors(row) + row += 1 + } + dBlock } else { - new SparseRowMatrix(vectors.length, blockncol, vectors, true, false) + block } - Iterator(keys -> resBlock) } })
