Repository: mahout Updated Branches: refs/heads/master 907781bb8 -> 8c529ccff
MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded data This closes apache/mahout#10 Squashed commit of the following: commit cc93dedcad0b6ff0365f8e15dba280221a0a64f0 Author: Dmitriy Lyubimov <[email protected]> Date: Tue Jun 10 11:27:38 2014 -0700 + tests commit 07180efbfa4472f7d13a2afe1f8f37e18edfe08e Merge: 162c5ca 907781b Author: Dmitriy Lyubimov <[email protected]> Date: Tue Jun 10 11:04:51 2014 -0700 Merge branch 'master' into MAHOUT-1572 commit 162c5ca36e00af91a9599075332c577d9b1a13c4 Author: Dmitriy Lyubimov <[email protected]> Date: Wed Jun 4 15:10:11 2014 -0700 initial fix (?) Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/8c529ccf Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/8c529ccf Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/8c529ccf Branch: refs/heads/master Commit: 8c529ccff23d419c4cb5191b0435de40d6a9831c Parents: 907781b Author: Dmitriy Lyubimov <[email protected]> Authored: Tue Jun 10 11:30:55 2014 -0700 Committer: Dmitriy Lyubimov <[email protected]> Committed: Tue Jun 10 11:30:55 2014 -0700 ---------------------------------------------------------------------- CHANGELOG | 2 ++ .../mahout/sparkbindings/drm/package.scala | 12 +++++++- .../mahout/sparkbindings/drm/DrmLikeSuite.scala | 30 ++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 2e174c5..2f604e1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ Mahout Change Log Release 1.0 - unreleased + MAHOUT-1572: blockify() to detect (naively) the data sparsity in the loaded data (dlyubimov) + MAHOUT-1571: Functional Views are not serialized as dense/sparse correctly (dlyubimov) MAHOUT-1566: (Experimental) Regular ALS factorizer with conversion tests, optimizer enhancements and bug fixes (dlyubimov) http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala ---------------------------------------------------------------------- diff --git a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala index 37a9ac2..2a2a4a9 100644 --- a/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala +++ b/spark/src/main/scala/org/apache/mahout/sparkbindings/drm/package.scala @@ -65,7 +65,17 @@ package object drm { val keys = data.map(t => t._1).toArray[K] val vectors = data.map(t => t._2).toArray - val block = new SparseRowMatrix(vectors.size, blockncol, vectors) + val block = if (vectors(0).isDense) { + val block = new DenseMatrix(vectors.size, blockncol) + var row = 0 + while (row < vectors.size) { + block(row, ::) := vectors(row) + row += 1 + } + block + } else { + new SparseRowMatrix(vectors.size, blockncol, vectors) + } Iterator(keys -> block) } http://git-wip-us.apache.org/repos/asf/mahout/blob/8c529ccf/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala ---------------------------------------------------------------------- diff --git a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala index caccb70..3c7e7f9 100644 --- a/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala +++ b/spark/src/test/scala/org/apache/mahout/sparkbindings/drm/DrmLikeSuite.scala @@ -22,6 +22,7 @@ import org.apache.mahout.math._ import scalabindings._ import drm._ import RLikeOps._ +import RLikeDrmOps._ import org.apache.mahout.sparkbindings.test.MahoutLocalContext @@ -52,6 +53,35 @@ class DrmLikeSuite extends FunSuite with MahoutLocalContext { println(inCoreB) } + + test("DRM blockify dense") { + + val inCoreA = dense((1, 2, 3), (3, 4, 5)) + val drmA = drmParallelize(inCoreA, numPartitions = 2) + + (inCoreA - drmA.mapBlock() { + case (keys, block) => + if (!block.isInstanceOf[DenseMatrix]) + throw new AssertionError("Block must be dense.") + keys -> block + }).norm should be < 1e-4 + } + + test("DRM blockify sparse -> SRM") { + + val inCoreA = sparse( + (1, 2, 3), + 0 -> 3 :: 2 -> 5 :: Nil + ) + val drmA = drmParallelize(inCoreA, numPartitions = 2) + + (inCoreA - drmA.mapBlock() { + case (keys, block) => + if (!block.isInstanceOf[SparseRowMatrix]) + throw new AssertionError("Block must be dense.") + keys -> block + }).norm should be < 1e-4 + } test("DRM parallelizeEmpty") {
