Repository: spark Updated Branches: refs/heads/master 10d04213f -> 25fc31884
[SPARK-1535] ALS: Avoid the garbage-creating ctor of DoubleMatrix `new DoubleMatrix(double[])` creates a garbage `double[]` of the same length as its argument and immediately throws it away. This pull request avoids that constructor in the ALS code. Author: Tor Myklebust <[email protected]> Closes #442 from tmyklebu/foo2 and squashes the following commits: 2784fc5 [Tor Myklebust] Mention that this is probably fixed as of jblas 1.2.4; repunctuate. a09904f [Tor Myklebust] Helper function for wrapping Array[Double]'s with DoubleMatrix's. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/25fc3188 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/25fc3188 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/25fc3188 Branch: refs/heads/master Commit: 25fc31884b0382b2d43c55e1f55e305a73dfae91 Parents: 10d0421 Author: Tor Myklebust <[email protected]> Authored: Sat Apr 19 15:10:18 2014 -0700 Committer: Matei Zaharia <[email protected]> Committed: Sat Apr 19 15:10:18 2014 -0700 ---------------------------------------------------------------------- .../org/apache/spark/mllib/recommendation/ALS.scala | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/25fc3188/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index 102742c..1f5c746 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -269,7 +269,7 @@ class ALS private ( private def computeYtY(factors: RDD[(Int, Array[Array[Double]])]) = { val n = rank * (rank + 1) / 2 val LYtY = factors.values.aggregate(new DoubleMatrix(n))( seqOp = (L, Y) => { - Y.foreach(y => dspr(1.0, new DoubleMatrix(y), L)) + Y.foreach(y => dspr(1.0, wrapDoubleArray(y), L)) L }, combOp = (L1, L2) => { L1.addi(L2) @@ -305,6 +305,15 @@ class ALS private ( } /** + * Wrap a double array in a DoubleMatrix without creating garbage. + * This is a temporary fix for jblas 1.2.3; it should be safe to move back to the + * DoubleMatrix(double[]) constructor come jblas 1.2.4. + */ + private def wrapDoubleArray(v: Array[Double]): DoubleMatrix = { + new DoubleMatrix(v.length, 1, v: _*) + } + + /** * Flatten out blocked user or product factors into an RDD of (id, factor vector) pairs */ private def unblockFactors(blockedFactors: RDD[(Int, Array[Array[Double]])], @@ -457,7 +466,7 @@ class ALS private ( // block for (productBlock <- 0 until numBlocks) { for (p <- 0 until blockFactors(productBlock).length) { - val x = new DoubleMatrix(blockFactors(productBlock)(p)) + val x = wrapDoubleArray(blockFactors(productBlock)(p)) tempXtX.fill(0.0) dspr(1.0, x, tempXtX) val (us, rs) = inLinkBlock.ratingsForBlock(productBlock)(p)
