This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-2.4 by this push: new f36d0c5 [SPARK-26228][MLLIB] OOM issue encountered when computing Gramian matrix f36d0c5 is described below commit f36d0c56c256164f229b900778f593a0d8e4c7fc Author: Sean Owen <sean.o...@databricks.com> AuthorDate: Tue Jan 22 19:22:06 2019 -0600 [SPARK-26228][MLLIB] OOM issue encountered when computing Gramian matrix Avoid memory problems in closure cleaning when handling large Gramians (>= 16K rows/cols) by using null as zeroValue Existing tests. Note that it's hard to test the case that triggers this issue as it would require a large amount of memory and run a while. I confirmed locally that a 16K x 16K Gramian failed with tons of driver memory before, and didn't fail upfront after this change. Closes #23600 from srowen/SPARK-26228. Authored-by: Sean Owen <sean.o...@databricks.com> Signed-off-by: Sean Owen <sean.o...@databricks.com> (cherry picked from commit 6dcad38ba3393188084f378b7ff6dfc12b685b13) Signed-off-by: Sean Owen <sean.o...@databricks.com> --- .../spark/mllib/linalg/distributed/RowMatrix.scala | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 78a8810..5109efb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -119,11 +119,25 @@ class RowMatrix @Since("1.0.0") ( val nt = if (n % 2 == 0) ((n / 2) * (n + 1)) else (n * ((n + 1) / 2)) // Compute the upper triangular part of the gram matrix. - val GU = rows.treeAggregate(new BDV[Double](nt))( - seqOp = (U, v) => { + val GU = rows.treeAggregate(null.asInstanceOf[BDV[Double]])( + seqOp = (maybeU, v) => { + val U = + if (maybeU == null) { + new BDV[Double](nt) + } else { + maybeU + } BLAS.spr(1.0, v, U.data) U - }, combOp = (U1, U2) => U1 += U2) + }, combOp = (U1, U2) => + if (U1 == null) { + U2 + } else if (U2 == null) { + U1 + } else { + U1 += U2 + } + ) RowMatrix.triuToFull(n, GU.data) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org