Repository: spark Updated Branches: refs/heads/master d7ee12211 -> b4a4421b6
[SPARK-11918][ML] Better error from WLS for cases like singular input ## What changes were proposed in this pull request? Update error handling for Cholesky decomposition to provide a little more info when input is singular. ## How was this patch tested? New test case; jenkins tests. Author: Sean Owen <so...@cloudera.com> Closes #15177 from srowen/SPARK-11918. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4a4421b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4a4421b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4a4421b Branch: refs/heads/master Commit: b4a4421b610e776e5280fd5e7453f937f806cbd1 Parents: d7ee122 Author: Sean Owen <so...@cloudera.com> Authored: Wed Sep 21 18:56:16 2016 +0000 Committer: DB Tsai <d...@netflix.com> Committed: Wed Sep 21 18:56:16 2016 +0000 ---------------------------------------------------------------------- .../mllib/linalg/CholeskyDecomposition.scala | 19 +++++++++++++++---- .../ml/optim/WeightedLeastSquaresSuite.scala | 20 ++++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b4a4421b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala index e449479..08f8f19 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala @@ -36,8 +36,7 @@ private[spark] object CholeskyDecomposition { val k = bx.length val info = new intW(0) lapack.dppsv("U", k, 1, A, bx, k, info) - val code = info.`val` - assert(code == 0, s"lapack.dppsv returned $code.") + checkReturnValue(info, "dppsv") bx } @@ -52,8 +51,20 @@ private[spark] object CholeskyDecomposition { def inverse(UAi: Array[Double], k: Int): Array[Double] = { val info = new intW(0) lapack.dpptri("U", k, UAi, info) - val code = info.`val` - assert(code == 0, s"lapack.dpptri returned $code.") + checkReturnValue(info, "dpptri") UAi } + + private def checkReturnValue(info: intW, method: String): Unit = { + info.`val` match { + case code if code < 0 => + throw new IllegalStateException(s"LAPACK.$method returned $code; arg ${-code} is illegal") + case code if code > 0 => + throw new IllegalArgumentException( + s"LAPACK.$method returned $code because A is not positive definite. Is A derived from " + + "a singular matrix (e.g. collinear column values)?") + case _ => // do nothing + } + } + } http://git-wip-us.apache.org/repos/asf/spark/blob/b4a4421b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala index c8de796..2cb1af0 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala @@ -60,6 +60,26 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext ), 2) } + test("two collinear features result in error with no regularization") { + val singularInstances = sc.parallelize(Seq( + Instance(1.0, 1.0, Vectors.dense(1.0, 2.0)), + Instance(2.0, 1.0, Vectors.dense(2.0, 4.0)), + Instance(3.0, 1.0, Vectors.dense(3.0, 6.0)), + Instance(4.0, 1.0, Vectors.dense(4.0, 8.0)) + ), 2) + + intercept[IllegalArgumentException] { + new WeightedLeastSquares( + false, regParam = 0.0, standardizeFeatures = false, + standardizeLabel = false).fit(singularInstances) + } + + // Should not throw an exception + new WeightedLeastSquares( + false, regParam = 1.0, standardizeFeatures = false, + standardizeLabel = false).fit(singularInstances) + } + test("WLS against lm") { /* R code: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org