Updated Branches: refs/heads/master 0675ca50f -> 84595ea3e
Code clean up for mllib Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/0d94d74e Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/0d94d74e Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/0d94d74e Branch: refs/heads/master Commit: 0d94d74edf759e19c3f4ca98eadf6b22536c6645 Parents: 01c0d72 Author: Frank Dai <soulmach...@gmail.com> Authored: Tue Jan 14 14:37:26 2014 +0800 Committer: Frank Dai <soulmach...@gmail.com> Committed: Tue Jan 14 14:37:26 2014 +0800 ---------------------------------------------------------------------- .../spark/mllib/api/python/PythonMLLibAPI.scala | 24 +++++++++----------- .../apache/spark/mllib/classification/SVM.scala | 2 -- .../spark/mllib/clustering/KMeansModel.scala | 5 ++-- .../mllib/regression/LinearRegression.scala | 2 +- .../mllib/regression/RidgeRegression.scala | 8 +++---- .../spark/mllib/util/LinearDataGenerator.scala | 4 +--- .../spark/mllib/util/MFDataGenerator.scala | 17 +++++++------- .../org/apache/spark/mllib/util/MLUtils.scala | 2 +- .../spark/mllib/util/SVMDataGenerator.scala | 2 +- .../LogisticRegressionSuite.scala | 6 ++--- .../spark/mllib/classification/SVMSuite.scala | 9 ++++---- .../spark/mllib/clustering/KMeansSuite.scala | 3 --- .../spark/mllib/recommendation/ALSSuite.scala | 1 - .../spark/mllib/regression/LassoSuite.scala | 10 ++++---- .../regression/LinearRegressionSuite.scala | 9 ++++---- .../mllib/regression/RidgeRegressionSuite.scala | 3 --- 16 files changed, 44 insertions(+), 63 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index c972a71..9ec6019 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -24,7 +24,6 @@ import org.apache.spark.mllib.recommendation._ import org.apache.spark.rdd.RDD import java.nio.ByteBuffer import java.nio.ByteOrder -import java.nio.DoubleBuffer /** * The Java stubs necessary for the Python mllib bindings. @@ -37,11 +36,11 @@ class PythonMLLibAPI extends Serializable { } val bb = ByteBuffer.wrap(bytes) bb.order(ByteOrder.nativeOrder()) - val magic = bb.getLong() + val magic = bb.getLong if (magic != 1) { throw new IllegalArgumentException("Magic " + magic + " is wrong.") } - val length = bb.getLong() + val length = bb.getLong if (packetLength != 16 + 8 * length) { throw new IllegalArgumentException("Length " + length + " is wrong.") } @@ -70,18 +69,17 @@ class PythonMLLibAPI extends Serializable { } val bb = ByteBuffer.wrap(bytes) bb.order(ByteOrder.nativeOrder()) - val magic = bb.getLong() + val magic = bb.getLong if (magic != 2) { throw new IllegalArgumentException("Magic " + magic + " is wrong.") } - val rows = bb.getLong() - val cols = bb.getLong() + val rows = bb.getLong + val cols = bb.getLong if (packetLength != 24 + 8 * rows * cols) { throw new IllegalArgumentException("Size " + rows + "x" + cols + " is wrong.") } val db = bb.asDoubleBuffer() val ans = new Array[Array[Double]](rows.toInt) - var i = 0 for (i <- 0 until rows.toInt) { ans(i) = new Array[Double](cols.toInt) db.get(ans(i)) @@ -200,9 +198,9 @@ class PythonMLLibAPI extends Serializable { private def unpackRating(ratingBytes: Array[Byte]): Rating = { val bb = ByteBuffer.wrap(ratingBytes) bb.order(ByteOrder.nativeOrder()) - val user = bb.getInt() - val product = bb.getInt() - val rating = bb.getDouble() + val user = bb.getInt + val product = bb.getInt + val rating = bb.getDouble new Rating(user, product, rating) } @@ -210,8 +208,8 @@ class PythonMLLibAPI extends Serializable { private[spark] def unpackTuple(tupleBytes: Array[Byte]): (Int, Int) = { val bb = ByteBuffer.wrap(tupleBytes) bb.order(ByteOrder.nativeOrder()) - val v1 = bb.getInt() - val v2 = bb.getInt() + val v1 = bb.getInt + val v2 = bb.getInt (v1, v2) } @@ -219,7 +217,7 @@ class PythonMLLibAPI extends Serializable { * Serialize a Rating object into an array of bytes. * It can be deserialized using RatingDeserializer(). * - * @param rate + * @param rate the Rating object to serialize * @return */ private[spark] def serializeRating(rate: Rating): Array[Byte] = { http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 3b8f855..831aa76 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -17,8 +17,6 @@ package org.apache.spark.mllib.classification -import scala.math.signum - import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.mllib.optimization._ http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index cfc81c9..f770707 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -19,8 +19,6 @@ package org.apache.spark.mllib.clustering import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ -import org.apache.spark.mllib.util.MLUtils - /** * A clustering model for K-means. Each point belongs to the cluster with the closest center. @@ -39,6 +37,7 @@ class KMeansModel(val clusterCenters: Array[Array[Double]]) extends Serializable * model on the given data. */ def computeCost(data: RDD[Array[Double]]): Double = { - data.map(p => KMeans.pointCost(clusterCenters, p)).sum + data.map(p => KMeans.pointCost(clusterCenters, p)).sum() + } } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 597d55e..6aa63b0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.regression -import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.util.MLUtils http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index b29508d..41b80cc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -17,7 +17,7 @@ package org.apache.spark.mllib.regression -import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.util.MLUtils @@ -76,7 +76,7 @@ class RidgeRegressionWithSGD private ( def createModel(weights: Array[Double], intercept: Double) = { val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) ++ weights):_*) val weightsScaled = weightsMat.div(xColSd) - val interceptScaled = yMean - (weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0)) + val interceptScaled = yMean - weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0) new RidgeRegressionModel(weightsScaled.data, interceptScaled) } @@ -86,7 +86,7 @@ class RidgeRegressionWithSGD private ( initialWeights: Array[Double]) : RidgeRegressionModel = { - val nfeatures: Int = input.first.features.length + val nfeatures: Int = input.first().features.length val nexamples: Long = input.count() // To avoid penalizing the intercept, we center and scale the data. @@ -122,7 +122,7 @@ object RidgeRegressionWithSGD { * @param stepSize Step size to be used for each iteration of gradient descent. * @param regParam Regularization parameter. * @param miniBatchFraction Fraction of data to be used per iteration. - * @param initialWeights Initial set of weights to be used. Array should be equal in size to + * @param initialWeights Initial set of weights to be used. Array should be equal in size to * the number of features in the data. */ def train( http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala index bc5045f..2e03684 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala @@ -25,7 +25,6 @@ import org.jblas.DoubleMatrix import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.mllib.regression.LabeledPoint -import org.apache.spark.mllib.regression.LabeledPoint /** * Generate sample data used for Linear Data. This class generates @@ -73,7 +72,7 @@ object LinearDataGenerator { val x = Array.fill[Array[Double]](nPoints)( Array.fill[Double](weights.length)(2 * rnd.nextDouble - 1.0)) val y = x.map { xi => - (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + eps * rnd.nextGaussian() + new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) + intercept + eps * rnd.nextGaussian() } y.zip(x).map(p => LabeledPoint(p._1, p._2)) } @@ -86,7 +85,6 @@ object LinearDataGenerator { * @param nexamples Number of examples that will be contained in the RDD. * @param nfeatures Number of features to generate for each example. * @param eps Epsilon factor by which examples are scaled. - * @param weights Weights associated with the first weights.length features. * @param nparts Number of partitions in the RDD. Default value is 2. * * @return RDD of LabeledPoint containing sample data. http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala index d5f3f6b..348aba1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.mllib.recommendation +package org.apache.spark.mllib.util import scala.util.Random @@ -23,7 +23,6 @@ import org.jblas.DoubleMatrix import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD -import org.apache.spark.mllib.util.MLUtils /** * Generate RDD(s) containing data for Matrix Factorization. @@ -31,9 +30,9 @@ import org.apache.spark.mllib.util.MLUtils * This method samples training entries according to the oversampling factor * 'trainSampFact', which is a multiplicative factor of the number of * degrees of freedom of the matrix: rank*(m+n-rank). -* -* It optionally samples entries for a testing matrix using -* 'testSampFact', the percentage of the number of training entries +* +* It optionally samples entries for a testing matrix using +* 'testSampFact', the percentage of the number of training entries * to use for testing. * * This method takes the following inputs: @@ -73,7 +72,7 @@ object MFDataGenerator{ val A = DoubleMatrix.randn(m, rank) val B = DoubleMatrix.randn(rank, n) - val z = 1 / (scala.math.sqrt(scala.math.sqrt(rank))) + val z = 1 / scala.math.sqrt(scala.math.sqrt(rank)) A.mmuli(z) B.mmuli(z) val fullData = A.mmul(B) @@ -91,7 +90,7 @@ object MFDataGenerator{ .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1))) // optionally add gaussian noise - if (noise) { + if (noise) { trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma)) } @@ -107,8 +106,8 @@ object MFDataGenerator{ .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), fullData.get(x - 1))) testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath) } - + sc.stop() - + } } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index d91b74c..64c6136 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -97,7 +97,7 @@ object MLUtils { while (col < nfeatures) { xColMean.put(col, xColSumsMap(col)._1 / nexamples) val variance = - (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) / (nexamples) + (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) / nexamples xColSd.put(col, math.sqrt(variance)) col += 1 } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala index 0702209..c96c94f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala @@ -56,7 +56,7 @@ object SVMDataGenerator { val x = Array.fill[Double](nfeatures) { rnd.nextDouble() * 2.0 - 1.0 } - val yD = (new DoubleMatrix(1, x.length, x:_*)).dot(trueWeights) + rnd.nextGaussian() * 0.1 + val yD = new DoubleMatrix(1, x.length, x: _*).dot(trueWeights) + rnd.nextGaussian() * 0.1 val y = if (yD < 0) 0.0 else 1.0 LabeledPoint(y, x) } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala index 34c6729..f97eaf3 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala @@ -80,9 +80,9 @@ class LogisticRegressionSuite extends FunSuite with BeforeAndAfterAll with Shoul } def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { - val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => - (prediction != expected.label) - }.size + val numOffPredictions = predictions.zip(input).count { case (prediction, expected) => + prediction != expected.label + } // At least 83% of the predictions should be on. ((input.length - numOffPredictions).toDouble / input.length) should be > 0.83 } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala index 6a957e3..0f24fbb 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.mllib.classification import scala.util.Random -import scala.math.signum import scala.collection.JavaConversions._ import org.scalatest.BeforeAndAfterAll @@ -50,7 +49,7 @@ object SVMSuite { val x = Array.fill[Array[Double]](nPoints)( Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0)) val y = x.map { xi => - val yD = (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + + val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) + intercept + 0.01 * rnd.nextGaussian() if (yD < 0) 0.0 else 1.0 } @@ -72,9 +71,9 @@ class SVMSuite extends FunSuite with BeforeAndAfterAll { } def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { - val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => - (prediction != expected.label) - }.size + val numOffPredictions = predictions.zip(input).count { case (prediction, expected) => + prediction != expected.label + } // At least 80% of the predictions should be on. assert(numOffPredictions < input.length / 5) } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala index 94245f6..73657ca 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala @@ -17,15 +17,12 @@ package org.apache.spark.mllib.clustering -import scala.util.Random import org.scalatest.BeforeAndAfterAll import org.scalatest.FunSuite import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ -import org.jblas._ class KMeansSuite extends FunSuite with BeforeAndAfterAll { @transient private var sc: SparkContext = _ http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala index e683a90..4e8dbde 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala @@ -24,7 +24,6 @@ import org.scalatest.BeforeAndAfterAll import org.scalatest.FunSuite import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ import org.jblas._ http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala index db980c7..0a6a9f7 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.mllib.regression -import scala.collection.JavaConversions._ -import scala.util.Random import org.scalatest.BeforeAndAfterAll import org.scalatest.FunSuite @@ -41,10 +39,10 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll { } def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { - val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => - // A prediction is off if the prediction is more than 0.5 away from expected value. - math.abs(prediction - expected.label) > 0.5 - }.size + val numOffPredictions = predictions.zip(input).count { case (prediction, expected) => + // A prediction is off if the prediction is more than 0.5 away from expected value. + math.abs(prediction - expected.label) > 0.5 + } // At least 80% of the predictions should be on. assert(numOffPredictions < input.length / 5) } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala index ef500c7..dd5aa85 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala @@ -21,7 +21,6 @@ import org.scalatest.BeforeAndAfterAll import org.scalatest.FunSuite import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ import org.apache.spark.mllib.util.LinearDataGenerator class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll { @@ -37,10 +36,10 @@ class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll { } def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { - val numOffPredictions = predictions.zip(input).filter { case (prediction, expected) => - // A prediction is off if the prediction is more than 0.5 away from expected value. - math.abs(prediction - expected.label) > 0.5 - }.size + val numOffPredictions = predictions.zip(input).count { case (prediction, expected) => + // A prediction is off if the prediction is more than 0.5 away from expected value. + math.abs(prediction - expected.label) > 0.5 + } // At least 80% of the predictions should be on. assert(numOffPredictions < input.length / 5) } http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala index c18092d..1d6a10b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala @@ -17,15 +17,12 @@ package org.apache.spark.mllib.regression -import scala.collection.JavaConversions._ -import scala.util.Random import org.jblas.DoubleMatrix import org.scalatest.BeforeAndAfterAll import org.scalatest.FunSuite import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ import org.apache.spark.mllib.util.LinearDataGenerator class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {
