[1/6] git commit: Code clean up for mllib

rxin Wed, 15 Jan 2014 20:17:16 -0800

Updated Branches:
  refs/heads/master 0675ca50f -> 84595ea3e


Code clean up for mllib


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/0d94d74e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/0d94d74e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/0d94d74e

Branch: refs/heads/master
Commit: 0d94d74edf759e19c3f4ca98eadf6b22536c6645
Parents: 01c0d72
Author: Frank Dai <soulmach...@gmail.com>
Authored: Tue Jan 14 14:37:26 2014 +0800
Committer: Frank Dai <soulmach...@gmail.com>
Committed: Tue Jan 14 14:37:26 2014 +0800

----------------------------------------------------------------------
 .../spark/mllib/api/python/PythonMLLibAPI.scala | 24 +++++++++-----------
 .../apache/spark/mllib/classification/SVM.scala |  2 --
 .../spark/mllib/clustering/KMeansModel.scala    |  5 ++--
 .../mllib/regression/LinearRegression.scala     |  2 +-
 .../mllib/regression/RidgeRegression.scala      |  8 +++----
 .../spark/mllib/util/LinearDataGenerator.scala  |  4 +---
 .../spark/mllib/util/MFDataGenerator.scala      | 17 +++++++-------
 .../org/apache/spark/mllib/util/MLUtils.scala   |  2 +-
 .../spark/mllib/util/SVMDataGenerator.scala     |  2 +-
 .../LogisticRegressionSuite.scala               |  6 ++---
 .../spark/mllib/classification/SVMSuite.scala   |  9 ++++----
 .../spark/mllib/clustering/KMeansSuite.scala    |  3 ---
 .../spark/mllib/recommendation/ALSSuite.scala   |  1 -
 .../spark/mllib/regression/LassoSuite.scala     | 10 ++++----
 .../regression/LinearRegressionSuite.scala      |  9 ++++----
 .../mllib/regression/RidgeRegressionSuite.scala |  3 ---
 16 files changed, 44 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index c972a71..9ec6019 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -24,7 +24,6 @@ import org.apache.spark.mllib.recommendation._
 import org.apache.spark.rdd.RDD
 import java.nio.ByteBuffer
 import java.nio.ByteOrder
-import java.nio.DoubleBuffer
 
 /**
  * The Java stubs necessary for the Python mllib bindings.
@@ -37,11 +36,11 @@ class PythonMLLibAPI extends Serializable {
     }
     val bb = ByteBuffer.wrap(bytes)
     bb.order(ByteOrder.nativeOrder())
-    val magic = bb.getLong()
+    val magic = bb.getLong
     if (magic != 1) {
       throw new IllegalArgumentException("Magic " + magic + " is wrong.")
     }
-    val length = bb.getLong()
+    val length = bb.getLong
     if (packetLength != 16 + 8 * length) {
       throw new IllegalArgumentException("Length " + length + " is wrong.")
     }
@@ -70,18 +69,17 @@ class PythonMLLibAPI extends Serializable {
     }
     val bb = ByteBuffer.wrap(bytes)
     bb.order(ByteOrder.nativeOrder())
-    val magic = bb.getLong()
+    val magic = bb.getLong
     if (magic != 2) {
       throw new IllegalArgumentException("Magic " + magic + " is wrong.")
     }
-    val rows = bb.getLong()
-    val cols = bb.getLong()
+    val rows = bb.getLong
+    val cols = bb.getLong
     if (packetLength != 24 + 8 * rows * cols) {
       throw new IllegalArgumentException("Size " + rows + "x" + cols + " is 
wrong.")
     }
     val db = bb.asDoubleBuffer()
     val ans = new Array[Array[Double]](rows.toInt)
-    var i = 0
     for (i <- 0 until rows.toInt) {
       ans(i) = new Array[Double](cols.toInt)
       db.get(ans(i))
@@ -200,9 +198,9 @@ class PythonMLLibAPI extends Serializable {
   private def unpackRating(ratingBytes: Array[Byte]): Rating = {
     val bb = ByteBuffer.wrap(ratingBytes)
     bb.order(ByteOrder.nativeOrder())
-    val user = bb.getInt()
-    val product = bb.getInt()
-    val rating = bb.getDouble()
+    val user = bb.getInt
+    val product = bb.getInt
+    val rating = bb.getDouble
     new Rating(user, product, rating)
   }
 
@@ -210,8 +208,8 @@ class PythonMLLibAPI extends Serializable {
   private[spark] def unpackTuple(tupleBytes: Array[Byte]): (Int, Int) = {
     val bb = ByteBuffer.wrap(tupleBytes)
     bb.order(ByteOrder.nativeOrder())
-    val v1 = bb.getInt()
-    val v2 = bb.getInt()
+    val v1 = bb.getInt
+    val v2 = bb.getInt
     (v1, v2)
   }
 
@@ -219,7 +217,7 @@ class PythonMLLibAPI extends Serializable {
     * Serialize a Rating object into an array of bytes.
     * It can be deserialized using RatingDeserializer().
     *
-    * @param rate
+    * @param rate the Rating object to serialize
     * @return
     */
   private[spark] def serializeRating(rate: Rating): Array[Byte] = {

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 3b8f855..831aa76 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.mllib.classification
 
-import scala.math.signum
-
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index cfc81c9..f770707 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -19,8 +19,6 @@ package org.apache.spark.mllib.clustering
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
-import org.apache.spark.mllib.util.MLUtils
-
 
 /**
  * A clustering model for K-means. Each point belongs to the cluster with the 
closest center.
@@ -39,6 +37,7 @@ class KMeansModel(val clusterCenters: Array[Array[Double]]) 
extends Serializable
    * model on the given data.
    */
   def computeCost(data: RDD[Array[Double]]): Double = {
-    data.map(p => KMeans.pointCost(clusterCenters, p)).sum
+    data.map(p => KMeans.pointCost(clusterCenters, p)).sum()
+
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 597d55e..6aa63b0 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.util.MLUtils

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index b29508d..41b80cc 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.regression
 
-import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.util.MLUtils
@@ -76,7 +76,7 @@ class RidgeRegressionWithSGD private (
   def createModel(weights: Array[Double], intercept: Double) = {
     val weightsMat = new DoubleMatrix(weights.length + 1, 1, (Array(intercept) 
++ weights):_*)
     val weightsScaled = weightsMat.div(xColSd)
-    val interceptScaled = yMean - 
(weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0))
+    val interceptScaled = yMean - 
weightsMat.transpose().mmul(xColMean.div(xColSd)).get(0)
 
     new RidgeRegressionModel(weightsScaled.data, interceptScaled)
   }
@@ -86,7 +86,7 @@ class RidgeRegressionWithSGD private (
       initialWeights: Array[Double])
     : RidgeRegressionModel =
   {
-    val nfeatures: Int = input.first.features.length
+    val nfeatures: Int = input.first().features.length
     val nexamples: Long = input.count()
 
     // To avoid penalizing the intercept, we center and scale the data.
@@ -122,7 +122,7 @@ object RidgeRegressionWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient 
descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be 
equal in size to 
+   * @param initialWeights Initial set of weights to be used. Array should be 
equal in size to
    *        the number of features in the data.
    */
   def train(

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index bc5045f..2e03684 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -25,7 +25,6 @@ import org.jblas.DoubleMatrix
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.regression.LabeledPoint
 
 /**
  * Generate sample data used for Linear Data. This class generates
@@ -73,7 +72,7 @@ object LinearDataGenerator {
     val x = Array.fill[Array[Double]](nPoints)(
       Array.fill[Double](weights.length)(2 * rnd.nextDouble - 1.0))
     val y = x.map { xi =>
-      (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) + intercept + 
eps * rnd.nextGaussian()
+      new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) + intercept + eps 
* rnd.nextGaussian()
     }
     y.zip(x).map(p => LabeledPoint(p._1, p._2))
   }
@@ -86,7 +85,6 @@ object LinearDataGenerator {
    * @param nexamples Number of examples that will be contained in the RDD.
    * @param nfeatures Number of features to generate for each example.
    * @param eps Epsilon factor by which examples are scaled.
-   * @param weights Weights associated with the first weights.length features.
    * @param nparts Number of partitions in the RDD. Default value is 2.
    *
    * @return RDD of LabeledPoint containing sample data.

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index d5f3f6b..348aba1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.mllib.recommendation
+package org.apache.spark.mllib.util
 
 import scala.util.Random
 
@@ -23,7 +23,6 @@ import org.jblas.DoubleMatrix
 
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.util.MLUtils
 
 /**
 * Generate RDD(s) containing data for Matrix Factorization.
@@ -31,9 +30,9 @@ import org.apache.spark.mllib.util.MLUtils
 * This method samples training entries according to the oversampling factor
 * 'trainSampFact', which is a multiplicative factor of the number of
 * degrees of freedom of the matrix: rank*(m+n-rank).
-* 
-* It optionally samples entries for a testing matrix using 
-* 'testSampFact', the percentage of the number of training entries 
+*
+* It optionally samples entries for a testing matrix using
+* 'testSampFact', the percentage of the number of training entries
 * to use for testing.
 *
 * This method takes the following inputs:
@@ -73,7 +72,7 @@ object MFDataGenerator{
 
     val A = DoubleMatrix.randn(m, rank)
     val B = DoubleMatrix.randn(rank, n)
-    val z = 1 / (scala.math.sqrt(scala.math.sqrt(rank)))
+    val z = 1 / scala.math.sqrt(scala.math.sqrt(rank))
     A.mmuli(z)
     B.mmuli(z)
     val fullData = A.mmul(B)
@@ -91,7 +90,7 @@ object MFDataGenerator{
       .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), 
fullData.get(x - 1)))
 
     // optionally add gaussian noise
-    if (noise) { 
+    if (noise) {
       trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma))
     }
 
@@ -107,8 +106,8 @@ object MFDataGenerator{
         .map(x => (fullData.indexRows(x - 1), fullData.indexColumns(x - 1), 
fullData.get(x - 1)))
       testData.map(x => x._1 + "," + x._2 + "," + 
x._3).saveAsTextFile(outputPath)
     }
-        
+
     sc.stop()
-  
+
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index d91b74c..64c6136 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -97,7 +97,7 @@ object MLUtils {
     while (col < nfeatures) {
       xColMean.put(col, xColSumsMap(col)._1 / nexamples)
       val variance =
-        (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) 
/ (nexamples)
+        (xColSumsMap(col)._2 - (math.pow(xColSumsMap(col)._1, 2) / nexamples)) 
/ nexamples
       xColSd.put(col, math.sqrt(variance))
       col += 1
     }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index 0702209..c96c94f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -56,7 +56,7 @@ object SVMDataGenerator {
       val x = Array.fill[Double](nfeatures) {
         rnd.nextDouble() * 2.0 - 1.0
       }
-      val yD = (new DoubleMatrix(1, x.length, x:_*)).dot(trueWeights) + 
rnd.nextGaussian() * 0.1
+      val yD = new DoubleMatrix(1, x.length, x: _*).dot(trueWeights) + 
rnd.nextGaussian() * 0.1
       val y = if (yD < 0) 0.0 else 1.0
       LabeledPoint(y, x)
     }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index 34c6729..f97eaf3 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -80,9 +80,9 @@ class LogisticRegressionSuite extends FunSuite with 
BeforeAndAfterAll with Shoul
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, 
expected) =>
-      (prediction != expected.label)
-    }.size
+    val numOffPredictions = predictions.zip(input).count { case (prediction, 
expected) =>
+        prediction != expected.label
+    }
     // At least 83% of the predictions should be on.
     ((input.length - numOffPredictions).toDouble / input.length) should be > 
0.83
   }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 6a957e3..0f24fbb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.mllib.classification
 
 import scala.util.Random
-import scala.math.signum
 import scala.collection.JavaConversions._
 
 import org.scalatest.BeforeAndAfterAll
@@ -50,7 +49,7 @@ object SVMSuite {
     val x = Array.fill[Array[Double]](nPoints)(
         Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0))
     val y = x.map { xi =>
-      val yD = (new DoubleMatrix(1, xi.length, xi:_*)).dot(weightsMat) +
+      val yD = new DoubleMatrix(1, xi.length, xi: _*).dot(weightsMat) +
         intercept + 0.01 * rnd.nextGaussian()
       if (yD < 0) 0.0 else 1.0
     }
@@ -72,9 +71,9 @@ class SVMSuite extends FunSuite with BeforeAndAfterAll {
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, 
expected) =>
-      (prediction != expected.label)
-    }.size
+    val numOffPredictions = predictions.zip(input).count { case (prediction, 
expected) =>
+        prediction != expected.label
+    }
     // At least 80% of the predictions should be on.
     assert(numOffPredictions < input.length / 5)
   }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index 94245f6..73657ca 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.mllib.clustering
 
-import scala.util.Random
 
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 
-import org.jblas._
 
 class KMeansSuite extends FunSuite with BeforeAndAfterAll {
   @transient private var sc: SparkContext = _

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index e683a90..4e8dbde 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -24,7 +24,6 @@ import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 
 import org.jblas._
 

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index db980c7..0a6a9f7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.mllib.regression
 
-import scala.collection.JavaConversions._
-import scala.util.Random
 
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
@@ -41,10 +39,10 @@ class LassoSuite extends FunSuite with BeforeAndAfterAll {
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, 
expected) =>
-      // A prediction is off if the prediction is more than 0.5 away from 
expected value.
-      math.abs(prediction - expected.label) > 0.5
-    }.size
+    val numOffPredictions = predictions.zip(input).count { case (prediction, 
expected) =>
+        // A prediction is off if the prediction is more than 0.5 away from 
expected value.
+        math.abs(prediction - expected.label) > 0.5
+    }
     // At least 80% of the predictions should be on.
     assert(numOffPredictions < input.length / 5)
   }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index ef500c7..dd5aa85 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -21,7 +21,6 @@ import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.util.LinearDataGenerator
 
 class LinearRegressionSuite extends FunSuite with BeforeAndAfterAll {
@@ -37,10 +36,10 @@ class LinearRegressionSuite extends FunSuite with 
BeforeAndAfterAll {
   }
 
   def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
-    val numOffPredictions = predictions.zip(input).filter { case (prediction, 
expected) =>
-      // A prediction is off if the prediction is more than 0.5 away from 
expected value.
-      math.abs(prediction - expected.label) > 0.5
-    }.size
+    val numOffPredictions = predictions.zip(input).count { case (prediction, 
expected) =>
+        // A prediction is off if the prediction is more than 0.5 away from 
expected value.
+        math.abs(prediction - expected.label) > 0.5
+    }
     // At least 80% of the predictions should be on.
     assert(numOffPredictions < input.length / 5)
   }

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/0d94d74e/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index c18092d..1d6a10b 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -17,15 +17,12 @@
 
 package org.apache.spark.mllib.regression
 
-import scala.collection.JavaConversions._
-import scala.util.Random
 
 import org.jblas.DoubleMatrix
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 import org.apache.spark.mllib.util.LinearDataGenerator
 
 class RidgeRegressionSuite extends FunSuite with BeforeAndAfterAll {

[1/6] git commit: Code clean up for mllib

Reply via email to