[2/2] spark git commit: [SPARK-16356][ML] Add testImplicits for ML unit tests and promote toDF()

yliang Mon, 26 Sep 2016 04:20:07 -0700

[SPARK-16356][ML] Add testImplicits for ML unit tests and promote toDF()

## What changes were proposed in this pull request?


This was suggested in 
https://github.com/apache/spark/commit/101663f1ae222a919fc40510aa4f2bad22d1be6f#commitcomment-17114968.

This PR adds `testImplicits` to `MLlibTestSparkContext` so that some implicits 
such as `toDF()` can be sued across ml tests.

This PR also changes all the usages of `spark.createDataFrame( ... )` to 
`toDF()` where applicable in ml tests in Scala.

## How was this patch tested?

Existing tests should work.

Author: hyukjinkwon <[email protected]>

Closes #14035 from HyukjinKwon/minor-ml-test.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f234b7cd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f234b7cd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f234b7cd

Branch: refs/heads/master
Commit: f234b7cd795dd9baa3feff541c211b4daf39ccc6
Parents: 50b89d0
Author: hyukjinkwon <[email protected]>
Authored: Mon Sep 26 04:19:39 2016 -0700
Committer: Yanbo Liang <[email protected]>
Committed: Mon Sep 26 04:19:39 2016 -0700

----------------------------------------------------------------------
 .../org/apache/spark/ml/PipelineSuite.scala     |  13 +-
 .../ml/classification/ClassifierSuite.scala     |  16 +--
 .../DecisionTreeClassifierSuite.scala           |   3 +-
 .../ml/classification/GBTClassifierSuite.scala  |   6 +-
 .../LogisticRegressionSuite.scala               |  43 +++----
 .../MultilayerPerceptronClassifierSuite.scala   |  26 ++--
 .../ml/classification/NaiveBayesSuite.scala     |  20 +--
 .../ml/classification/OneVsRestSuite.scala      |   4 +-
 .../RandomForestClassifierSuite.scala           |   3 +-
 .../apache/spark/ml/clustering/LDASuite.scala   |   6 +-
 .../BinaryClassificationEvaluatorSuite.scala    |  14 ++-
 .../evaluation/RegressionEvaluatorSuite.scala   |   8 +-
 .../spark/ml/feature/BinarizerSuite.scala       |  16 +--
 .../spark/ml/feature/BucketizerSuite.scala      |  15 ++-
 .../spark/ml/feature/ChiSqSelectorSuite.scala   |   3 +-
 .../spark/ml/feature/CountVectorizerSuite.scala |  30 ++---
 .../org/apache/spark/ml/feature/DCTSuite.scala  |  10 +-
 .../spark/ml/feature/HashingTFSuite.scala       |  10 +-
 .../org/apache/spark/ml/feature/IDFSuite.scala  |   6 +-
 .../spark/ml/feature/InteractionSuite.scala     |  53 ++++----
 .../spark/ml/feature/MaxAbsScalerSuite.scala    |   5 +-
 .../spark/ml/feature/MinMaxScalerSuite.scala    |  13 +-
 .../apache/spark/ml/feature/NGramSuite.scala    |  35 +++---
 .../spark/ml/feature/NormalizerSuite.scala      |   4 +-
 .../spark/ml/feature/OneHotEncoderSuite.scala   |  10 +-
 .../org/apache/spark/ml/feature/PCASuite.scala  |   4 +-
 .../ml/feature/PolynomialExpansionSuite.scala   |  11 +-
 .../apache/spark/ml/feature/RFormulaSuite.scala | 126 ++++++++-----------
 .../spark/ml/feature/SQLTransformerSuite.scala  |   8 +-
 .../spark/ml/feature/StandardScalerSuite.scala  |  12 +-
 .../ml/feature/StopWordsRemoverSuite.scala      |  29 +++--
 .../spark/ml/feature/StringIndexerSuite.scala   |  32 ++---
 .../spark/ml/feature/TokenizerSuite.scala       |  17 +--
 .../spark/ml/feature/VectorAssemblerSuite.scala |  10 +-
 .../spark/ml/feature/VectorIndexerSuite.scala   |  15 ++-
 .../regression/AFTSurvivalRegressionSuite.scala |  26 ++--
 .../spark/ml/regression/GBTRegressorSuite.scala |   7 +-
 .../GeneralizedLinearRegressionSuite.scala      | 115 ++++++++---------
 .../ml/regression/IsotonicRegressionSuite.scala |  14 +--
 .../ml/regression/LinearRegressionSuite.scala   |  62 +++++----
 .../tree/impl/GradientBoostedTreesSuite.scala   |   6 +-
 .../spark/ml/tuning/CrossValidatorSuite.scala   |  12 +-
 .../ml/tuning/TrainValidationSplitSuite.scala   |  13 +-
 .../apache/spark/mllib/util/MLUtilsSuite.scala  |  18 +--
 .../mllib/util/MLlibTestSparkContext.scala      |  13 +-
 45 files changed, 462 insertions(+), 460 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
index 3b490cd..6413ca1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -36,6 +36,8 @@ import org.apache.spark.sql.types.StructType
 
 class PipelineSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   abstract class MyModel extends Model[MyModel]
 
   test("pipeline") {
@@ -183,12 +185,11 @@ class PipelineSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
   }
 
   test("pipeline validateParams") {
-    val df = spark.createDataFrame(
-      Seq(
-        (1, Vectors.dense(0.0, 1.0, 4.0), 1.0),
-        (2, Vectors.dense(1.0, 0.0, 4.0), 2.0),
-        (3, Vectors.dense(1.0, 0.0, 5.0), 3.0),
-        (4, Vectors.dense(0.0, 0.0, 5.0), 4.0))
+    val df = Seq(
+      (1, Vectors.dense(0.0, 1.0, 4.0), 1.0),
+      (2, Vectors.dense(1.0, 0.0, 4.0), 2.0),
+      (3, Vectors.dense(1.0, 0.0, 5.0), 3.0),
+      (4, Vectors.dense(0.0, 0.0, 5.0), 4.0)
     ).toDF("id", "features", "label")
 
     intercept[IllegalArgumentException] {

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
index 4db5f03..de71207 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
@@ -29,12 +29,13 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 
 class ClassifierSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  test("extractLabeledPoints") {
-    def getTestData(labels: Seq[Double]): DataFrame = {
-      val data = labels.map { label: Double => LabeledPoint(label, 
Vectors.dense(0.0)) }
-      spark.createDataFrame(data)
-    }
+  import testImplicits._
+
+  private def getTestData(labels: Seq[Double]): DataFrame = {
+    labels.map { label: Double => LabeledPoint(label, Vectors.dense(0.0)) 
}.toDF()
+  }
 
+  test("extractLabeledPoints") {
     val c = new MockClassifier
     // Valid dataset
     val df0 = getTestData(Seq(0.0, 2.0, 1.0, 5.0))
@@ -70,11 +71,6 @@ class ClassifierSuite extends SparkFunSuite with 
MLlibTestSparkContext {
   }
 
   test("getNumClasses") {
-    def getTestData(labels: Seq[Double]): DataFrame = {
-      val data = labels.map { label: Double => LabeledPoint(label, 
Vectors.dense(0.0)) }
-      spark.createDataFrame(data)
-    }
-
     val c = new MockClassifier
     // Valid dataset
     val df0 = getTestData(Seq(0.0, 2.0, 1.0, 5.0))

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 089d30a..c711e7f 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -34,6 +34,7 @@ class DecisionTreeClassifierSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
   import DecisionTreeClassifierSuite.compareAPIs
+  import testImplicits._
 
   private var categoricalDataPointsRDD: RDD[LabeledPoint] = _
   private var orderedLabeledPointsWithLabel0RDD: RDD[LabeledPoint] = _
@@ -345,7 +346,7 @@ class DecisionTreeClassifierSuite
   }
 
   test("Fitting without numClasses in metadata") {
-    val df: DataFrame = 
spark.createDataFrame(TreeTests.featureImportanceData(sc))
+    val df: DataFrame = TreeTests.featureImportanceData(sc).toDF()
     val dt = new DecisionTreeClassifier().setMaxDepth(1)
     dt.fit(df)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index 8d588cc..3492709 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -39,6 +39,7 @@ import org.apache.spark.util.Utils
 class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext
   with DefaultReadWriteTest {
 
+  import testImplicits._
   import GBTClassifierSuite.compareAPIs
 
   // Combinations for estimators, learning rates and subsamplingRate
@@ -134,15 +135,14 @@ class GBTClassifierSuite extends SparkFunSuite with 
MLlibTestSparkContext
   */
 
   test("Fitting without numClasses in metadata") {
-    val df: DataFrame = 
spark.createDataFrame(TreeTests.featureImportanceData(sc))
+    val df: DataFrame = TreeTests.featureImportanceData(sc).toDF()
     val gbt = new GBTClassifier().setMaxDepth(1).setMaxIter(1)
     gbt.fit(df)
   }
 
   test("extractLabeledPoints with bad data") {
     def getTestData(labels: Seq[Double]): DataFrame = {
-      val data = labels.map { label: Double => LabeledPoint(label, 
Vectors.dense(0.0)) }
-      spark.createDataFrame(data)
+      labels.map { label: Double => LabeledPoint(label, Vectors.dense(0.0)) 
}.toDF()
     }
 
     val gbt = new GBTClassifier().setMaxDepth(1).setMaxIter(1)

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 2623759..8451e60 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -37,6 +37,8 @@ import org.apache.spark.sql.functions.lit
 class LogisticRegressionSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   @transient var smallBinaryDataset: Dataset[_] = _
   @transient var smallMultinomialDataset: Dataset[_] = _
   @transient var binaryDataset: Dataset[_] = _
@@ -46,8 +48,7 @@ class LogisticRegressionSuite
   override def beforeAll(): Unit = {
     super.beforeAll()
 
-    smallBinaryDataset =
-      spark.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, 
seed = 42))
+    smallBinaryDataset = generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 
42).toDF()
 
     smallMultinomialDataset = {
       val nPoints = 100
@@ -61,7 +62,7 @@ class LogisticRegressionSuite
       val testData = generateMultinomialLogisticInput(
         coefficients, xMean, xVariance, addIntercept = true, nPoints, 42)
 
-      val df = spark.createDataFrame(sc.parallelize(testData, 4))
+      val df = sc.parallelize(testData, 4).toDF()
       df.cache()
       df
     }
@@ -76,7 +77,7 @@ class LogisticRegressionSuite
         generateMultinomialLogisticInput(coefficients, xMean, xVariance,
           addIntercept = true, nPoints, 42)
 
-      spark.createDataFrame(sc.parallelize(testData, 4))
+      sc.parallelize(testData, 4).toDF()
     }
 
     multinomialDataset = {
@@ -91,7 +92,7 @@ class LogisticRegressionSuite
       val testData = generateMultinomialLogisticInput(
         coefficients, xMean, xVariance, addIntercept = true, nPoints, 42)
 
-      val df = spark.createDataFrame(sc.parallelize(testData, 4))
+      val df = sc.parallelize(testData, 4).toDF()
       df.cache()
       df
     }
@@ -430,10 +431,10 @@ class LogisticRegressionSuite
     val model = new LogisticRegressionModel("mLogReg",
       Matrices.dense(3, 2, Array(0.0, 0.0, 0.0, 1.0, 2.0, 3.0)),
       Vectors.dense(0.0, 0.0, 0.0), 3, true)
-    val overFlowData = spark.createDataFrame(Seq(
+    val overFlowData = Seq(
       LabeledPoint(1.0, Vectors.dense(0.0, 1000.0)),
       LabeledPoint(1.0, Vectors.dense(0.0, -1.0))
-    ))
+    ).toDF()
     val results = model.transform(overFlowData).select("rawPrediction", 
"probability").collect()
 
     // probabilities are correct when margins have to be adjusted
@@ -1795,9 +1796,9 @@ class LogisticRegressionSuite
     val numPoints = 40
     val outlierData = 
MLTestingUtils.genClassificationInstancesWithWeightedOutliers(spark,
       numClasses, numPoints)
-    val testData = 
spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
+    val testData = Array.tabulate[LabeledPoint](numClasses) { i =>
       LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
-    })
+    }.toSeq.toDF()
     val lr = new 
LogisticRegression().setFamily("binomial").setWeightCol("weight")
     val model = lr.fit(outlierData)
     val results = model.transform(testData).select("label", 
"prediction").collect()
@@ -1819,9 +1820,9 @@ class LogisticRegressionSuite
     val numPoints = 40
     val outlierData = 
MLTestingUtils.genClassificationInstancesWithWeightedOutliers(spark,
       numClasses, numPoints)
-    val testData = 
spark.createDataFrame(Array.tabulate[LabeledPoint](numClasses) { i =>
+    val testData = Array.tabulate[LabeledPoint](numClasses) { i =>
       LabeledPoint(i.toDouble, Vectors.dense(i.toDouble))
-    })
+    }.toSeq.toDF()
     val mlr = new 
LogisticRegression().setFamily("multinomial").setWeightCol("weight")
     val model = mlr.fit(outlierData)
     val results = model.transform(testData).select("label", 
"prediction").collect()
@@ -1945,11 +1946,10 @@ class LogisticRegressionSuite
   }
 
   test("multiclass logistic regression with all labels the same") {
-    val constantData = spark.createDataFrame(Seq(
+    val constantData = Seq(
       LabeledPoint(4.0, Vectors.dense(0.0)),
       LabeledPoint(4.0, Vectors.dense(1.0)),
-      LabeledPoint(4.0, Vectors.dense(2.0)))
-    )
+      LabeledPoint(4.0, Vectors.dense(2.0))).toDF()
     val mlr = new LogisticRegression().setFamily("multinomial")
     val model = mlr.fit(constantData)
     val results = model.transform(constantData)
@@ -1961,11 +1961,10 @@ class LogisticRegressionSuite
     }
 
     // force the model to be trained with only one class
-    val constantZeroData = spark.createDataFrame(Seq(
+    val constantZeroData = Seq(
       LabeledPoint(0.0, Vectors.dense(0.0)),
       LabeledPoint(0.0, Vectors.dense(1.0)),
-      LabeledPoint(0.0, Vectors.dense(2.0)))
-    )
+      LabeledPoint(0.0, Vectors.dense(2.0))).toDF()
     val modelZeroLabel = mlr.setFitIntercept(false).fit(constantZeroData)
     val resultsZero = modelZeroLabel.transform(constantZeroData)
     resultsZero.select("rawPrediction", "probability", 
"prediction").collect().foreach {
@@ -1990,20 +1989,18 @@ class LogisticRegressionSuite
   }
 
   test("compressed storage") {
-    val moreClassesThanFeatures = spark.createDataFrame(Seq(
+    val moreClassesThanFeatures = Seq(
       LabeledPoint(4.0, Vectors.dense(0.0, 0.0, 0.0)),
       LabeledPoint(4.0, Vectors.dense(1.0, 1.0, 1.0)),
-      LabeledPoint(4.0, Vectors.dense(2.0, 2.0, 2.0)))
-    )
+      LabeledPoint(4.0, Vectors.dense(2.0, 2.0, 2.0))).toDF()
     val mlr = new LogisticRegression().setFamily("multinomial")
     val model = mlr.fit(moreClassesThanFeatures)
     assert(model.coefficientMatrix.isInstanceOf[SparseMatrix])
     assert(model.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length 
=== 4)
-    val moreFeaturesThanClasses = spark.createDataFrame(Seq(
+    val moreFeaturesThanClasses = Seq(
       LabeledPoint(1.0, Vectors.dense(0.0, 0.0, 0.0)),
       LabeledPoint(1.0, Vectors.dense(1.0, 1.0, 1.0)),
-      LabeledPoint(1.0, Vectors.dense(2.0, 2.0, 2.0)))
-    )
+      LabeledPoint(1.0, Vectors.dense(2.0, 2.0, 2.0))).toDF()
     val model2 = mlr.fit(moreFeaturesThanClasses)
     assert(model2.coefficientMatrix.isInstanceOf[SparseMatrix])
     assert(model2.coefficientMatrix.asInstanceOf[SparseMatrix].colPtrs.length 
=== 3)

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index e809dd4..c08cb69 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -33,16 +33,18 @@ import org.apache.spark.sql.{Dataset, Row}
 class MultilayerPerceptronClassifierSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   @transient var dataset: Dataset[_] = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
 
-    dataset = spark.createDataFrame(Seq(
-        (Vectors.dense(0.0, 0.0), 0.0),
-        (Vectors.dense(0.0, 1.0), 1.0),
-        (Vectors.dense(1.0, 0.0), 1.0),
-        (Vectors.dense(1.0, 1.0), 0.0))
+    dataset = Seq(
+      (Vectors.dense(0.0, 0.0), 0.0),
+      (Vectors.dense(0.0, 1.0), 1.0),
+      (Vectors.dense(1.0, 0.0), 1.0),
+      (Vectors.dense(1.0, 1.0), 0.0)
     ).toDF("features", "label")
   }
 
@@ -80,11 +82,11 @@ class MultilayerPerceptronClassifierSuite
   }
 
   test("Test setWeights by training restart") {
-    val dataFrame = spark.createDataFrame(Seq(
+    val dataFrame = Seq(
       (Vectors.dense(0.0, 0.0), 0.0),
       (Vectors.dense(0.0, 1.0), 1.0),
       (Vectors.dense(1.0, 0.0), 1.0),
-      (Vectors.dense(1.0, 1.0), 0.0))
+      (Vectors.dense(1.0, 1.0), 0.0)
     ).toDF("features", "label")
     val layers = Array[Int](2, 5, 2)
     val trainer = new MultilayerPerceptronClassifier()
@@ -114,9 +116,9 @@ class MultilayerPerceptronClassifierSuite
     val xMean = Array(5.843, 3.057, 3.758, 1.199)
     val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
     // the input seed is somewhat magic, to make this test pass
-    val rdd = sc.parallelize(generateMultinomialLogisticInput(
-      coefficients, xMean, xVariance, true, nPoints, 1), 2)
-    val dataFrame = spark.createDataFrame(rdd).toDF("label", "features")
+    val data = generateMultinomialLogisticInput(
+      coefficients, xMean, xVariance, true, nPoints, 1).toDS()
+    val dataFrame = data.toDF("label", "features")
     val numClasses = 3
     val numIterations = 100
     val layers = Array[Int](4, 5, 4, numClasses)
@@ -137,9 +139,9 @@ class MultilayerPerceptronClassifierSuite
       .setNumClasses(numClasses)
     lr.optimizer.setRegParam(0.0)
       .setNumIterations(numIterations)
-    val lrModel = lr.run(rdd.map(OldLabeledPoint.fromML))
+    val lrModel = lr.run(data.rdd.map(OldLabeledPoint.fromML))
     val lrPredictionAndLabels =
-      lrModel.predict(rdd.map(p => 
OldVectors.fromML(p.features))).zip(rdd.map(_.label))
+      lrModel.predict(data.rdd.map(p => 
OldVectors.fromML(p.features))).zip(data.rdd.map(_.label))
     // MLP's predictions should not differ a lot from LR's.
     val lrMetrics = new MulticlassMetrics(lrPredictionAndLabels)
     val mlpMetrics = new MulticlassMetrics(mlpPredictionAndLabels)

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
index 04c010b..9909932 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
@@ -35,6 +35,8 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row}
 
 class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   @transient var dataset: Dataset[_] = _
 
   override def beforeAll(): Unit = {
@@ -47,7 +49,7 @@ class NaiveBayesSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
       Array(0.10, 0.10, 0.70, 0.10)  // label 2
     ).map(_.map(math.log))
 
-    dataset = spark.createDataFrame(generateNaiveBayesInput(pi, theta, 100, 
42))
+    dataset = generateNaiveBayesInput(pi, theta, 100, 42).toDF()
   }
 
   def validatePrediction(predictionAndLabels: DataFrame): Unit = {
@@ -131,16 +133,16 @@ class NaiveBayesSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
     val pi = Vectors.dense(piArray)
     val theta = new DenseMatrix(3, 4, thetaArray.flatten, true)
 
-    val testDataset = spark.createDataFrame(generateNaiveBayesInput(
-      piArray, thetaArray, nPoints, 42, "multinomial"))
+    val testDataset =
+      generateNaiveBayesInput(piArray, thetaArray, nPoints, 42, 
"multinomial").toDF()
     val nb = new NaiveBayes().setSmoothing(1.0).setModelType("multinomial")
     val model = nb.fit(testDataset)
 
     validateModelFit(pi, theta, model)
     assert(model.hasParent)
 
-    val validationDataset = spark.createDataFrame(generateNaiveBayesInput(
-      piArray, thetaArray, nPoints, 17, "multinomial"))
+    val validationDataset =
+      generateNaiveBayesInput(piArray, thetaArray, nPoints, 17, 
"multinomial").toDF()
 
     val predictionAndLabels = 
model.transform(validationDataset).select("prediction", "label")
     validatePrediction(predictionAndLabels)
@@ -161,16 +163,16 @@ class NaiveBayesSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
     val pi = Vectors.dense(piArray)
     val theta = new DenseMatrix(3, 12, thetaArray.flatten, true)
 
-    val testDataset = spark.createDataFrame(generateNaiveBayesInput(
-      piArray, thetaArray, nPoints, 45, "bernoulli"))
+    val testDataset =
+      generateNaiveBayesInput(piArray, thetaArray, nPoints, 45, 
"bernoulli").toDF()
     val nb = new NaiveBayes().setSmoothing(1.0).setModelType("bernoulli")
     val model = nb.fit(testDataset)
 
     validateModelFit(pi, theta, model)
     assert(model.hasParent)
 
-    val validationDataset = spark.createDataFrame(generateNaiveBayesInput(
-      piArray, thetaArray, nPoints, 20, "bernoulli"))
+    val validationDataset =
+      generateNaiveBayesInput(piArray, thetaArray, nPoints, 20, 
"bernoulli").toDF()
 
     val predictionAndLabels = 
model.transform(validationDataset).select("prediction", "label")
     validatePrediction(predictionAndLabels)

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
index 99dd585..3f9bcec 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala
@@ -37,6 +37,8 @@ import org.apache.spark.sql.types.Metadata
 
 class OneVsRestSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   @transient var dataset: Dataset[_] = _
   @transient var rdd: RDD[LabeledPoint] = _
 
@@ -55,7 +57,7 @@ class OneVsRestSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defau
     val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)
     rdd = sc.parallelize(generateMultinomialLogisticInput(
       coefficients, xMean, xVariance, true, nPoints, 42), 2)
-    dataset = spark.createDataFrame(rdd)
+    dataset = rdd.toDF()
   }
 
   test("params") {

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 2e99ee1..44e1585 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -39,6 +39,7 @@ class RandomForestClassifierSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
   import RandomForestClassifierSuite.compareAPIs
+  import testImplicits._
 
   private var orderedLabeledPoints50_1000: RDD[LabeledPoint] = _
   private var orderedLabeledPoints5_20: RDD[LabeledPoint] = _
@@ -158,7 +159,7 @@ class RandomForestClassifierSuite
   }
 
   test("Fitting without numClasses in metadata") {
-    val df: DataFrame = 
spark.createDataFrame(TreeTests.featureImportanceData(sc))
+    val df: DataFrame = TreeTests.featureImportanceData(sc).toDF()
     val rf = new RandomForestClassifier().setMaxDepth(1).setNumTrees(1)
     rf.fit(df)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
index ddfa875..3f39ded 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -62,6 +62,8 @@ object LDASuite {
 
 class LDASuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   val k: Int = 5
   val vocabSize: Int = 30
   @transient var dataset: Dataset[_] = _
@@ -140,8 +142,8 @@ class LDASuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRead
       new LDA().setTopicConcentration(-1.1)
     }
 
-    val dummyDF = spark.createDataFrame(Seq(
-      (1, Vectors.dense(1.0, 2.0)))).toDF("id", "features")
+    val dummyDF = Seq((1, Vectors.dense(1.0, 2.0))).toDF("id", "features")
+
     // validate parameters
     lda.transformSchema(dummyDF.schema)
     lda.setDocConcentration(1.1)

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
index 9ee3df5..ede2847 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluatorSuite.scala
@@ -26,6 +26,8 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 class BinaryClassificationEvaluatorSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new BinaryClassificationEvaluator)
   }
@@ -42,25 +44,25 @@ class BinaryClassificationEvaluatorSuite
     val evaluator = new BinaryClassificationEvaluator()
       .setMetricName("areaUnderPR")
 
-    val vectorDF = spark.createDataFrame(Seq(
+    val vectorDF = Seq(
       (0d, Vectors.dense(12, 2.5)),
       (1d, Vectors.dense(1, 3)),
       (0d, Vectors.dense(10, 2))
-    )).toDF("label", "rawPrediction")
+    ).toDF("label", "rawPrediction")
     assert(evaluator.evaluate(vectorDF) === 1.0)
 
-    val doubleDF = spark.createDataFrame(Seq(
+    val doubleDF = Seq(
       (0d, 0d),
       (1d, 1d),
       (0d, 0d)
-    )).toDF("label", "rawPrediction")
+    ).toDF("label", "rawPrediction")
     assert(evaluator.evaluate(doubleDF) === 1.0)
 
-    val stringDF = spark.createDataFrame(Seq(
+    val stringDF = Seq(
       (0d, "0d"),
       (1d, "1d"),
       (0d, "0d")
-    )).toDF("label", "rawPrediction")
+    ).toDF("label", "rawPrediction")
     val thrown = intercept[IllegalArgumentException] {
       evaluator.evaluate(stringDF)
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
index 42ff8ad..c1a1569 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
@@ -27,6 +27,8 @@ import org.apache.spark.mllib.util.TestingUtils._
 class RegressionEvaluatorSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new RegressionEvaluator)
   }
@@ -42,9 +44,9 @@ class RegressionEvaluatorSuite
      * data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1))
      *   .saveAsTextFile("path")
      */
-    val dataset = spark.createDataFrame(
-      sc.parallelize(LinearDataGenerator.generateLinearInput(
-        6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 100, 42, 
0.1), 2).map(_.asML))
+    val dataset = LinearDataGenerator.generateLinearInput(
+      6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 100, 42, 0.1)
+      .map(_.asML).toDF()
 
     /**
      * Using the following R code to load the data, train the model and 
evaluate metrics.

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
index 9cb84a6..4455d35 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.{DataFrame, Row}
 
 class BinarizerSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   @transient var data: Array[Double] = _
 
   override def beforeAll(): Unit = {
@@ -39,8 +41,7 @@ class BinarizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defau
 
   test("Binarize continuous features with default parameter") {
     val defaultBinarized: Array[Double] = data.map(x => if (x > 0.0) 1.0 else 
0.0)
-    val dataFrame: DataFrame = spark.createDataFrame(
-      data.zip(defaultBinarized)).toDF("feature", "expected")
+    val dataFrame: DataFrame = 
data.zip(defaultBinarized).toSeq.toDF("feature", "expected")
 
     val binarizer: Binarizer = new Binarizer()
       .setInputCol("feature")
@@ -55,8 +56,7 @@ class BinarizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defau
   test("Binarize continuous features with setter") {
     val threshold: Double = 0.2
     val thresholdBinarized: Array[Double] = data.map(x => if (x > threshold) 
1.0 else 0.0)
-    val dataFrame: DataFrame = spark.createDataFrame(
-        data.zip(thresholdBinarized)).toDF("feature", "expected")
+    val dataFrame: DataFrame = 
data.zip(thresholdBinarized).toSeq.toDF("feature", "expected")
 
     val binarizer: Binarizer = new Binarizer()
       .setInputCol("feature")
@@ -71,9 +71,9 @@ class BinarizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defau
 
   test("Binarize vector of continuous features with default parameter") {
     val defaultBinarized: Array[Double] = data.map(x => if (x > 0.0) 1.0 else 
0.0)
-    val dataFrame: DataFrame = spark.createDataFrame(Seq(
+    val dataFrame: DataFrame = Seq(
       (Vectors.dense(data), Vectors.dense(defaultBinarized))
-    )).toDF("feature", "expected")
+    ).toDF("feature", "expected")
 
     val binarizer: Binarizer = new Binarizer()
       .setInputCol("feature")
@@ -88,9 +88,9 @@ class BinarizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defau
   test("Binarize vector of continuous features with setter") {
     val threshold: Double = 0.2
     val defaultBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 
else 0.0)
-    val dataFrame: DataFrame = spark.createDataFrame(Seq(
+    val dataFrame: DataFrame = Seq(
       (Vectors.dense(data), Vectors.dense(defaultBinarized))
-    )).toDF("feature", "expected")
+    ).toDF("feature", "expected")
 
     val binarizer: Binarizer = new Binarizer()
       .setInputCol("feature")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index c7f5093..87cdceb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -29,6 +29,8 @@ import org.apache.spark.sql.{DataFrame, Row}
 
 class BucketizerSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new Bucketizer)
   }
@@ -38,8 +40,7 @@ class BucketizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
     val splits = Array(-0.5, 0.0, 0.5)
     val validData = Array(-0.5, -0.3, 0.0, 0.2)
     val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0)
-    val dataFrame: DataFrame =
-      spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", 
"expected")
+    val dataFrame: DataFrame = 
validData.zip(expectedBuckets).toSeq.toDF("feature", "expected")
 
     val bucketizer: Bucketizer = new Bucketizer()
       .setInputCol("feature")
@@ -55,13 +56,13 @@ class BucketizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
     // Check for exceptions when using a set of invalid feature values.
     val invalidData1: Array[Double] = Array(-0.9) ++ validData
     val invalidData2 = Array(0.51) ++ validData
-    val badDF1 = 
spark.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx")
+    val badDF1 = invalidData1.zipWithIndex.toSeq.toDF("feature", "idx")
     withClue("Invalid feature value -0.9 was not caught as an invalid 
feature!") {
       intercept[SparkException] {
         bucketizer.transform(badDF1).collect()
       }
     }
-    val badDF2 = 
spark.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx")
+    val badDF2 = invalidData2.zipWithIndex.toSeq.toDF("feature", "idx")
     withClue("Invalid feature value 0.51 was not caught as an invalid 
feature!") {
       intercept[SparkException] {
         bucketizer.transform(badDF2).collect()
@@ -73,8 +74,7 @@ class BucketizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
     val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, 
Double.PositiveInfinity)
     val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9)
     val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0)
-    val dataFrame: DataFrame =
-      spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", 
"expected")
+    val dataFrame: DataFrame = 
validData.zip(expectedBuckets).toSeq.toDF("feature", "expected")
 
     val bucketizer: Bucketizer = new Bucketizer()
       .setInputCol("feature")
@@ -92,8 +92,7 @@ class BucketizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
     val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, 
Double.PositiveInfinity)
     val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9, Double.NaN, 
Double.NaN, Double.NaN)
     val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 
4.0)
-    val dataFrame: DataFrame =
-      spark.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", 
"expected")
+    val dataFrame: DataFrame = 
validData.zip(expectedBuckets).toSeq.toDF("feature", "expected")
 
     val bucketizer: Bucketizer = new Bucketizer()
       .setInputCol("feature")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala
index 6b56e42..dfebfc8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ChiSqSelectorSuite.scala
@@ -29,8 +29,7 @@ class ChiSqSelectorSuite extends SparkFunSuite with 
MLlibTestSparkContext
   with DefaultReadWriteTest {
 
   test("Test Chi-Square selector") {
-    val spark = this.spark
-    import spark.implicits._
+    import testImplicits._
     val data = Seq(
       LabeledPoint(0.0, Vectors.sparse(3, Array((0, 8.0), (1, 7.0)))),
       LabeledPoint(1.0, Vectors.sparse(3, Array((1, 9.0), (2, 6.0)))),

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala
index 863b66b..69d3033 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/feature/CountVectorizerSuite.scala
@@ -27,6 +27,8 @@ import org.apache.spark.sql.Row
 class CountVectorizerSuite extends SparkFunSuite with MLlibTestSparkContext
   with DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new CountVectorizer)
     ParamsSuite.checkParams(new CountVectorizerModel(Array("empty")))
@@ -35,7 +37,7 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   private def split(s: String): Seq[String] = s.split("\\s+")
 
   test("CountVectorizerModel common cases") {
-    val df = spark.createDataFrame(Seq(
+    val df = Seq(
       (0, split("a b c d"),
         Vectors.sparse(4, Seq((0, 1.0), (1, 1.0), (2, 1.0), (3, 1.0)))),
       (1, split("a b b c d  a"),
@@ -44,7 +46,7 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
       (3, split(""), Vectors.sparse(4, Seq())), // empty string
       (4, split("a notInDict d"),
         Vectors.sparse(4, Seq((0, 1.0), (3, 1.0))))  // with words not in 
vocabulary
-    )).toDF("id", "words", "expected")
+    ).toDF("id", "words", "expected")
     val cv = new CountVectorizerModel(Array("a", "b", "c", "d"))
       .setInputCol("words")
       .setOutputCol("features")
@@ -55,13 +57,13 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   }
 
   test("CountVectorizer common cases") {
-    val df = spark.createDataFrame(Seq(
+    val df = Seq(
       (0, split("a b c d e"),
         Vectors.sparse(5, Seq((0, 1.0), (1, 1.0), (2, 1.0), (3, 1.0), (4, 
1.0)))),
       (1, split("a a a a a a"), Vectors.sparse(5, Seq((0, 6.0)))),
       (2, split("c c"), Vectors.sparse(5, Seq((2, 2.0)))),
       (3, split("d"), Vectors.sparse(5, Seq((3, 1.0)))),
-      (4, split("b b b b b"), Vectors.sparse(5, Seq((1, 5.0)))))
+      (4, split("b b b b b"), Vectors.sparse(5, Seq((1, 5.0))))
     ).toDF("id", "words", "expected")
     val cv = new CountVectorizer()
       .setInputCol("words")
@@ -76,11 +78,11 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   }
 
   test("CountVectorizer vocabSize and minDF") {
-    val df = spark.createDataFrame(Seq(
+    val df = Seq(
       (0, split("a b c d"), Vectors.sparse(2, Seq((0, 1.0), (1, 1.0)))),
       (1, split("a b c"), Vectors.sparse(2, Seq((0, 1.0), (1, 1.0)))),
       (2, split("a b"), Vectors.sparse(2, Seq((0, 1.0), (1, 1.0)))),
-      (3, split("a"), Vectors.sparse(2, Seq((0, 1.0)))))
+      (3, split("a"), Vectors.sparse(2, Seq((0, 1.0))))
     ).toDF("id", "words", "expected")
     val cvModel = new CountVectorizer()
       .setInputCol("words")
@@ -118,9 +120,9 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
 
   test("CountVectorizer throws exception when vocab is empty") {
     intercept[IllegalArgumentException] {
-      val df = spark.createDataFrame(Seq(
+      val df = Seq(
         (0, split("a a b b c c")),
-        (1, split("aa bb cc")))
+        (1, split("aa bb cc"))
       ).toDF("id", "words")
       val cvModel = new CountVectorizer()
         .setInputCol("words")
@@ -132,11 +134,11 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   }
 
   test("CountVectorizerModel with minTF count") {
-    val df = spark.createDataFrame(Seq(
+    val df = Seq(
       (0, split("a a a b b c c c d "), Vectors.sparse(4, Seq((0, 3.0), (2, 
3.0)))),
       (1, split("c c c c c c"), Vectors.sparse(4, Seq((2, 6.0)))),
       (2, split("a"), Vectors.sparse(4, Seq())),
-      (3, split("e e e e e"), Vectors.sparse(4, Seq())))
+      (3, split("e e e e e"), Vectors.sparse(4, Seq()))
     ).toDF("id", "words", "expected")
 
     // minTF: count
@@ -151,11 +153,11 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   }
 
   test("CountVectorizerModel with minTF freq") {
-    val df = spark.createDataFrame(Seq(
+    val df = Seq(
       (0, split("a a a b b c c c d "), Vectors.sparse(4, Seq((0, 3.0), (2, 
3.0)))),
       (1, split("c c c c c c"), Vectors.sparse(4, Seq((2, 6.0)))),
       (2, split("a"), Vectors.sparse(4, Seq((0, 1.0)))),
-      (3, split("e e e e e"), Vectors.sparse(4, Seq())))
+      (3, split("e e e e e"), Vectors.sparse(4, Seq()))
     ).toDF("id", "words", "expected")
 
     // minTF: set frequency
@@ -170,12 +172,12 @@ class CountVectorizerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   }
 
   test("CountVectorizerModel and CountVectorizer with binary") {
-    val df = spark.createDataFrame(Seq(
+    val df = Seq(
       (0, split("a a a a b b b b c d"),
       Vectors.sparse(4, Seq((0, 1.0), (1, 1.0), (2, 1.0), (3, 1.0)))),
       (1, split("c c c"), Vectors.sparse(4, Seq((2, 1.0)))),
       (2, split("a"), Vectors.sparse(4, Seq((0, 1.0))))
-    )).toDF("id", "words", "expected")
+    ).toDF("id", "words", "expected")
 
     // CountVectorizer test
     val cv = new CountVectorizer()

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
index c02e961..8dd3dd7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
@@ -32,6 +32,8 @@ case class DCTTestData(vec: Vector, wantedVec: Vector)
 
 class DCTSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("forward transform of discrete cosine matches jTransforms result") {
     val data = Vectors.dense((0 until 128).map(_ => 2D * math.random - 
1D).toArray)
     val inverse = false
@@ -57,15 +59,13 @@ class DCTSuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRead
   private def testDCT(data: Vector, inverse: Boolean): Unit = {
     val expectedResultBuffer = data.toArray.clone()
     if (inverse) {
-      (new DoubleDCT_1D(data.size)).inverse(expectedResultBuffer, true)
+      new DoubleDCT_1D(data.size).inverse(expectedResultBuffer, true)
     } else {
-      (new DoubleDCT_1D(data.size)).forward(expectedResultBuffer, true)
+      new DoubleDCT_1D(data.size).forward(expectedResultBuffer, true)
     }
     val expectedResult = Vectors.dense(expectedResultBuffer)
 
-    val dataset = spark.createDataFrame(Seq(
-      DCTTestData(data, expectedResult)
-    ))
+    val dataset = Seq(DCTTestData(data, expectedResult)).toDF()
 
     val transformer = new DCT()
       .setInputCol("vec")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
index 99b8007..1d14866 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
@@ -29,14 +29,14 @@ import org.apache.spark.util.Utils
 
 class HashingTFSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new HashingTF)
   }
 
   test("hashingTF") {
-    val df = spark.createDataFrame(Seq(
-      (0, "a a b b c d".split(" ").toSeq)
-    )).toDF("id", "words")
+    val df = Seq((0, "a a b b c d".split(" ").toSeq)).toDF("id", "words")
     val n = 100
     val hashingTF = new HashingTF()
       .setInputCol("words")
@@ -54,9 +54,7 @@ class HashingTFSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defau
   }
 
   test("applying binary term freqs") {
-    val df = spark.createDataFrame(Seq(
-      (0, "a a b c c c".split(" ").toSeq)
-    )).toDF("id", "words")
+    val df = Seq((0, "a a b c c c".split(" ").toSeq)).toDF("id", "words")
     val n = 100
     val hashingTF = new HashingTF()
         .setInputCol("words")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
index 09dc8b9..5325d95 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala
@@ -29,6 +29,8 @@ import org.apache.spark.sql.Row
 
 class IDFSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   def scaleDataWithIDF(dataSet: Array[Vector], model: Vector): Array[Vector] = 
{
     dataSet.map {
       case data: DenseVector =>
@@ -61,7 +63,7 @@ class IDFSuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRead
     })
     val expected = scaleDataWithIDF(data, idf)
 
-    val df = spark.createDataFrame(data.zip(expected)).toDF("features", 
"expected")
+    val df = data.zip(expected).toSeq.toDF("features", "expected")
 
     val idfModel = new IDF()
       .setInputCol("features")
@@ -87,7 +89,7 @@ class IDFSuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRead
     })
     val expected = scaleDataWithIDF(data, idf)
 
-    val df = spark.createDataFrame(data.zip(expected)).toDF("features", 
"expected")
+    val df = data.zip(expected).toSeq.toDF("features", "expected")
 
     val idfModel = new IDF()
       .setInputCol("features")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
index 3429172..54f059e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
@@ -28,6 +28,9 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.functions.col
 
 class InteractionSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
+
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new Interaction())
   }
@@ -59,11 +62,10 @@ class InteractionSuite extends SparkFunSuite with 
MLlibTestSparkContext with Def
   }
 
   test("numeric interaction") {
-    val data = spark.createDataFrame(
-      Seq(
-        (2, Vectors.dense(3.0, 4.0)),
-        (1, Vectors.dense(1.0, 5.0)))
-      ).toDF("a", "b")
+    val data = Seq(
+      (2, Vectors.dense(3.0, 4.0)),
+      (1, Vectors.dense(1.0, 5.0))
+    ).toDF("a", "b")
     val groupAttr = new AttributeGroup(
       "b",
       Array[Attribute](
@@ -74,11 +76,10 @@ class InteractionSuite extends SparkFunSuite with 
MLlibTestSparkContext with Def
       col("b").as("b", groupAttr.toMetadata()))
     val trans = new Interaction().setInputCols(Array("a", 
"b")).setOutputCol("features")
     val res = trans.transform(df)
-    val expected = spark.createDataFrame(
-      Seq(
-        (2, Vectors.dense(3.0, 4.0), Vectors.dense(6.0, 8.0)),
-        (1, Vectors.dense(1.0, 5.0), Vectors.dense(1.0, 5.0)))
-      ).toDF("a", "b", "features")
+    val expected = Seq(
+      (2, Vectors.dense(3.0, 4.0), Vectors.dense(6.0, 8.0)),
+      (1, Vectors.dense(1.0, 5.0), Vectors.dense(1.0, 5.0))
+    ).toDF("a", "b", "features")
     assert(res.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(res.schema("features"))
     val expectedAttrs = new AttributeGroup(
@@ -90,11 +91,10 @@ class InteractionSuite extends SparkFunSuite with 
MLlibTestSparkContext with Def
   }
 
   test("nominal interaction") {
-    val data = spark.createDataFrame(
-      Seq(
-        (2, Vectors.dense(3.0, 4.0)),
-        (1, Vectors.dense(1.0, 5.0)))
-      ).toDF("a", "b")
+    val data = Seq(
+      (2, Vectors.dense(3.0, 4.0)),
+      (1, Vectors.dense(1.0, 5.0))
+    ).toDF("a", "b")
     val groupAttr = new AttributeGroup(
       "b",
       Array[Attribute](
@@ -106,11 +106,10 @@ class InteractionSuite extends SparkFunSuite with 
MLlibTestSparkContext with Def
       col("b").as("b", groupAttr.toMetadata()))
     val trans = new Interaction().setInputCols(Array("a", 
"b")).setOutputCol("features")
     val res = trans.transform(df)
-    val expected = spark.createDataFrame(
-      Seq(
-        (2, Vectors.dense(3.0, 4.0), Vectors.dense(0, 0, 0, 0, 3, 4)),
-        (1, Vectors.dense(1.0, 5.0), Vectors.dense(0, 0, 1, 5, 0, 0)))
-      ).toDF("a", "b", "features")
+    val expected = Seq(
+      (2, Vectors.dense(3.0, 4.0), Vectors.dense(0, 0, 0, 0, 3, 4)),
+      (1, Vectors.dense(1.0, 5.0), Vectors.dense(0, 0, 1, 5, 0, 0))
+    ).toDF("a", "b", "features")
     assert(res.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(res.schema("features"))
     val expectedAttrs = new AttributeGroup(
@@ -126,10 +125,9 @@ class InteractionSuite extends SparkFunSuite with 
MLlibTestSparkContext with Def
   }
 
   test("default attr names") {
-    val data = spark.createDataFrame(
-      Seq(
+    val data = Seq(
         (2, Vectors.dense(0.0, 4.0), 1.0),
-        (1, Vectors.dense(1.0, 5.0), 10.0))
+        (1, Vectors.dense(1.0, 5.0), 10.0)
       ).toDF("a", "b", "c")
     val groupAttr = new AttributeGroup(
       "b",
@@ -142,11 +140,10 @@ class InteractionSuite extends SparkFunSuite with 
MLlibTestSparkContext with Def
       col("c").as("c", NumericAttribute.defaultAttr.toMetadata()))
     val trans = new Interaction().setInputCols(Array("a", "b", 
"c")).setOutputCol("features")
     val res = trans.transform(df)
-    val expected = spark.createDataFrame(
-      Seq(
-        (2, Vectors.dense(0.0, 4.0), 1.0, Vectors.dense(0, 0, 0, 0, 0, 0, 1, 
0, 4)),
-        (1, Vectors.dense(1.0, 5.0), 10.0, Vectors.dense(0, 0, 0, 0, 10, 50, 
0, 0, 0)))
-      ).toDF("a", "b", "c", "features")
+    val expected = Seq(
+      (2, Vectors.dense(0.0, 4.0), 1.0, Vectors.dense(0, 0, 0, 0, 0, 0, 1, 0, 
4)),
+      (1, Vectors.dense(1.0, 5.0), 10.0, Vectors.dense(0, 0, 0, 0, 10, 50, 0, 
0, 0))
+    ).toDF("a", "b", "c", "features")
     assert(res.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(res.schema("features"))
     val expectedAttrs = new AttributeGroup(

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
index d6400ee..a121744 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MaxAbsScalerSuite.scala
@@ -23,6 +23,9 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.Row
 
 class MaxAbsScalerSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
+
+  import testImplicits._
+
   test("MaxAbsScaler fit basic case") {
     val data = Array(
       Vectors.dense(1, 0, 100),
@@ -36,7 +39,7 @@ class MaxAbsScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext with De
       Vectors.sparse(3, Array(0, 2), Array(-1, -1)),
       Vectors.sparse(3, Array(0), Array(-0.75)))
 
-    val df = spark.createDataFrame(data.zip(expected)).toDF("features", 
"expected")
+    val df = data.zip(expected).toSeq.toDF("features", "expected")
     val scaler = new MaxAbsScaler()
       .setInputCol("features")
       .setOutputCol("scaled")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
index 9f376b7..b79eeb2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinMaxScalerSuite.scala
@@ -25,6 +25,8 @@ import org.apache.spark.sql.Row
 
 class MinMaxScalerSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("MinMaxScaler fit basic case") {
     val data = Array(
       Vectors.dense(1, 0, Long.MinValue),
@@ -38,7 +40,7 @@ class MinMaxScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext with De
       Vectors.sparse(3, Array(0, 2), Array(5, 5)),
       Vectors.sparse(3, Array(0), Array(-2.5)))
 
-    val df = spark.createDataFrame(data.zip(expected)).toDF("features", 
"expected")
+    val df = data.zip(expected).toSeq.toDF("features", "expected")
     val scaler = new MinMaxScaler()
       .setInputCol("features")
       .setOutputCol("scaled")
@@ -57,14 +59,13 @@ class MinMaxScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext with De
 
   test("MinMaxScaler arguments max must be larger than min") {
     withClue("arguments max must be larger than min") {
-      val dummyDF = spark.createDataFrame(Seq(
-        (1, Vectors.dense(1.0, 2.0)))).toDF("id", "feature")
+      val dummyDF = Seq((1, Vectors.dense(1.0, 2.0))).toDF("id", "features")
       intercept[IllegalArgumentException] {
-        val scaler = new 
MinMaxScaler().setMin(10).setMax(0).setInputCol("feature")
+        val scaler = new 
MinMaxScaler().setMin(10).setMax(0).setInputCol("features")
         scaler.transformSchema(dummyDF.schema)
       }
       intercept[IllegalArgumentException] {
-        val scaler = new 
MinMaxScaler().setMin(0).setMax(0).setInputCol("feature")
+        val scaler = new 
MinMaxScaler().setMin(0).setMax(0).setInputCol("features")
         scaler.transformSchema(dummyDF.schema)
       }
     }
@@ -104,7 +105,7 @@ class MinMaxScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext with De
       Vectors.dense(-1.0, Double.NaN, -5.0, -5.0),
       Vectors.dense(5.0, 0.0, 5.0, Double.NaN))
 
-    val df = spark.createDataFrame(data.zip(expected)).toDF("features", 
"expected")
+    val df = data.zip(expected).toSeq.toDF("features", "expected")
     val scaler = new MinMaxScaler()
       .setInputCol("features")
       .setOutputCol("scaled")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
index e5288d9..d4975c0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
@@ -28,17 +28,18 @@ import org.apache.spark.sql.{Dataset, Row}
 case class NGramTestData(inputTokens: Array[String], wantedNGrams: 
Array[String])
 
 class NGramSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
+
   import org.apache.spark.ml.feature.NGramSuite._
+  import testImplicits._
 
   test("default behavior yields bigram features") {
     val nGram = new NGram()
       .setInputCol("inputTokens")
       .setOutputCol("nGrams")
-    val dataset = spark.createDataFrame(Seq(
-      NGramTestData(
-        Array("Test", "for", "ngram", "."),
-        Array("Test for", "for ngram", "ngram .")
-    )))
+    val dataset = Seq(NGramTestData(
+      Array("Test", "for", "ngram", "."),
+      Array("Test for", "for ngram", "ngram .")
+    )).toDF()
     testNGram(nGram, dataset)
   }
 
@@ -47,11 +48,10 @@ class NGramSuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRe
       .setInputCol("inputTokens")
       .setOutputCol("nGrams")
       .setN(4)
-    val dataset = spark.createDataFrame(Seq(
-      NGramTestData(
-        Array("a", "b", "c", "d", "e"),
-        Array("a b c d", "b c d e")
-      )))
+    val dataset = Seq(NGramTestData(
+      Array("a", "b", "c", "d", "e"),
+      Array("a b c d", "b c d e")
+    )).toDF()
     testNGram(nGram, dataset)
   }
 
@@ -60,11 +60,7 @@ class NGramSuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRe
       .setInputCol("inputTokens")
       .setOutputCol("nGrams")
       .setN(4)
-    val dataset = spark.createDataFrame(Seq(
-      NGramTestData(
-        Array(),
-        Array()
-      )))
+    val dataset = Seq(NGramTestData(Array(), Array())).toDF()
     testNGram(nGram, dataset)
   }
 
@@ -73,11 +69,10 @@ class NGramSuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRe
       .setInputCol("inputTokens")
       .setOutputCol("nGrams")
       .setN(6)
-    val dataset = spark.createDataFrame(Seq(
-      NGramTestData(
-        Array("a", "b", "c", "d", "e"),
-        Array()
-      )))
+    val dataset = Seq(NGramTestData(
+      Array("a", "b", "c", "d", "e"),
+      Array()
+    )).toDF()
     testNGram(nGram, dataset)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
index b692831..c75027f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NormalizerSuite.scala
@@ -27,6 +27,8 @@ import org.apache.spark.sql.{DataFrame, Row}
 
 class NormalizerSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   @transient var data: Array[Vector] = _
   @transient var dataFrame: DataFrame = _
   @transient var normalizer: Normalizer = _
@@ -61,7 +63,7 @@ class NormalizerSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defa
       Vectors.sparse(3, Seq())
     )
 
-    dataFrame = spark.createDataFrame(sc.parallelize(data, 
2).map(NormalizerSuite.FeatureData))
+    dataFrame = data.map(NormalizerSuite.FeatureData).toSeq.toDF()
     normalizer = new Normalizer()
       .setInputCol("features")
       .setOutputCol("normalized_features")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
index d41eeec..c44c681 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
@@ -30,9 +30,11 @@ import org.apache.spark.sql.types._
 class OneHotEncoderSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   def stringIndexed(): DataFrame = {
-    val data = sc.parallelize(Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, 
"a"), (5, "c")), 2)
-    val df = spark.createDataFrame(data).toDF("id", "label")
+    val data = Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
+    val df = data.toDF("id", "label")
     val indexer = new StringIndexer()
       .setInputCol("label")
       .setOutputCol("labelIndex")
@@ -83,7 +85,7 @@ class OneHotEncoderSuite
 
   test("input column with ML attribute") {
     val attr = NominalAttribute.defaultAttr.withValues("small", "medium", 
"large")
-    val df = spark.createDataFrame(Seq(0.0, 1.0, 2.0, 
1.0).map(Tuple1.apply)).toDF("size")
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
       .select(col("size").as("size", attr.toMetadata()))
     val encoder = new OneHotEncoder()
       .setInputCol("size")
@@ -96,7 +98,7 @@ class OneHotEncoderSuite
   }
 
   test("input column without ML attribute") {
-    val df = spark.createDataFrame(Seq(0.0, 1.0, 2.0, 
1.0).map(Tuple1.apply)).toDF("index")
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("index")
     val encoder = new OneHotEncoder()
       .setInputCol("index")
       .setOutputCol("encoded")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
index ddb51fb..a60e875 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PCASuite.scala
@@ -29,6 +29,8 @@ import org.apache.spark.sql.Row
 
 class PCASuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new PCA)
     val mat = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 
3.0)).asInstanceOf[DenseMatrix]
@@ -50,7 +52,7 @@ class PCASuite extends SparkFunSuite with 
MLlibTestSparkContext with DefaultRead
     val pc = mat.computePrincipalComponents(3)
     val expected = mat.multiply(pc).rows.map(_.asML)
 
-    val df = spark.createDataFrame(dataRDD.zip(expected)).toDF("features", 
"expected")
+    val df = dataRDD.zip(expected).toDF("features", "expected")
 
     val pca = new PCA()
       .setInputCol("features")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
index 9ecd321..e4b0ddf 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.Row
 class PolynomialExpansionSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new PolynomialExpansion)
   }
@@ -59,7 +61,7 @@ class PolynomialExpansionSuite
     Vectors.sparse(19, Array.empty, Array.empty))
 
   test("Polynomial expansion with default parameter") {
-    val df = 
spark.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected")
+    val df = data.zip(twoDegreeExpansion).toSeq.toDF("features", "expected")
 
     val polynomialExpansion = new PolynomialExpansion()
       .setInputCol("features")
@@ -76,7 +78,7 @@ class PolynomialExpansionSuite
   }
 
   test("Polynomial expansion with setter") {
-    val df = 
spark.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", 
"expected")
+    val df = data.zip(threeDegreeExpansion).toSeq.toDF("features", "expected")
 
     val polynomialExpansion = new PolynomialExpansion()
       .setInputCol("features")
@@ -94,7 +96,7 @@ class PolynomialExpansionSuite
   }
 
   test("Polynomial expansion with degree 1 is identity on vectors") {
-    val df = spark.createDataFrame(data.zip(data)).toDF("features", "expected")
+    val df = data.zip(data).toSeq.toDF("features", "expected")
 
     val polynomialExpansion = new PolynomialExpansion()
       .setInputCol("features")
@@ -124,8 +126,7 @@ class PolynomialExpansionSuite
       (Vectors.dense(1.0, 2.0, 3.0, 4.0, 5.0, 6.0), 8007, 12375)
     )
 
-    val df = spark.createDataFrame(data)
-      .toDF("features", "expectedPoly10size", "expectedPoly11size")
+    val df = data.toSeq.toDF("features", "expectedPoly10size", 
"expectedPoly11size")
 
     val t = new PolynomialExpansion()
       .setInputCol("features")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
index 0794a04..97c268f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaSuite.scala
@@ -26,22 +26,23 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.types.DoubleType
 
 class RFormulaSuite extends SparkFunSuite with MLlibTestSparkContext with 
DefaultReadWriteTest {
+
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new RFormula())
   }
 
   test("transform numeric data") {
     val formula = new RFormula().setFormula("id ~ v1 + v2")
-    val original = spark.createDataFrame(
-      Seq((0, 1.0, 3.0), (2, 2.0, 5.0))).toDF("id", "v1", "v2")
+    val original = Seq((0, 1.0, 3.0), (2, 2.0, 5.0)).toDF("id", "v1", "v2")
     val model = formula.fit(original)
     val result = model.transform(original)
     val resultSchema = model.transformSchema(original.schema)
-    val expected = spark.createDataFrame(
-      Seq(
-        (0, 1.0, 3.0, Vectors.dense(1.0, 3.0), 0.0),
-        (2, 2.0, 5.0, Vectors.dense(2.0, 5.0), 2.0))
-      ).toDF("id", "v1", "v2", "features", "label")
+    val expected = Seq(
+      (0, 1.0, 3.0, Vectors.dense(1.0, 3.0), 0.0),
+      (2, 2.0, 5.0, Vectors.dense(2.0, 5.0), 2.0)
+    ).toDF("id", "v1", "v2", "features", "label")
     // TODO(ekl) make schema comparisons ignore metadata, to avoid .toString
     assert(result.schema.toString == resultSchema.toString)
     assert(resultSchema == expected.schema)
@@ -50,7 +51,7 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("features column already exists") {
     val formula = new RFormula().setFormula("y ~ x").setFeaturesCol("x")
-    val original = spark.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", 
"y")
+    val original = Seq((0, 1.0), (2, 2.0)).toDF("x", "y")
     intercept[IllegalArgumentException] {
       formula.fit(original)
     }
@@ -58,7 +59,7 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("label column already exists") {
     val formula = new RFormula().setFormula("y ~ x").setLabelCol("y")
-    val original = spark.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", 
"y")
+    val original = Seq((0, 1.0), (2, 2.0)).toDF("x", "y")
     val model = formula.fit(original)
     val resultSchema = model.transformSchema(original.schema)
     assert(resultSchema.length == 3)
@@ -67,7 +68,7 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("label column already exists but is not numeric type") {
     val formula = new RFormula().setFormula("y ~ x").setLabelCol("y")
-    val original = spark.createDataFrame(Seq((0, true), (2, false))).toDF("x", 
"y")
+    val original = Seq((0, true), (2, false)).toDF("x", "y")
     val model = formula.fit(original)
     intercept[IllegalArgumentException] {
       model.transformSchema(original.schema)
@@ -79,7 +80,7 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("allow missing label column for test datasets") {
     val formula = new RFormula().setFormula("y ~ x").setLabelCol("label")
-    val original = spark.createDataFrame(Seq((0, 1.0), (2, 2.0))).toDF("x", 
"_not_y")
+    val original = Seq((0, 1.0), (2, 2.0)).toDF("x", "_not_y")
     val model = formula.fit(original)
     val resultSchema = model.transformSchema(original.schema)
     assert(resultSchema.length == 3)
@@ -88,37 +89,32 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
   }
 
   test("allow empty label") {
-    val original = spark.createDataFrame(
-      Seq((1, 2.0, 3.0), (4, 5.0, 6.0), (7, 8.0, 9.0))
-    ).toDF("id", "a", "b")
+    val original = Seq((1, 2.0, 3.0), (4, 5.0, 6.0), (7, 8.0, 9.0)).toDF("id", 
"a", "b")
     val formula = new RFormula().setFormula("~ a + b")
     val model = formula.fit(original)
     val result = model.transform(original)
     val resultSchema = model.transformSchema(original.schema)
-    val expected = spark.createDataFrame(
-      Seq(
-        (1, 2.0, 3.0, Vectors.dense(2.0, 3.0)),
-        (4, 5.0, 6.0, Vectors.dense(5.0, 6.0)),
-        (7, 8.0, 9.0, Vectors.dense(8.0, 9.0)))
-      ).toDF("id", "a", "b", "features")
+    val expected = Seq(
+      (1, 2.0, 3.0, Vectors.dense(2.0, 3.0)),
+      (4, 5.0, 6.0, Vectors.dense(5.0, 6.0)),
+      (7, 8.0, 9.0, Vectors.dense(8.0, 9.0))
+    ).toDF("id", "a", "b", "features")
     assert(result.schema.toString == resultSchema.toString)
     assert(result.collect() === expected.collect())
   }
 
   test("encodes string terms") {
     val formula = new RFormula().setFormula("id ~ a + b")
-    val original = spark.createDataFrame(
-      Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5))
-    ).toDF("id", "a", "b")
+    val original = Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 
5))
+      .toDF("id", "a", "b")
     val model = formula.fit(original)
     val result = model.transform(original)
     val resultSchema = model.transformSchema(original.schema)
-    val expected = spark.createDataFrame(
-      Seq(
+    val expected = Seq(
         (1, "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 1.0),
         (2, "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 2.0),
         (3, "bar", 5, Vectors.dense(1.0, 0.0, 5.0), 3.0),
-        (4, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 4.0))
+        (4, "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 4.0)
       ).toDF("id", "a", "b", "features", "label")
     assert(result.schema.toString == resultSchema.toString)
     assert(result.collect() === expected.collect())
@@ -126,17 +122,16 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("index string label") {
     val formula = new RFormula().setFormula("id ~ a + b")
-    val original = spark.createDataFrame(
+    val original =
       Seq(("male", "foo", 4), ("female", "bar", 4), ("female", "bar", 5), 
("male", "baz", 5))
-    ).toDF("id", "a", "b")
+        .toDF("id", "a", "b")
     val model = formula.fit(original)
     val result = model.transform(original)
-    val expected = spark.createDataFrame(
-      Seq(
+    val expected = Seq(
         ("male", "foo", 4, Vectors.dense(0.0, 1.0, 4.0), 1.0),
         ("female", "bar", 4, Vectors.dense(1.0, 0.0, 4.0), 0.0),
         ("female", "bar", 5, Vectors.dense(1.0, 0.0, 5.0), 0.0),
-        ("male", "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 1.0))
+        ("male", "baz", 5, Vectors.dense(0.0, 0.0, 5.0), 1.0)
     ).toDF("id", "a", "b", "features", "label")
     // assert(result.schema.toString == resultSchema.toString)
     assert(result.collect() === expected.collect())
@@ -144,9 +139,8 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("attribute generation") {
     val formula = new RFormula().setFormula("id ~ a + b")
-    val original = spark.createDataFrame(
-      Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5))
-    ).toDF("id", "a", "b")
+    val original = Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 
5))
+      .toDF("id", "a", "b")
     val model = formula.fit(original)
     val result = model.transform(original)
     val attrs = AttributeGroup.fromStructField(result.schema("features"))
@@ -161,9 +155,8 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("vector attribute generation") {
     val formula = new RFormula().setFormula("id ~ vec")
-    val original = spark.createDataFrame(
-      Seq((1, Vectors.dense(0.0, 1.0)), (2, Vectors.dense(1.0, 2.0)))
-    ).toDF("id", "vec")
+    val original = Seq((1, Vectors.dense(0.0, 1.0)), (2, Vectors.dense(1.0, 
2.0)))
+      .toDF("id", "vec")
     val model = formula.fit(original)
     val result = model.transform(original)
     val attrs = AttributeGroup.fromStructField(result.schema("features"))
@@ -177,9 +170,8 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("vector attribute generation with unnamed input attrs") {
     val formula = new RFormula().setFormula("id ~ vec2")
-    val base = spark.createDataFrame(
-      Seq((1, Vectors.dense(0.0, 1.0)), (2, Vectors.dense(1.0, 2.0)))
-    ).toDF("id", "vec")
+    val base = Seq((1, Vectors.dense(0.0, 1.0)), (2, Vectors.dense(1.0, 2.0)))
+      .toDF("id", "vec")
     val metadata = new AttributeGroup(
       "vec2",
       Array[Attribute](
@@ -199,16 +191,13 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("numeric interaction") {
     val formula = new RFormula().setFormula("a ~ b:c:d")
-    val original = spark.createDataFrame(
-      Seq((1, 2, 4, 2), (2, 3, 4, 1))
-    ).toDF("a", "b", "c", "d")
+    val original = Seq((1, 2, 4, 2), (2, 3, 4, 1)).toDF("a", "b", "c", "d")
     val model = formula.fit(original)
     val result = model.transform(original)
-    val expected = spark.createDataFrame(
-      Seq(
-        (1, 2, 4, 2, Vectors.dense(16.0), 1.0),
-        (2, 3, 4, 1, Vectors.dense(12.0), 2.0))
-      ).toDF("a", "b", "c", "d", "features", "label")
+    val expected = Seq(
+      (1, 2, 4, 2, Vectors.dense(16.0), 1.0),
+      (2, 3, 4, 1, Vectors.dense(12.0), 2.0)
+    ).toDF("a", "b", "c", "d", "features", "label")
     assert(result.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(result.schema("features"))
     val expectedAttrs = new AttributeGroup(
@@ -219,20 +208,19 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("factor numeric interaction") {
     val formula = new RFormula().setFormula("id ~ a:b")
-    val original = spark.createDataFrame(
+    val original =
       Seq((1, "foo", 4), (2, "bar", 4), (3, "bar", 5), (4, "baz", 5), (4, 
"baz", 5), (4, "baz", 5))
-    ).toDF("id", "a", "b")
+        .toDF("id", "a", "b")
     val model = formula.fit(original)
     val result = model.transform(original)
-    val expected = spark.createDataFrame(
-      Seq(
-        (1, "foo", 4, Vectors.dense(0.0, 0.0, 4.0), 1.0),
-        (2, "bar", 4, Vectors.dense(0.0, 4.0, 0.0), 2.0),
-        (3, "bar", 5, Vectors.dense(0.0, 5.0, 0.0), 3.0),
-        (4, "baz", 5, Vectors.dense(5.0, 0.0, 0.0), 4.0),
-        (4, "baz", 5, Vectors.dense(5.0, 0.0, 0.0), 4.0),
-        (4, "baz", 5, Vectors.dense(5.0, 0.0, 0.0), 4.0))
-      ).toDF("id", "a", "b", "features", "label")
+    val expected = Seq(
+      (1, "foo", 4, Vectors.dense(0.0, 0.0, 4.0), 1.0),
+      (2, "bar", 4, Vectors.dense(0.0, 4.0, 0.0), 2.0),
+      (3, "bar", 5, Vectors.dense(0.0, 5.0, 0.0), 3.0),
+      (4, "baz", 5, Vectors.dense(5.0, 0.0, 0.0), 4.0),
+      (4, "baz", 5, Vectors.dense(5.0, 0.0, 0.0), 4.0),
+      (4, "baz", 5, Vectors.dense(5.0, 0.0, 0.0), 4.0)
+    ).toDF("id", "a", "b", "features", "label")
     assert(result.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(result.schema("features"))
     val expectedAttrs = new AttributeGroup(
@@ -246,17 +234,15 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
 
   test("factor factor interaction") {
     val formula = new RFormula().setFormula("id ~ a:b")
-    val original = spark.createDataFrame(
-      Seq((1, "foo", "zq"), (2, "bar", "zq"), (3, "bar", "zz"))
-    ).toDF("id", "a", "b")
+    val original =
+      Seq((1, "foo", "zq"), (2, "bar", "zq"), (3, "bar", "zz")).toDF("id", 
"a", "b")
     val model = formula.fit(original)
     val result = model.transform(original)
-    val expected = spark.createDataFrame(
-      Seq(
-        (1, "foo", "zq", Vectors.dense(0.0, 0.0, 1.0, 0.0), 1.0),
-        (2, "bar", "zq", Vectors.dense(1.0, 0.0, 0.0, 0.0), 2.0),
-        (3, "bar", "zz", Vectors.dense(0.0, 1.0, 0.0, 0.0), 3.0))
-      ).toDF("id", "a", "b", "features", "label")
+    val expected = Seq(
+      (1, "foo", "zq", Vectors.dense(0.0, 0.0, 1.0, 0.0), 1.0),
+      (2, "bar", "zq", Vectors.dense(1.0, 0.0, 0.0, 0.0), 2.0),
+      (3, "bar", "zz", Vectors.dense(0.0, 1.0, 0.0, 0.0), 3.0)
+    ).toDF("id", "a", "b", "features", "label")
     assert(result.collect() === expected.collect())
     val attrs = AttributeGroup.fromStructField(result.schema("features"))
     val expectedAttrs = new AttributeGroup(
@@ -295,9 +281,7 @@ class RFormulaSuite extends SparkFunSuite with 
MLlibTestSparkContext with Defaul
       }
     }
 
-    val dataset = spark.createDataFrame(
-      Seq((1, "foo", "zq"), (2, "bar", "zq"), (3, "bar", "zz"))
-    ).toDF("id", "a", "b")
+    val dataset = Seq((1, "foo", "zq"), (2, "bar", "zq"), (3, "bar", 
"zz")).toDF("id", "a", "b")
 
     val rFormula = new RFormula().setFormula("id ~ a:b")
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
index 1401ea9..2346407 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
@@ -26,19 +26,19 @@ import org.apache.spark.sql.types.{LongType, StructField, 
StructType}
 class SQLTransformerSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
+  import testImplicits._
+
   test("params") {
     ParamsSuite.checkParams(new SQLTransformer())
   }
 
   test("transform numeric data") {
-    val original = spark.createDataFrame(
-      Seq((0, 1.0, 3.0), (2, 2.0, 5.0))).toDF("id", "v1", "v2")
+    val original = Seq((0, 1.0, 3.0), (2, 2.0, 5.0)).toDF("id", "v1", "v2")
     val sqlTrans = new SQLTransformer().setStatement(
       "SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
     val result = sqlTrans.transform(original)
     val resultSchema = sqlTrans.transformSchema(original.schema)
-    val expected = spark.createDataFrame(
-      Seq((0, 1.0, 3.0, 4.0, 3.0), (2, 2.0, 5.0, 7.0, 10.0)))
+    val expected = Seq((0, 1.0, 3.0, 4.0, 3.0), (2, 2.0, 5.0, 7.0, 10.0))
       .toDF("id", "v1", "v2", "v3", "v4")
     assert(result.schema.toString == resultSchema.toString)
     assert(resultSchema == expected.schema)

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala
index 827ecb0..a928f93 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StandardScalerSuite.scala
@@ -28,6 +28,8 @@ import org.apache.spark.sql.{DataFrame, Row}
 class StandardScalerSuite extends SparkFunSuite with MLlibTestSparkContext
   with DefaultReadWriteTest {
 
+  import testImplicits._
+
   @transient var data: Array[Vector] = _
   @transient var resWithStd: Array[Vector] = _
   @transient var resWithMean: Array[Vector] = _
@@ -73,7 +75,7 @@ class StandardScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   }
 
   test("Standardization with default parameter") {
-    val df0 = spark.createDataFrame(data.zip(resWithStd)).toDF("features", 
"expected")
+    val df0 = data.zip(resWithStd).toSeq.toDF("features", "expected")
 
     val standardScaler0 = new StandardScaler()
       .setInputCol("features")
@@ -84,9 +86,9 @@ class StandardScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext
   }
 
   test("Standardization with setter") {
-    val df1 = spark.createDataFrame(data.zip(resWithBoth)).toDF("features", 
"expected")
-    val df2 = spark.createDataFrame(data.zip(resWithMean)).toDF("features", 
"expected")
-    val df3 = spark.createDataFrame(data.zip(data)).toDF("features", 
"expected")
+    val df1 = data.zip(resWithBoth).toSeq.toDF("features", "expected")
+    val df2 = data.zip(resWithMean).toSeq.toDF("features", "expected")
+    val df3 = data.zip(data).toSeq.toDF("features", "expected")
 
     val standardScaler1 = new StandardScaler()
       .setInputCol("features")
@@ -120,7 +122,7 @@ class StandardScalerSuite extends SparkFunSuite with 
MLlibTestSparkContext
       Vectors.sparse(3, Array(1, 2), Array(-5.1, 1.0)),
       Vectors.dense(1.7, -0.6, 3.3)
     )
-    val df = 
spark.createDataFrame(someSparseData.zip(resWithMean)).toDF("features", 
"expected")
+    val df = someSparseData.zip(resWithMean).toSeq.toDF("features", "expected")
     val standardScaler = new StandardScaler()
       .setInputCol("features")
       .setOutputCol("standardized_features")

http://git-wip-us.apache.org/repos/asf/spark/blob/f234b7cd/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
index 125ad02..957cf58 100755
--- 
a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
@@ -37,19 +37,20 @@ class StopWordsRemoverSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
   import StopWordsRemoverSuite._
+  import testImplicits._
 
   test("StopWordsRemover default") {
     val remover = new StopWordsRemover()
       .setInputCol("raw")
       .setOutputCol("filtered")
-    val dataSet = spark.createDataFrame(Seq(
+    val dataSet = Seq(
       (Seq("test", "test"), Seq("test", "test")),
       (Seq("a", "b", "c", "d"), Seq("b", "c")),
       (Seq("a", "the", "an"), Seq()),
       (Seq("A", "The", "AN"), Seq()),
       (Seq(null), Seq(null)),
       (Seq(), Seq())
-    )).toDF("raw", "expected")
+    ).toDF("raw", "expected")
 
     testStopWordsRemover(remover, dataSet)
   }
@@ -60,14 +61,14 @@ class StopWordsRemoverSuite
       .setInputCol("raw")
       .setOutputCol("filtered")
       .setStopWords(stopWords)
-    val dataSet = spark.createDataFrame(Seq(
+    val dataSet = Seq(
       (Seq("test", "test"), Seq()),
       (Seq("a", "b", "c", "d"), Seq("b", "c", "d")),
       (Seq("a", "the", "an"), Seq()),
       (Seq("A", "The", "AN"), Seq()),
       (Seq(null), Seq(null)),
       (Seq(), Seq())
-    )).toDF("raw", "expected")
+    ).toDF("raw", "expected")
 
     testStopWordsRemover(remover, dataSet)
   }
@@ -77,10 +78,10 @@ class StopWordsRemoverSuite
       .setInputCol("raw")
       .setOutputCol("filtered")
       .setCaseSensitive(true)
-    val dataSet = spark.createDataFrame(Seq(
+    val dataSet = Seq(
       (Seq("A"), Seq("A")),
       (Seq("The", "the"), Seq("The"))
-    )).toDF("raw", "expected")
+    ).toDF("raw", "expected")
 
     testStopWordsRemover(remover, dataSet)
   }
@@ -98,10 +99,10 @@ class StopWordsRemoverSuite
       .setInputCol("raw")
       .setOutputCol("filtered")
       .setStopWords(stopWords)
-    val dataSet = spark.createDataFrame(Seq(
+    val dataSet = Seq(
       (Seq("acaba", "ama", "biri"), Seq()),
       (Seq("hep", "her", "scala"), Seq("scala"))
-    )).toDF("raw", "expected")
+    ).toDF("raw", "expected")
 
     testStopWordsRemover(remover, dataSet)
   }
@@ -112,10 +113,10 @@ class StopWordsRemoverSuite
       .setInputCol("raw")
       .setOutputCol("filtered")
       .setStopWords(stopWords.toArray)
-    val dataSet = spark.createDataFrame(Seq(
+    val dataSet = Seq(
       (Seq("python", "scala", "a"), Seq("python", "scala", "a")),
       (Seq("Python", "Scala", "swift"), Seq("Python", "Scala", "swift"))
-    )).toDF("raw", "expected")
+    ).toDF("raw", "expected")
 
     testStopWordsRemover(remover, dataSet)
   }
@@ -126,10 +127,10 @@ class StopWordsRemoverSuite
       .setInputCol("raw")
       .setOutputCol("filtered")
       .setStopWords(stopWords.toArray)
-    val dataSet = spark.createDataFrame(Seq(
+    val dataSet = Seq(
       (Seq("python", "scala", "a"), Seq()),
       (Seq("Python", "Scala", "swift"), Seq("swift"))
-    )).toDF("raw", "expected")
+    ).toDF("raw", "expected")
 
     testStopWordsRemover(remover, dataSet)
   }
@@ -148,9 +149,7 @@ class StopWordsRemoverSuite
     val remover = new StopWordsRemover()
       .setInputCol("raw")
       .setOutputCol(outputCol)
-    val dataSet = spark.createDataFrame(Seq(
-      (Seq("The", "the", "swift"), Seq("swift"))
-    )).toDF("raw", outputCol)
+    val dataSet = Seq((Seq("The", "the", "swift"), Seq("swift"))).toDF("raw", 
outputCol)
 
     val thrown = intercept[IllegalArgumentException] {
       testStopWordsRemover(remover, dataSet)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[2/2] spark git commit: [SPARK-16356][ML] Add testImplicits for ML unit tests and promote toDF()

Reply via email to