tgravescs commented on a change in pull request #25983: 
[SPARK-29327][MLLIB]Support specifying features via multiple columns
URL: https://github.com/apache/spark/pull/25983#discussion_r332793778
 
 

 ##########
 File path: mllib/src/test/scala/org/apache/spark/ml/PredictorSuite.scala
 ##########
 @@ -55,14 +55,50 @@ class PredictorSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       predictor.fit(df.select(col("label"), col("weight").cast(StringType), 
col("features")))
     }
   }
+
+  test("multiple columns for features should work well without side effect") {
+    // Should fail due to not supporting multiple columns
+    intercept[IllegalArgumentException] {
+      new MockPredictor(false).setFeaturesCol(Array("feature1", "feature2", 
"feature3"))
+    }
+
+    // Only use multiple columns for features
+    val df = spark.createDataFrame(Seq(
+      (0, 1, 0, 2, 3),
+      (1, 2, 0, 3, 9),
+      (0, 3, 0, 2, 6)
+    )).toDF("label", "weight", "feature1", "feature2", "feature3")
+
+    val predictor = new MockPredictor().setWeightCol("weight")
+      .setFeaturesCol(Array("feature1", "feature2", "feature3"))
+    predictor.fit(df)
+
+    // Should fail due to wrong type for column "feature1" in schema
+    intercept[IllegalArgumentException] {
+      predictor.fit(df.select(col("label"), col("weight"),
+        col("feature1").cast(StringType), col("feature2"), col("feature3")))
+    }
+
+    val df2 = df.toDF("label", "weight", "features", "feature2", "feature3")
+    // Should fail due to missing "feature1" in schema
+    intercept[IllegalArgumentException] {
+      predictor.setFeaturesCol(Array("feature1", "feature2", 
"feature3")).fit(df2)
+    }
+
+    // Should fail due to wrong type in schema for single column of features
 
 Review comment:
   oh, I see, so can you set the original setFeatureCol(String) to "" if you 
wanted to use "features" in the setFeaturesCol(Array)?  It looks like it but 
wanted to double check.
   
   Can you update the comment to say something like your explanation.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to