Repository: spark Updated Branches: refs/heads/master 7fcbb9b57 -> 56536e999
[SPARK-21285][ML] VectorAssembler reports the column name of unsupported data type ## What changes were proposed in this pull request? add the column name in the exception which is raised by unsupported data type. ## How was this patch tested? + [x] pass all tests. Author: Yan Facai (é¢åæ) <[email protected]> Closes #18523 from facaiy/ENH/vectorassembler_add_col. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/56536e99 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/56536e99 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/56536e99 Branch: refs/heads/master Commit: 56536e9992ac4ea771758463962e49bba410e896 Parents: 7fcbb9b Author: Yan Facai (é¢åæ) <[email protected]> Authored: Fri Jul 7 18:32:01 2017 +0800 Committer: Yanbo Liang <[email protected]> Committed: Fri Jul 7 18:32:01 2017 +0800 ---------------------------------------------------------------------- .../apache/spark/ml/feature/VectorAssembler.scala | 15 +++++++++------ .../spark/ml/feature/VectorAssemblerSuite.scala | 5 ++++- 2 files changed, 13 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/56536e99/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index ca90053..73f27d1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -113,12 +113,15 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String) override def transformSchema(schema: StructType): StructType = { val inputColNames = $(inputCols) val outputColName = $(outputCol) - val inputDataTypes = inputColNames.map(name => schema(name).dataType) - inputDataTypes.foreach { - case _: NumericType | BooleanType => - case t if t.isInstanceOf[VectorUDT] => - case other => - throw new IllegalArgumentException(s"Data type $other is not supported.") + val incorrectColumns = inputColNames.flatMap { name => + schema(name).dataType match { + case _: NumericType | BooleanType => None + case t if t.isInstanceOf[VectorUDT] => None + case other => Some(s"Data type $other of column $name is not supported.") + } + } + if (incorrectColumns.nonEmpty) { + throw new IllegalArgumentException(incorrectColumns.mkString("\n")) } if (schema.fieldNames.contains(outputColName)) { throw new IllegalArgumentException(s"Output column $outputColName already exists.") http://git-wip-us.apache.org/repos/asf/spark/blob/56536e99/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala index 46cced3..6aef1c6 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala @@ -79,7 +79,10 @@ class VectorAssemblerSuite val thrown = intercept[IllegalArgumentException] { assembler.transform(df) } - assert(thrown.getMessage contains "Data type StringType is not supported") + assert(thrown.getMessage contains + "Data type StringType of column a is not supported.\n" + + "Data type StringType of column b is not supported.\n" + + "Data type StringType of column c is not supported.") } test("ML attributes") { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
