This is an automated email from the ASF dual-hosted git repository. lixiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5102ccc [SPARK-26339][SQL][FOLLOW-UP] Issue warning instead of throwing an exception for underscore files 5102ccc is described below commit 5102ccc4ab6e30caa5510131dee7098b4f3ad32e Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Mon Jan 7 15:48:54 2019 -0800 [SPARK-26339][SQL][FOLLOW-UP] Issue warning instead of throwing an exception for underscore files ## What changes were proposed in this pull request? The PR https://github.com/apache/spark/pull/23446 happened to introduce a behaviour change - empty dataframes can't be read anymore from underscore files. It looks controversial to allow or disallow this case so this PR targets to fix to issue warning instead of throwing an exception to be more conservative. **Before** ```scala scala> spark.read.schema("a int").parquet("_tmp*").show() org.apache.spark.sql.AnalysisException: All paths were ignored: file:/.../_tmp file:/.../_tmp1; at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:570) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:360) at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:231) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:219) at org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:651) at org.apache.spark.sql.DataFrameReader.parquet(DataFrameReader.scala:635) ... 49 elided scala> spark.read.text("_tmp*").show() org.apache.spark.sql.AnalysisException: All paths were ignored: file:/.../_tmp file:/.../_tmp1; at org.apache.spark.sql.execution.datasources.DataSource.checkAndGlobPathIfNecessary(DataSource.scala:570) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:360) at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:231) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:219) at org.apache.spark.sql.DataFrameReader.text(DataFrameReader.scala:723) at org.apache.spark.sql.DataFrameReader.text(DataFrameReader.scala:695) ... 49 elided ``` **After** ```scala scala> spark.read.schema("a int").parquet("_tmp*").show() 19/01/07 15:14:43 WARN DataSource: All paths were ignored: file:/.../_tmp file:/.../_tmp1 +---+ | a| +---+ +---+ scala> spark.read.text("_tmp*").show() 19/01/07 15:14:51 WARN DataSource: All paths were ignored: file:/.../_tmp file:/.../_tmp1 +-----+ |value| +-----+ +-----+ ``` ## How was this patch tested? Manually tested as above. Closes #23481 from HyukjinKwon/SPARK-26339. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: gatorsmile <gatorsm...@gmail.com> --- .../spark/sql/execution/datasources/DataSource.scala | 6 +++--- sql/core/src/test/resources/test-data/_cars.csv | 7 ------- .../sql/execution/datasources/csv/CSVSuite.scala | 20 -------------------- 3 files changed, 3 insertions(+), 30 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 2a438a5..5dad784 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -567,11 +567,11 @@ case class DataSource( } if (filteredOut.nonEmpty) { if (filteredIn.isEmpty) { - throw new AnalysisException( - s"All paths were ignored:\n${filteredOut.mkString("\n ")}") + logWarning( + s"All paths were ignored:\n ${filteredOut.mkString("\n ")}") } else { logDebug( - s"Some paths were ignored:\n${filteredOut.mkString("\n ")}") + s"Some paths were ignored:\n ${filteredOut.mkString("\n ")}") } } } diff --git a/sql/core/src/test/resources/test-data/_cars.csv b/sql/core/src/test/resources/test-data/_cars.csv deleted file mode 100644 index 40ded57..0000000 --- a/sql/core/src/test/resources/test-data/_cars.csv +++ /dev/null @@ -1,7 +0,0 @@ - -year,make,model,comment,blank -"2012","Tesla","S","No comment", - -1997,Ford,E350,"Go get one now they are going fast", -2015,Chevy,Volt - diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index fb1bedf..d9e5d7a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -53,7 +53,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te private val carsEmptyValueFile = "test-data/cars-empty-value.csv" private val carsBlankColName = "test-data/cars-blank-column-name.csv" private val carsCrlf = "test-data/cars-crlf.csv" - private val carsFilteredOutFile = "test-data/_cars.csv" private val emptyFile = "test-data/empty.csv" private val commentsFile = "test-data/comments.csv" private val disableCommentsFile = "test-data/disable_comments.csv" @@ -347,25 +346,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te assert(result.schema.fieldNames.size === 1) } - test("SPARK-26339 Not throw an exception if some of specified paths are filtered in") { - val cars = spark - .read - .option("header", "false") - .csv(testFile(carsFile), testFile(carsFilteredOutFile)) - - verifyCars(cars, withHeader = false, checkTypes = false) - } - - test("SPARK-26339 Throw an exception only if all of the specified paths are filtered out") { - val e = intercept[AnalysisException] { - val cars = spark - .read - .option("header", "false") - .csv(testFile(carsFilteredOutFile)) - }.getMessage - assert(e.contains("All paths were ignored:")) - } - test("DDL test with empty file") { withView("carsTable") { spark.sql( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org