Repository: spark Updated Branches: refs/heads/branch-2.1 a03564418 -> 9483242f4
[SPARK-18760][SQL] Consistent format specification for FileFormats ## What changes were proposed in this pull request? This patch fixes the format specification in explain for file sources (Parquet and Text formats are the only two that are different from the rest): Before: ``` scala> spark.read.text("test.text").explain() == Physical Plan == *FileScan text [value#15] Batched: false, Format: org.apache.spark.sql.execution.datasources.text.TextFileFormatxyz, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string> ``` After: ``` scala> spark.read.text("test.text").explain() == Physical Plan == *FileScan text [value#15] Batched: false, Format: Text, Location: InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<value:string> ``` Also closes #14680. ## How was this patch tested? Verified in spark-shell. Author: Reynold Xin <r...@databricks.com> Closes #16187 from rxin/SPARK-18760. (cherry picked from commit 5f894d23a54ea99f75f8b722e111e5270f7f80cf) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9483242f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9483242f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9483242f Branch: refs/heads/branch-2.1 Commit: 9483242f4c6cc13001e5a967810718b26beb2361 Parents: a035644 Author: Reynold Xin <r...@databricks.com> Authored: Thu Dec 8 12:52:05 2016 -0800 Committer: Reynold Xin <r...@databricks.com> Committed: Thu Dec 8 12:52:21 2016 -0800 ---------------------------------------------------------------------- .../sql/execution/datasources/parquet/ParquetFileFormat.scala | 2 +- .../spark/sql/execution/datasources/text/TextFileFormat.scala | 2 ++ .../apache/spark/sql/streaming/FileStreamSourceSuite.scala | 7 ++++--- 3 files changed, 7 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/9483242f/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 031a0fe..0965ffe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -61,7 +61,7 @@ class ParquetFileFormat override def shortName(): String = "parquet" - override def toString: String = "ParquetFormat" + override def toString: String = "Parquet" override def hashCode(): Int = getClass.hashCode() http://git-wip-us.apache.org/repos/asf/spark/blob/9483242f/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala index 8e04396..3e89082 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala @@ -43,6 +43,8 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister { override def shortName(): String = "text" + override def toString: String = "Text" + private def verifySchema(schema: StructType): Unit = { if (schema.size != 1) { throw new AnalysisException( http://git-wip-us.apache.org/repos/asf/spark/blob/9483242f/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index 7b6fe83..267c462 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -31,7 +31,8 @@ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ import org.apache.spark.util.Utils -class FileStreamSourceTest extends StreamTest with SharedSQLContext with PrivateMethodTester { +abstract class FileStreamSourceTest + extends StreamTest with SharedSQLContext with PrivateMethodTester { import testImplicits._ @@ -848,13 +849,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest { val explainWithoutExtended = q.explainInternal(false) // `extended = false` only displays the physical plan. assert("Relation.*text".r.findAllMatchIn(explainWithoutExtended).size === 0) - assert("TextFileFormat".r.findAllMatchIn(explainWithoutExtended).size === 1) + assert(": Text".r.findAllMatchIn(explainWithoutExtended).size === 1) val explainWithExtended = q.explainInternal(true) // `extended = true` displays 3 logical plans (Parsed/Optimized/Optimized) and 1 physical // plan. assert("Relation.*text".r.findAllMatchIn(explainWithExtended).size === 3) - assert("TextFileFormat".r.findAllMatchIn(explainWithExtended).size === 1) + assert(": Text".r.findAllMatchIn(explainWithExtended).size === 1) } finally { q.stop() } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org