spark git commit: [SPARK-18760][SQL] Consistent format specification for FileFormats

rxin Thu, 08 Dec 2016 12:52:57 -0800

Repository: spark
Updated Branches:
  refs/heads/master 26432df9c -> 5f894d23a



[SPARK-18760][SQL] Consistent format specification for FileFormats

## What changes were proposed in this pull request?
This patch fixes the format specification in explain for file sources (Parquet 
and Text formats are the only two that are different from the rest):

Before:
```
scala> spark.read.text("test.text").explain()
== Physical Plan ==
*FileScan text [value#15] Batched: false, Format: 
org.apache.spark.sql.execution.datasources.text.TextFileFormatxyz, Location: 
InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], 
PushedFilters: [], ReadSchema: struct<value:string>
```

After:
```
scala> spark.read.text("test.text").explain()
== Physical Plan ==
*FileScan text [value#15] Batched: false, Format: Text, Location: 
InMemoryFileIndex[file:/scratch/rxin/spark/test.text], PartitionFilters: [], 
PushedFilters: [], ReadSchema: struct<value:string>
```

Also closes #14680.

## How was this patch tested?
Verified in spark-shell.

Author: Reynold Xin <[email protected]>

Closes #16187 from rxin/SPARK-18760.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f894d23
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f894d23
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f894d23

Branch: refs/heads/master
Commit: 5f894d23a54ea99f75f8b722e111e5270f7f80cf
Parents: 26432df
Author: Reynold Xin <[email protected]>
Authored: Thu Dec 8 12:52:05 2016 -0800
Committer: Reynold Xin <[email protected]>
Committed: Thu Dec 8 12:52:05 2016 -0800

----------------------------------------------------------------------
 .../sql/execution/datasources/parquet/ParquetFileFormat.scala | 2 +-
 .../spark/sql/execution/datasources/text/TextFileFormat.scala | 2 ++
 .../apache/spark/sql/streaming/FileStreamSourceSuite.scala    | 7 ++++---
 3 files changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5f894d23/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 031a0fe..0965ffe 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -61,7 +61,7 @@ class ParquetFileFormat
 
   override def shortName(): String = "parquet"
 
-  override def toString: String = "ParquetFormat"
+  override def toString: String = "Parquet"
 
   override def hashCode(): Int = getClass.hashCode()
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5f894d23/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index 178160c..897e535 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -39,6 +39,8 @@ class TextFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
 
   override def shortName(): String = "text"
 
+  override def toString: String = "Text"
+
   private def verifySchema(schema: StructType): Unit = {
     if (schema.size != 1) {
       throw new AnalysisException(

http://git-wip-us.apache.org/repos/asf/spark/blob/5f894d23/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 7b6fe83..267c462 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-class FileStreamSourceTest extends StreamTest with SharedSQLContext with 
PrivateMethodTester {
+abstract class FileStreamSourceTest
+  extends StreamTest with SharedSQLContext with PrivateMethodTester {
 
   import testImplicits._
 
@@ -848,13 +849,13 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         val explainWithoutExtended = q.explainInternal(false)
         // `extended = false` only displays the physical plan.
         assert("Relation.*text".r.findAllMatchIn(explainWithoutExtended).size 
=== 0)
-        assert("TextFileFormat".r.findAllMatchIn(explainWithoutExtended).size 
=== 1)
+        assert(": Text".r.findAllMatchIn(explainWithoutExtended).size === 1)
 
         val explainWithExtended = q.explainInternal(true)
         // `extended = true` displays 3 logical plans 
(Parsed/Optimized/Optimized) and 1 physical
         // plan.
         assert("Relation.*text".r.findAllMatchIn(explainWithExtended).size === 
3)
-        assert("TextFileFormat".r.findAllMatchIn(explainWithExtended).size === 
1)
+        assert(": Text".r.findAllMatchIn(explainWithExtended).size === 1)
       } finally {
         q.stop()
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-18760][SQL] Consistent format specification for FileFormats

Reply via email to