spark git commit: [SPARK-11694][SQL] Backports #9754

lian Tue, 17 Nov 2015 06:53:18 -0800

Repository: spark
Updated Branches:
  refs/heads/branch-1.6 29f18b730 -> 62ad81a14



[SPARK-11694][SQL] Backports #9754

The main purpose of this PR is to backport 
https://github.com/apache/spark/pull/9754

I added several commits but they are identical with the PR.

I will cc liancheng just to find this easily.

Author: hyukjinkwon <[email protected]>
Author: HyukjinKwon <[email protected]>

Closes #9763 from HyukjinKwon/SPARK-11694-followup-backporting.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62ad81a1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62ad81a1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62ad81a1

Branch: refs/heads/branch-1.6
Commit: 62ad81a14cb917f260877e3252a4aa88b3774005
Parents: 29f18b7
Author: hyukjinkwon <[email protected]>
Authored: Tue Nov 17 22:50:54 2015 +0800
Committer: Cheng Lian <[email protected]>
Committed: Tue Nov 17 22:50:54 2015 +0800

----------------------------------------------------------------------
 .../src/test/resources/dec-in-fixed-len.parquet | Bin 0 -> 460 bytes
 .../datasources/parquet/ParquetIOSuite.scala    |  24 +++++++------------
 2 files changed, 9 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/62ad81a1/sql/core/src/test/resources/dec-in-fixed-len.parquet
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/dec-in-fixed-len.parquet 
b/sql/core/src/test/resources/dec-in-fixed-len.parquet
new file mode 100644
index 0000000..6ad37d5
Binary files /dev/null and 
b/sql/core/src/test/resources/dec-in-fixed-len.parquet differ

http://git-wip-us.apache.org/repos/asf/spark/blob/62ad81a1/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 2aa5dca..29a5282 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.util.Collections
-
 import org.apache.parquet.column.{Encoding, ParquetProperties}
 
 import scala.collection.JavaConverters._
@@ -33,7 +31,7 @@ import org.apache.parquet.example.data.{Group, GroupWriter}
 import org.apache.parquet.hadoop._
 import org.apache.parquet.hadoop.api.WriteSupport
 import org.apache.parquet.hadoop.api.WriteSupport.WriteContext
-import org.apache.parquet.hadoop.metadata.{CompressionCodecName, FileMetaData, 
ParquetMetadata}
+import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.apache.parquet.io.api.RecordConsumer
 import org.apache.parquet.schema.{MessageType, MessageTypeParser}
 
@@ -243,15 +241,9 @@ class ParquetIOSuite extends QueryTest with ParquetTest 
with SharedSQLContext {
       """.stripMargin)
 
     withTempPath { location =>
-      val extraMetadata = Map.empty[String, String].asJava
-      val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, 
"Spark")
       val path = new Path(location.getCanonicalPath)
-      val footer = List(
-        new Footer(path, new ParquetMetadata(fileMetadata, 
Collections.emptyList()))
-      ).asJava
-
-      ParquetFileWriter.writeMetadataFile(sparkContext.hadoopConfiguration, 
path, footer)
-
+      val conf = sparkContext.hadoopConfiguration
+      writeMetadata(parquetSchema, path, conf)
       val errorMessage = intercept[Throwable] {
         sqlContext.read.parquet(path.toString).printSchema()
       }.toString
@@ -582,10 +574,12 @@ class ParquetIOSuite extends QueryTest with ParquetTest 
with SharedSQLContext {
       sqlContext.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 
'i64_dec))
   }
 
-  // TODO Adds test case for reading dictionary encoded decimals written as 
`FIXED_LEN_BYTE_ARRAY`
-  // The Parquet writer version Spark 1.6 and prior versions use is 
`PARQUET_1_0`, which doesn't
-  // provide dictionary encoding support for `FIXED_LEN_BYTE_ARRAY`.  Should 
add a test here once
-  // we upgrade to `PARQUET_2_0`.
+  test("read dictionary encoded decimals written as FIXED_LEN_BYTE_ARRAY") {
+    checkAnswer(
+      // Decimal column in this file is encoded using plain dictionary
+      readResourceParquetFile("dec-in-fixed-len.parquet"),
+      sqlContext.range(1 << 4).select('id % 10 cast DecimalType(10, 2) as 
'fixed_len_dec))
+  }
 }
 
 class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: 
TaskAttemptContext)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-11694][SQL] Backports #9754

Reply via email to