Repository: spark Updated Branches: refs/heads/master c056484c0 -> 851e247ca
[SPARK-8928] [SQL] Makes CatalystSchemaConverter sticking to 1.4.x- when handling Parquet LISTs in compatible mode This PR is based on #7209 authored by Sephiroth-Lin. Author: Weizhong Lin <linweizh...@huawei.com> Closes #7314 from liancheng/spark-8928 and squashes the following commits: 75267fe [Cheng Lian] Makes CatalystSchemaConverter sticking to 1.4.x- when handling LISTs in compatible mode Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/851e247c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/851e247c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/851e247c Branch: refs/heads/master Commit: 851e247caad0977cfd4998254d9602624e06539f Parents: c056484 Author: Weizhong Lin <linweizh...@huawei.com> Authored: Wed Jul 8 22:18:39 2015 -0700 Committer: Cheng Lian <l...@databricks.com> Committed: Wed Jul 8 22:19:19 2015 -0700 ---------------------------------------------------------------------- .../spark/sql/parquet/CatalystSchemaConverter.scala | 6 ++++-- .../org/apache/spark/sql/parquet/ParquetSchemaSuite.scala | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/851e247c/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala index de3a72d..1ea6926 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala @@ -461,7 +461,8 @@ private[parquet] class CatalystSchemaConverter( field.name, Types .buildGroup(REPEATED) - .addField(convertField(StructField("element", elementType, nullable))) + // "array_element" is the name chosen by parquet-hive (1.7.0 and prior version) + .addField(convertField(StructField("array_element", elementType, nullable))) .named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME)) // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level @@ -474,7 +475,8 @@ private[parquet] class CatalystSchemaConverter( ConversionPatterns.listType( repetition, field.name, - convertField(StructField("element", elementType, nullable), REPEATED)) + // "array" is the name chosen by parquet-avro (1.7.0 and prior version) + convertField(StructField("array", elementType, nullable), REPEATED)) // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by // MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`. http://git-wip-us.apache.org/repos/asf/spark/blob/851e247c/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala index 35d3c33..fa62939 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala @@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { """ |message root { | optional group _1 (LIST) { - | repeated int32 element; + | repeated int32 array; | } |} """.stripMargin) @@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { |message root { | optional group _1 (LIST) { | repeated group bag { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest { | optional binary _1 (UTF8); | optional group _2 (LIST) { | repeated group bag { - | optional group element { + | optional group array_element { | required int32 _1; | required double _2; | } @@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { """message root { | optional group f1 (LIST) { | repeated group bag { - | optional int32 element; + | optional int32 array_element; | } | } |} @@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest { nullable = true))), """message root { | optional group f1 (LIST) { - | repeated int32 element; + | repeated int32 array; | } |} """.stripMargin) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org