This is an automated email from the ASF dual-hosted git repository. huaxingao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new dcfd9f01289 [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils dcfd9f01289 is described below commit dcfd9f01289f26c1a25e97432710a13772b3ad4c Author: Prashant Singh <psing...@amazon.com> AuthorDate: Wed Jun 8 23:08:44 2022 -0700 [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils ### What changes were proposed in this pull request? We should not try casting everything returned by `removeLeadingZerosFromNumberTypePartition` to string, as it returns null value for the cases when partition has null value and is already replaced by `DEFAULT_PARTITION_NAME` ### Why are the changes needed? for null partitions where `removeLeadingZerosFromNumberTypePartition` is called it would throw a NPE and hence the query would fail. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a UT, which would fail with an NPE otherwise. Closes #36810 from singhpk234/psinghvk/fix-npe. Authored-by: Prashant Singh <psing...@amazon.com> Signed-off-by: huaxingao <huaxin_...@apple.com> --- .../spark/sql/execution/datasources/PartitioningUtils.scala | 2 +- .../datasources/parquet/ParquetPartitionDiscoverySuite.scala | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 166fc852899..e856bb5b9c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -359,7 +359,7 @@ object PartitioningUtils extends SQLConfHelper{ def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String = dataType match { case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType => - castPartValueToDesiredType(dataType, value, null).toString + Option(castPartValueToDesiredType(dataType, value, null)).map(_.toString).orNull case _ => value } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index b5947a4f820..fb5595322f7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -1259,6 +1259,14 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite { assert("p_int=10/p_float=1.0" === path) } + test("SPARK-39417: Null partition value") { + // null partition value is replaced by DEFAULT_PARTITION_NAME before hitting getPathFragment. + val spec = Map("p_int"-> ExternalCatalogUtils.DEFAULT_PARTITION_NAME) + val schema = new StructType().add("p_int", "int") + val path = PartitioningUtils.getPathFragment(spec, schema) + assert(s"p_int=${ExternalCatalogUtils.DEFAULT_PARTITION_NAME}" === path) + } + test("read partitioned table - partition key included in Parquet file") { withTempDir { base => for { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org