This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 4e5ada90cfb [SPARK-39417][SQL] Handle Null partition values in 
PartitioningUtils
4e5ada90cfb is described below

commit 4e5ada90cfb89caa25addd8991cec2af843e24a9
Author: Prashant Singh <psing...@amazon.com>
AuthorDate: Wed Jun 8 23:08:44 2022 -0700

    [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils
    
    ### What changes were proposed in this pull request?
    
    We should not try casting everything returned by 
`removeLeadingZerosFromNumberTypePartition` to string, as it returns null value 
for the cases when partition has null value and is already replaced by 
`DEFAULT_PARTITION_NAME`
    
    ### Why are the changes needed?
    
    for null partitions where `removeLeadingZerosFromNumberTypePartition` is 
called it would throw a NPE and hence the query would fail.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Added a UT, which would fail with an NPE otherwise.
    
    Closes #36810 from singhpk234/psinghvk/fix-npe.
    
    Authored-by: Prashant Singh <psing...@amazon.com>
    Signed-off-by: huaxingao <huaxin_...@apple.com>
    (cherry picked from commit dcfd9f01289f26c1a25e97432710a13772b3ad4c)
    Signed-off-by: huaxingao <huaxin_...@apple.com>
---
 .../spark/sql/execution/datasources/PartitioningUtils.scala       | 2 +-
 .../datasources/parquet/ParquetPartitionDiscoverySuite.scala      | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 166fc852899..e856bb5b9c2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -359,7 +359,7 @@ object PartitioningUtils extends SQLConfHelper{
   def removeLeadingZerosFromNumberTypePartition(value: String, dataType: 
DataType): String =
     dataType match {
       case ByteType | ShortType | IntegerType | LongType | FloatType | 
DoubleType =>
-        castPartValueToDesiredType(dataType, value, null).toString
+        Option(castPartValueToDesiredType(dataType, value, 
null)).map(_.toString).orNull
       case _ => value
     }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index ee905fba745..bd908a36401 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -1259,6 +1259,14 @@ class ParquetV2PartitionDiscoverySuite extends 
ParquetPartitionDiscoverySuite {
     assert("p_int=10/p_float=1.0" === path)
   }
 
+  test("SPARK-39417: Null partition value") {
+    // null partition value is replaced by DEFAULT_PARTITION_NAME before 
hitting getPathFragment.
+    val spec = Map("p_int"-> ExternalCatalogUtils.DEFAULT_PARTITION_NAME)
+    val schema = new StructType().add("p_int", "int")
+    val path = PartitioningUtils.getPathFragment(spec, schema)
+    assert(s"p_int=${ExternalCatalogUtils.DEFAULT_PARTITION_NAME}" === path)
+  }
+
   test("read partitioned table - partition key included in Parquet file") {
     withTempDir { base =>
       for {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to