[
https://issues.apache.org/jira/browse/HUDI-9380?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HUDI-9380:
---------------------------------
Labels: pull-request-available (was: )
> Fix HoodieTableMetadataUtil#collectColumnRangeMetadata to handle null date
> types
> --------------------------------------------------------------------------------
>
> Key: HUDI-9380
> URL: https://issues.apache.org/jira/browse/HUDI-9380
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Voon Hou
> Assignee: Voon Hou
> Priority: Major
> Labels: pull-request-available
>
> HoodieTableMetadataUtil#collectColumnRangeMetadata is not able to handle
> cases where there are null date types.
> This case can be triggered with the following routines:
> {code:java}
> test("Create table for comprehensive type testing") {
> withTempDir { tmp =>
> val tableName = "hudi_type_test_mor"
> spark.sql(
> s"""
> |CREATE TABLE $tableName (
> | uuid STRING,
> | precombine_field LONG,
> | col_double DOUBLE,
> | array_struct ARRAY<STRUCT<inner_f3: TIMESTAMP, inner_f4: STRING>>,
> | part_col STRING
> |) USING hudi
> | LOCATION '${tmp.getCanonicalPath}'
> | TBLPROPERTIES (
> | primaryKey = 'uuid',
> | type = 'mor',
> | preCombineField = 'precombine_field'
> | )
> | PARTITIONED BY (part_col)
> """.stripMargin)
> // directly write to new parquet file
> spark.sql(s"set hoodie.parquet.small.file.limit=0")
> spark.sql(s"set hoodie.metadata.compact.max.delta.commits=1")
> // partition stats index is enabled together with column stats index
> spark.sql(s"set hoodie.metadata.index.column.stats.enable=true")
> spark.sql(s"set hoodie.metadata.record.index.enable=true")
> spark.sql(s"set hoodie.metadata.index.secondary.enable=true")
> // Insert row 1 into partition 'A'
> spark.sql(
> s"""
> | INSERT INTO $tableName VALUES (
> | 'uuid1', 1000L, 1.1,
> | array(struct(cast('2023-11-11 11:11:11' as timestamp), 'asd'),
> struct(cast('2023-11-11 11:11:11' as timestamp), 'ghj')),
> | 'A'
> | )
> """.stripMargin)
> spark.sql(s"CREATE INDEX idx_double ON $tableName (col_double)")
> spark.sql(s"select * from $tableName").show(truncate=false)
> // Generate log files through updates on partition 'A'
> spark.sql(s"UPDATE $tableName SET col_double = col_double + 100,
> precombine_field = precombine_field + 1 WHERE part_col = 'A'")
> }
> } {code}
> Error:
> {code:java}
> Caused by: org.apache.hudi.exception.HoodieAppendException: Failed while
> appending records to
> file:/private/var/folders/vh/zgs02hf51dn7r08pbl5m2jc00000gn/T/spark-23f1d487-3b77-48df-b923-ffb219a4d835/part_col=B/.48c1060a-3d1d-43af-9e9c-abbfd8cca16d-0_20250506045258975.log.1_0-165-479
> at
> org.apache.hudi.io.HoodieAppendHandle.appendDataAndDeleteBlocks(HoodieAppendHandle.java:497)
> at
> org.apache.hudi.io.HoodieAppendHandle.doAppend(HoodieAppendHandle.java:456)
> at
> org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor.handleUpdate(BaseSparkDeltaCommitActionExecutor.java:83)
> at
> org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.handleUpsertPartition(BaseSparkCommitActionExecutor.java:321)
> ... 33 more
> Caused by: java.lang.NullPointerException
> at
> org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$null$1(HoodieTableMetadataUtil.java:277)
> at java.util.ArrayList.forEach(ArrayList.java:1259)
> at
> org.apache.hudi.metadata.HoodieTableMetadataUtil.lambda$collectColumnRangeMetadata$2(HoodieTableMetadataUtil.java:269)
> at java.util.ArrayList.forEach(ArrayList.java:1259)
> at
> org.apache.hudi.metadata.HoodieTableMetadataUtil.collectColumnRangeMetadata(HoodieTableMetadataUtil.java:266)
> at
> org.apache.hudi.io.HoodieAppendHandle.processAppendResult(HoodieAppendHandle.java:435)
> at
> org.apache.hudi.io.HoodieAppendHandle.appendDataAndDeleteBlocks(HoodieAppendHandle.java:490)
> ... 36 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)