Ethan Guo created HUDI-3708:
-------------------------------
Summary: Upsert to metadata table fails due to schema change
Key: HUDI-3708
URL: https://issues.apache.org/jira/browse/HUDI-3708
Project: Apache Hudi
Issue Type: Bug
Reporter: Ethan Guo
Fix For: 0.11.0
Scenario: Deltastreamer continuous mode, COW table, single writer with async
clustering and cleaning. Only files partition is enabled in metadata table.
The table is written before the metadata schema change (adding "columnName").
When using the new writer with the new schema, the upsert to metadata table
fails with schema compatibility check.
{code:java}
22/03/23 23:11:38 WARN CleanActionExecutor: Failed to perform previous clean
operation, instant: [==>20220314172020474__clean__INFLIGHT]
org.apache.hudi.exception.HoodieUpsertException: Failed upsert schema
compatibility check.
at
org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:729)
at
org.apache.hudi.client.SparkRDDWriteClient.upsertPreppedRecords(SparkRDDWriteClient.java:169)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.commit(SparkHoodieBackedTableMetadataWriter.java:154)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.processAndCommit(HoodieBackedTableMetadataWriter.java:670)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.update(HoodieBackedTableMetadataWriter.java:694)
at
org.apache.hudi.table.action.BaseActionExecutor.lambda$writeTableMetadata$1(BaseActionExecutor.java:69)
at org.apache.hudi.common.util.Option.ifPresent(Option.java:97)
at
org.apache.hudi.table.action.BaseActionExecutor.writeTableMetadata(BaseActionExecutor.java:69)
at
org.apache.hudi.table.action.clean.CleanActionExecutor.runClean(CleanActionExecutor.java:211)
at
org.apache.hudi.table.action.clean.CleanActionExecutor.runPendingClean(CleanActionExecutor.java:176)
at
org.apache.hudi.table.action.clean.CleanActionExecutor.lambda$execute$6(CleanActionExecutor.java:238)
at java.util.ArrayList.forEach(ArrayList.java:1259)
at
org.apache.hudi.table.action.clean.CleanActionExecutor.execute(CleanActionExecutor.java:232)
at
org.apache.hudi.table.HoodieSparkCopyOnWriteTable.clean(HoodieSparkCopyOnWriteTable.java:339)
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:781)
at
org.apache.hudi.client.BaseHoodieWriteClient.clean(BaseHoodieWriteClient.java:738)
at
org.apache.hudi.async.AsyncCleanerService.lambda$startService$0(AsyncCleanerService.java:55)
at
java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hudi.exception.HoodieException: Failed schema
compatibility check for writerSchema
:{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A
record saved within the Metadata
Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type
of the metadata
record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size
of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file
has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains information
about partitions and files within the
dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data
file bloom filter
details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom
filter type
code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant
timestamp when this metadata was
created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter
binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter
entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all
data files in the user
table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data
file column
statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File
name for which this column statistics
applies"},{"name":"columnName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Column
name for which this column statistics
applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum
value in the range. Based on user data table schema, we can convert this to
appropriate
type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum
value in the range. Based on user data table schema, we can convert it to
appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total
count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total count
of null values"},{"name":"totalSize","type":["null","long"],"doc":"Total
storage size on
disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total
uncompressed storage size on
disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry
valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all data
files in the user table","default":null}]}, table schema
:{"type":"record","name":"HoodieMetadataRecord","namespace":"org.apache.hudi.avro.model","doc":"A
record saved within the Metadata
Table","fields":[{"name":"_hoodie_commit_time","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_commit_seqno","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_record_key","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_partition_path","type":["null","string"],"doc":"","default":null},{"name":"_hoodie_file_name","type":["null","string"],"doc":"","default":null},{"name":"key","type":{"type":"string","avro.java.string":"String"}},{"name":"type","type":"int","doc":"Type
of the metadata
record"},{"name":"filesystemMetadata","type":["null",{"type":"map","values":{"type":"record","name":"HoodieMetadataFileInfo","fields":[{"name":"size","type":"long","doc":"Size
of the file"},{"name":"isDeleted","type":"boolean","doc":"True if this file
has been deleted"}]},"avro.java.string":"String"}],"doc":"Contains information
about partitions and files within the
dataset"},{"name":"BloomFilterMetadata","type":["null",{"type":"record","name":"HoodieMetadataBloomFilter","doc":"Data
file bloom filter
details","fields":[{"name":"type","type":{"type":"string","avro.java.string":"String"},"doc":"Bloom
filter type
code"},{"name":"timestamp","type":{"type":"string","avro.java.string":"String"},"doc":"Instant
timestamp when this metadata was
created/updated"},{"name":"bloomFilter","type":"bytes","doc":"Bloom filter
binary byte array"},{"name":"isDeleted","type":"boolean","doc":"Bloom filter
entry valid/deleted flag"}]}],"doc":"Metadata Index of bloom filters for all
data files in the user
table","default":null},{"name":"ColumnStatsMetadata","type":["null",{"type":"record","name":"HoodieMetadataColumnStats","doc":"Data
file column
statistics","fields":[{"name":"fileName","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"File
name for which this column statistics
applies"},{"name":"minValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Minimum
value in the range. Based on user data table schema, we can convert this to
appropriate
type"},{"name":"maxValue","type":["null",{"type":"string","avro.java.string":"String"}],"doc":"Maximum
value in the range. Based on user data table schema, we can convert it to
appropriate type"},{"name":"valueCount","type":["null","long"],"doc":"Total
count of values"},{"name":"nullCount","type":["null","long"],"doc":"Total count
of null values"},{"name":"totalSize","type":["null","long"],"doc":"Total
storage size on
disk"},{"name":"totalUncompressedSize","type":["null","long"],"doc":"Total
uncompressed storage size on
disk"},{"name":"isDeleted","type":"boolean","doc":"Column range entry
valid/deleted flag"}]}],"doc":"Metadata Index of column statistics for all data
files in the user table","default":null}]}, base path
:file:/Users/ethan/Work/scripts/mt_rollout_testing/deploy_b_single_writer_async_services/b3_ds_cow_010mt_011mt_conf_fix2/test_table/.hoodie/metadata
at org.apache.hudi.table.HoodieTable.validateSchema(HoodieTable.java:721)
at
org.apache.hudi.table.HoodieTable.validateUpsertSchema(HoodieTable.java:727)
... 20 more {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)