li-ang-666 commented on issue #9804:
URL: https://github.com/apache/hudi/issues/9804#issuecomment-1746650919

   > It should be like this: `--hoodie-conf k1=v1,k2=v2`, for your opition, it 
should be `--hoodie-conf hadoop.parquet.avro.readInt96AsFixed=true`
   
   I changed my FlinkSQL to :
   ----------------------------------------------------------------------------
   CREATE TABLE source_table (
     id                         DECIMAL(20, 0),
     company_id                 BIGINT,
     shareholder_id             STRING,
     shareholder_entity_type    SMALLINT,
     shareholder_name_id        BIGINT,
     investment_ratio_total     DECIMAL(24, 12),
     is_controller              SMALLINT,
     is_ultimate                SMALLINT,
     is_big_shareholder         SMALLINT,
     is_controlling_shareholder SMALLINT,
     equity_holding_path        STRING,
     create_time                TIMESTAMP(0),
     update_time                TIMESTAMP(0),
     is_deleted                 SMALLINT,
     op_ts TIMESTAMP(3) METADATA FROM 'value.ingestion-timestamp' VIRTUAL,
     PRIMARY KEY (id) NOT ENFORCED
   ) WITH (
     'connector' = 'kafka',
     'topic' = 'e1d4c.json.prism_shareholder_path.ratio_path_company',
     'properties.bootstrap.servers' = 
'10.99.202.90:9092,10.99.206.80:9092,10.99.199.2:9092',
     'properties.group.id' = 'demo-job',
     'scan.startup.mode' = 'earliest-offset',
     -- canal
     'format' = 'canal-json',
     'canal-json.ignore-parse-errors' = 'true',
     'canal-json.encode.decimal-as-plain-number' = 'true'
   );
   
-------------------------------------------------------------------------------------------
   create table ratio_path_company(
     id                         DECIMAL(20, 0),
     company_id                 BIGINT,
     shareholder_id             STRING,
     shareholder_entity_type    SMALLINT,
     shareholder_name_id        BIGINT,
     investment_ratio_total     DECIMAL(24, 12),
     is_controller              SMALLINT,
     is_ultimate                SMALLINT,
     is_big_shareholder         SMALLINT,
     is_controlling_shareholder SMALLINT,
     equity_holding_path        STRING,
     create_time                TIMESTAMP(0),
     update_time                TIMESTAMP(0),
     is_deleted                 SMALLINT,
     op_ts                      TIMESTAMP(0),
     PRIMARY KEY (id) NOT ENFORCED
   ) WITH (
     'connector' = 'hudi',
     'path' = 'obs://hadoop-obs/ods_hudi/ratio_path_company005',
     'table.type' = 'MERGE_ON_READ',
     -- cdc
     'changelog.enabled' = 'true',
     -- index
     'index.type' = 'BUCKET', 
     'hoodie.bucket.index.num.buckets' = '256',
     -- write
     'write.tasks' = '8',
     'write.task.max.size' = '512',
     'write.batch.size' = '12',
     'write.merge.max_memory' = '28',
     'write.log_block.size' = '128', 
     'write.precombine' = 'true',
     'precombine.field' = 'op_ts',
     -- compaction
     'compaction.tasks' = '8',
     'compaction.schedule.enabled' = 'true',
     'compaction.async.enabled' = 'true',
     'compaction.max_memory' = '128',
     'compaction.delta_commits' = '3',
     -- clean
     'clean.async.enabled' = 'true',
     'clean.policy' = 'KEEP_LATEST_BY_HOURS',
     'clean.retain_hours' = '72',
     'hadoop.parquet.avro.readInt96AsFixed' = 'true'
   );
   ---------------------------------------------------------------------
   insert into ratio_path_company select * from source_table;
   -------------------------------------------------------------------
   It did take effect, but I encountered a new problem:
   <img width="1348" alt="image" 
src="https://github.com/apache/hudi/assets/71430517/a9070b6c-d800-4e2c-af40-44423de2ff10";>
   2023-10-04 19:02:01.793 [ERROR] [pool-12-thread-1] 
(NonThrownExecutor.java:140) - Executor executes action [Execute compaction for 
instant 20231003234910347 from task 2] error
   org.apache.hudi.exception.HoodieException: 
org.apache.hudi.exception.HoodieException: 
org.apache.hudi.exception.HoodieException: unable to read next record from 
parquet file 
        at 
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:149)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.handleUpdateInternal(HoodieFlinkCopyOnWriteTable.java:423)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.handleUpdate(HoodieFlinkCopyOnWriteTable.java:414)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.action.compact.CompactionExecutionHelper.writeFileAndGetWriteStats(CompactionExecutionHelper.java:64)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:237)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:147)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.sink.compact.CompactOperator.doCompaction(CompactOperator.java:142)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.sink.compact.CompactOperator.lambda$processElement$0(CompactOperator.java:124)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.sink.utils.NonThrownExecutor.lambda$wrapAction$0(NonThrownExecutor.java:130)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 
[?:1.8.0_302]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 
[?:1.8.0_302]
        at java.lang.Thread.run(Thread.java:748) [?:1.8.0_302]
   Caused by: org.apache.hudi.exception.HoodieException: 
org.apache.hudi.exception.HoodieException: unable to read next record from 
parquet file 
        at 
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:75)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        ... 11 more
   Caused by: org.apache.hudi.exception.HoodieException: unable to read next 
record from parquet file 
        at 
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:54)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:67)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        ... 11 more
   Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read 
value at 0 in block -1 in file 
obs://hadoop-obs/ods_hudi/ratio_path_company005/00000019-da3a-4963-8d2b-9618b89d2f93_19-128-1_20231003230828396.parquet
        at 
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:254)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132) 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:136) 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:67)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        ... 11 more
   Caused by: org.apache.parquet.io.ParquetDecodingException: The requested 
schema is not compatible with the file schema. incompatible types: optional 
fixed_len_byte_array(12) create_time != optional int96 create_time
        at 
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.incompatibleSchema(ColumnIOFactory.java:101)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:93)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.parquet.schema.PrimitiveType.accept(PrimitiveType.java:602) 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visitChildren(ColumnIOFactory.java:83)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:57)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at org.apache.parquet.schema.MessageType.accept(MessageType.java:55) 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.parquet.io.ColumnIOFactory.getColumnIO(ColumnIOFactory.java:162) 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:135)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:225)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132) 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:136) 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:67)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        at 
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
 
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
        ... 11 more


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to