li-ang-666 commented on issue #9804:
URL: https://github.com/apache/hudi/issues/9804#issuecomment-1746650919
> It should be like this: `--hoodie-conf k1=v1,k2=v2`, for your opition, it
should be `--hoodie-conf hadoop.parquet.avro.readInt96AsFixed=true`
I changed my FlinkSQL to :
----------------------------------------------------------------------------
CREATE TABLE source_table (
id DECIMAL(20, 0),
company_id BIGINT,
shareholder_id STRING,
shareholder_entity_type SMALLINT,
shareholder_name_id BIGINT,
investment_ratio_total DECIMAL(24, 12),
is_controller SMALLINT,
is_ultimate SMALLINT,
is_big_shareholder SMALLINT,
is_controlling_shareholder SMALLINT,
equity_holding_path STRING,
create_time TIMESTAMP(0),
update_time TIMESTAMP(0),
is_deleted SMALLINT,
op_ts TIMESTAMP(3) METADATA FROM 'value.ingestion-timestamp' VIRTUAL,
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'kafka',
'topic' = 'e1d4c.json.prism_shareholder_path.ratio_path_company',
'properties.bootstrap.servers' =
'10.99.202.90:9092,10.99.206.80:9092,10.99.199.2:9092',
'properties.group.id' = 'demo-job',
'scan.startup.mode' = 'earliest-offset',
-- canal
'format' = 'canal-json',
'canal-json.ignore-parse-errors' = 'true',
'canal-json.encode.decimal-as-plain-number' = 'true'
);
-------------------------------------------------------------------------------------------
create table ratio_path_company(
id DECIMAL(20, 0),
company_id BIGINT,
shareholder_id STRING,
shareholder_entity_type SMALLINT,
shareholder_name_id BIGINT,
investment_ratio_total DECIMAL(24, 12),
is_controller SMALLINT,
is_ultimate SMALLINT,
is_big_shareholder SMALLINT,
is_controlling_shareholder SMALLINT,
equity_holding_path STRING,
create_time TIMESTAMP(0),
update_time TIMESTAMP(0),
is_deleted SMALLINT,
op_ts TIMESTAMP(0),
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'hudi',
'path' = 'obs://hadoop-obs/ods_hudi/ratio_path_company005',
'table.type' = 'MERGE_ON_READ',
-- cdc
'changelog.enabled' = 'true',
-- index
'index.type' = 'BUCKET',
'hoodie.bucket.index.num.buckets' = '256',
-- write
'write.tasks' = '8',
'write.task.max.size' = '512',
'write.batch.size' = '12',
'write.merge.max_memory' = '28',
'write.log_block.size' = '128',
'write.precombine' = 'true',
'precombine.field' = 'op_ts',
-- compaction
'compaction.tasks' = '8',
'compaction.schedule.enabled' = 'true',
'compaction.async.enabled' = 'true',
'compaction.max_memory' = '128',
'compaction.delta_commits' = '3',
-- clean
'clean.async.enabled' = 'true',
'clean.policy' = 'KEEP_LATEST_BY_HOURS',
'clean.retain_hours' = '72',
'hadoop.parquet.avro.readInt96AsFixed' = 'true'
);
---------------------------------------------------------------------
insert into ratio_path_company select * from source_table;
-------------------------------------------------------------------
It did take effect, but I encountered a new problem:
<img width="1348" alt="image"
src="https://github.com/apache/hudi/assets/71430517/a9070b6c-d800-4e2c-af40-44423de2ff10">
2023-10-04 19:02:01.793 [ERROR] [pool-12-thread-1]
(NonThrownExecutor.java:140) - Executor executes action [Execute compaction for
instant 20231003234910347 from task 2] error
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException: unable to read next record from
parquet file
at
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:149)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.handleUpdateInternal(HoodieFlinkCopyOnWriteTable.java:423)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.HoodieFlinkCopyOnWriteTable.handleUpdate(HoodieFlinkCopyOnWriteTable.java:414)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.action.compact.CompactionExecutionHelper.writeFileAndGetWriteStats(CompactionExecutionHelper.java:64)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:237)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.action.compact.HoodieCompactor.compact(HoodieCompactor.java:147)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.sink.compact.CompactOperator.doCompaction(CompactOperator.java:142)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.sink.compact.CompactOperator.lambda$processElement$0(CompactOperator.java:124)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.sink.utils.NonThrownExecutor.lambda$wrapAction$0(NonThrownExecutor.java:130)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_302]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_302]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_302]
Caused by: org.apache.hudi.exception.HoodieException:
org.apache.hudi.exception.HoodieException: unable to read next record from
parquet file
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:75)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
... 11 more
Caused by: org.apache.hudi.exception.HoodieException: unable to read next
record from parquet file
at
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:54)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:67)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
... 11 more
Caused by: org.apache.parquet.io.ParquetDecodingException: Can not read
value at 0 in block -1 in file
obs://hadoop-obs/ods_hudi/ratio_path_company005/00000019-da3a-4963-8d2b-9618b89d2f93_19-128-1_20231003230828396.parquet
at
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:254)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:136)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:67)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
... 11 more
Caused by: org.apache.parquet.io.ParquetDecodingException: The requested
schema is not compatible with the file schema. incompatible types: optional
fixed_len_byte_array(12) create_time != optional int96 create_time
at
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.incompatibleSchema(ColumnIOFactory.java:101)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:93)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.parquet.schema.PrimitiveType.accept(PrimitiveType.java:602)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visitChildren(ColumnIOFactory.java:83)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:57)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at org.apache.parquet.schema.MessageType.accept(MessageType.java:55)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.parquet.io.ColumnIOFactory.getColumnIO(ColumnIOFactory.java:162)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:135)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:225)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:132)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at org.apache.parquet.hadoop.ParquetReader.read(ParquetReader.java:136)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.ParquetReaderIterator.hasNext(ParquetReaderIterator.java:49)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.collection.MappingIterator.hasNext(MappingIterator.java:39)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.common.util.queue.SimpleExecutor.execute(SimpleExecutor.java:67)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
at
org.apache.hudi.table.action.commit.HoodieMergeHelper.runMerge(HoodieMergeHelper.java:147)
~[blob_p-d52f9bdde15fcc5202b1e5453d346309de61818c-dff7e5bfe71d3391b3a22a198a642f47:?]
... 11 more
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]