adward32135 commented on issue #7058:
URL: https://github.com/apache/hudi/issues/7058#issuecomment-1291413206
> @adward32135 In addition, there are only four fields in your example. Can
you provide the exception information corresponding to the example
CREATE TABLE T1
(
ftime STRING,
`id` BIGINT,
`cid` INT,
`shard` STRING,
`productCode` STRING,
`subProductCode` STRING,
`serialId` STRING,
`uin` BIGINT ,
`appId` INT,
`payerUin` BIGINT ,
`payerAppId` INT,
`platform` INT,
`projectId` INT,
`regionId` INT,
`zoneId` INT,
`payMode` INT ,
`pid` INT,
`actId` INT,
`calcDate` STRING ,
`startTime` STRING,
`endTime` STRING,
`calcNum` STRING,
`timeSpan` STRING,
`calcStatus` INT,
`calcChannel` INT,
`dataStatus` INT,
`settleType` INT,
`params` STRING,
`parts` STRING,
`billingParams` STRING,
`reduceInfo` STRING,
`currency` STRING,
`price` DOUBLE,
`money` DOUBLE,
`totalCost` DOUBLE,
`taxRate` DOUBLE,
`taxAmount` DOUBLE,
`orderId` STRING,
`feeBillId` STRING,
`addTime` STRING,
`modifyTime` STRING,
`errorMsg` STRING,
`riOrderId` STRING,
`timeUnit` STRING,
record_time TIMESTAMP(3),
PRIMARY KEY (`id`, `shard`) NOT ENFORCED
)
PARTITIONED BY (`ftime`)
WITH (
'connector' = 'hudi',
'table.type' = 'MERGE_ON_READ' ,
'write.operation' = 'upsert',
'write.bucket_assign.tasks' = '20',
'write.tasks' = '20',
'hoodie.logfile.to.parquet.compression.ratio'='0.5',
'changelog.enabled' = 'true',
'read.streaming.enabled' = 'true',
'read.streaming.check-interval' = '10',
'compaction.schedule.enabled' = 'true',
'compaction.async.enabled' = 'true',
'compaction.delta_seconds' = '600',
'compaction.trigger.strategy' = 'num_or_time',
'compaction.delta_commits' = '30',
'compaction.tasks'='8',
'compaction.max_memory' = '4096',
'clean.retain_commits' = '30',
'hive_sync.enable' = 'true',
'hive_sync.mode' = 'hms',
'index.type' = 'BUCKET',
'hoodie.bucket.index.num.buckets' = '50',
'hive_sync.metastore.uris' = 'xxx',
'path' = 'hdfs://xxx/xx.db/T1',
'hive_sync.db' = 'xxx',
'hive_sync.table' = 'T1' ,
'hoodie.datasource.write.recordkey.field' = 'id,shard',
'write.precombine.field' = 'addTime'
)
hive T+1 table T2
Use sparksql to insert data of a certain partition into the hudi table The
statement is as follows:
INSERT INTO T1 PARTITION (ftime = '20221005')
select
id,
cid,
shard,
productcode,
subproductcode,
serialid,
uin,
appid,
payeruin,
payerappid,
platform,
projectid,
regionid,
zoneid,
paymode,
pid,
actid,
calcdate,
starttime,
endtime,
calcnum,
timespan,
calcstatus,
calcchannel,
datastatus,
settletype,
params,
parts,
billingparams,
reduceinfo,
currency,
price,
money,
totalcost,
taxrate,
taxamount,
orderid,
feebillid,
addtime,
modifytime,
errormsg,
riorderid,
'',
0
from
T2 where stat_date = '20221005'
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]