adward32135 commented on issue #7058:
URL: https://github.com/apache/hudi/issues/7058#issuecomment-1291413206

   > @adward32135 In addition, there are only four fields in your example. Can 
you provide the exception information corresponding to the example
   CREATE TABLE T1
   (
    ftime           STRING,
       `id`             BIGINT,
       `cid`            INT,
       `shard`          STRING,
       `productCode`    STRING,
       `subProductCode` STRING,
       `serialId`       STRING,
       `uin`            BIGINT  ,
       `appId`          INT,
       `payerUin`       BIGINT  ,
       `payerAppId`     INT,
       `platform`       INT,
       `projectId`      INT,
       `regionId`       INT,
       `zoneId`         INT,
       `payMode`        INT ,
       `pid`            INT,
       `actId`          INT,
       `calcDate`       STRING ,
       `startTime`      STRING,
       `endTime`        STRING,
       `calcNum`        STRING,
       `timeSpan`       STRING,
       `calcStatus`     INT,
       `calcChannel`    INT,
       `dataStatus`     INT,
       `settleType`     INT,
       `params`         STRING,
       `parts`          STRING,
       `billingParams`  STRING,
       `reduceInfo`     STRING,
       `currency`       STRING,
       `price`          DOUBLE,
       `money`          DOUBLE,
       `totalCost`      DOUBLE,
       `taxRate`        DOUBLE,
       `taxAmount`      DOUBLE,
       `orderId`        STRING,
       `feeBillId`      STRING,
       `addTime`        STRING,
       `modifyTime`     STRING,
       `errorMsg`       STRING,
       `riOrderId`      STRING,
       `timeUnit`       STRING,
       record_time      TIMESTAMP(3),
       PRIMARY KEY (`id`, `shard`) NOT ENFORCED
   )
   PARTITIONED BY (`ftime`)
   WITH (
      'connector' = 'hudi',
      'table.type' = 'MERGE_ON_READ' ,
      'write.operation' = 'upsert',
      'write.bucket_assign.tasks' = '20',
      'write.tasks' = '20',
      'hoodie.logfile.to.parquet.compression.ratio'='0.5',
      'changelog.enabled' = 'true',
      'read.streaming.enabled' = 'true',
      'read.streaming.check-interval' = '10',
     'compaction.schedule.enabled' = 'true',
      'compaction.async.enabled' = 'true',
      'compaction.delta_seconds' = '600',
     'compaction.trigger.strategy' = 'num_or_time',
      'compaction.delta_commits' = '30',
      'compaction.tasks'='8',
      'compaction.max_memory' = '4096',
      'clean.retain_commits' = '30',
      'hive_sync.enable' = 'true',
      'hive_sync.mode' = 'hms',
      'index.type' = 'BUCKET',
      'hoodie.bucket.index.num.buckets' = '50',
      'hive_sync.metastore.uris' = 'xxx',
      'path' = 'hdfs://xxx/xx.db/T1',
      'hive_sync.db' = 'xxx',
      'hive_sync.table' = 'T1' ,
      'hoodie.datasource.write.recordkey.field' = 'id,shard',
      'write.precombine.field' = 'addTime'
        )
   hive T+1 table T2
   
   Use sparksql to insert data of a certain partition into the hudi table The 
statement is as follows:
   
   INSERT INTO T1 PARTITION (ftime = '20221005')
   select 
   id,
   cid,
   shard,
   productcode,
   subproductcode,
   serialid,
   uin,
   appid,
   payeruin,
   payerappid,
   platform,
   projectid,
   regionid,
   zoneid,
   paymode,
   pid,
   actid,
   calcdate,
   starttime,
   endtime,
   calcnum,
   timespan,
   calcstatus,
   calcchannel,
   datastatus,
   settletype,
   params,
   parts,
   billingparams,
   reduceinfo,
   currency,
   price,
   money,
   totalcost,
   taxrate,
   taxamount,
   orderid,
   feebillid,
   addtime,
   modifytime,
   errormsg,
   riorderid,
   '',
   0
                from 
                T2  where stat_date = '20221005'
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to