[ 
https://issues.apache.org/jira/browse/HUDI-7271?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated HUDI-7271:
---------------------------------
    Labels: pull-request-available  (was: )

> copy a conf let following modification not to impact the global conf
> --------------------------------------------------------------------
>
>                 Key: HUDI-7271
>                 URL: https://issues.apache.org/jira/browse/HUDI-7271
>             Project: Apache Hudi
>          Issue Type: Bug
>            Reporter: jack Lei
>            Priority: Major
>              Labels: pull-request-available
>
> when using a create table sql like 
> {code:java}
> CREATE TABLE IF NOT EXISTS hudi_catalog.tmp.table
>     (
>         bb STRING,
>         dt STRING
>     )
>     PARTITIONED BY
>     (
>         dt
>     )
> WITH
>     (
>         'connector' = 'hudi',
>         'path' = '',
>         'table.type' = 'COPY_ON_WRITE',
>         'write.operation' = 'insert',
>         'hoodie.datasource.write.recordkey.field' = 'dt',
>         'hoodie.parquet.max.file.size' = '536870912', -- 512M
>         'clustering.async.enabled' = 'true',
>         'clustering.schedule.enabled' = 'true',
>         'hoodie.clustering.async.max.commits' = '1000000',
>         'clustering.plan.strategy.small.file.limit' = '268435456', -- 256M
>         'clustering.plan.strategy.target.file.max.bytes' = '1610612736', -- 
> 1.5G
>         'clustering.plan.partition.filter.mode' = 'RECENT_DAYS',
>         'clustering.plan.strategy.daybased.lookback.partitions' = '2',
>         'clustering.plan.strategy.daybased.skipfromlatest.partitions' = '0',
>         'clustering.plan.strategy.max.num.groups'='750',
>         'hoodie.cleaner.policy' = 'KEEP_LATEST_COMMITS',
>         'clean.retain_commits' = '300',
>         'archive.max_commits' = '500',
>         'archive.min_commits' = '400',
>         'write.precombine' = 'false'
>     ) ; {code}
> then set 'clustering.async.enabled'='true' 
> 'clustering.schedule.enabled'='false'
> {code:java}
> INSERT 
> INTO  hudi_catalog.tmp.table
> /*+ 
> OPTIONS('clustering.async.enabled'='true','clustering.schedule.enabled'='false')*/
> (     bb,     dt)SELECT     log,    DATE_FORMAT(`timestamp`, 'yyyy-MM-dd') AS 
> dtFROM    kafkatable {code}
> this will modify the 
> hoodie.parquet.max.file.size , and only when reach the size,the file can be 
> rolling
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to