jack Lei created HUDI-7271:
------------------------------

             Summary: copy a conf let following modification not to impact the 
global conf
                 Key: HUDI-7271
                 URL: https://issues.apache.org/jira/browse/HUDI-7271
             Project: Apache Hudi
          Issue Type: Bug
            Reporter: jack Lei


when using a create table sql like 
{code:java}
CREATE TABLE IF NOT EXISTS hudi_catalog.tmp.table
    (
        bb STRING,
        dt STRING
    )
    PARTITIONED BY
    (
        dt
    )
WITH
    (
        'connector' = 'hudi',
        'path' = '',
        'table.type' = 'COPY_ON_WRITE',
        'write.operation' = 'insert',
        'hoodie.datasource.write.recordkey.field' = 'dt',
        'hoodie.parquet.max.file.size' = '536870912', -- 512M
        'clustering.async.enabled' = 'true',
        'clustering.schedule.enabled' = 'true',
        'hoodie.clustering.async.max.commits' = '1000000',
        'clustering.plan.strategy.small.file.limit' = '268435456', -- 256M
        'clustering.plan.strategy.target.file.max.bytes' = '1610612736', -- 1.5G
        'clustering.plan.partition.filter.mode' = 'RECENT_DAYS',
        'clustering.plan.strategy.daybased.lookback.partitions' = '2',
        'clustering.plan.strategy.daybased.skipfromlatest.partitions' = '0',
        'clustering.plan.strategy.max.num.groups'='750',
        'hoodie.cleaner.policy' = 'KEEP_LATEST_COMMITS',
        'clean.retain_commits' = '300',
        'archive.max_commits' = '500',
        'archive.min_commits' = '400',
        'write.precombine' = 'false'
    ) ; {code}
then set 'clustering.async.enabled'='true' 'clustering.schedule.enabled'='false'
{code:java}
INSERT 
INTO    hudi_catalog.tmp.table
/*+ 
OPTIONS('clustering.async.enabled'='true','clustering.schedule.enabled'='false')*/
(       bb,     dt)SELECT     log,    DATE_FORMAT(`timestamp`, 'yyyy-MM-dd') AS 
dtFROM    kafkatable {code}
this will modify the 

hoodie.parquet.max.file.size , and only when reach the size,the file can be 
rolling

 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to