PankajKaushal commented on issue #9613:
URL: https://github.com/apache/hudi/issues/9613#issuecomment-1705954047

   hive_sync.support_timestamp=true
   hoodie.archive.async=true
   hoodie.archive.automatic=true
   hoodie.archivelog.folder=archived
   hoodie.bulkinsert.shuffle.parallelism=200
   hoodie.clean.async=true
   hoodie.clean.automatic=true
   hoodie.cleaner.commits.retained=2
   hoodie.cleaner.policy.failed.writes=EAGER
   hoodie.clustering.async.enabled=false
   hoodie.clustering.inline=false
   hoodie.datasource.compaction.async.enable=true
   hoodie.datasource.hive_sync.base_file_format=PARQUET
   hoodie.datasource.hive_sync.create_managed_table=false
   hoodie.datasource.hive_sync.database=<db_name>
   hoodie.datasource.hive_sync.enable=true
   hoodie.datasource.hive_sync.jdbcurl=
   
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.MultiPartKeysValueExtractor
   hoodie.datasource.hive_sync.partition_fields=dt
   hoodie.datasource.hive_sync.password=hive
   hoodie.datasource.hive_sync.schema_string_length_thresh=4000
   hoodie.datasource.hive_sync.support_timestamp=true
   hoodie.datasource.hive_sync.sync_as_datasource=true
   hoodie.datasource.hive_sync.table=table_name
   hoodie.datasource.hive_sync.use_jdbc=true
   hoodie.datasource.hive_sync.username=hive
   hoodie.datasource.meta.sync.base.path=
   hoodie.datasource.meta.sync.enable=true
   hoodie.datasource.write.commitmeta.key.prefix=_
   hoodie.datasource.write.drop.partition.columns=false
   hoodie.datasource.write.hive_style_partitioning=true
   hoodie.datasource.write.insert.drop.duplicates=false
   
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.ComplexKeyGenerator
   
hoodie.datasource.write.keygenerator.consistent.logical.timestamp.enabled=false
   hoodie.datasource.write.operation=upsert
   hoodie.datasource.write.partitionpath.field=dt
   hoodie.datasource.write.partitionpath.urlencode=false
   
hoodie.datasource.write.payload.class=org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
   hoodie.datasource.write.precombine.field=ingestedat
   hoodie.datasource.write.reconcile.schema=false
   hoodie.datasource.write.recordkey.field=id
   hoodie.datasource.write.row.writer.enable=true
   hoodie.datasource.write.streaming.ignore.failed.batch=false
   hoodie.datasource.write.streaming.retry.count=3
   
[hoodie.datasource.write.streaming.retry.interval.ms](http://hoodie.datasource.write.streaming.retry.interval.ms/)=2000
   hoodie.datasource.write.table.type=COPY_ON_WRITE
   hoodie.fail.on.timeline.archiving=false
   hoodie.finalize.write.parallelism=200
   hoodie.insert.shuffle.parallelism=200
   hoodie.keep.max.commits=4
   hoodie.keep.min.commits=3
   hoodie.meta.sync.client.tool.class=org.apache.hudi.hive.HiveSyncTool
   hoodie.meta.sync.metadata_file_listing=true
   hoodie.meta_sync.spark.version=3.3.2-amzn-0
   hoodie.metadata.clean.async=true
   hoodie.metadata.cleaner.commits.retained=4
   hoodie.metadata.enable=true
   hoodie.metadata.keep.max.commits=7
   hoodie.metadata.keep.min.commits=5
   hoodie.metrics.pushgateway.host=
   hoodie.metrics.pushgateway.port=9091
   hoodie.parquet.max.file.size=128000000
   hoodie.parquet.small.file.limit=100000000
   hoodie.payload.ordering.field=ingestedat
   hoodie.table.base.file.format=PARQUET
   hoodie.table.checksum=1229177767
   hoodie.table.keygenerator.class=org.apache.hudi.keygen.ComplexKeyGenerator
   hoodie.table.metadata.partitions=files
   [hoodie.table.name](http://hoodie.table.name/)=table_name
   hoodie.table.partition.fields=dt
   hoodie.table.precombine.field=ingestedat
   hoodie.table.recordkey.fields=id
   hoodie.table.type=COPY_ON_WRITE
   hoodie.table.version=5
   hoodie.timeline.layout.version=1
   hoodie.upsert.shuffle.parallelism=200
   hoodie.write.concurrency.mode=single_writer
   
hoodie.write.lock.provider=org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider
   hoodie.write.lock.zookeeper.base_path=/hudi
   hoodie.write.lock.zookeeper.port=
   hoodie.write.lock.zookeeper.url=
   
   We tried upgrading to 0.13.1 also, but the batch was taking too much time to 
finish. 
   **Getting small files from partitions** - Job was taking too much time. 
   We checked even the data that we are writing to hudi and how many partitions 
its touching, there was no change in behaviour. for this particular table, we 
reverted back to 0.12.2 and this step is taking very less time(few seconds to 
minute.)
   
   
![Screenshot](https://github.com/apache/hudi/assets/22418097/f4789e1a-672f-40a9-be96-82558e7f0c34)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to