peng-xin commented on issue #2448: URL: https://github.com/apache/hudi/issues/2448#issuecomment-761540885
> Can you provide the full dump of the logs and .hoodie/ folder ? the log has been clear, but still have some picture     when i set `hoodie.auto.commit = false`,the error is gone. but how to limit the log file size,my log file is so big(3GB+),log file version always 1.  when i change `hoodie.cleaner.policy = KEEP_LATEST_FILE_VERSIONS` and `hoodie.cleaner.fileversions.retained = 1`,the old data file can be clean,but how to clean the old log file(or clean the old log file commit,set `hoodie.cleaner.policy = KEEP_LATEST_COMMITS` and `hoodie.cleaner.commits.retained = 1` is useless) now my config is > hoodie.filesystem.view.incr.timeline.sync.enable -> false, hoodie.bulkinsert.sort.mode -> GLOBAL_SORT, hoodie.avro.schema.externalTransformation -> false, hoodie.bootstrap.parallelism -> 1500, hoodie.delete.shuffle.parallelism -> 1500, hoodie.simple.index.use.caching -> true, hoodie.bloom.index.filter.type -> DYNAMIC_V0, hoodie.filesystem.view.remote.port -> 26754, hoodie.datasource.write.operation -> upsert, hoodie.cleaner.parallelism -> 200, hoodie.global.simple.index.parallelism -> 100, hoodie.bootstrap.mode.selector.regex -> .*, hoodie.parquet.page.size -> 1048576, hoodie.datasource.write.table.type -> MERGE_ON_READ, hoodie.datasource.hive_sync.table -> f_mid_business_card, hoodie.compaction.daybased.target.partitions -> 10, hoodie.metrics.reporter.class -> , hoodie.parquet.block.size -> 125829120, hoodie.cleaner.delete.bootstrap.base.file -> false, hoodie.consistency.check.max_interval_ms -> 300000, hoodie.insert.shuffle.parallelism -> 100, hoodie.upsert.shuffle.parallelism -> 100, hoodie.bulkinsert.shuffle.parallelism -> 1500, hoodie.write.commit.callback.on -> false, hoodie.cleaner.fileversions.retained -> 1, hoodie.datasource.hive_sync.partition_extractor_class -> org.apache.hudi.hive.NonPartitionedExtractor, hoodie.parquet.compression.codec -> gzip, hoodie.datasource.write.hive_style_partitioning -> true, hoodie.copyonwrite.insert.split.size -> 500000, hoodie.optimistic.consistency.guard.sleep_time_ms -> 500, hoodie.datasource.hive_sync.use_jdbc -> true, hoodie.metrics.reporter.type -> GRAPHITE, hoodie.bootstrap.index.class -> org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex, hoodie.filesystem.remote.backup.view.enable -> true, hoodie.logfile.to.parquet.compression.ratio -> 0.35, hoodie.filesystem.view.spillable.mem -> 104857600, hoodie.write.status.storage.level -> MEMORY_AND_DISK_SER, hoodie.write.commit.callback.http.timeout.seconds -> 3, hoodie.copyonwrite.insert.auto.split -> true, hoodie.logfile.data.block.max.size -> 268435456, hoodie.index.type -> BLOOM, hoodie.keep.min.commits -> 6, hoodie.memory.spillable.map.path -> /tmp/, hoodie.filesystem.view.rocksdb.base.path -> /tmp/hoodie_timeline_rocksdb, hoodie.compact.inline -> false, hoodie.clean.async -> true, hoodie.record.size.estimation.threshold -> 1.0, hoodie.metrics.graphite.host -> localhost, hoodie.simple.index.update.partition.path -> false, hoodie.bloom.index.filter.dynamic.max.entries -> 100000, hoodie.compaction.reverse.log.read -> false, hoodie.metrics.jmx.port -> 9889, hoodie.writestatus.class -> org.apache.hudi.client.WriteStatus, hoodie.datasource.hive_sync.enable -> true, hoodie.finalize.write.parallelism -> 1500, hoodie.rollback.parallelism -> 100, hoodie.index.bloom.num_entries -> 60000, hoodie.memory.merge.max.size -> 134217728, hoodie.bootstrap.mode.selector.regex.mode -> METADATA_ONLY, hoodie.rollback.using.markers -> false, hoodie.copyonwrite.record.size.estimate -> 1024, hoodie.bloom.index.input.storage.level -> MEMORY_AND_DISK_SER, hoodie.simple.index.parallelism -> 50, hoodie.consistency.check.enabled -> false, hoodie.bloom.index.use.caching -> true, hoodie.metrics.on -> false, hoodie.memory.compaction.max.size -> 1073741824, hoodie.parquet.small.file.limit -> 104857600, hoodie.combine.before.insert -> false, hoodie.cleaner.commits.retained -> 1, hoodie.embed.timeline.server -> true, hoodie.bootstrap.mode.selector -> org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector, hoodie.filesystem.view.secondary.type -> MEMORY, _.hoodie.allow.multi.write.on.same.instant -> false, hoodie.datasource.write.partitionpath.field -> , _hoodie.optimistic.consistency.guard.enable -> true, hoodie.datasource.hive_sync.database -> hbase, hoodie.bloom.index.update.partition.path -> true, hoodie.fail.on.timeline.archiving -> true, hoodie.markers.delete.parallelism -> 100, hoodie.filesystem.view.type -> MEMORY, hoodie.parquet.max.file.size -> 125829120, hoodie.datasource.write.keygenerator.class -> org.apache.hudi.keygen.NonpartitionedKeyGenerator, hoodie.bootstrap.partitionpath.translator.class -> org.apache.hudi.client.bootstrap.translator.IdentityBootstrapPartitionPathTranslator, hoodie.bloom.index.prune.by.ranges -> true, hoodie.base.path -> /user/datalake/hudi/hbase/f_mid_business_card, hoodie.index.class -> , hoodie.clean.automatic -> true, hoodie.filesystem.view.remote.host -> localhost, hoodie.compaction.lazy.block.read -> false, hoodie.memory.writestatus.failure.fraction -> 0.1, hoodie.metrics.graphite.port -> 4756, hoodie.cleaner.policy -> KEEP_LATEST_FILE_VERSIONS, hoodie.logfile.max.size -> 1073741824, hoodie.filesystem.view.spillable.compaction.mem.fraction -> 0.01, hoodie.datasource.write.recordkey.field -> datalake_rowkey, hoodie.avro.schema.validate -> false, hoodie.simple.index.input.storage.level -> MEMORY_AND_DISK_SER, hoodie.timeline.layout.version -> 1, hoodie.consistency.check.max_checks -> 7, hoodie.consistency.check.initial_interval_ms -> 2000, hoodie.keep.max.commits -> 8, hoodie.compact.inline.max.delta.commits -> 5, hoodie.parquet.compression.ratio -> 0.1, hoodie.memory.dfs.buffer.max.size -> 16777216, hoodie.auto.commit -> false, hoodie.write.commit.callback.http.api.key -> hudi_write_commit_http_callback, hoodie.assume.date.partitioning -> false, hoodie.filesystem.view.spillable.dir -> /tmp/view_map/, hoodie.compaction.strategy -> org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy, hoodie.combine.before.upsert -> true, hoodie.bloom.index.keys.per.bucket -> 10000000, hoodie.write.commit.callback.class -> org.apache.hudi.callback.impl.HoodieWriteCommitHttpCallback, hoodie.bloom.index.parallelism -> 0, hoodie.cleaner.incremental.mode -> true, hoodie.commits.archival.batch -> 5, hoodie.datasource.hive_sync.partition_fields -> , hoodie.compaction.target.io -> 512000, hoodie.table.name -> f_mid_business_card, hoodie.bloom.index.bucketized.checking -> true, hoodie.compaction.payload.class -> org.apache.hudi.common.model.OverwriteWithLatestAvroPayload, hoodie.combine.before.delete -> true, hoodie.datasource.write.precombine.field -> ts, hoodie.filesystem.view.spillable.bootstrap.base.file.mem.fraction -> 0.05, hoodie.metrics.jmx.host -> localhost, hoodie.index.bloom.fpp -> 0.000000001, hoodie.datasource.hive_sync.jdbcurl -> jdbc:hive2://172.16.116.102:10000, hoodie.bloom.index.use.treebased.filter -> true ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
