[
https://issues.apache.org/jira/browse/HUDI-2911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Alexey Kudinkin updated HUDI-2911:
----------------------------------
Fix Version/s: 0.11.0
> Writing non-partitioned table produces incorrect "hoodie.properties" file
> -------------------------------------------------------------------------
>
> Key: HUDI-2911
> URL: https://issues.apache.org/jira/browse/HUDI-2911
> Project: Apache Hudi
> Issue Type: Bug
> Reporter: Alexey Kudinkin
> Priority: Major
> Fix For: 0.11.0
>
>
> After ingesting Hudi table w/ the following configuration, i'm still getting
> "hoodie.table.partition.fields=partitionpath" in the "hoodie.properties",
> which blocks this table form being read.
>
> Example table config:
> {code:java}
> val commonOpts =
> Map(
> "hoodie.compact.inline" -> "false",
> "hoodie.bulk_insert.shuffle.parallelism" -> "10"
> )
> spark.sparkContext.setLogLevel("DEBUG")
> ////////////////////////////////////////////////////////////////
> // Writing to Hudi
> ////////////////////////////////////////////////////////////////
> val fs = FSUtils.getFs(outputPath, spark.sparkContext.hadoopConfiguration)
> if (!fs.exists(new Path(outputPath))) {
> val df = spark.read.parquet(inputPath)
> df.write.format("hudi")
> .option(DataSourceWriteOptions.TABLE_TYPE.key(), COW_TABLE_TYPE_OPT_VAL)
> .option("hoodie.table.name", tableName)
> .option(PRECOMBINE_FIELD.key(), "review_id")
> .option(RECORDKEY_FIELD.key(), "review_id")
> //.option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key(),
> "product_category")
> .option("hoodie.clustering.inline", "true")
> .option("hoodie.clustering.inline.max.commits", "1")
> // NOTE: Small file limit is intentionally kept _ABOVE_ target file-size
> max threshold for Clustering,
> // to force re-clustering
> .option("hoodie.clustering.plan.strategy.small.file.limit",
> String.valueOf(1024 * 1024 * 1024)) // 1Gb
> .option("hoodie.clustering.plan.strategy.target.file.max.bytes",
> String.valueOf(128 * 1024 * 1024)) // 128Mb
> .option("hoodie.clustering.plan.strategy.max.num.groups",
> String.valueOf(4096))
> .option(HoodieClusteringConfig.LAYOUT_OPTIMIZE_ENABLE.key, "true")
> .option(HoodieClusteringConfig.LAYOUT_OPTIMIZE_STRATEGY.key,
> layoutOptStrategy)
> .option(HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS.key,
> "product_id,customer_id")
> .option(DataSourceWriteOptions.OPERATION.key(),
> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL)
> .option(BULK_INSERT_SORT_MODE.key(), "NONE")
> .options(commonOpts)
> .mode(ErrorIfExists)
> .save(outputPath)
> } {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)