This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/asf-site by this push:
new f47da4bde7f [DOCS] Hardcode config names instead of params (#10888)
f47da4bde7f is described below
commit f47da4bde7f3c627dd8ee845d18e81008374e551
Author: Bhavani Sudha Saktheeswaran <[email protected]>
AuthorDate: Tue Mar 19 07:56:42 2024 -0700
[DOCS] Hardcode config names instead of params (#10888)
---
website/docs/basic_configurations.md | 2 +-
website/docs/clustering.md | 16 ++---
website/docs/compaction.md | 16 ++---
website/docs/concurrency_control.md | 8 +--
website/docs/configurations.md | 2 +-
website/docs/deployment.md | 18 +++---
website/docs/disaster_recovery.md | 24 +++----
website/docs/faq_storage.md | 8 +--
website/docs/faq_writing_tables.md | 2 +-
website/docs/flink-quick-start-guide.md | 14 ++--
website/docs/precommit_validator.md | 6 +-
website/docs/querying_data.md | 6 +-
website/docs/quick-start-guide.md | 46 ++++++-------
website/docs/reading_tables_streaming_reads.md | 8 +--
website/docs/schema_evolution.md | 16 ++---
website/docs/syncing_metastore.md | 12 ++--
website/docs/troubleshooting.md | 6 +-
website/docs/write_operations.md | 2 +-
website/docs/writing_data.md | 86 ++++++++++++-------------
website/docs/writing_tables_streaming_writes.md | 8 +--
20 files changed, 151 insertions(+), 155 deletions(-)
diff --git a/website/docs/basic_configurations.md
b/website/docs/basic_configurations.md
index 6761f1b83c1..38e3e8511d2 100644
--- a/website/docs/basic_configurations.md
+++ b/website/docs/basic_configurations.md
@@ -50,7 +50,7 @@ inputDF.write()
.format("org.apache.hudi")
.options(clientOpts) // any of the Hudi client opts can be passed in as well
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
+.option("hoodie.datasource.write.partitionpath.field", "partition")
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
.option(HoodieWriteConfig.TABLE_NAME, tableName)
.mode(SaveMode.Append)
diff --git a/website/docs/clustering.md b/website/docs/clustering.md
index 149b690ff3b..3052e171b6e 100644
--- a/website/docs/clustering.md
+++ b/website/docs/clustering.md
@@ -191,10 +191,10 @@ import org.apache.hudi.config.HoodieWriteConfig._
val df = //generate data frame
df.write.format("org.apache.hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, "tableName").
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", "tableName").
option("hoodie.parquet.small.file.limit", "0").
option("hoodie.clustering.inline", "true").
option("hoodie.clustering.inline.max.commits", "4").
@@ -293,10 +293,10 @@ We can also enable asynchronous clustering with Spark
structured streaming sink
val commonOpts = Map(
"hoodie.insert.shuffle.parallelism" -> "4",
"hoodie.upsert.shuffle.parallelism" -> "4",
- DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
- DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
- DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp",
- HoodieWriteConfig.TBL_NAME.key -> "hoodie_test"
+ "hoodie.datasource.write.recordkey.field" -> "_row_key",
+ "hoodie.datasource.write.partitionpath.field" -> "partition",
+ "hoodie.datasource.write.precombine.field" -> "timestamp",
+ "hoodie.table.name" -> "hoodie_test"
)
def getAsyncClusteringOpts(isAsyncClustering: String,
diff --git a/website/docs/compaction.md b/website/docs/compaction.md
index 49126fd2108..c3504236da7 100644
--- a/website/docs/compaction.md
+++ b/website/docs/compaction.md
@@ -137,14 +137,14 @@ import org.apache.spark.sql.streaming.ProcessingTime;
DataStreamWriter<Row> writer =
streamingInput.writeStream().format("org.apache.hudi")
- .option(DataSourceWriteOptions.OPERATION_OPT_KEY(), operationType)
- .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY(), tableType)
- .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
- .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(),
"partition")
- .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
- .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP,
"10")
- .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY(), "true")
- .option(HoodieWriteConfig.TABLE_NAME,
tableName).option("checkpointLocation", checkpointLocation)
+ .option("hoodie.datasource.write.operation", operationType)
+ .option("hoodie.datasource.write.table.type", tableType)
+ .option("hoodie.datasource.write.recordkey.field", "_row_key")
+ .option("hoodie.datasource.write.partitionpath.field", "partition")
+ .option("hoodie.datasource.write.precombine.field"(), "timestamp")
+ .option("hoodie.compact.inline.max.delta.commits", "10")
+ .option("hoodie.datasource.compaction.async.enable", "true")
+ .option("hoodie.table.name", tableName).option("checkpointLocation",
checkpointLocation)
.outputMode(OutputMode.Append());
writer.trigger(new ProcessingTime(30000)).start(tablePath);
```
diff --git a/website/docs/concurrency_control.md
b/website/docs/concurrency_control.md
index 461f2d1dd3c..64c9af85b66 100644
--- a/website/docs/concurrency_control.md
+++ b/website/docs/concurrency_control.md
@@ -215,15 +215,15 @@ Following is an example of how to use
optimistic_concurrency_control via spark d
```java
inputDF.write.format("hudi")
.options(getQuickstartWriteConfigs)
- .option(PRECOMBINE_FIELD_OPT_KEY, "ts")
+ .option("hoodie.datasource.write.precombine.field", "ts")
.option("hoodie.cleaner.policy.failed.writes", "LAZY")
.option("hoodie.write.concurrency.mode",
"optimistic_concurrency_control")
.option("hoodie.write.lock.zookeeper.url", "zookeeper")
.option("hoodie.write.lock.zookeeper.port", "2181")
.option("hoodie.write.lock.zookeeper.base_path", "/test")
- .option(RECORDKEY_FIELD_OPT_KEY, "uuid")
- .option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath")
- .option(TABLE_NAME, tableName)
+ .option("hoodie.datasource.write.recordkey.field", "uuid")
+ .option("hoodie.datasource.write.partitionpath.field", "partitionpath")
+ .option("hoodie.table.name", tableName)
.mode(Overwrite)
.save(basePath)
```
diff --git a/website/docs/configurations.md b/website/docs/configurations.md
index 0f0ba7df7e7..f473d9ce6ba 100644
--- a/website/docs/configurations.md
+++ b/website/docs/configurations.md
@@ -93,7 +93,7 @@ inputDF.write()
.format("org.apache.hudi")
.options(clientOpts) // any of the Hudi client opts can be passed in as well
.option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
+.option("hoodie.datasource.write.partitionpath.field", "partition")
.option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
.option(HoodieWriteConfig.TABLE_NAME, tableName)
.mode(SaveMode.Append)
diff --git a/website/docs/deployment.md b/website/docs/deployment.md
index b6d56b9937e..9bafde59c46 100644
--- a/website/docs/deployment.md
+++ b/website/docs/deployment.md
@@ -144,10 +144,10 @@ Here is an example invocation using spark datasource
inputDF.write()
.format("org.apache.hudi")
.options(clientOpts) // any of the Hudi client opts can be passed in as
well
- .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
- .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(),
"partition")
- .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
- .option(HoodieWriteConfig.TABLE_NAME, tableName)
+ .option("hoodie.datasource.write.recordkey.field", "_row_key")
+ .option("hoodie.datasource.write.partitionpath.field", "partition")
+ .option("hoodie.datasource.write.precombine.field"(), "timestamp")
+ .option("hoodie.table.name", tableName)
.mode(SaveMode.Append)
.save(basePath);
```
@@ -205,11 +205,11 @@ val inserts =
convertToStringList(dataGen.generateInserts(100)).toList
val insertDf = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
insertDf.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
- option(OPERATION.key(), INSERT_OPERATION_OPT_VAL).
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
+ option("hoodie.datasource.write.operation", "insert").
mode(Append).
save(basePath)
```
diff --git a/website/docs/disaster_recovery.md
b/website/docs/disaster_recovery.md
index 889f339dad5..a264b7d3615 100644
--- a/website/docs/disaster_recovery.md
+++ b/website/docs/disaster_recovery.md
@@ -46,10 +46,10 @@ val inserts =
convertToStringList(dataGen.generateInserts(10))
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Overwrite).
save(basePath)
```
@@ -61,10 +61,10 @@ for (_ <- 1 to 4) {
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
}
@@ -159,10 +159,10 @@ for (_ <- 1 to 3) {
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
}
diff --git a/website/docs/faq_storage.md b/website/docs/faq_storage.md
index 43ca76817a8..d74e65dfc3c 100644
--- a/website/docs/faq_storage.md
+++ b/website/docs/faq_storage.md
@@ -71,8 +71,8 @@ spark.read.parquet("your_data_set/path/to/month")
.write.format("org.apache.hudi")
.option("hoodie.datasource.write.operation", "bulk_insert")
.option("hoodie.datasource.write.storage.type", "storage_type") //
COPY_ON_WRITE or MERGE_ON_READ
- .option(RECORDKEY_FIELD_OPT_KEY, "<your key>").
- .option(PARTITIONPATH_FIELD_OPT_KEY, "<your_partition>")
+ .option("hoodie.datasource.write.recordkey.field", "<your key>").
+ .option("hoodie.datasource.write.partitionpath.field", "<your_partition>")
...
.mode(SaveMode.Append)
.save(basePath);
@@ -84,8 +84,8 @@ Once you have the initial copy, you can simply run upsert
operations on this by
spark.read.parquet("your_data_set/path/to/month").limit(n) // Limit n records
.write.format("org.apache.hudi")
.option("hoodie.datasource.write.operation", "upsert")
- .option(RECORDKEY_FIELD_OPT_KEY, "<your key>").
- .option(PARTITIONPATH_FIELD_OPT_KEY, "<your_partition>")
+ .option("hoodie.datasource.write.recordkey.field", "<your key>").
+ .option("hoodie.datasource.write.partitionpath.field", "<your_partition>")
...
.mode(SaveMode.Append)
.save(basePath);
diff --git a/website/docs/faq_writing_tables.md
b/website/docs/faq_writing_tables.md
index bb1c1a01f74..90874efbf4f 100644
--- a/website/docs/faq_writing_tables.md
+++ b/website/docs/faq_writing_tables.md
@@ -89,7 +89,7 @@ Hudi configuration options covering the datasource and low
level Hudi write clie
```scala
inputDF.write().format("org.apache.hudi")
.options(clientOpts) // any of the Hudi client opts can be passed in as well
- .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
+ .option("hoodie.datasource.write.recordkey.field", "_row_key")
...
```
diff --git a/website/docs/flink-quick-start-guide.md
b/website/docs/flink-quick-start-guide.md
index bdca36c6853..ed9f7fe9371 100644
--- a/website/docs/flink-quick-start-guide.md
+++ b/website/docs/flink-quick-start-guide.md
@@ -195,9 +195,9 @@ String targetTable = "hudi_table";
String basePath = "file:///tmp/hudi_table";
Map<String, String> options = new HashMap<>();
-options.put(FlinkOptions.PATH.key(), basePath);
-options.put(FlinkOptions.TABLE_TYPE.key(),
HoodieTableType.MERGE_ON_READ.name());
-options.put(FlinkOptions.PRECOMBINE_FIELD.key(), "ts");
+options.put("path", basePath);
+options.put("table.type", HoodieTableType.MERGE_ON_READ.name());
+options.put("precombine.field", "ts");
DataStream<RowData> dataStream = env.addSource(...);
HoodiePipeline.Builder builder = HoodiePipeline.builder(targetTable)
@@ -253,10 +253,10 @@ String targetTable = "hudi_table";
String basePath = "file:///tmp/hudi_table";
Map<String, String> options = new HashMap<>();
-options.put(FlinkOptions.PATH.key(), basePath);
-options.put(FlinkOptions.TABLE_TYPE.key(),
HoodieTableType.MERGE_ON_READ.name());
-options.put(FlinkOptions.READ_AS_STREAMING.key(), "true"); // this option
enable the streaming read
-options.put(FlinkOptions.READ_START_COMMIT.key(), "20210316134557"); //
specifies the start commit instant time
+options.put("path", basePath);
+options.put("table.type", HoodieTableType.MERGE_ON_READ.name());
+options.put("read.streaming.enabled", "true"); // this option enable the
streaming read
+options.put("read.start-commit", "20210316134557"); // specifies the start
commit instant time
HoodiePipeline.Builder builder = HoodiePipeline.builder(targetTable)
.column("uuid VARCHAR(20)")
diff --git a/website/docs/precommit_validator.md
b/website/docs/precommit_validator.md
index 6f6806a3fa9..5e13fca3dc0 100644
--- a/website/docs/precommit_validator.md
+++ b/website/docs/precommit_validator.md
@@ -33,7 +33,7 @@ Example:
import org.apache.hudi.config.HoodiePreCommitValidatorConfig._
df.write.format("hudi").mode(Overwrite).
- option(TABLE_NAME, tableName).
+ option("hoodie.table.name", tableName).
option("hoodie.precommit.validators",
"org.apache.hudi.client.validator.SqlQuerySingleResultPreCommitValidator").
option("hoodie.precommit.validators.single.value.sql.queries", "select
count(*) from <TABLE_NAME> where col is null#0").
save(basePath)
@@ -56,7 +56,7 @@ Example:
import org.apache.hudi.config.HoodiePreCommitValidatorConfig._
df.write.format("hudi").mode(Overwrite).
- option(TABLE_NAME, tableName).
+ option("hoodie.table.name", tableName).
option("hoodie.precommit.validators",
"org.apache.hudi.client.validator.SqlQueryEqualityPreCommitValidator").
option("hoodie.precommit.validators.equality.sql.queries", "select count(*)
from <TABLE_NAME> where col is null").
save(basePath)
@@ -74,7 +74,7 @@ Example:
import org.apache.hudi.config.HoodiePreCommitValidatorConfig._
df.write.format("hudi").mode(Overwrite).
- option(TABLE_NAME, tableName).
+ option("hoodie.table.name", tableName).
option("hoodie.precommit.validators",
"org.apache.hudi.client.validator.SqlQueryInequalityPreCommitValidator").
option("hoodie.precommit.validators.inequality.sql.queries", "select
count(*) from <TABLE_NAME> where col is null").
save(basePath)
diff --git a/website/docs/querying_data.md b/website/docs/querying_data.md
index c43ee1fd7f4..31069822df7 100644
--- a/website/docs/querying_data.md
+++ b/website/docs/querying_data.md
@@ -34,7 +34,7 @@ Retrieve the data table at the present point in time.
val hudiSnapshotQueryDF = spark
.read
.format("hudi")
- .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(),
DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL())
+ .option("hoodie.datasource.query.type", "snapshot")
.load(tablePath)
```
@@ -47,8 +47,8 @@ The following snippet shows how to obtain all records changed
after `beginInstan
```java
Dataset<Row> hudiIncQueryDF = spark.read()
.format("org.apache.hudi")
- .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(),
DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
- .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(),
<beginInstantTime>)
+ .option("hoodie.datasource.query.type", "incremental"())
+ .option("hoodie.datasource.read.begin.instanttime", <beginInstantTime>)
.option(DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY(),
"/year=2020/month=*/day=*") // Optional, use glob pattern if querying certain
partitions
.load(tablePath); // For incremental query, pass in the root/base path of
table
diff --git a/website/docs/quick-start-guide.md
b/website/docs/quick-start-guide.md
index 9104edee8a7..1359c5c2957 100644
--- a/website/docs/quick-start-guide.md
+++ b/website/docs/quick-start-guide.md
@@ -260,8 +260,8 @@ val data =
var inserts = spark.createDataFrame(data).toDF(columns:_*)
inserts.write.format("hudi").
- option(PARTITIONPATH_FIELD_NAME.key(), "city").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.partitionpath.field", "city").
+ option("hoodie.table.name", tableName).
mode(Overwrite).
save(basePath)
```
@@ -404,9 +404,9 @@ values={[
val updatesDf = spark.read.format("hudi").load(basePath).filter($"rider" ===
"rider-D").withColumn("fare", col("fare") * 10)
updatesDf.write.format("hudi").
- option(OPERATION_OPT_KEY, "upsert").
- option(PARTITIONPATH_FIELD_NAME.key(), "city").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.operation", "upsert").
+ option("hoodie.datasource.write.partitionpath.field", "city").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
```
@@ -560,9 +560,9 @@ values={[
val deletesDF = spark.read.format("hudi").load(basePath).filter($"rider" ===
"rider-F")
deletesDF.write.format("hudi").
- option(OPERATION_OPT_KEY, "delete").
- option(PARTITIONPATH_FIELD_NAME.key(), "city").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.operation", "delete").
+ option("hoodie.datasource.write.partitionpath.field", "city").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
@@ -717,8 +717,8 @@ val beginTime = commits(commits.length - 2) // commit time
we are interested in
// incrementally query data
val tripsIncrementalDF = spark.read.format("hudi").
- option(QUERY_TYPE.key(), QUERY_TYPE_INCREMENTAL_OPT_VAL).
- option(BEGIN_INSTANTTIME.key(), 0).
+ option("hoodie.datasource.query.type", "incremental").
+ option("hoodie.datasource.read.begin.instanttime", 0).
load(basePath)
tripsIncrementalDF.createOrReplaceTempView("trips_incremental")
@@ -812,9 +812,9 @@ var df = spark.createDataFrame(data).toDF(columns:_*)
// Insert data
df.write.format("hudi").
- option(PARTITIONPATH_FIELD_NAME.key(), "city").
- option(CDC_ENABLED.key(), "true").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.partitionpath.field", "city").
+ option("hoodie.table.cdc.enabled", "true").
+ option("hoodie.table.name", tableName).
mode(Overwrite).
save(basePath)
@@ -822,18 +822,18 @@ df.write.format("hudi").
val updatesDf = spark.read.format("hudi").load(basePath).filter($"rider" ===
"rider-A" || $"rider" === "rider-B").withColumn("fare", col("fare") * 10)
updatesDf.write.format("hudi").
- option(OPERATION_OPT_KEY, "upsert").
- option(PARTITIONPATH_FIELD_NAME.key(), "city").
- option(CDC_ENABLED.key(), "true").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.operation", "upsert").
+ option("hoodie.datasource.write.partitionpath.field", "city").
+ option("hoodie.table.cdc.enabled", "true").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
// Query CDC data
-spark.read.option(BEGIN_INSTANTTIME.key(), 0).
- option(QUERY_TYPE.key(), QUERY_TYPE_INCREMENTAL_OPT_VAL).
- option(INCREMENTAL_FORMAT.key(), "cdc").
+spark.read.option("hoodie.datasource.read.begin.instanttime", 0).
+ option("hoodie.datasource.query.type", "incremental").
+ option("hoodie.datasource.query.incremental.format", "cdc").
format("hudi").load(basePath).show(false)
```
</TabItem>
@@ -929,7 +929,7 @@ values={[
// spark-shell
inserts.write.format("hudi").
...
- option(TABLE_TYPE.key(), "MERGE_ON_READ").
+ option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
...
```
</TabItem>
@@ -992,7 +992,7 @@ values={[
// spark-shell
inserts.write.format("hudi").
...
-option(RECORDKEY_FIELD.key(), "uuid").
+option("hoodie.datasource.write.recordkey.field", "uuid").
...
```
@@ -1063,7 +1063,7 @@ values={[
// spark-shell
updatesDf.write.format("hudi").
...
- option(PRECOMBINE_FIELD_NAME.key(), "ts").
+ option("hoodie.datasource.write.precombine.field", "ts").
...
```
diff --git a/website/docs/reading_tables_streaming_reads.md
b/website/docs/reading_tables_streaming_reads.md
index 57c3e2c4702..5e73524e14d 100644
--- a/website/docs/reading_tables_streaming_reads.md
+++ b/website/docs/reading_tables_streaming_reads.md
@@ -26,10 +26,10 @@ values={[
// reload data
df.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Overwrite).
save(basePath)
diff --git a/website/docs/schema_evolution.md b/website/docs/schema_evolution.md
index 68ad442e434..31dd73662fc 100755
--- a/website/docs/schema_evolution.md
+++ b/website/docs/schema_evolution.md
@@ -207,11 +207,11 @@ val data1 = Seq(Row("row_1", "part_0", 0L, "bob", "v_0",
0),
var dfFromData1 = spark.createDataFrame(data1, schema)
dfFromData1.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD.key, "preComb").
- option(RECORDKEY_FIELD.key, "rowId").
- option(PARTITIONPATH_FIELD.key, "partitionId").
+ option("hoodie.datasource.write.precombine.field", "preComb").
+ option("hoodie.datasource.write.recordkey.field", "rowId").
+ option("hoodie.datasource.write.partitionpath.field", "partitionId").
option("hoodie.index.type","SIMPLE").
- option(TBL_NAME.key, tableName).
+ option("hoodie.table.name", tableName).
mode(Overwrite).
save(basePath)
@@ -266,11 +266,11 @@ val data2 = Seq(Row("row_2", "part_0", 5L, "john", "v_3",
3L, "newField_1"),
var dfFromData2 = spark.createDataFrame(data2, newSchema)
dfFromData2.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD.key, "preComb").
- option(RECORDKEY_FIELD.key, "rowId").
- option(PARTITIONPATH_FIELD.key, "partitionId").
+ option("hoodie.datasource.write.precombine.field", "preComb").
+ option("hoodie.datasource.write.recordkey.field", "rowId").
+ option("hoodie.datasource.write.partitionpath.field", "partitionId").
option("hoodie.index.type","SIMPLE").
- option(TBL_NAME.key, tableName).
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
diff --git a/website/docs/syncing_metastore.md
b/website/docs/syncing_metastore.md
index 2c5866b82a0..e39c5f39337 100644
--- a/website/docs/syncing_metastore.md
+++ b/website/docs/syncing_metastore.md
@@ -229,12 +229,12 @@ var dfFromData0 = spark.createDataFrame(data0,schema)
dfFromData0.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "preComb").
- option(RECORDKEY_FIELD_OPT_KEY, "rowId").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionId").
- option(TABLE_NAME, tableName).
- option(TABLE_TYPE.key, COW_TABLE_TYPE_OPT_VAL).
- option(OPERATION_OPT_KEY, "upsert").
+ option("hoodie.datasource.write.precombine.field", "preComb").
+ option("hoodie.datasource.write.recordkey.field", "rowId").
+ option("hoodie.datasource.write.partitionpath.field", "partitionId").
+ option("hoodie.table.name", tableName).
+ option("hoodie.datasource.write.table.type", 'COPY_ON_WRITE').
+ option("hoodie.datasource.write.operation", "upsert").
option("hoodie.index.type","SIMPLE").
option("hoodie.datasource.write.hive_style_partitioning","true").
option("hoodie.datasource.hive_sync.jdbcurl","jdbc:hive2://hiveserver:10000/").
diff --git a/website/docs/troubleshooting.md b/website/docs/troubleshooting.md
index 6398cfc7245..db93a76d187 100644
--- a/website/docs/troubleshooting.md
+++ b/website/docs/troubleshooting.md
@@ -101,11 +101,11 @@ Unless Hive sync is enabled, the dataset written by Hudi
using one of the method
val hudiSnapshotQueryDF = spark
.read()
.format("hudi")
- .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(),
DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL())
+ .option("hoodie.datasource.query.type", "snapshot")
.load(basePath)
val hudiIncQueryDF = spark.read().format("hudi")
- .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY(),
DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL())
- .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(),
<beginInstantTime>)
+ .option("hoodie.datasource.query.type", "incremental")
+ .option("hoodie.datasource.read.begin.instanttime", <beginInstantTime>)
.load(basePath);
```
diff --git a/website/docs/write_operations.md b/website/docs/write_operations.md
index 056d99c5f2b..04a7a8b63a8 100644
--- a/website/docs/write_operations.md
+++ b/website/docs/write_operations.md
@@ -29,7 +29,7 @@ Hudi supports implementing two types of deletes on data
stored in Hudi tables, b
- **Soft Deletes** : Retain the record key and just null out the values for
all the other fields.
This can be achieved by ensuring the appropriate fields are nullable in the
table schema and simply upserting the table after setting these fields to null.
- **Hard Deletes** : This method entails completely eradicating all evidence
of a record from the table, including any duplicates. There are three distinct
approaches to accomplish this:
- - Using DataSource, set `OPERATION_OPT_KEY` to `DELETE_OPERATION_OPT_VAL`.
This will remove all the records in the DataSet being submitted.
+ - Using DataSource, set `"hoodie.datasource.write.operation"` to `"delete"`.
This will remove all the records in the DataSet being submitted.
- Using DataSource, set `PAYLOAD_CLASS_OPT_KEY` to
`"org.apache.hudi.EmptyHoodieRecordPayload"`. This will remove all the records
in the DataSet being submitted.
- Using DataSource or Hudi Streamer, add a column named `_hoodie_is_deleted`
to DataSet. The value of this column must be set to `true` for all the records
to be deleted and either `false` or left null for any records which are to be
upserted.
diff --git a/website/docs/writing_data.md b/website/docs/writing_data.md
index 582d13bbd06..308de8ca78a 100644
--- a/website/docs/writing_data.md
+++ b/website/docs/writing_data.md
@@ -14,33 +14,29 @@ There are a number of options available:
**`HoodieWriteConfig`**:
-**TABLE_NAME** (Required)<br/>
+**TABLE_NAME** <br/>
**`DataSourceWriteOptions`**:
-**RECORDKEY_FIELD_OPT_KEY** (Required): Primary key field(s). Record keys
uniquely identify a record/row within each partition. If one wants to have a
global uniqueness, there are two options. You could either make the dataset
non-partitioned, or, you can leverage Global indexes to ensure record keys are
unique irrespective of the partition path. Record keys can either be a single
column or refer to multiple columns. `KEYGENERATOR_CLASS_OPT_KEY` property
should be set accordingly based o [...]
+**RECORDKEY_FIELD**: Primary key field(s). Record keys uniquely identify a
record/row within each partition. If one wants to have a global uniqueness,
there are two options. You could either make the dataset non-partitioned, or,
you can leverage Global indexes to ensure record keys are unique irrespective
of the partition path. Record keys can either be a single column or refer to
multiple columns. `KEYGENERATOR_CLASS_OPT_KEY` property should be set
accordingly based on whether it is a s [...]
Default value: `"uuid"`<br/>
-**PARTITIONPATH_FIELD_OPT_KEY** (Required): Columns to be used for
partitioning the table. To prevent partitioning, provide empty string as value
eg: `""`. Specify partitioning/no partitioning using
`KEYGENERATOR_CLASS_OPT_KEY`. If partition path needs to be url encoded, you
can set `URL_ENCODE_PARTITIONING_OPT_KEY`. If synchronizing to hive, also
specify using `HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.`<br/>
+**PARTITIONPATH_FIELD**: Columns to be used for partitioning the table. To
prevent partitioning, provide empty string as value eg: `""`. Specify
partitioning/no partitioning using `KEYGENERATOR_CLASS_OPT_KEY`. If partition
path needs to be url encoded, you can set `URL_ENCODE_PARTITIONING_OPT_KEY`. If
synchronizing to hive, also specify using
`HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.`<br/>
Default value: `"partitionpath"`<br/>
-**PRECOMBINE_FIELD_OPT_KEY** (Required): When two records within the same
batch have the same key value, the record with the largest value from the field
specified will be choosen. If you are using default payload of
OverwriteWithLatestAvroPayload for HoodieRecordPayload (`WRITE_PAYLOAD_CLASS`),
an incoming record will always takes precendence compared to the one in storage
ignoring this `PRECOMBINE_FIELD_OPT_KEY`. <br/>
+**PRECOMBINE_FIELD**: When two records within the same batch have the same key
value, the record with the largest value from the field specified will be
choosen. If you are using default payload of OverwriteWithLatestAvroPayload for
HoodieRecordPayload (`WRITE_PAYLOAD_CLASS`), an incoming record will always
takes precendence compared to the one in storage ignoring this
`PRECOMBINE_FIELD_OPT_KEY`. <br/>
Default value: `"ts"`<br/>
-**OPERATION_OPT_KEY**: The [write operations](/docs/write_operations) to
use.<br/>
+**OPERATION**: The [write operations](/docs/write_operations) to use.<br/>
Available values:<br/>
-`UPSERT_OPERATION_OPT_VAL` (default), `BULK_INSERT_OPERATION_OPT_VAL`,
`INSERT_OPERATION_OPT_VAL`, `DELETE_OPERATION_OPT_VAL`
+`"upsert"` (default), `"bulk_insert"`, `"insert"`, `"delete"`
-**TABLE_TYPE_OPT_KEY**: The [type of table](/docs/concepts#table-types) to
write to. Note: After the initial creation of a table, this value must stay
consistent when writing to (updating) the table using the Spark
`SaveMode.Append` mode.<br/>
+**TABLE_TYPE**: The [type of table](/docs/concepts#table-types) to write to.
Note: After the initial creation of a table, this value must stay consistent
when writing to (updating) the table using the Spark `SaveMode.Append`
mode.<br/>
Available values:<br/>
[`COW_TABLE_TYPE_OPT_VAL`](/docs/concepts#copy-on-write-table) (default),
[`MOR_TABLE_TYPE_OPT_VAL`](/docs/concepts#merge-on-read-table)
-**KEYGENERATOR_CLASS_OPT_KEY**: Refer to [Key
Generation](/docs/key_generation) section below.
-
-**HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY**: If using hive, specify if the
table should or should not be partitioned.<br/>
-Available values:<br/>
-`classOf[MultiPartKeysValueExtractor].getCanonicalName` (default),
`classOf[SlashEncodedDayPartitionValueExtractor].getCanonicalName`,
`classOf[TimestampBasedKeyGenerator].getCanonicalName`,
`classOf[NonPartitionedExtractor].getCanonicalName`,
`classOf[GlobalDeleteKeyGenerator].getCanonicalName` (to be used when
`OPERATION_OPT_KEY` is set to `DELETE_OPERATION_OPT_VAL`)
+**KEYGENERATOR_CLASS_NAME**: Refer to [Key Generation](/docs/key_generation)
section below.
Example:
@@ -50,10 +46,10 @@ Upsert a DataFrame, specifying the necessary field names
for `recordKey => _row_
inputDF.write()
.format("hudi")
.options(clientOpts) //Where clientOpts is of type Map[String, String].
clientOpts can include any other options necessary.
- .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
- .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(),
"partition")
- .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
- .option(HoodieWriteConfig.TABLE_NAME, tableName)
+ .option("hoodie.datasource.write.recordkey.field", "_row_key")
+ .option("hoodie.datasource.write.partitionpath.field", "partition")
+ .option("hoodie.datasource.write.precombine.field"(), "timestamp")
+ .option("hoodie.table.name", tableName)
.mode(SaveMode.Append)
.save(basePath);
```
@@ -75,10 +71,10 @@ val inserts =
convertToStringList(dataGen.generateInserts(10))
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Overwrite).
save(basePath)
```
@@ -200,11 +196,11 @@ val inserts =
convertToStringList(dataGen.generateInserts(10))
val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
df.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(OPERATION_OPT_KEY,"insert_overwrite_table").
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.operation","insert_overwrite_table").
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
@@ -258,11 +254,11 @@ val df = spark.
filter("partitionpath = 'americas/united_states/san_francisco'")
df.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(OPERATION_OPT_KEY,"insert_overwrite").
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.operation","insert_overwrite").
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
@@ -315,18 +311,18 @@ val softDeleteDf = nullifyColumns.
// simply upsert the table after setting these fields to null
softDeleteDf.write.format("hudi").
options(getQuickstartWriteConfigs).
- option(OPERATION_OPT_KEY, "upsert").
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, tableName).
+ option("hoodie.datasource.write.operation", "upsert").
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", tableName).
mode(Append).
save(basePath)
```
- **Hard Deletes** : A stronger form of deletion is to physically remove any
trace of the record from the table. This can be achieved in 3 different ways.
-1. Using Datasource, set `OPERATION_OPT_KEY` to `DELETE_OPERATION_OPT_VAL`.
This will remove all the records in the DataSet being submitted.
+1. Using Datasource, set `"hoodie.datasource.write.operation"` to `"delete"`.
This will remove all the records in the DataSet being submitted.
Example, first read in a dataset:
```scala
@@ -349,11 +345,11 @@ val deletes = dataGen.generateDeletes(df.collectAsList())
val df = spark.read.json(spark.sparkContext.parallelize(deletes, 2));
df.write.format("org.apache.hudi").
options(getQuickstartWriteConfigs).
-option(OPERATION_OPT_KEY,"delete").
-option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-option(TABLE_NAME, tableName).
+option("hoodie.datasource.write.operation","delete").
+option("hoodie.datasource.write.precombine.field", "ts").
+option("hoodie.datasource.write.recordkey.field", "uuid").
+option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+option("hoodie.table.name", tableName).
mode(Append).
save(basePath);
```
@@ -430,16 +426,16 @@ Read more in-depth details about concurrency control in
the [concurrency control
```java
inputDF.write.format("hudi")
.options(getQuickstartWriteConfigs)
- .option(PRECOMBINE_FIELD_OPT_KEY, "ts")
+ .option("hoodie.datasource.write.precombine.field", "ts")
.option("hoodie.cleaner.policy.failed.writes", "LAZY")
.option("hoodie.write.concurrency.mode",
"optimistic_concurrency_control")
.option("hoodie.write.lock.zookeeper.url", "zookeeper")
.option("hoodie.write.lock.zookeeper.port", "2181")
.option("hoodie.write.lock.zookeeper.lock_key", "test_table")
.option("hoodie.write.lock.zookeeper.base_path", "/test")
- .option(RECORDKEY_FIELD_OPT_KEY, "uuid")
- .option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath")
- .option(TABLE_NAME, tableName)
+ .option("hoodie.datasource.write.recordkey.field", "uuid")
+ .option("hoodie.datasource.write.partitionpath.field", "partitionpath")
+ .option("hoodie.table.name", tableName)
.mode(Overwrite)
.save(basePath)
```
diff --git a/website/docs/writing_tables_streaming_writes.md
b/website/docs/writing_tables_streaming_writes.md
index 4bb43bb696b..77ff044ca63 100644
--- a/website/docs/writing_tables_streaming_writes.md
+++ b/website/docs/writing_tables_streaming_writes.md
@@ -36,10 +36,10 @@ val df = spark.readStream.
// write stream to new hudi table
df.writeStream.format("hudi").
options(getQuickstartWriteConfigs).
- option(PRECOMBINE_FIELD_OPT_KEY, "ts").
- option(RECORDKEY_FIELD_OPT_KEY, "uuid").
- option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
- option(TABLE_NAME, streamingTableName).
+ option("hoodie.datasource.write.precombine.field", "ts").
+ option("hoodie.datasource.write.recordkey.field", "uuid").
+ option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+ option("hoodie.table.name", streamingTableName).
outputMode("append").
option("path", baseStreamingPath).
option("checkpointLocation", checkpointLocation).