(hudi) branch asf-site updated: [DOCS] Hardcode config names instead of params (#10888)

xushiyan Tue, 19 Mar 2024 07:57:59 -0700

This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/hudi.git



The following commit(s) were added to refs/heads/asf-site by this push:
     new f47da4bde7f [DOCS] Hardcode config names instead of params (#10888)
f47da4bde7f is described below

commit f47da4bde7f3c627dd8ee845d18e81008374e551
Author: Bhavani Sudha Saktheeswaran <[email protected]>
AuthorDate: Tue Mar 19 07:56:42 2024 -0700

    [DOCS] Hardcode config names instead of params (#10888)
---
 website/docs/basic_configurations.md            |  2 +-
 website/docs/clustering.md                      | 16 ++---
 website/docs/compaction.md                      | 16 ++---
 website/docs/concurrency_control.md             |  8 +--
 website/docs/configurations.md                  |  2 +-
 website/docs/deployment.md                      | 18 +++---
 website/docs/disaster_recovery.md               | 24 +++----
 website/docs/faq_storage.md                     |  8 +--
 website/docs/faq_writing_tables.md              |  2 +-
 website/docs/flink-quick-start-guide.md         | 14 ++--
 website/docs/precommit_validator.md             |  6 +-
 website/docs/querying_data.md                   |  6 +-
 website/docs/quick-start-guide.md               | 46 ++++++-------
 website/docs/reading_tables_streaming_reads.md  |  8 +--
 website/docs/schema_evolution.md                | 16 ++---
 website/docs/syncing_metastore.md               | 12 ++--
 website/docs/troubleshooting.md                 |  6 +-
 website/docs/write_operations.md                |  2 +-
 website/docs/writing_data.md                    | 86 ++++++++++++-------------
 website/docs/writing_tables_streaming_writes.md |  8 +--
 20 files changed, 151 insertions(+), 155 deletions(-)

diff --git a/website/docs/basic_configurations.md 
b/website/docs/basic_configurations.md
index 6761f1b83c1..38e3e8511d2 100644
--- a/website/docs/basic_configurations.md
+++ b/website/docs/basic_configurations.md
@@ -50,7 +50,7 @@ inputDF.write()
 .format("org.apache.hudi")
 .options(clientOpts) // any of the Hudi client opts can be passed in as well
 .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
+.option("hoodie.datasource.write.partitionpath.field", "partition")
 .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
 .option(HoodieWriteConfig.TABLE_NAME, tableName)
 .mode(SaveMode.Append)
diff --git a/website/docs/clustering.md b/website/docs/clustering.md
index 149b690ff3b..3052e171b6e 100644
--- a/website/docs/clustering.md
+++ b/website/docs/clustering.md
@@ -191,10 +191,10 @@ import org.apache.hudi.config.HoodieWriteConfig._
 val df =  //generate data frame
 df.write.format("org.apache.hudi").
         options(getQuickstartWriteConfigs).
-        option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-        option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-        option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-        option(TABLE_NAME, "tableName").
+        option("hoodie.datasource.write.precombine.field", "ts").
+        option("hoodie.datasource.write.recordkey.field", "uuid").
+        option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+        option("hoodie.table.name", "tableName").
         option("hoodie.parquet.small.file.limit", "0").
         option("hoodie.clustering.inline", "true").
         option("hoodie.clustering.inline.max.commits", "4").
@@ -293,10 +293,10 @@ We can also enable asynchronous clustering with Spark 
structured streaming sink
 val commonOpts = Map(
    "hoodie.insert.shuffle.parallelism" -> "4",
    "hoodie.upsert.shuffle.parallelism" -> "4",
-   DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
-   DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
-   DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp",
-   HoodieWriteConfig.TBL_NAME.key -> "hoodie_test"
+   "hoodie.datasource.write.recordkey.field" -> "_row_key",
+   "hoodie.datasource.write.partitionpath.field" -> "partition",
+   "hoodie.datasource.write.precombine.field" -> "timestamp",
+   "hoodie.table.name" -> "hoodie_test"
 )
 
 def getAsyncClusteringOpts(isAsyncClustering: String, 
diff --git a/website/docs/compaction.md b/website/docs/compaction.md
index 49126fd2108..c3504236da7 100644
--- a/website/docs/compaction.md
+++ b/website/docs/compaction.md
@@ -137,14 +137,14 @@ import org.apache.spark.sql.streaming.ProcessingTime;
 
 
  DataStreamWriter<Row> writer = 
streamingInput.writeStream().format("org.apache.hudi")
-        .option(DataSourceWriteOptions.OPERATION_OPT_KEY(), operationType)
-        .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY(), tableType)
-        .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-        .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), 
"partition")
-        .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
-        .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, 
"10")
-        .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY(), "true")
-        .option(HoodieWriteConfig.TABLE_NAME, 
tableName).option("checkpointLocation", checkpointLocation)
+        .option("hoodie.datasource.write.operation", operationType)
+        .option("hoodie.datasource.write.table.type", tableType)
+        .option("hoodie.datasource.write.recordkey.field", "_row_key")
+        .option("hoodie.datasource.write.partitionpath.field", "partition")
+        .option("hoodie.datasource.write.precombine.field"(), "timestamp")
+        .option("hoodie.compact.inline.max.delta.commits", "10")
+        .option("hoodie.datasource.compaction.async.enable", "true")
+        .option("hoodie.table.name", tableName).option("checkpointLocation", 
checkpointLocation)
         .outputMode(OutputMode.Append());
  writer.trigger(new ProcessingTime(30000)).start(tablePath);
 ```
diff --git a/website/docs/concurrency_control.md 
b/website/docs/concurrency_control.md
index 461f2d1dd3c..64c9af85b66 100644
--- a/website/docs/concurrency_control.md
+++ b/website/docs/concurrency_control.md
@@ -215,15 +215,15 @@ Following is an example of how to use 
optimistic_concurrency_control via spark d
 ```java
 inputDF.write.format("hudi")
        .options(getQuickstartWriteConfigs)
-       .option(PRECOMBINE_FIELD_OPT_KEY, "ts")
+       .option("hoodie.datasource.write.precombine.field", "ts")
        .option("hoodie.cleaner.policy.failed.writes", "LAZY")
        .option("hoodie.write.concurrency.mode", 
"optimistic_concurrency_control")
        .option("hoodie.write.lock.zookeeper.url", "zookeeper")
        .option("hoodie.write.lock.zookeeper.port", "2181")
        .option("hoodie.write.lock.zookeeper.base_path", "/test")
-       .option(RECORDKEY_FIELD_OPT_KEY, "uuid")
-       .option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath")
-       .option(TABLE_NAME, tableName)
+       .option("hoodie.datasource.write.recordkey.field", "uuid")
+       .option("hoodie.datasource.write.partitionpath.field", "partitionpath")
+       .option("hoodie.table.name", tableName)
        .mode(Overwrite)
        .save(basePath)
 ```
diff --git a/website/docs/configurations.md b/website/docs/configurations.md
index 0f0ba7df7e7..f473d9ce6ba 100644
--- a/website/docs/configurations.md
+++ b/website/docs/configurations.md
@@ -93,7 +93,7 @@ inputDF.write()
 .format("org.apache.hudi")
 .options(clientOpts) // any of the Hudi client opts can be passed in as well
 .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-.option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
+.option("hoodie.datasource.write.partitionpath.field", "partition")
 .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
 .option(HoodieWriteConfig.TABLE_NAME, tableName)
 .mode(SaveMode.Append)
diff --git a/website/docs/deployment.md b/website/docs/deployment.md
index b6d56b9937e..9bafde59c46 100644
--- a/website/docs/deployment.md
+++ b/website/docs/deployment.md
@@ -144,10 +144,10 @@ Here is an example invocation using spark datasource
 inputDF.write()
        .format("org.apache.hudi")
        .options(clientOpts) // any of the Hudi client opts can be passed in as 
well
-       .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-       .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), 
"partition")
-       .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
-       .option(HoodieWriteConfig.TABLE_NAME, tableName)
+       .option("hoodie.datasource.write.recordkey.field", "_row_key")
+       .option("hoodie.datasource.write.partitionpath.field", "partition")
+       .option("hoodie.datasource.write.precombine.field"(), "timestamp")
+       .option("hoodie.table.name", tableName)
        .mode(SaveMode.Append)
        .save(basePath);
 ```
@@ -205,11 +205,11 @@ val inserts = 
convertToStringList(dataGen.generateInserts(100)).toList
 val insertDf = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
 insertDf.write.format("hudi").
         options(getQuickstartWriteConfigs).
-        option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-        option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-        option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-        option(TABLE_NAME, tableName).
-        option(OPERATION.key(), INSERT_OPERATION_OPT_VAL).
+        option("hoodie.datasource.write.precombine.field", "ts").
+        option("hoodie.datasource.write.recordkey.field", "uuid").
+        option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+        option("hoodie.table.name", tableName).
+        option("hoodie.datasource.write.operation", "insert").
         mode(Append).
         save(basePath)
 ```
diff --git a/website/docs/disaster_recovery.md 
b/website/docs/disaster_recovery.md
index 889f339dad5..a264b7d3615 100644
--- a/website/docs/disaster_recovery.md
+++ b/website/docs/disaster_recovery.md
@@ -46,10 +46,10 @@ val inserts = 
convertToStringList(dataGen.generateInserts(10))
 val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
 df.write.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-  option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.precombine.field", "ts").
+  option("hoodie.datasource.write.recordkey.field", "uuid").
+  option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+  option("hoodie.table.name", tableName).
   mode(Overwrite).
   save(basePath)
 ```
@@ -61,10 +61,10 @@ for (_ <- 1 to 4) {
   val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
   df.write.format("hudi").
     options(getQuickstartWriteConfigs).
-    option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-    option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-    option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-    option(TABLE_NAME, tableName).
+    option("hoodie.datasource.write.precombine.field", "ts").
+    option("hoodie.datasource.write.recordkey.field", "uuid").
+    option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+    option("hoodie.table.name", tableName).
     mode(Append).
     save(basePath)
 }
@@ -159,10 +159,10 @@ for (_ <- 1 to 3) {
   val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
   df.write.format("hudi").
     options(getQuickstartWriteConfigs).
-    option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-    option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-    option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-    option(TABLE_NAME, tableName).
+    option("hoodie.datasource.write.precombine.field", "ts").
+    option("hoodie.datasource.write.recordkey.field", "uuid").
+    option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+    option("hoodie.table.name", tableName).
     mode(Append).
     save(basePath)
 }
diff --git a/website/docs/faq_storage.md b/website/docs/faq_storage.md
index 43ca76817a8..d74e65dfc3c 100644
--- a/website/docs/faq_storage.md
+++ b/website/docs/faq_storage.md
@@ -71,8 +71,8 @@ spark.read.parquet("your_data_set/path/to/month")
      .write.format("org.apache.hudi")
      .option("hoodie.datasource.write.operation", "bulk_insert")
      .option("hoodie.datasource.write.storage.type", "storage_type") // 
COPY_ON_WRITE or MERGE_ON_READ
-     .option(RECORDKEY_FIELD_OPT_KEY, "<your key>").
-     .option(PARTITIONPATH_FIELD_OPT_KEY, "<your_partition>")
+     .option("hoodie.datasource.write.recordkey.field", "<your key>").
+     .option("hoodie.datasource.write.partitionpath.field", "<your_partition>")
      ...
      .mode(SaveMode.Append)
      .save(basePath);
@@ -84,8 +84,8 @@ Once you have the initial copy, you can simply run upsert 
operations on this by
 spark.read.parquet("your_data_set/path/to/month").limit(n) // Limit n records
      .write.format("org.apache.hudi")
      .option("hoodie.datasource.write.operation", "upsert")
-     .option(RECORDKEY_FIELD_OPT_KEY, "<your key>").
-     .option(PARTITIONPATH_FIELD_OPT_KEY, "<your_partition>")
+     .option("hoodie.datasource.write.recordkey.field", "<your key>").
+     .option("hoodie.datasource.write.partitionpath.field", "<your_partition>")
      ...
      .mode(SaveMode.Append)
      .save(basePath);
diff --git a/website/docs/faq_writing_tables.md 
b/website/docs/faq_writing_tables.md
index bb1c1a01f74..90874efbf4f 100644
--- a/website/docs/faq_writing_tables.md
+++ b/website/docs/faq_writing_tables.md
@@ -89,7 +89,7 @@ Hudi configuration options covering the datasource and low 
level Hudi write clie
 ```scala
 inputDF.write().format("org.apache.hudi")
   .options(clientOpts) // any of the Hudi client opts can be passed in as well
-  .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
+  .option("hoodie.datasource.write.recordkey.field", "_row_key")
   ...
 ```
 
diff --git a/website/docs/flink-quick-start-guide.md 
b/website/docs/flink-quick-start-guide.md
index bdca36c6853..ed9f7fe9371 100644
--- a/website/docs/flink-quick-start-guide.md
+++ b/website/docs/flink-quick-start-guide.md
@@ -195,9 +195,9 @@ String targetTable = "hudi_table";
 String basePath = "file:///tmp/hudi_table";
 
 Map<String, String> options = new HashMap<>();
-options.put(FlinkOptions.PATH.key(), basePath);
-options.put(FlinkOptions.TABLE_TYPE.key(), 
HoodieTableType.MERGE_ON_READ.name());
-options.put(FlinkOptions.PRECOMBINE_FIELD.key(), "ts");
+options.put("path", basePath);
+options.put("table.type", HoodieTableType.MERGE_ON_READ.name());
+options.put("precombine.field", "ts");
 
 DataStream<RowData> dataStream = env.addSource(...);
 HoodiePipeline.Builder builder = HoodiePipeline.builder(targetTable)
@@ -253,10 +253,10 @@ String targetTable = "hudi_table";
 String basePath = "file:///tmp/hudi_table";
 
 Map<String, String> options = new HashMap<>();
-options.put(FlinkOptions.PATH.key(), basePath);
-options.put(FlinkOptions.TABLE_TYPE.key(), 
HoodieTableType.MERGE_ON_READ.name());
-options.put(FlinkOptions.READ_AS_STREAMING.key(), "true"); // this option 
enable the streaming read
-options.put(FlinkOptions.READ_START_COMMIT.key(), "20210316134557"); // 
specifies the start commit instant time
+options.put("path", basePath);
+options.put("table.type", HoodieTableType.MERGE_ON_READ.name());
+options.put("read.streaming.enabled", "true"); // this option enable the 
streaming read
+options.put("read.start-commit", "20210316134557"); // specifies the start 
commit instant time
     
 HoodiePipeline.Builder builder = HoodiePipeline.builder(targetTable)
     .column("uuid VARCHAR(20)")
diff --git a/website/docs/precommit_validator.md 
b/website/docs/precommit_validator.md
index 6f6806a3fa9..5e13fca3dc0 100644
--- a/website/docs/precommit_validator.md
+++ b/website/docs/precommit_validator.md
@@ -33,7 +33,7 @@ Example:
 import org.apache.hudi.config.HoodiePreCommitValidatorConfig._
 
 df.write.format("hudi").mode(Overwrite).
-  option(TABLE_NAME, tableName).
+  option("hoodie.table.name", tableName).
   option("hoodie.precommit.validators", 
"org.apache.hudi.client.validator.SqlQuerySingleResultPreCommitValidator").
   option("hoodie.precommit.validators.single.value.sql.queries", "select 
count(*) from <TABLE_NAME> where col is null#0").
   save(basePath)
@@ -56,7 +56,7 @@ Example:
 import org.apache.hudi.config.HoodiePreCommitValidatorConfig._
 
 df.write.format("hudi").mode(Overwrite).
-  option(TABLE_NAME, tableName).
+  option("hoodie.table.name", tableName).
   option("hoodie.precommit.validators", 
"org.apache.hudi.client.validator.SqlQueryEqualityPreCommitValidator").
   option("hoodie.precommit.validators.equality.sql.queries", "select count(*) 
from <TABLE_NAME> where col is null").
   save(basePath)
@@ -74,7 +74,7 @@ Example:
 import org.apache.hudi.config.HoodiePreCommitValidatorConfig._
 
 df.write.format("hudi").mode(Overwrite).
-  option(TABLE_NAME, tableName).
+  option("hoodie.table.name", tableName).
   option("hoodie.precommit.validators", 
"org.apache.hudi.client.validator.SqlQueryInequalityPreCommitValidator").
   option("hoodie.precommit.validators.inequality.sql.queries", "select 
count(*) from <TABLE_NAME> where col is null").
   save(basePath)
diff --git a/website/docs/querying_data.md b/website/docs/querying_data.md
index c43ee1fd7f4..31069822df7 100644
--- a/website/docs/querying_data.md
+++ b/website/docs/querying_data.md
@@ -34,7 +34,7 @@ Retrieve the data table at the present point in time.
 val hudiSnapshotQueryDF = spark
      .read
      .format("hudi")
-     .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(), 
DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL())
+     .option("hoodie.datasource.query.type", "snapshot")
      .load(tablePath) 
 ```
 
@@ -47,8 +47,8 @@ The following snippet shows how to obtain all records changed 
after `beginInstan
 ```java
 Dataset<Row> hudiIncQueryDF = spark.read()
      .format("org.apache.hudi")
-     .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(), 
DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
-     .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(), 
<beginInstantTime>)
+     .option("hoodie.datasource.query.type", "incremental"())
+     .option("hoodie.datasource.read.begin.instanttime", <beginInstantTime>)
      .option(DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY(), 
"/year=2020/month=*/day=*") // Optional, use glob pattern if querying certain 
partitions
      .load(tablePath); // For incremental query, pass in the root/base path of 
table
      
diff --git a/website/docs/quick-start-guide.md 
b/website/docs/quick-start-guide.md
index 9104edee8a7..1359c5c2957 100644
--- a/website/docs/quick-start-guide.md
+++ b/website/docs/quick-start-guide.md
@@ -260,8 +260,8 @@ val data =
 
 var inserts = spark.createDataFrame(data).toDF(columns:_*)
 inserts.write.format("hudi").
-  option(PARTITIONPATH_FIELD_NAME.key(), "city").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.partitionpath.field", "city").
+  option("hoodie.table.name", tableName).
   mode(Overwrite).
   save(basePath)
 ```
@@ -404,9 +404,9 @@ values={[
 val updatesDf = spark.read.format("hudi").load(basePath).filter($"rider" === 
"rider-D").withColumn("fare", col("fare") * 10)
 
 updatesDf.write.format("hudi").
-  option(OPERATION_OPT_KEY, "upsert").
-  option(PARTITIONPATH_FIELD_NAME.key(), "city").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.operation", "upsert").
+  option("hoodie.datasource.write.partitionpath.field", "city").
+  option("hoodie.table.name", tableName).
   mode(Append).
   save(basePath)
 ```
@@ -560,9 +560,9 @@ values={[
 val deletesDF = spark.read.format("hudi").load(basePath).filter($"rider" === 
"rider-F")
 
 deletesDF.write.format("hudi").
-  option(OPERATION_OPT_KEY, "delete").
-  option(PARTITIONPATH_FIELD_NAME.key(), "city").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.operation", "delete").
+  option("hoodie.datasource.write.partitionpath.field", "city").
+  option("hoodie.table.name", tableName).
   mode(Append).
   save(basePath)
 
@@ -717,8 +717,8 @@ val beginTime = commits(commits.length - 2) // commit time 
we are interested in
 
 // incrementally query data
 val tripsIncrementalDF = spark.read.format("hudi").
-  option(QUERY_TYPE.key(), QUERY_TYPE_INCREMENTAL_OPT_VAL).
-  option(BEGIN_INSTANTTIME.key(), 0).
+  option("hoodie.datasource.query.type", "incremental").
+  option("hoodie.datasource.read.begin.instanttime", 0).
   load(basePath)
 tripsIncrementalDF.createOrReplaceTempView("trips_incremental")
 
@@ -812,9 +812,9 @@ var df = spark.createDataFrame(data).toDF(columns:_*)
 
 // Insert data
 df.write.format("hudi").
-  option(PARTITIONPATH_FIELD_NAME.key(), "city").
-  option(CDC_ENABLED.key(), "true").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.partitionpath.field", "city").
+  option("hoodie.table.cdc.enabled", "true").
+  option("hoodie.table.name", tableName).
   mode(Overwrite).
   save(basePath)
 
@@ -822,18 +822,18 @@ df.write.format("hudi").
 val updatesDf = spark.read.format("hudi").load(basePath).filter($"rider" === 
"rider-A" || $"rider" === "rider-B").withColumn("fare", col("fare") * 10)
 
 updatesDf.write.format("hudi").
-  option(OPERATION_OPT_KEY, "upsert").
-  option(PARTITIONPATH_FIELD_NAME.key(), "city").
-  option(CDC_ENABLED.key(), "true").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.operation", "upsert").
+  option("hoodie.datasource.write.partitionpath.field", "city").
+  option("hoodie.table.cdc.enabled", "true").
+  option("hoodie.table.name", tableName).
   mode(Append).
   save(basePath)
 
 
 // Query CDC data
-spark.read.option(BEGIN_INSTANTTIME.key(), 0).
-  option(QUERY_TYPE.key(), QUERY_TYPE_INCREMENTAL_OPT_VAL).
-  option(INCREMENTAL_FORMAT.key(), "cdc").
+spark.read.option("hoodie.datasource.read.begin.instanttime", 0).
+  option("hoodie.datasource.query.type", "incremental").
+  option("hoodie.datasource.query.incremental.format", "cdc").
   format("hudi").load(basePath).show(false)
 ```
 </TabItem>
@@ -929,7 +929,7 @@ values={[
 // spark-shell
 inserts.write.format("hudi").
   ...
-  option(TABLE_TYPE.key(), "MERGE_ON_READ").
+  option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
   ...
 ```
 </TabItem>
@@ -992,7 +992,7 @@ values={[
 // spark-shell
 inserts.write.format("hudi").
 ...
-option(RECORDKEY_FIELD.key(), "uuid").
+option("hoodie.datasource.write.recordkey.field", "uuid").
 ...
 ```
 
@@ -1063,7 +1063,7 @@ values={[
 // spark-shell 
 updatesDf.write.format("hudi").
   ...
-  option(PRECOMBINE_FIELD_NAME.key(), "ts").
+  option("hoodie.datasource.write.precombine.field", "ts").
   ...
 ```
 
diff --git a/website/docs/reading_tables_streaming_reads.md 
b/website/docs/reading_tables_streaming_reads.md
index 57c3e2c4702..5e73524e14d 100644
--- a/website/docs/reading_tables_streaming_reads.md
+++ b/website/docs/reading_tables_streaming_reads.md
@@ -26,10 +26,10 @@ values={[
 // reload data
 df.write.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-  option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.precombine.field", "ts").
+  option("hoodie.datasource.write.recordkey.field", "uuid").
+  option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+  option("hoodie.table.name", tableName).
   mode(Overwrite).
   save(basePath)
 
diff --git a/website/docs/schema_evolution.md b/website/docs/schema_evolution.md
index 68ad442e434..31dd73662fc 100755
--- a/website/docs/schema_evolution.md
+++ b/website/docs/schema_evolution.md
@@ -207,11 +207,11 @@ val data1 = Seq(Row("row_1", "part_0", 0L, "bob", "v_0", 
0),
 var dfFromData1 = spark.createDataFrame(data1, schema)
 dfFromData1.write.format("hudi").
    options(getQuickstartWriteConfigs).
-   option(PRECOMBINE_FIELD.key, "preComb").
-   option(RECORDKEY_FIELD.key, "rowId").
-   option(PARTITIONPATH_FIELD.key, "partitionId").
+   option("hoodie.datasource.write.precombine.field", "preComb").
+   option("hoodie.datasource.write.recordkey.field", "rowId").
+   option("hoodie.datasource.write.partitionpath.field", "partitionId").
    option("hoodie.index.type","SIMPLE").
-   option(TBL_NAME.key, tableName).
+   option("hoodie.table.name", tableName).
    mode(Overwrite).
    save(basePath)
 
@@ -266,11 +266,11 @@ val data2 = Seq(Row("row_2", "part_0", 5L, "john", "v_3", 
3L, "newField_1"),
 var dfFromData2 = spark.createDataFrame(data2, newSchema)
 dfFromData2.write.format("hudi").
     options(getQuickstartWriteConfigs).
-    option(PRECOMBINE_FIELD.key, "preComb").
-    option(RECORDKEY_FIELD.key, "rowId").
-    option(PARTITIONPATH_FIELD.key, "partitionId").
+    option("hoodie.datasource.write.precombine.field", "preComb").
+    option("hoodie.datasource.write.recordkey.field", "rowId").
+    option("hoodie.datasource.write.partitionpath.field", "partitionId").
     option("hoodie.index.type","SIMPLE").
-    option(TBL_NAME.key, tableName).
+    option("hoodie.table.name", tableName).
     mode(Append).
     save(basePath)
 
diff --git a/website/docs/syncing_metastore.md 
b/website/docs/syncing_metastore.md
index 2c5866b82a0..e39c5f39337 100644
--- a/website/docs/syncing_metastore.md
+++ b/website/docs/syncing_metastore.md
@@ -229,12 +229,12 @@ var dfFromData0 = spark.createDataFrame(data0,schema)
 
 dfFromData0.write.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(PRECOMBINE_FIELD_OPT_KEY, "preComb").
-  option(RECORDKEY_FIELD_OPT_KEY, "rowId").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionId").
-  option(TABLE_NAME, tableName).
-  option(TABLE_TYPE.key, COW_TABLE_TYPE_OPT_VAL).
-  option(OPERATION_OPT_KEY, "upsert").
+  option("hoodie.datasource.write.precombine.field", "preComb").
+  option("hoodie.datasource.write.recordkey.field", "rowId").
+  option("hoodie.datasource.write.partitionpath.field", "partitionId").
+  option("hoodie.table.name", tableName).
+  option("hoodie.datasource.write.table.type", 'COPY_ON_WRITE').
+  option("hoodie.datasource.write.operation", "upsert").
   option("hoodie.index.type","SIMPLE").
   option("hoodie.datasource.write.hive_style_partitioning","true").
   
option("hoodie.datasource.hive_sync.jdbcurl","jdbc:hive2://hiveserver:10000/").
diff --git a/website/docs/troubleshooting.md b/website/docs/troubleshooting.md
index 6398cfc7245..db93a76d187 100644
--- a/website/docs/troubleshooting.md
+++ b/website/docs/troubleshooting.md
@@ -101,11 +101,11 @@ Unless Hive sync is enabled, the dataset written by Hudi 
using one of the method
 val hudiSnapshotQueryDF = spark
      .read()
      .format("hudi")
-     .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(), 
DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL())
+     .option("hoodie.datasource.query.type", "snapshot")
      .load(basePath) 
 val hudiIncQueryDF = spark.read().format("hudi")
-     .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY(), 
DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL())
-     .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(), 
<beginInstantTime>)
+     .option("hoodie.datasource.query.type", "incremental")
+     .option("hoodie.datasource.read.begin.instanttime", <beginInstantTime>)
      .load(basePath);
 ```
 
diff --git a/website/docs/write_operations.md b/website/docs/write_operations.md
index 056d99c5f2b..04a7a8b63a8 100644
--- a/website/docs/write_operations.md
+++ b/website/docs/write_operations.md
@@ -29,7 +29,7 @@ Hudi supports implementing two types of deletes on data 
stored in Hudi tables, b
 - **Soft Deletes** : Retain the record key and just null out the values for 
all the other fields.
   This can be achieved by ensuring the appropriate fields are nullable in the 
table schema and simply upserting the table after setting these fields to null.
 - **Hard Deletes** : This method entails completely eradicating all evidence 
of a record from the table, including any duplicates. There are three distinct 
approaches to accomplish this: 
-  - Using DataSource, set `OPERATION_OPT_KEY` to `DELETE_OPERATION_OPT_VAL`. 
This will remove all the records in the DataSet being submitted. 
+  - Using DataSource, set `"hoodie.datasource.write.operation"` to `"delete"`. 
This will remove all the records in the DataSet being submitted. 
   - Using DataSource, set `PAYLOAD_CLASS_OPT_KEY` to 
`"org.apache.hudi.EmptyHoodieRecordPayload"`. This will remove all the records 
in the DataSet being submitted. 
   - Using DataSource or Hudi Streamer, add a column named `_hoodie_is_deleted` 
to DataSet. The value of this column must be set to `true` for all the records 
to be deleted and either `false` or left null for any records which are to be 
upserted.
 
diff --git a/website/docs/writing_data.md b/website/docs/writing_data.md
index 582d13bbd06..308de8ca78a 100644
--- a/website/docs/writing_data.md
+++ b/website/docs/writing_data.md
@@ -14,33 +14,29 @@ There are a number of options available:
 
 **`HoodieWriteConfig`**:
 
-**TABLE_NAME** (Required)<br/>
+**TABLE_NAME** <br/>
 
 
 **`DataSourceWriteOptions`**:
 
-**RECORDKEY_FIELD_OPT_KEY** (Required): Primary key field(s). Record keys 
uniquely identify a record/row within each partition. If one wants to have a 
global uniqueness, there are two options. You could either make the dataset 
non-partitioned, or, you can leverage Global indexes to ensure record keys are 
unique irrespective of the partition path. Record keys can either be a single 
column or refer to multiple columns. `KEYGENERATOR_CLASS_OPT_KEY` property 
should be set accordingly based o [...]
+**RECORDKEY_FIELD**: Primary key field(s). Record keys uniquely identify a 
record/row within each partition. If one wants to have a global uniqueness, 
there are two options. You could either make the dataset non-partitioned, or, 
you can leverage Global indexes to ensure record keys are unique irrespective 
of the partition path. Record keys can either be a single column or refer to 
multiple columns. `KEYGENERATOR_CLASS_OPT_KEY` property should be set 
accordingly based on whether it is a s [...]
 Default value: `"uuid"`<br/>
 
-**PARTITIONPATH_FIELD_OPT_KEY** (Required): Columns to be used for 
partitioning the table. To prevent partitioning, provide empty string as value 
eg: `""`. Specify partitioning/no partitioning using 
`KEYGENERATOR_CLASS_OPT_KEY`. If partition path needs to be url encoded, you 
can set `URL_ENCODE_PARTITIONING_OPT_KEY`. If synchronizing to hive, also 
specify using `HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.`<br/>
+**PARTITIONPATH_FIELD**: Columns to be used for partitioning the table. To 
prevent partitioning, provide empty string as value eg: `""`. Specify 
partitioning/no partitioning using `KEYGENERATOR_CLASS_OPT_KEY`. If partition 
path needs to be url encoded, you can set `URL_ENCODE_PARTITIONING_OPT_KEY`. If 
synchronizing to hive, also specify using 
`HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.`<br/>
 Default value: `"partitionpath"`<br/>
 
-**PRECOMBINE_FIELD_OPT_KEY** (Required): When two records within the same 
batch have the same key value, the record with the largest value from the field 
specified will be choosen. If you are using default payload of 
OverwriteWithLatestAvroPayload for HoodieRecordPayload (`WRITE_PAYLOAD_CLASS`), 
an incoming record will always takes precendence compared to the one in storage 
ignoring this `PRECOMBINE_FIELD_OPT_KEY`. <br/>
+**PRECOMBINE_FIELD**: When two records within the same batch have the same key 
value, the record with the largest value from the field specified will be 
choosen. If you are using default payload of OverwriteWithLatestAvroPayload for 
HoodieRecordPayload (`WRITE_PAYLOAD_CLASS`), an incoming record will always 
takes precendence compared to the one in storage ignoring this 
`PRECOMBINE_FIELD_OPT_KEY`. <br/>
 Default value: `"ts"`<br/>
 
-**OPERATION_OPT_KEY**: The [write operations](/docs/write_operations) to 
use.<br/>
+**OPERATION**: The [write operations](/docs/write_operations) to use.<br/>
 Available values:<br/>
-`UPSERT_OPERATION_OPT_VAL` (default), `BULK_INSERT_OPERATION_OPT_VAL`, 
`INSERT_OPERATION_OPT_VAL`, `DELETE_OPERATION_OPT_VAL`
+`"upsert"` (default), `"bulk_insert"`, `"insert"`, `"delete"`
 
-**TABLE_TYPE_OPT_KEY**: The [type of table](/docs/concepts#table-types) to 
write to. Note: After the initial creation of a table, this value must stay 
consistent when writing to (updating) the table using the Spark 
`SaveMode.Append` mode.<br/>
+**TABLE_TYPE**: The [type of table](/docs/concepts#table-types) to write to. 
Note: After the initial creation of a table, this value must stay consistent 
when writing to (updating) the table using the Spark `SaveMode.Append` 
mode.<br/>
 Available values:<br/>
 [`COW_TABLE_TYPE_OPT_VAL`](/docs/concepts#copy-on-write-table) (default), 
[`MOR_TABLE_TYPE_OPT_VAL`](/docs/concepts#merge-on-read-table)
 
-**KEYGENERATOR_CLASS_OPT_KEY**: Refer to [Key 
Generation](/docs/key_generation) section below.
-
-**HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY**: If using hive, specify if the 
table should or should not be partitioned.<br/>
-Available values:<br/>
-`classOf[MultiPartKeysValueExtractor].getCanonicalName` (default), 
`classOf[SlashEncodedDayPartitionValueExtractor].getCanonicalName`, 
`classOf[TimestampBasedKeyGenerator].getCanonicalName`, 
`classOf[NonPartitionedExtractor].getCanonicalName`, 
`classOf[GlobalDeleteKeyGenerator].getCanonicalName` (to be used when 
`OPERATION_OPT_KEY` is set to `DELETE_OPERATION_OPT_VAL`)
+**KEYGENERATOR_CLASS_NAME**: Refer to [Key Generation](/docs/key_generation) 
section below.
 
 
 Example:
@@ -50,10 +46,10 @@ Upsert a DataFrame, specifying the necessary field names 
for `recordKey => _row_
 inputDF.write()
        .format("hudi")
        .options(clientOpts) //Where clientOpts is of type Map[String, String]. 
clientOpts can include any other options necessary.
-       .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-       .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), 
"partition")
-       .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
-       .option(HoodieWriteConfig.TABLE_NAME, tableName)
+       .option("hoodie.datasource.write.recordkey.field", "_row_key")
+       .option("hoodie.datasource.write.partitionpath.field", "partition")
+       .option("hoodie.datasource.write.precombine.field"(), "timestamp")
+       .option("hoodie.table.name", tableName)
        .mode(SaveMode.Append)
        .save(basePath);
 ```
@@ -75,10 +71,10 @@ val inserts = 
convertToStringList(dataGen.generateInserts(10))
 val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
 df.write.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-  option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.precombine.field", "ts").
+  option("hoodie.datasource.write.recordkey.field", "uuid").
+  option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+  option("hoodie.table.name", tableName).
   mode(Overwrite).
   save(basePath)
 ```
@@ -200,11 +196,11 @@ val inserts = 
convertToStringList(dataGen.generateInserts(10))
 val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2))
 df.write.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(OPERATION_OPT_KEY,"insert_overwrite_table").
-  option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-  option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.operation","insert_overwrite_table").
+  option("hoodie.datasource.write.precombine.field", "ts").
+  option("hoodie.datasource.write.recordkey.field", "uuid").
+  option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+  option("hoodie.table.name", tableName).
   mode(Append).
   save(basePath)
 
@@ -258,11 +254,11 @@ val df = spark.
   filter("partitionpath = 'americas/united_states/san_francisco'")
 df.write.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(OPERATION_OPT_KEY,"insert_overwrite").
-  option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-  option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.operation","insert_overwrite").
+  option("hoodie.datasource.write.precombine.field", "ts").
+  option("hoodie.datasource.write.recordkey.field", "uuid").
+  option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+  option("hoodie.table.name", tableName).
   mode(Append).
   save(basePath)
 
@@ -315,18 +311,18 @@ val softDeleteDf = nullifyColumns.
 // simply upsert the table after setting these fields to null
 softDeleteDf.write.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(OPERATION_OPT_KEY, "upsert").
-  option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-  option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-  option(TABLE_NAME, tableName).
+  option("hoodie.datasource.write.operation", "upsert").
+  option("hoodie.datasource.write.precombine.field", "ts").
+  option("hoodie.datasource.write.recordkey.field", "uuid").
+  option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+  option("hoodie.table.name", tableName).
   mode(Append).
   save(basePath)
 ```
 
 - **Hard Deletes** : A stronger form of deletion is to physically remove any 
trace of the record from the table. This can be achieved in 3 different ways. 
 
-1. Using Datasource, set `OPERATION_OPT_KEY` to `DELETE_OPERATION_OPT_VAL`. 
This will remove all the records in the DataSet being submitted.
+1. Using Datasource, set `"hoodie.datasource.write.operation"` to `"delete"`. 
This will remove all the records in the DataSet being submitted.
 
 Example, first read in a dataset:
 ```scala
@@ -349,11 +345,11 @@ val deletes = dataGen.generateDeletes(df.collectAsList())
 val df = spark.read.json(spark.sparkContext.parallelize(deletes, 2));
 df.write.format("org.apache.hudi").
 options(getQuickstartWriteConfigs).
-option(OPERATION_OPT_KEY,"delete").
-option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-option(TABLE_NAME, tableName).
+option("hoodie.datasource.write.operation","delete").
+option("hoodie.datasource.write.precombine.field", "ts").
+option("hoodie.datasource.write.recordkey.field", "uuid").
+option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+option("hoodie.table.name", tableName).
 mode(Append).
 save(basePath);
 ```
@@ -430,16 +426,16 @@ Read more in-depth details about concurrency control in 
the [concurrency control
 ```java
 inputDF.write.format("hudi")
        .options(getQuickstartWriteConfigs)
-       .option(PRECOMBINE_FIELD_OPT_KEY, "ts")
+       .option("hoodie.datasource.write.precombine.field", "ts")
        .option("hoodie.cleaner.policy.failed.writes", "LAZY")
        .option("hoodie.write.concurrency.mode", 
"optimistic_concurrency_control")
        .option("hoodie.write.lock.zookeeper.url", "zookeeper")
        .option("hoodie.write.lock.zookeeper.port", "2181")
        .option("hoodie.write.lock.zookeeper.lock_key", "test_table")
        .option("hoodie.write.lock.zookeeper.base_path", "/test")
-       .option(RECORDKEY_FIELD_OPT_KEY, "uuid")
-       .option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath")
-       .option(TABLE_NAME, tableName)
+       .option("hoodie.datasource.write.recordkey.field", "uuid")
+       .option("hoodie.datasource.write.partitionpath.field", "partitionpath")
+       .option("hoodie.table.name", tableName)
        .mode(Overwrite)
        .save(basePath)
 ```
diff --git a/website/docs/writing_tables_streaming_writes.md 
b/website/docs/writing_tables_streaming_writes.md
index 4bb43bb696b..77ff044ca63 100644
--- a/website/docs/writing_tables_streaming_writes.md
+++ b/website/docs/writing_tables_streaming_writes.md
@@ -36,10 +36,10 @@ val df = spark.readStream.
 // write stream to new hudi table
 df.writeStream.format("hudi").
   options(getQuickstartWriteConfigs).
-  option(PRECOMBINE_FIELD_OPT_KEY, "ts").
-  option(RECORDKEY_FIELD_OPT_KEY, "uuid").
-  option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath").
-  option(TABLE_NAME, streamingTableName).
+  option("hoodie.datasource.write.precombine.field", "ts").
+  option("hoodie.datasource.write.recordkey.field", "uuid").
+  option("hoodie.datasource.write.partitionpath.field", "partitionpath").
+  option("hoodie.table.name", streamingTableName).
   outputMode("append").
   option("path", baseStreamingPath).
   option("checkpointLocation", checkpointLocation).

(hudi) branch asf-site updated: [DOCS] Hardcode config names instead of params (#10888)

Reply via email to