nsivabalan commented on code in PR #9203:
URL: https://github.com/apache/hudi/pull/9203#discussion_r1268662043
##########
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndexFactory.java:
##########
@@ -44,8 +43,7 @@
*/
public final class SparkHoodieIndexFactory {
public static HoodieIndex createIndex(HoodieWriteConfig config) {
- boolean mergeIntoWrites =
config.getProps().getBoolean(HoodieInternalConfig.SQL_MERGE_INTO_WRITES.key(),
- HoodieInternalConfig.SQL_MERGE_INTO_WRITES.defaultValue());
+ boolean mergeIntoWrites =
config.getProps().getBoolean(HoodieWriteConfig.SPARK_SQL_MERGE_INTO_PREPPED_KEY,
false);
Review Comment:
lets align the var name w/ the config.
boolean sqlMergeIntoPrepped
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala:
##########
@@ -82,7 +82,7 @@ object HoodieWriterUtils {
hoodieConfig.setDefaultValue(RECONCILE_SCHEMA)
hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS)
hoodieConfig.setDefaultValue(KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED)
- hoodieConfig.setDefaultValue(ENABLE_OPTIMIZED_SQL_WRITES)
+ hoodieConfig.setDefaultValue(SPARK_SQL_OPTIMIZED_WRITES)
Review Comment:
lets check if this is really required.
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala:
##########
@@ -348,9 +356,8 @@ object HoodieSparkSqlWriter {
}
// Remove meta columns from writerSchema if isPrepped is true.
- val isPrepped =
hoodieConfig.getBooleanOrDefault(DATASOURCE_WRITE_PREPPED_KEY, false)
- val mergeIntoWrites =
parameters.getOrDefault(SQL_MERGE_INTO_WRITES.key(),
- SQL_MERGE_INTO_WRITES.defaultValue.toString).toBoolean
+ val isPrepped =
hoodieConfig.getBooleanOrDefault(SPARK_SQL_WRITE_PREPPED_KEY, false)
+ val mergeIntoWrites =
parameters.getOrDefault(SPARK_SQL_MERGE_INTO_PREPPED_KEY, "false").toBoolean
Review Comment:
lets fix the var name
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala:
##########
@@ -313,7 +313,7 @@ object DataSourceWriteOptions {
/**
* Config key with boolean value that indicates whether record being written
is already prepped.
*/
- val DATASOURCE_WRITE_PREPPED_KEY = "_hoodie.datasource.write.prepped";
+ val SPARK_SQL_WRITE_PREPPED_KEY = "_hoodie.spark.sql.writes.prepped";
Review Comment:
minor
"SPARK_SQL_WRITES_PREPPED_KEY"
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestDeleteTable.scala:
##########
@@ -47,7 +47,7 @@ class TestDeleteTable extends HoodieSparkSqlTestBase {
""".stripMargin)
// test with optimized sql writes enabled / disabled.
- spark.sql(s"set
hoodie.spark.sql.writes.optimized.enable=$optimizedSqlEnabled")
+ spark.sql(s"set
hoodie.spark.sql.optimized.writes.enable=$optimizedSqlEnabled")
Review Comment:
lets see if we can use the variable to avoid any mis-steps
##########
hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaApp.java:
##########
@@ -161,7 +161,7 @@ public void run() throws Exception {
.option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "false")
.option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true")
.option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true")
- .option(DataSourceWriteOptions.ENABLE_OPTIMIZED_SQL_WRITES().key(),
"true")
+ .option(DataSourceWriteOptions.SPARK_SQL_OPTIMIZED_WRITES().key(),
"true")
Review Comment:
for defaults, lets try to use the actual default instead of hard coding it
DataSourceWriteOptions.SPARK_SQL_OPTIMIZED_WRITES().default()
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala:
##########
@@ -617,6 +622,8 @@ case class MergeIntoHoodieTableCommand(mergeInto:
MergeIntoTable) extends Hoodie
val hiveSyncConfig = buildHiveSyncConfig(sparkSession, hoodieCatalogTable,
tableConfig)
+ val enableOptimizedMerge =
sparkSession.sqlContext.conf.getConfString(SPARK_SQL_OPTIMIZED_WRITES.key(),
+ SPARK_SQL_OPTIMIZED_WRITES.defaultValue())
Review Comment:
lets introduce
val keyGeneratorClassName = ....
and use it in L 636
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]