vinothchandar commented on a change in pull request #2225:
URL: https://github.com/apache/hudi/pull/2225#discussion_r528958557
##########
File path: hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
##########
@@ -117,6 +134,8 @@ private[hudi] object HoodieSparkSqlWriter {
}
val commitActionType = DataSourceUtils.getCommitActionType(operation,
tableConfig.getTableType)
+ val partition_key =
parameters.get(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY).get
Review comment:
camelCase?
##########
File path: hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
##########
@@ -55,6 +55,23 @@ private[hudi] object HoodieSparkSqlWriter {
private var tableExists: Boolean = false
private var asyncCompactionTriggerFnDefined: Boolean = false
+ def generateSchemaWithoutPartitionColumns(partitionParam: String, oldSchema:
Schema): Schema = {
+ val fieldsToRemove = new util.ArrayList[String]()
+ partitionParam.split(",").map(partitionField => partitionField.trim)
+ .filter(s => !s.isEmpty).map(field => fieldsToRemove.add(field))
+ HoodieAvroUtils.removeFields(oldSchema, fieldsToRemove)
+ }
+
+ def generateNewRecordForPartitionColumnsDrop(enableDropPartitionColumns:
Boolean,
Review comment:
could we add the boolean as the third arg. makes it easy to read both
the method signatures
##########
File path: hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
##########
@@ -55,6 +55,23 @@ private[hudi] object HoodieSparkSqlWriter {
private var tableExists: Boolean = false
private var asyncCompactionTriggerFnDefined: Boolean = false
+ def generateSchemaWithoutPartitionColumns(partitionParam: String, oldSchema:
Schema): Schema = {
+ val fieldsToRemove = new util.ArrayList[String]()
+ partitionParam.split(",").map(partitionField => partitionField.trim)
+ .filter(s => !s.isEmpty).map(field => fieldsToRemove.add(field))
+ HoodieAvroUtils.removeFields(oldSchema, fieldsToRemove)
+ }
+
+ def generateNewRecordForPartitionColumnsDrop(enableDropPartitionColumns:
Boolean,
+ oldRecord: GenericRecord,
partitionParam: String): GenericRecord = {
+ var record = oldRecord
+ if (enableDropPartitionColumns) {
+ val newSchema = generateSchemaWithoutPartitionColumns(partitionParam,
oldRecord.getSchema)
Review comment:
doing this every record is going to be expensive.
##########
File path: hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
##########
@@ -117,6 +134,8 @@ private[hudi] object HoodieSparkSqlWriter {
}
val commitActionType = DataSourceUtils.getCommitActionType(operation,
tableConfig.getTableType)
+ val partition_key =
parameters.get(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY).get
Review comment:
this just covers the case when a key generator is not used?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]