Indhumathi27 commented on a change in pull request #4107:
URL: https://github.com/apache/carbondata/pull/4107#discussion_r597099633
##########
File path:
integration/spark/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala
##########
@@ -276,7 +290,25 @@ class CarbonTableCompactor(
segmentMetaDataAccumulator)
} else {
if (mergeRDD != null) {
- mergeRDD.collect
+ val result = mergeRDD.collect
Review comment:
Current code will not handle multi-partitions properly and Add/Drop
partitions is called for each partition. Please change the code as below:
` if (!updatePartitionSpecs.isEmpty) {
val tableIdentifier = new
TableIdentifier(carbonTable.getTableName,
Some(carbonTable.getDatabaseName))
// To update partitionSpec in hive metastore, drop and add with
latest path.
val oldPartition: util.List[TablePartitionSpec] =
new util.ArrayList[TablePartitionSpec]()
val newPartition: util.List[TablePartitionSpec] =
new util.ArrayList[TablePartitionSpec]()
updatePartitionSpecs.asScala.foreach {
partitionSpec =>
var spec = PartitioningUtils.parsePathFragment(
String.join(CarbonCommonConstants.FILE_SEPARATOR,
partitionSpec.getPartitions))
oldPartition.add(spec)
val addPartition =
mergeRDD.checkAndUpdatePartitionLocation(partitionSpec)
spec = PartitioningUtils.parsePathFragment(
String.join(CarbonCommonConstants.FILE_SEPARATOR,
addPartition.getPartitions))
newPartition.add(spec)
}
AlterTableDropPartitionCommand(
tableIdentifier,
oldPartition.asScala,
true, false, true).run(sqlContext.sparkSession)
AlterTableAddPartitionCommand(tableIdentifier,
newPartition.asScala.map(p => (p, None)),
false).run(sqlContext.sparkSession)
}`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]