ravipesala commented on a change in pull request #3210: [CARBONDATA-3375] 
[CARBONDATA-3376] Fix GC Overhead limit exceeded issue and partition column as 
range column issue
URL: https://github.com/apache/carbondata/pull/3210#discussion_r282474383
 
 

 ##########
 File path: 
integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
 ##########
 @@ -433,75 +442,118 @@ class CarbonMergerRDD[K, V](
     val newRanges = allRanges.filter { range =>
       range != null
     }
-    carbonInputSplits.foreach { split =>
-      var dataFileFooter: DataFileFooter = null
-      if (null == rangeColumn) {
-        val taskNo = getTaskNo(split, partitionTaskMap, counter)
-        var sizeOfSplit = split.getDetailInfo.getBlockSize
-        val splitList = taskIdMapping.get(taskNo)
-        noOfBlocks += 1
+    val noOfSplitsPerTask = Math.ceil(carbonInputSplits.size / 
defaultParallelism)
+    var taskCount = 0
+    // In case of range column if only one data value is present then we try to
+    // divide the splits to different tasks in order to avoid single task 
creation
+    // and load on single executor
+    if (singleRange) {
+      var filterExpr = CarbonCompactionUtil
 
 Review comment:
   For single range no need to add filter expression.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to