This is an automated email from the ASF dual-hosted git repository. jackylk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push: new edfb30a [HOTFIX] Fixed task distribution issue in SegmentPruneRDD edfb30a is described below commit edfb30aff5e4231e4229808e0edacd603bc2366d Author: kunal642 <kunalkapoor...@gmail.com> AuthorDate: Thu Apr 16 11:29:36 2020 +0530 [HOTFIX] Fixed task distribution issue in SegmentPruneRDD Why is this PR needed? SI queries are degraded because getPrefferedLocation is not overridden in SegmentPruneRDD due to which tasks are fired randomly to any executors What changes were proposed in this PR? override getPrefferedLocation so that tasks are fired to correct executors. Does this PR introduce any user interface change? No Is any new testcase added? No This closes #3712 --- .../org/apache/carbondata/indexserver/SegmentPruneRDD.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/integration/spark/src/main/scala/org/apache/carbondata/indexserver/SegmentPruneRDD.scala b/integration/spark/src/main/scala/org/apache/carbondata/indexserver/SegmentPruneRDD.scala index 0fabb8d..cc882ff 100644 --- a/integration/spark/src/main/scala/org/apache/carbondata/indexserver/SegmentPruneRDD.scala +++ b/integration/spark/src/main/scala/org/apache/carbondata/indexserver/SegmentPruneRDD.scala @@ -32,6 +32,15 @@ class SegmentPruneRDD(@transient private val ss: SparkSession, indexInputFormat: IndexInputFormat) extends CarbonRDD[(String, SegmentWrapper)](ss, Nil) { + override protected def getPreferredLocations(split: Partition): Seq[String] = { + val locations = split.asInstanceOf[IndexRDDPartition].getLocations + if (locations != null) { + locations.toSeq + } else { + Seq() + } + } + override protected def internalGetPartitions: Array[Partition] = { new DistributedPruneRDD(ss, indexInputFormat).partitions }