[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-17 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r633543389



##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -789,6 +797,40 @@ case class Range(
 }
   }
 
+  private def computeHistogramStatistics() = {
+val numBins = conf.histogramNumBins
+val height = numElements.toDouble / numBins
+val percentileArray = (0 to numBins).map(i => i * height).toArray
+
+val binArray = new Array[HistogramBin](numBins)
+val lowerIndexInitial = percentileArray.head
+val lowerBinValueInitial = getRangeValue(0)
+percentileArray.tail.zipWithIndex
+  .foldLeft[(Double, Long)]((lowerIndexInitial, lowerBinValueInitial)) {
+case ((lowerIndex, lowerBinValue), (upperIndex, binId)) =>
+  // Integer index for upper and lower values in the bin.
+  val upperIndexPos = math.ceil(upperIndex).toInt - 1
+  val lowerIndexPos = math.ceil(lowerIndex).toInt - 1
+
+  val upperBinValue = getRangeValue(math.max(upperIndexPos, 0))
+  val ndv = math.max(upperIndexPos - lowerIndexPos, 1)
+  binArray(binId) = HistogramBin(lowerBinValue, upperBinValue, ndv)
+  // Update the lowerIndex and lowerBinValue with upper ones for the 
next iteration.
+  (upperIndex, upperBinValue)
+  }
+Histogram(height, binArray)

Review comment:
   nit: how about rewriting it like this?
   ```
   
   val lowerIndexInitial = percentileArray.head
   val lowerBinValueInitial = getRangeValue(0)
   val (_, _, binArray) = percentileArray.tail
 .foldLeft((lowerIndexInitial, lowerBinValueInitial, 
Seq.empty[HistogramBin])) {
 case ((lowerIndex, lowerBinValue, binAr), upperIndex) =>
   // Integer index for upper and lower values in the bin.
   val upperIndexPos = math.ceil(upperIndex).toInt - 1
   val lowerIndexPos = math.ceil(lowerIndex).toInt - 1
   
   val upperBinValue = getRangeValue(math.max(upperIndexPos, 0))
   val ndv = math.max(upperIndexPos - lowerIndexPos, 1)
   // Update the lowerIndex and lowerBinValue with upper ones for the 
next iteration.
   (upperIndex, upperBinValue, binAr :+ HistogramBin(lowerBinValue, 
upperBinValue, ndv))
   }
   Histogram(height, binArray.toArray)
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-17 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r633543389



##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -789,6 +797,40 @@ case class Range(
 }
   }
 
+  private def computeHistogramStatistics() = {
+val numBins = conf.histogramNumBins
+val height = numElements.toDouble / numBins
+val percentileArray = (0 to numBins).map(i => i * height).toArray
+
+val binArray = new Array[HistogramBin](numBins)
+val lowerIndexInitial = percentileArray.head
+val lowerBinValueInitial = getRangeValue(0)
+percentileArray.tail.zipWithIndex
+  .foldLeft[(Double, Long)]((lowerIndexInitial, lowerBinValueInitial)) {
+case ((lowerIndex, lowerBinValue), (upperIndex, binId)) =>
+  // Integer index for upper and lower values in the bin.
+  val upperIndexPos = math.ceil(upperIndex).toInt - 1
+  val lowerIndexPos = math.ceil(lowerIndex).toInt - 1
+
+  val upperBinValue = getRangeValue(math.max(upperIndexPos, 0))
+  val ndv = math.max(upperIndexPos - lowerIndexPos, 1)
+  binArray(binId) = HistogramBin(lowerBinValue, upperBinValue, ndv)
+  // Update the lowerIndex and lowerBinValue with upper ones for the 
next iteration.
+  (upperIndex, upperBinValue)
+  }
+Histogram(height, binArray)

Review comment:
   nit: how about rewriting it like this?
   ```
   
   val lowerIndexInitial = percentileArray.head
   val lowerBinValueInitial = getRangeValue(0)
   val (_, _, binArray) = percentileArray.tail
 .foldLeft((lowerIndexInitial, lowerBinValueInitial, 
Seq.empty[HistogramBin])) {
 case ((lowerIndex, lowerBinValue, binAr), upperIndex) =>
   // Integer index for upper and lower values in the bin.
   val upperIndexPos = math.ceil(upperIndex).toInt - 1
   val lowerIndexPos = math.ceil(lowerIndex).toInt - 1
   
   val upperBinValue = getRangeValue(math.max(upperIndexPos, 0))
   val ndv = math.max(upperIndexPos - lowerIndexPos, 1)
   // Update the lowerIndex and lowerBinValue with upper ones for the 
next iteration.
   (upperIndex, upperBinValue, binAr :+ HistogramBin(lowerBinValue, 
upperBinValue, ndv))
   }
   Histogram(height, binArray.toArray)
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631552581



##
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
##
@@ -77,12 +92,21 @@ class BasicStatsEstimationSuite extends PlanTest with 
StatsEstimationTestBase {
 max = Some(4),
 nullCount = Some(0),
 maxLen = Some(LongType.defaultSize),
-avgLen = Some(LongType.defaultSize))
-checkStats(range, expectedStatsCboOn = rangeStats, expectedStatsCboOff = 
rangeStats)
+avgLen = Some(LongType.defaultSize),
+histogram = histogram)
+val extraConfig = Map(SQLConf.HISTOGRAM_ENABLED.key -> "true",
+  SQLConf.HISTOGRAM_NUM_BINS.key -> "3")
+checkStats(range, expectedStatsCboOn = rangeStats,
+  expectedStatsCboOff = rangeStats, extraConfig)
   }
 
   test("range with negative step") {
 val range = Range(-10, -20, -2, None)
+val histogramBins = new Array[HistogramBin](3)
+histogramBins(0) = HistogramBin(-18.0, -16.0, 2)
+histogramBins(1) = HistogramBin(-16.0, -12.0, 2)
+histogramBins(2) = HistogramBin(-12.0, -10.0, 1)

Review comment:
   Could you check if `range.numElements` and the total `ndv` are the same?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631552121



##
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
##
@@ -283,14 +326,17 @@ class BasicStatsEstimationSuite extends PlanTest with 
StatsEstimationTestBase {
   private def checkStats(
   plan: LogicalPlan,
   expectedStatsCboOn: Statistics,
-  expectedStatsCboOff: Statistics): Unit = {
-withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
+  expectedStatsCboOff: Statistics,
+  extraConfigs: Map[String, String] = Map.empty): Unit = {
+

Review comment:
   nit: remove this unnecessary change




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631552022



##
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
##
@@ -97,12 +121,24 @@ class BasicStatsEstimationSuite extends PlanTest with 
StatsEstimationTestBase {
 max = Some(-10),
 nullCount = Some(0),
 maxLen = Some(LongType.defaultSize),
-avgLen = Some(LongType.defaultSize))
-checkStats(range, expectedStatsCboOn = rangeStats, expectedStatsCboOff = 
rangeStats)
+avgLen = Some(LongType.defaultSize),
+histogram = histogram)
+val extraConfig = Map(SQLConf.HISTOGRAM_ENABLED.key -> "true",
+  SQLConf.HISTOGRAM_NUM_BINS.key -> "3")
+checkStats(range, expectedStatsCboOn = rangeStats,
+  expectedStatsCboOff = rangeStats, extraConfig)
   }
 
   test("range with negative step where end minus start not divisible by step") 
{
+
 val range = Range(-10, -20, -3, None)
+
+val histogramBins = new Array[HistogramBin](3)
+histogramBins(0) = HistogramBin(-19.0, -16.0, 2)
+histogramBins(1) = HistogramBin(-16.0, -13.0, 1)
+histogramBins(2) = HistogramBin(-13.0, -10.0, 1)

Review comment:
   ditto




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631551951



##
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
##
@@ -77,12 +92,21 @@ class BasicStatsEstimationSuite extends PlanTest with 
StatsEstimationTestBase {
 max = Some(4),
 nullCount = Some(0),
 maxLen = Some(LongType.defaultSize),
-avgLen = Some(LongType.defaultSize))
-checkStats(range, expectedStatsCboOn = rangeStats, expectedStatsCboOff = 
rangeStats)
+avgLen = Some(LongType.defaultSize),
+histogram = histogram)
+val extraConfig = Map(SQLConf.HISTOGRAM_ENABLED.key -> "true",
+  SQLConf.HISTOGRAM_NUM_BINS.key -> "3")
+checkStats(range, expectedStatsCboOn = rangeStats,
+  expectedStatsCboOff = rangeStats, extraConfig)
   }
 
   test("range with negative step") {
 val range = Range(-10, -20, -2, None)
+val histogramBins = new Array[HistogramBin](3)
+histogramBins(0) = HistogramBin(-18.0, -16.0, 2)
+histogramBins(1) = HistogramBin(-16.0, -12.0, 2)
+histogramBins(2) = HistogramBin(-12.0, -10.0, 1)

Review comment:
   nit:
   ```
   val histogramBins = Array(
 HistogramBin(-18.0, -16.0, 2),
 HistogramBin(-16.0, -12.0, 2),
 HistogramBin(-12.0, -10.0, 1))
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631551107



##
File path: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
##
@@ -97,12 +121,24 @@ class BasicStatsEstimationSuite extends PlanTest with 
StatsEstimationTestBase {
 max = Some(-10),
 nullCount = Some(0),
 maxLen = Some(LongType.defaultSize),
-avgLen = Some(LongType.defaultSize))
-checkStats(range, expectedStatsCboOn = rangeStats, expectedStatsCboOff = 
rangeStats)
+avgLen = Some(LongType.defaultSize),
+histogram = histogram)
+val extraConfig = Map(SQLConf.HISTOGRAM_ENABLED.key -> "true",
+  SQLConf.HISTOGRAM_NUM_BINS.key -> "3")
+checkStats(range, expectedStatsCboOn = rangeStats,
+  expectedStatsCboOff = rangeStats, extraConfig)
   }
 
   test("range with negative step where end minus start not divisible by step") 
{
+

Review comment:
   nit: please revert this change.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631548474



##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -789,6 +797,38 @@ case class Range(
 }
   }
 
+  private def computeHistogramStatistics() = {
+val numBins = conf.histogramNumBins
+val height = numElements.toDouble / numBins
+val percentileArray = (0 to numBins).map(i => i * height).toArray
+
+val binArray = new Array[HistogramBin](numBins)
+var lowerIndex = percentileArray.head
+var lowerBinValue = getRangeValue(0)
+percentileArray.tail.zipWithIndex.foreach { case (upperIndex, binId) =>
+  // Integer index for upper and lower values in the bin.
+  val upperIndexPos = math.ceil(upperIndex).toInt - 1
+  val lowerIndexPos = math.ceil(lowerIndex).toInt - 1
+
+  val upperBinValue = getRangeValue(math.max(upperIndexPos, 0))
+  val ndv = math.max(upperIndexPos - lowerIndexPos, 1)
+  binArray(binId) = HistogramBin(lowerBinValue, upperBinValue, ndv)
+
+  lowerBinValue = upperBinValue
+  lowerIndex = upperIndex
+}
+Histogram(height, binArray)
+  }
+
+  // Utility method to compute histogram
+  private def getRangeValue(index: Int): Long = {

Review comment:
   ```
 private def getRangeValue(index: Int): Long = {
   assert(index >= 0, "index must be greater than and equal to 0")
   if (step == 0) {
 start + (numElements.toLong - index - 1) * step
   } else {
 start + index * step
   }
 }
   ```
   ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631546439



##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -789,6 +797,38 @@ case class Range(
 }
   }
 
+  private def computeHistogramStatistics() = {
+val numBins = conf.histogramNumBins
+val height = numElements.toDouble / numBins
+val percentileArray = (0 to numBins).map(i => i * height).toArray
+
+val binArray = new Array[HistogramBin](numBins)
+var lowerIndex = percentileArray.head
+var lowerBinValue = getRangeValue(0)

Review comment:
   It looks we can remove `var` by using `foldLeft` instead of `foreach`. 
Could you?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r631512825



##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -789,6 +793,36 @@ case class Range(
 }
   }
 
+  private def getHistogramStatistics = {
+val numBins = conf.histogramNumBins
+val height = numElements.toDouble / numBins
+val percentileArray = (0 to numBins).map(i => i * height).toArray
+
+var binId = 0

Review comment:
   This is not performance-intensive code, so could you use 
`zipWithIndex+map` instead of `while`?

##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -789,6 +793,36 @@ case class Range(
 }
   }
 
+  private def getHistogramStatistics = {
+val numBins = conf.histogramNumBins
+val height = numElements.toDouble / numBins
+val percentileArray = (0 to numBins).map(i => i * height).toArray
+
+var binId = 0
+val binArray = new Array[HistogramBin](numBins)
+var lowerIndex = percentileArray(0)
+var lowerBinValue = getRangeValue(0)
+while (binId < numBins) {
+  val upperIndex = percentileArray(binId + 1)
+  val upperBinValue = getRangeValue(math.max(math.ceil(upperIndex).toInt - 
1, 0))
+  val ndv = math.max(math.ceil(upperIndex).toInt - 
math.ceil(lowerIndex).toInt, 1)

Review comment:
   Please do not repeat `math.ceil(upperIndex)` here.

##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -789,6 +793,36 @@ case class Range(
 }
   }
 
+  private def getHistogramStatistics = {

Review comment:
   `getHistogramStatistics` -> `computeHistogramStatistics()`

##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -774,13 +774,17 @@ case class Range(
   } else {
 (start + (numElements - 1) * step, start)
   }
+
+  val histogram = getHistogramStatistics

Review comment:
   I feel its better to wrap a return value of `computeHistogramStatistics 
` with `Some` in the caller side because the method always return a value.
   
https://github.com/apache/spark/pull/32498/files#diff-0ac2c89f8cc0d00e8fe7717b01697f36f20fe8abf2def09bcfde0ad84b30e467R814

##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -774,13 +774,17 @@ case class Range(
   } else {
 (start + (numElements - 1) * step, start)
   }
+
+  val histogram = getHistogramStatistics

Review comment:
   Let's check `conf.histogramEnabled` here;
   ```
   val histogram = if (conf.histogramEnabled) {
 Some(computeHistogramStatistics())
   } else {
 None
   }
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] maropu commented on a change in pull request #32498: [SPARK-35368][SQL] Update histogram statistics for RANGE operator for stats estimation

2021-05-12 Thread GitBox


maropu commented on a change in pull request #32498:
URL: https://github.com/apache/spark/pull/32498#discussion_r630844778



##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -759,6 +759,10 @@ case class Range(
   }
 
   override def computeStats(): Statistics = {
+if (!conf.cboEnabled) {
+  return Statistics(sizeInBytes = LongType.defaultSize * numElements)
+}
+

Review comment:
   We need this?

##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -767,14 +771,44 @@ case class Range(
   } else {
 (start + (numElements - 1) * step, start)
   }
-  val colStat = ColumnStat(
+  var colStat = ColumnStat(

Review comment:
   Please avoid using `var` where possible.

##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -767,14 +771,44 @@ case class Range(
   } else {
 (start + (numElements - 1) * step, start)
   }
-  val colStat = ColumnStat(
+  var colStat = ColumnStat(
 distinctCount = Some(numElements),
 max = Some(maxVal),
 min = Some(minVal),
 nullCount = Some(0),
 avgLen = Some(LongType.defaultSize),
 maxLen = Some(LongType.defaultSize))
 
+  if (conf.histogramEnabled) {
+

Review comment:
   please remove this blank.

##
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
##
@@ -767,14 +771,44 @@ case class Range(
   } else {
 (start + (numElements - 1) * step, start)
   }
-  val colStat = ColumnStat(
+  var colStat = ColumnStat(
 distinctCount = Some(numElements),
 max = Some(maxVal),
 min = Some(minVal),
 nullCount = Some(0),
 avgLen = Some(LongType.defaultSize),
 maxLen = Some(LongType.defaultSize))
 
+  if (conf.histogramEnabled) {
+
+def getRangeValue(index: Int): Long = {

Review comment:
   How about extracting this inner method as a private method?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org