cloud-fan commented on a change in pull request #24068: [SPARK-27105][SQL]
Optimize away exponential complexity in ORC predicate conversion
URL: https://github.com/apache/spark/pull/24068#discussion_r288217387
##########
File path:
sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
##########
@@ -63,55 +64,28 @@ private[sql] object OrcFilters extends OrcFiltersBase {
*/
def createFilter(schema: StructType, filters: Seq[Filter]):
Option[SearchArgument] = {
val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
+ val orcFilterConverter = new OrcFilterConverter(dataTypeMap)
for {
// Combines all convertible filters using `And` to produce a single
conjunction
conjunction <- buildTree(convertibleFilters(schema, dataTypeMap,
filters))
// Then tries to build a single ORC `SearchArgument` for the conjunction
predicate
- builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
+ builder <- orcFilterConverter.buildSearchArgument(conjunction,
newBuilder)
} yield builder.build()
}
def convertibleFilters(
schema: StructType,
dataTypeMap: Map[String, DataType],
filters: Seq[Filter]): Seq[Filter] = {
- import org.apache.spark.sql.sources._
+ val orcFilterConverter = new OrcFilterConverter(dataTypeMap)
+ filters.flatMap(orcFilterConverter.trimUnconvertibleFilters)
+ }
- def convertibleFiltersHelper(
- filter: Filter,
- canPartialPushDown: Boolean): Option[Filter] = filter match {
- case And(left, right) =>
- val leftResultOptional = convertibleFiltersHelper(left,
canPartialPushDown)
- val rightResultOptional = convertibleFiltersHelper(right,
canPartialPushDown)
- (leftResultOptional, rightResultOptional) match {
- case (Some(leftResult), Some(rightResult)) => Some(And(leftResult,
rightResult))
- case (Some(leftResult), None) if canPartialPushDown =>
Some(leftResult)
- case (None, Some(rightResult)) if canPartialPushDown =>
Some(rightResult)
- case _ => None
- }
+}
- case Or(left, right) =>
- val leftResultOptional = convertibleFiltersHelper(left,
canPartialPushDown)
- val rightResultOptional = convertibleFiltersHelper(right,
canPartialPushDown)
- if (leftResultOptional.isEmpty || rightResultOptional.isEmpty) {
- None
- } else {
- Some(Or(leftResultOptional.get, rightResultOptional.get))
- }
- case Not(pred) =>
- val resultOptional = convertibleFiltersHelper(pred, canPartialPushDown
= false)
- resultOptional.map(Not)
- case other =>
- if (buildSearchArgument(dataTypeMap, other, newBuilder()).isDefined) {
- Some(other)
- } else {
- None
- }
- }
- filters.flatMap { filter =>
- convertibleFiltersHelper(filter, true)
- }
- }
+private class OrcFilterConverter(
+ val dataTypeMap: Map[String, DataType]
Review comment:
nit: keep class definition in one line if possible
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]