liancheng commented on a change in pull request #24910: [SPARK-28108][SQL] 
Simplify OrcFilters
URL: https://github.com/apache/spark/pull/24910#discussion_r295427015
 
 

 ##########
 File path: 
sql/core/v1.2.1/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
 ##########
 @@ -64,26 +64,79 @@ private[sql] object OrcFilters extends OrcFiltersBase {
    */
   def createFilter(schema: StructType, filters: Seq[Filter]): 
Option[SearchArgument] = {
     val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
-    val orcFilterConverter = new OrcFilterConverter(dataTypeMap)
     for {
-      // Combines all filters using `And` to produce a single conjunction
-      conjunction <- buildTree(filters)
-      // Then tries to build a single ORC `SearchArgument` for the conjunction 
predicate
-      builder <- orcFilterConverter.buildSearchArgument(conjunction, 
newBuilder)
+      // Combines all convertible filters using `And` to produce a single 
conjunction
+      conjunction <- buildTree(convertibleFilters(schema, dataTypeMap, 
filters))
+      // Then tries to build a single ORC `SearchArgument` for the conjunction 
predicate.
+      // The input predicate is fully convertible. There should not be any 
empty result in the
+      // following recursive method call `buildSearchArgument`.
+      builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
     } yield builder.build()
   }
 
   def convertibleFilters(
       schema: StructType,
       dataTypeMap: Map[String, DataType],
       filters: Seq[Filter]): Seq[Filter] = {
-    val orcFilterConverter = new OrcFilterConverter(dataTypeMap)
-    filters.flatMap(orcFilterConverter.trimUnconvertibleFilters)
-  }
+    import org.apache.spark.sql.sources._
 
-}
+    def convertibleFiltersHelper(
+        filter: Filter,
+        canPartialPushDown: Boolean): Option[Filter] = filter match {
+      // At here, it is not safe to just convert one side and remove the other 
side
+      // if we do not understand what the parent filters are.
+      //
+      // Here is an example used to explain the reason.
+      // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand 
how to
+      // convert b in ('1'). If we only convert a = 2, we will end up with a 
filter
+      // NOT(a = 2), which will generate wrong results.
+      //
+      // Pushing one side of AND down is only safe to do at the top level or 
in the child
+      // AND before hitting NOT or OR conditions, and in this case, the 
unsupported predicate
+      // can be safely removed.
+      case And(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, 
canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, 
canPartialPushDown)
+        (leftResultOptional, rightResultOptional) match {
+          case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, 
rightResult))
+          case (Some(leftResult), None) if canPartialPushDown => 
Some(leftResult)
+          case (None, Some(rightResult)) if canPartialPushDown => 
Some(rightResult)
+          case _ => None
+        }
 
-private class OrcFilterConverter(val dataTypeMap: Map[String, DataType]) {
+      // The Or predicate is convertible when both of its children can be 
pushed down.
+      // That is to say, if one/both of the children can be partially pushed 
down, the Or
+      // predicate can be partially pushed down as well.
+      //
+      // Here is an example used to explain the reason.
+      // Let's say we have
+      // (a1 AND a2) OR (b1 AND b2),
+      // a1 and b1 is convertible, while a2 and b2 is not.
+      // The predicate can be converted as
+      // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
+      // As per the logical in And predicate, we can push down (a1 OR b1).
+      case Or(left, right) =>
+        val leftResultOptional = convertibleFiltersHelper(left, 
canPartialPushDown)
+        val rightResultOptional = convertibleFiltersHelper(right, 
canPartialPushDown)
+        if (leftResultOptional.isEmpty || rightResultOptional.isEmpty) {
+          None
+        } else {
+          Some(Or(leftResultOptional.get, rightResultOptional.get))
+        }
 
 Review comment:
   This part can be simplified to:
   
   ```scala
   for {
     lhs <- convertibleFiltersHelper(left, canPartialPushDown)
     rhs <- convertibleFiltersHelper(right, canPartialPushDown)
   } yield Or(lhs, rhs)
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to