[GitHub] spark pull request #23176: [SPARK-26211][SQL] Fix InSet for binary, and stru...

cloud-fan Thu, 29 Nov 2018 00:03:01 -0800

Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/23176#discussion_r237382990
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 ---
    @@ -367,11 +367,29 @@ case class InSet(child: Expression, hset: Set[Any]) 
extends UnaryExpression with
       }
     
       @transient lazy val set: Set[Any] = child.dataType match {
    -    case _: AtomicType => hset
    +    case t: AtomicType if !t.isInstanceOf[BinaryType] => hset
         case _: NullType => hset
         case _ =>
    +      val ord = TypeUtils.getInterpretedOrdering(child.dataType)
    +      val ordering = if (hasNull) {
    +        new Ordering[Any] {
    +          override def compare(x: Any, y: Any): Int = {
    +            if (x == null && y == null) {
    +              0
    +            } else if (x == null) {
    +              -1
    +            } else if (y == null) {
    +              1
    +            } else {
    +              ord.compare(x, y)
    +            }
    +          }
    +        }
    +      } else {
    +        ord
    +      }
           // for structs use interpreted ordering to be able to compare 
UnsafeRows with non-UnsafeRows
    -      TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ 
hset
    +      TreeSet.empty(ordering) ++ hset
    --- End diff --
    
    and udpate eval to
    ```
    if (value == null) {
      null
    } else if (set.contains(value)) {
      true
    } else if (hasNull) {
      null
    } else {
      false
    }
    ```



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #23176: [SPARK-26211][SQL] Fix InSet for binary, and stru...

Reply via email to