Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/23176#discussion_r237382990 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala --- @@ -367,11 +367,29 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with } @transient lazy val set: Set[Any] = child.dataType match { - case _: AtomicType => hset + case t: AtomicType if !t.isInstanceOf[BinaryType] => hset case _: NullType => hset case _ => + val ord = TypeUtils.getInterpretedOrdering(child.dataType) + val ordering = if (hasNull) { + new Ordering[Any] { + override def compare(x: Any, y: Any): Int = { + if (x == null && y == null) { + 0 + } else if (x == null) { + -1 + } else if (y == null) { + 1 + } else { + ord.compare(x, y) + } + } + } + } else { + ord + } // for structs use interpreted ordering to be able to compare UnsafeRows with non-UnsafeRows - TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ hset + TreeSet.empty(ordering) ++ hset --- End diff -- and udpate eval to ``` if (value == null) { null } else if (set.contains(value)) { true } else if (hasNull) { null } else { false } ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org