Github user gengliangwang commented on a diff in the pull request:
https://github.com/apache/spark/pull/21442#discussion_r191585661
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
---
@@ -219,10 +219,15 @@ object ReorderAssociativeOperator extends
Rule[LogicalPlan] {
object OptimizeIn extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
case q: LogicalPlan => q transformExpressionsDown {
- case In(v, list) if list.isEmpty && !v.nullable => FalseLiteral
+ case In(v, list) if list.isEmpty =>
+ // When v is not nullable, the following expression will be
optimized
+ // to FalseLiteral which is tested in OptimizeInSuite.scala
+ If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType))
case expr @ In(v, list) if expr.inSetConvertible =>
val newList = ExpressionSet(list).toSeq
- if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) {
+ if (newList.length == 1) {
+ EqualTo(v, newList.head)
+ } else if (newList.size >
SQLConf.get.optimizerInSetConversionThreshold) {
val hSet = newList.map(e => e.eval(EmptyRow))
InSet(v, HashSet() ++ hSet)
} else if (newList.size < list.size) {
--- End diff --
nit: In line 235 the comment
```// newList.length == list.length```
can be updated as
```// newList.length == list.length && newList.length > 1```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]