Xiao Li created SPARK-25714:
-------------------------------

             Summary: Null Handling in the Optimizer rule BooleanSimplification
                 Key: SPARK-25714
                 URL: https://issues.apache.org/jira/browse/SPARK-25714
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 2.3.2, 2.2.2, 2.1.3, 2.0.2, 1.6.3, 2.4.0
            Reporter: Xiao Li
            Assignee: Xiao Li


{code}
scala> val df = Seq(("abc", 1), (null, 3)).toDF("col1", "col2")
df: org.apache.spark.sql.DataFrame = [col1: string, col2: int]

scala> df.write.mode("overwrite").parquet("/tmp/test1")
                                                                                
scala> val df2 = spark.read.parquet("/tmp/test1");
df2: org.apache.spark.sql.DataFrame = [col1: string, col2: int]

scala> df2.filter("col1 = 'abc' OR (col1 != 'abc' AND col2 == 3)").show()
+----+----+
|col1|col2|
+----+----+
| abc|   1|
|null|   3|
+----+----+
{code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to