Davies Liu created SPARK-10859:
----------------------------------

             Summary: Predicates pushed to InmemoryColumnarTableScan are not 
evaluated correctly
                 Key: SPARK-10859
                 URL: https://issues.apache.org/jira/browse/SPARK-10859
             Project: Spark
          Issue Type: Bug
          Components: SQL
    Affects Versions: 1.5.0, 1.5.1
            Reporter: Davies Liu
            Assignee: Davies Liu
            Priority: Blocker


{code}
var data01 = sqlContext.sql("select 1 as id, \"{\\\"animal\\\":{\\\"type\\\": 
\\\"cat\\\"}},{\\\"animal\\\":{\\\"type\\\": 
\\\"dog\\\"}},{\\\"animal\\\":{\\\"type\\\": 
\\\"donkey\\\"}},{\\\"animal\\\":{\\\"type\\\": 
\\\"turkey\\\"}},{\\\"animal\\\":{\\\"type\\\": 
\\\"cat\\\"}},{\\\"animal\\\":{\\\"NOTANIMAL\\\": \\\"measuring tape\\\"}}\" as 
field")
case class SubField(fieldling: String)
var data02 = data01.explode(data01("field")){ case Row(field: String) => 
field.split(",").map(SubField(_))}
  .selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as 
animal") 
var data03 = data01.explode(data01("field")){ case Row(field: String) => 
field.split(",").map(SubField(_))}
  .selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as 
animal")
data02.cache()

data02.select($"animal" === "cat").explain
== Physical Plan ==
Project [(animal#25 = cat) AS (animal = cat)#263]
 InMemoryColumnarTableScan [animal#25], (InMemoryRelation 
[id#20,fieldling#24,animal#25], true, 10000, StorageLevel(true, true, false, 
true, 1), (TungstenProject 
[id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]), 
None)

data02.select($"animal" === "cat").show
+--------------+
|(animal = cat)|
+--------------+
|          true|
|         false|
|         false|
|         false|
|          true|
|          null|
+--------------+

data02.filter($"animal" === "cat").explain
== Physical Plan ==
Filter (animal#25 = cat)
 InMemoryColumnarTableScan [id#20,fieldling#24,animal#25], [(animal#25 = cat)], 
(InMemoryRelation [id#20,fieldling#24,animal#25], true, 10000, 
StorageLevel(true, true, false, true, 1), (TungstenProject 
[id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]), 
None)

data02.filter($"animal" === "cat").show
+---+---------+------+
| id|fieldling|animal|
+---+---------+------+
+---+---------+------+
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to