Davies Liu created SPARK-10859:
----------------------------------
Summary: Predicates pushed to InmemoryColumnarTableScan are not
evaluated correctly
Key: SPARK-10859
URL: https://issues.apache.org/jira/browse/SPARK-10859
Project: Spark
Issue Type: Bug
Components: SQL
Affects Versions: 1.5.0, 1.5.1
Reporter: Davies Liu
Assignee: Davies Liu
Priority: Blocker
{code}
var data01 = sqlContext.sql("select 1 as id, \"{\\\"animal\\\":{\\\"type\\\":
\\\"cat\\\"}},{\\\"animal\\\":{\\\"type\\\":
\\\"dog\\\"}},{\\\"animal\\\":{\\\"type\\\":
\\\"donkey\\\"}},{\\\"animal\\\":{\\\"type\\\":
\\\"turkey\\\"}},{\\\"animal\\\":{\\\"type\\\":
\\\"cat\\\"}},{\\\"animal\\\":{\\\"NOTANIMAL\\\": \\\"measuring tape\\\"}}\" as
field")
case class SubField(fieldling: String)
var data02 = data01.explode(data01("field")){ case Row(field: String) =>
field.split(",").map(SubField(_))}
.selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as
animal")
var data03 = data01.explode(data01("field")){ case Row(field: String) =>
field.split(",").map(SubField(_))}
.selectExpr("id","fieldling","get_json_object(fieldling,\"$.animal.type\") as
animal")
data02.cache()
data02.select($"animal" === "cat").explain
== Physical Plan ==
Project [(animal#25 = cat) AS (animal = cat)#263]
InMemoryColumnarTableScan [animal#25], (InMemoryRelation
[id#20,fieldling#24,animal#25], true, 10000, StorageLevel(true, true, false,
true, 1), (TungstenProject
[id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]),
None)
data02.select($"animal" === "cat").show
+--------------+
|(animal = cat)|
+--------------+
| true|
| false|
| false|
| false|
| true|
| null|
+--------------+
data02.filter($"animal" === "cat").explain
== Physical Plan ==
Filter (animal#25 = cat)
InMemoryColumnarTableScan [id#20,fieldling#24,animal#25], [(animal#25 = cat)],
(InMemoryRelation [id#20,fieldling#24,animal#25], true, 10000,
StorageLevel(true, true, false, true, 1), (TungstenProject
[id#20,fieldling#24,get_json_object(fieldling#24,$.animal.type) AS animal#25]),
None)
data02.filter($"animal" === "cat").show
+---+---------+------+
| id|fieldling|animal|
+---+---------+------+
+---+---------+------+
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]