Bob Rumsby created DRILL-1417:
---------------------------------
Summary: Query against directory of JSON files no longer works in
0.5
Key: DRILL-1417
URL: https://issues.apache.org/jira/browse/DRILL-1417
Project: Apache Drill
Issue Type: Bug
Components: Query Planning & Optimization
Affects Versions: 0.5.0
Environment: Apache Drill Sandbox
(http://builds.qa.lab/vm/ova/?C=M;O=A) running on VirtualBox VM.
You can also access the same logs data set as follows:
ssh root@andypdemo0 [pswd = mapr]
cd /mapr/demorig/data/flat
Reporter: Bob Rumsby
This query was working on an earlier build (somewhere between the 0.4 and 0.5
releases) but no longer works:
0: jdbc:drill:> select dir1 month_no, count(*) month_count from logs where
dir0=2014 group by dir1 order by dir1;
Query failed: Failure while running fragment. json record reader requires at
least a column [90feae61-fcfa-427f-b0bf-4a06563aa9a9]
Error: exception while executing query: Failure while trying to get next result
batch. (state=,code=0)
Let me know if I should open a Jira, or if I need to rewrite the query. I am
using the latest build of the MapR Sandbox for Drill from
http://builds.qa.lab/vm/ova/?C=M;O=A
The old result of the query is here:
0: jdbc:drill:> select dir1 month_no, count(*) month_count from logs where
dir0=2014 group by dir1 order by dir1;
+------------+-------------+
| month_no | month_count |
+------------+-------------+
| 1 | 1741 |
| 2 | 1538 |
| 3 | 1689 |
| 4 | 1675 |
| 5 | 1738 |
| 6 | 1653 |
| 7 | 1745 |
| 8 | 221 |
+------------+-------------+
8 rows selected
EXPLAIN TEXT
---------------------
0: jdbc:drill:> explain plan for select dir1 month_no, count(*) month_count
from logs where dir0=2014 group by dir1 order by dir1;
+------------+------------+
| text | json |
+------------+------------+
| 00-00 Screen
00-01 Project(month_no=[$0], month_count=[$1])
00-02 SelectionVectorRemover
00-03 Sort(sort0=[$0], dir0=[ASC])
00-04 Project(month_no=[$0], month_count=[$1])
00-05 HashAgg(group=[{0}], month_count=[COUNT()])
00-06 Project(dir1=[$0])
00-07 SelectionVectorRemover
00-08 Filter(condition=[=(CAST($1):INTEGER, 2014)])
00-09 Scan(groupscan=[EasyGroupScan
[selectionRoot=/mapr/demo.mapr.com/data/flat/logs, columns = [SchemaPath
[`dir1`], SchemaPath [`dir0`]]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 9,
"files" : [ "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/8/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/11/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/6/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/12/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/3/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/4/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/5/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/1/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/7/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/2/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/9/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/10/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/8/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/11/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/6/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/12/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/3/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/4/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/5/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/1/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/7/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/2/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/9/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/10/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/8/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/6/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/3/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/4/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/5/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/1/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/7/log.json",
"maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/2/log.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "maprfs:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"data" : {
"location" : "/mapr/demo.mapr.com/data",
"writable" : false,
"storageformat" : null
},
"clicks" : {
"location" : "/mapr/demo.mapr.com/data/nested",
"writable" : true,
"storageformat" : "parquet"
},
"logs" : {
"location" : "/mapr/demo.mapr.com/data/flat",
"writable" : true,
"storageformat" : "parquet"
},
"views" : {
"location" : "/mapr/demo.mapr.com/data/views",
"writable" : true,
"storageformat" : "parquet"
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`dir1`", "`dir0`" ],
"selectionRoot" : "/mapr/demo.mapr.com/data/flat/logs",
"cost" : 7731.0
}, {
"pop" : "filter",
"@id" : 8,
"child" : 9,
"expr" : "equal(cast( (`dir0` ) as INT ), 2014) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1159.6499999999999
}, {
"pop" : "selection-vector-remover",
"@id" : 7,
"child" : 8,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1159.6499999999999
}, {
"pop" : "project",
"@id" : 6,
"exprs" : [ {
"ref" : "`dir1`",
"expr" : "`dir1`"
} ],
"child" : 7,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1159.6499999999999
}, {
"pop" : "hash-aggregate",
"@id" : 5,
"child" : 6,
"cardinality" : 1.0,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 579.8249999999999,
"groupByExprs" : [ {
"ref" : "`dir1`",
"expr" : "`dir1`"
} ],
"aggrExprs" : [ {
"ref" : "`month_count`",
"expr" : "count(1) "
} ]
}, {
"pop" : "project",
"@id" : 4,
"exprs" : [ {
"ref" : "`month_no`",
"expr" : "`dir1`"
}, {
"ref" : "`month_count`",
"expr" : "`month_count`"
} ],
"child" : 5,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 115.96499999999999
}, {
"pop" : "external-sort",
"@id" : 3,
"child" : 4,
"orderings" : [ {
"order" : "ASC",
"expr" : "`month_no`",
"nullDirection" : "UNSPECIFIED"
} ],
"reverse" : false,
"initialAllocation" : 20000000,
"maxAllocation" : 10000000000,
"cost" : 115.96499999999999
}, {
"pop" : "selection-vector-remover",
"@id" : 2,
"child" : 3,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 115.96499999999999
}, {
"pop" : "project",
"@id" : 1,
"exprs" : [ {
"ref" : "`month_no`",
"expr" : "`month_no`"
}, {
"ref" : "`month_count`",
"expr" : "`month_count`"
} ],
"child" : 2,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 115.96499999999999
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 115.96499999999999
} ]
} |
+------------+------------+
1 row selected (0.86 seconds)
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)