[ https://issues.apache.org/jira/browse/DRILL-1417?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14133416#comment-14133416 ]
Aman Sinha commented on DRILL-1417: ----------------------------------- I was able to run the query successfully on the tutorial dataset used by Bob and doing my own build with the the 0.5-r1 branch (details of commit level below). It's not clear to me what commit level is being used for the RPM build where this query failed. Can someone check and update this bug ? git.commit.id.abbrev=f8d38b6 git.commit.id=f8d38b64e6e19367da22f5e65fde01f050102296 git.commit.message.short=DRILL-1407\: Add scan size calculator option to HBase storage plugin configuration 0: jdbc:drill:zk=local> select dir1 month_no, count(*) month_count from dfs.`/Users/asinha/drill-beta-demo/data/output/flat/logs` where dir0=2014 group by dir1 order by dir1; +------------+-------------+ | month_no | month_count | +------------+-------------+ | 1 | 1741 | | 2 | 1538 | | 3 | 1689 | | 4 | 1675 | | 5 | 1738 | | 6 | 1653 | | 7 | 1745 | | 8 | 221 | +------------+-------------+ 8 rows selected (4.974 seconds) > Query against directory of JSON files no longer works in 0.5 > ------------------------------------------------------------ > > Key: DRILL-1417 > URL: https://issues.apache.org/jira/browse/DRILL-1417 > Project: Apache Drill > Issue Type: Bug > Components: Query Planning & Optimization > Affects Versions: 0.5.0 > Environment: Apache Drill Sandbox > (http://builds.qa.lab/vm/ova/?C=M;O=A) running on VirtualBox VM. > You can also access the same logs data set as follows: > ssh root@andypdemo0 [pswd = mapr] > cd /mapr/demorig/data/flat > Reporter: Bob Rumsby > > This query was working on an earlier build (somewhere between the 0.4 and 0.5 > releases) but no longer works: > 0: jdbc:drill:> select dir1 month_no, count(*) month_count from logs where > dir0=2014 group by dir1 order by dir1; > Query failed: Failure while running fragment. json record reader requires at > least a column [90feae61-fcfa-427f-b0bf-4a06563aa9a9] > Error: exception while executing query: Failure while trying to get next > result batch. (state=,code=0) > Let me know if I should open a Jira, or if I need to rewrite the query. I am > using the latest build of the MapR Sandbox for Drill from > http://builds.qa.lab/vm/ova/?C=M;O=A > The old result of the query is here: > 0: jdbc:drill:> select dir1 month_no, count(*) month_count from logs where > dir0=2014 group by dir1 order by dir1; > +------------+-------------+ > | month_no | month_count | > +------------+-------------+ > | 1 | 1741 | > | 2 | 1538 | > | 3 | 1689 | > | 4 | 1675 | > | 5 | 1738 | > | 6 | 1653 | > | 7 | 1745 | > | 8 | 221 | > +------------+-------------+ > 8 rows selected > EXPLAIN TEXT > --------------------- > 0: jdbc:drill:> explain plan for select dir1 month_no, count(*) month_count > from logs where dir0=2014 group by dir1 order by dir1; > +------------+------------+ > | text | json | > +------------+------------+ > | 00-00 Screen > 00-01 Project(month_no=[$0], month_count=[$1]) > 00-02 SelectionVectorRemover > 00-03 Sort(sort0=[$0], dir0=[ASC]) > 00-04 Project(month_no=[$0], month_count=[$1]) > 00-05 HashAgg(group=[{0}], month_count=[COUNT()]) > 00-06 Project(dir1=[$0]) > 00-07 SelectionVectorRemover > 00-08 Filter(condition=[=(CAST($1):INTEGER, 2014)]) > 00-09 Scan(groupscan=[EasyGroupScan > [selectionRoot=/mapr/demo.mapr.com/data/flat/logs, columns = [SchemaPath > [`dir1`], SchemaPath [`dir0`]]]]) > | { > "head" : { > "version" : 1, > "generator" : { > "type" : "ExplainHandler", > "info" : "" > }, > "type" : "APACHE_DRILL_PHYSICAL", > "options" : [ ], > "queue" : 0, > "resultMode" : "EXEC" > }, > "graph" : [ { > "pop" : "fs-scan", > "@id" : 9, > "files" : [ "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/8/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/11/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/6/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/12/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/3/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/4/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/5/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/1/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/7/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/2/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/9/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2012/10/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/8/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/11/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/6/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/12/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/3/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/4/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/5/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/1/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/7/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/2/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/9/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2013/10/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/8/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/6/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/3/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/4/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/5/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/1/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/7/log.json", > "maprfs:/mapr/demo.mapr.com/data/flat/logs/2014/2/log.json" ], > "storage" : { > "type" : "file", > "enabled" : true, > "connection" : "maprfs:///", > "workspaces" : { > "root" : { > "location" : "/", > "writable" : false, > "storageformat" : null > }, > "data" : { > "location" : "/mapr/demo.mapr.com/data", > "writable" : false, > "storageformat" : null > }, > "clicks" : { > "location" : "/mapr/demo.mapr.com/data/nested", > "writable" : true, > "storageformat" : "parquet" > }, > "logs" : { > "location" : "/mapr/demo.mapr.com/data/flat", > "writable" : true, > "storageformat" : "parquet" > }, > "views" : { > "location" : "/mapr/demo.mapr.com/data/views", > "writable" : true, > "storageformat" : "parquet" > }, > "tmp" : { > "location" : "/tmp", > "writable" : true, > "storageformat" : "csv" > } > }, > "formats" : { > "psv" : { > "type" : "text", > "extensions" : [ "tbl" ], > "delimiter" : "|" > }, > "csv" : { > "type" : "text", > "extensions" : [ "csv" ], > "delimiter" : "," > }, > "tsv" : { > "type" : "text", > "extensions" : [ "tsv" ], > "delimiter" : "\t" > }, > "parquet" : { > "type" : "parquet" > }, > "json" : { > "type" : "json" > } > } > }, > "format" : { > "type" : "json" > }, > "columns" : [ "`dir1`", "`dir0`" ], > "selectionRoot" : "/mapr/demo.mapr.com/data/flat/logs", > "cost" : 7731.0 > }, { > "pop" : "filter", > "@id" : 8, > "child" : 9, > "expr" : "equal(cast( (`dir0` ) as INT ), 2014) ", > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 1159.6499999999999 > }, { > "pop" : "selection-vector-remover", > "@id" : 7, > "child" : 8, > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 1159.6499999999999 > }, { > "pop" : "project", > "@id" : 6, > "exprs" : [ { > "ref" : "`dir1`", > "expr" : "`dir1`" > } ], > "child" : 7, > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 1159.6499999999999 > }, { > "pop" : "hash-aggregate", > "@id" : 5, > "child" : 6, > "cardinality" : 1.0, > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 579.8249999999999, > "groupByExprs" : [ { > "ref" : "`dir1`", > "expr" : "`dir1`" > } ], > "aggrExprs" : [ { > "ref" : "`month_count`", > "expr" : "count(1) " > } ] > }, { > "pop" : "project", > "@id" : 4, > "exprs" : [ { > "ref" : "`month_no`", > "expr" : "`dir1`" > }, { > "ref" : "`month_count`", > "expr" : "`month_count`" > } ], > "child" : 5, > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 115.96499999999999 > }, { > "pop" : "external-sort", > "@id" : 3, > "child" : 4, > "orderings" : [ { > "order" : "ASC", > "expr" : "`month_no`", > "nullDirection" : "UNSPECIFIED" > } ], > "reverse" : false, > "initialAllocation" : 20000000, > "maxAllocation" : 10000000000, > "cost" : 115.96499999999999 > }, { > "pop" : "selection-vector-remover", > "@id" : 2, > "child" : 3, > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 115.96499999999999 > }, { > "pop" : "project", > "@id" : 1, > "exprs" : [ { > "ref" : "`month_no`", > "expr" : "`month_no`" > }, { > "ref" : "`month_count`", > "expr" : "`month_count`" > } ], > "child" : 2, > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 115.96499999999999 > }, { > "pop" : "screen", > "@id" : 0, > "child" : 1, > "initialAllocation" : 1000000, > "maxAllocation" : 10000000000, > "cost" : 115.96499999999999 > } ] > } | > +------------+------------+ > 1 row selected (0.86 seconds) -- This message was sent by Atlassian JIRA (v6.3.4#6332)