[
https://issues.apache.org/jira/browse/DRILL-1629?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14194274#comment-14194274
]
Tomer Shiran commented on DRILL-1629:
-------------------------------------
The plans were being truncated. Here they are again:
{code}
0: jdbc:drill:zk=local> EXPLAIN PLAN FOR SELECT u.user_id, u.name, count(*)
reviews FROM mongo.yelp.users u, dfs.yelp.`yelp_academic_dataset_review.json` r
WHERE u.user_id = r.user_id GROUP BY u.user_id, u.name ORDER BY reviews DESC
LIMIT 10;
+------------+------------+
| text | json |
+------------+------------+
| 00-00 Screen
00-01 Project(user_id=[$0], name=[$1], reviews=[$2])
00-02 SelectionVectorRemover
00-03 Limit(fetch=[10])
00-04 SingleMergeExchange(sort0=[2 DESC])
01-01 SelectionVectorRemover
01-02 TopN(limit=[10])
01-03 HashToRandomExchange(dist0=[[$2]])
02-01 HashAgg(group=[{0, 1}], reviews=[COUNT()])
02-02 Project(user_id=[$0], name=[$1])
02-03 HashJoin(condition=[=($0, $2)], joinType=[inner])
02-05 HashToRandomExchange(dist0=[[$0]])
03-01 Scan(groupscan=[MongoGroupScan
[MongoScanSpec=MongoScanSpec [dbName=yelp, collectionName=users, filters=null],
columns=[SchemaPath [`user_id`], SchemaPath [`name`]]]])
02-04 HashToRandomExchange(dist0=[[$0]])
04-01 Project(T9¦¦*=[$0])
04-02 Project(T9¦¦*=[$0], T9¦¦user_id=[$1])
04-03 Scan(groupscan=[EasyGroupScan
[selectionRoot=/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json,
numFiles=1, columns = [SchemaPath [`*`]]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "mongo-scan",
"@id" : 196609,
"mongoScanSpec" : {
"dbName" : "yelp",
"collectionName" : "users",
"filters" : null
},
"storage" : {
"type" : "mongo",
"connection" : "mongodb://localhost:27017/",
"enabled" : true
},
"columns" : [ "`user_id`", "`name`" ],
"cost" : 252898.0
}, {
"pop" : "hash-to-random-exchange",
"@id" : 131077,
"child" : 196609,
"expr" : "hash(`user_id`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 252898.0
}, {
"pop" : "fs-scan",
"@id" : 262147,
"files" : [
"file:/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "file:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
},
"yelp" : {
"location" : "/Users/tshiran/Development/yelp",
"writable" : false,
"storageformat" : "json"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`*`" ],
"selectionRoot" :
"/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json",
"cost" : 1021244.0
}, {
"pop" : "project",
"@id" : 262146,
"exprs" : [ {
"ref" : "`T9¦¦*`",
"expr" : "`*`"
} ],
"child" : 262147,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "project",
"@id" : 262145,
"exprs" : [ {
"ref" : "`T9¦¦*`",
"expr" : "`T9¦¦*`"
} ],
"child" : 262146,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "hash-to-random-exchange",
"@id" : 131076,
"child" : 262145,
"expr" : "hash(`T9¦¦*`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "hash-join",
"@id" : 131075,
"left" : 131077,
"right" : 131076,
"conditions" : [ {
"relationship" : "==",
"left" : "`user_id`",
"right" : "`T9¦¦*`"
} ],
"joinType" : "INNER",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "project",
"@id" : 131074,
"exprs" : [ {
"ref" : "`user_id`",
"expr" : "`user_id`"
}, {
"ref" : "`name`",
"expr" : "`name`"
} ],
"child" : 131075,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "hash-aggregate",
"@id" : 131073,
"child" : 131074,
"cardinality" : 1.0,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 765933.0,
"groupByExprs" : [ {
"ref" : "`user_id`",
"expr" : "`user_id`"
}, {
"ref" : "`name`",
"expr" : "`name`"
} ],
"aggrExprs" : [ {
"ref" : "`reviews`",
"expr" : "count(1) "
} ]
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65539,
"child" : 131073,
"expr" : "hash(`reviews`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "top-n",
"@id" : 65538,
"child" : 65539,
"orderings" : [ {
"order" : "DESC",
"expr" : "`reviews`",
"nullDirection" : "UNSPECIFIED"
} ],
"reverse" : false,
"limit" : 10,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "selection-vector-remover",
"@id" : 65537,
"child" : 65538,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "single-merge-exchange",
"@id" : 4,
"child" : 65537,
"orderings" : [ {
"order" : "DESC",
"expr" : "`reviews`",
"nullDirection" : "UNSPECIFIED"
} ],
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "limit",
"@id" : 3,
"child" : 4,
"first" : 0,
"last" : 10,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "selection-vector-remover",
"@id" : 2,
"child" : 3,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "project",
"@id" : 1,
"exprs" : [ {
"ref" : "`user_id`",
"expr" : "`user_id`"
}, {
"ref" : "`name`",
"expr" : "`name`"
}, {
"ref" : "`reviews`",
"expr" : "`reviews`"
} ],
"child" : 2,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
} ]
} |
+------------+------------+
1 row selected (0.342 seconds)
0: jdbc:drill:zk=local> EXPLAIN PLAN FOR SELECT u.user_id, u.name, count(*)
reviews FROM dfs.yelp.`yelp_academic_dataset_user.json` u,
dfs.yelp.`yelp_academic_dataset_review.json` r WHERE u.user_id = r.user_id
GROUP BY u.user_id, u.name ORDER BY reviews DESC LIMIT 10;
+------------+------------+
| text | json |
+------------+------------+
| 00-00 Screen
00-01 Project(user_id=[$0], name=[$1], reviews=[$2])
00-02 SelectionVectorRemover
00-03 Limit(fetch=[10])
00-04 SingleMergeExchange(sort0=[2 DESC])
01-01 SelectionVectorRemover
01-02 TopN(limit=[10])
01-03 HashToRandomExchange(dist0=[[$2]])
02-01 HashAgg(group=[{0, 1}], reviews=[COUNT()])
02-02 Project(user_id=[$0], name=[$1])
02-03 HashJoin(condition=[=($0, $2)], joinType=[inner])
02-05 HashToRandomExchange(dist0=[[$0]])
03-01 Scan(groupscan=[EasyGroupScan
[selectionRoot=/Users/tshiran/Development/yelp/yelp_academic_dataset_user.json,
numFiles=1, columns = [SchemaPath [`user_id`], SchemaPath [`name`]]]])
02-04 Project(user_id0=[$0])
02-06 HashToRandomExchange(dist0=[[$0]])
04-01 Scan(groupscan=[EasyGroupScan
[selectionRoot=/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json,
numFiles=1, columns = [SchemaPath [`user_id`]]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 262145,
"files" : [
"file:/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "file:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
},
"yelp" : {
"location" : "/Users/tshiran/Development/yelp",
"writable" : false,
"storageformat" : "json"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`user_id`" ],
"selectionRoot" :
"/Users/tshiran/Development/yelp/yelp_academic_dataset_review.json",
"cost" : 1021244.0
}, {
"pop" : "hash-to-random-exchange",
"@id" : 131078,
"child" : 262145,
"expr" : "hash(`user_id`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "project",
"@id" : 131076,
"exprs" : [ {
"ref" : "`user_id0`",
"expr" : "`user_id`"
} ],
"child" : 131078,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "fs-scan",
"@id" : 196609,
"files" : [
"file:/Users/tshiran/Development/yelp/yelp_academic_dataset_user.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "file:///",
"workspaces" : {
"root" : {
"location" : "/",
"writable" : false,
"storageformat" : null
},
"tmp" : {
"location" : "/tmp",
"writable" : true,
"storageformat" : "csv"
},
"yelp" : {
"location" : "/Users/tshiran/Development/yelp",
"writable" : false,
"storageformat" : "json"
}
},
"formats" : {
"psv" : {
"type" : "text",
"extensions" : [ "tbl" ],
"delimiter" : "|"
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"tsv" : {
"type" : "text",
"extensions" : [ "tsv" ],
"delimiter" : "\t"
},
"parquet" : {
"type" : "parquet"
},
"json" : {
"type" : "json"
}
}
},
"format" : {
"type" : "json"
},
"columns" : [ "`user_id`", "`name`" ],
"selectionRoot" :
"/Users/tshiran/Development/yelp/yelp_academic_dataset_user.json",
"cost" : 115995.0
}, {
"pop" : "hash-to-random-exchange",
"@id" : 131077,
"child" : 196609,
"expr" : "hash(`user_id`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 115995.0
}, {
"pop" : "hash-join",
"@id" : 131075,
"left" : 131077,
"right" : 131076,
"conditions" : [ {
"relationship" : "==",
"left" : "`user_id`",
"right" : "`user_id0`"
} ],
"joinType" : "INNER",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "project",
"@id" : 131074,
"exprs" : [ {
"ref" : "`user_id`",
"expr" : "`user_id`"
}, {
"ref" : "`name`",
"expr" : "`name`"
} ],
"child" : 131075,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 1021244.0
}, {
"pop" : "hash-aggregate",
"@id" : 131073,
"child" : 131074,
"cardinality" : 1.0,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 765933.0,
"groupByExprs" : [ {
"ref" : "`user_id`",
"expr" : "`user_id`"
}, {
"ref" : "`name`",
"expr" : "`name`"
} ],
"aggrExprs" : [ {
"ref" : "`reviews`",
"expr" : "count(1) "
} ]
}, {
"pop" : "hash-to-random-exchange",
"@id" : 65539,
"child" : 131073,
"expr" : "hash(`reviews`) ",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "top-n",
"@id" : 65538,
"child" : 65539,
"orderings" : [ {
"order" : "DESC",
"expr" : "`reviews`",
"nullDirection" : "UNSPECIFIED"
} ],
"reverse" : false,
"limit" : 10,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "selection-vector-remover",
"@id" : 65537,
"child" : 65538,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "single-merge-exchange",
"@id" : 4,
"child" : 65537,
"orderings" : [ {
"order" : "DESC",
"expr" : "`reviews`",
"nullDirection" : "UNSPECIFIED"
} ],
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "limit",
"@id" : 3,
"child" : 4,
"first" : 0,
"last" : 10,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "selection-vector-remover",
"@id" : 2,
"child" : 3,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "project",
"@id" : 1,
"exprs" : [ {
"ref" : "`user_id`",
"expr" : "`user_id`"
}, {
"ref" : "`name`",
"expr" : "`name`"
}, {
"ref" : "`reviews`",
"expr" : "`reviews`"
} ],
"child" : 2,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
}, {
"pop" : "screen",
"@id" : 0,
"child" : 1,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 102124.4
} ]
} |
+------------+------------+
1 row selected (0.26 seconds)
0: jdbc:drill:zk=local>
{code}
> Cast issue during query
> -----------------------
>
> Key: DRILL-1629
> URL: https://issues.apache.org/jira/browse/DRILL-1629
> Project: Apache Drill
> Issue Type: Bug
> Reporter: Tomer Shiran
>
> Imported Yelp user.json file into a local MongoDB instance (via mongoimport).
> Ran the following query to find the names of the users who had the most
> reviews:
> {code}
> 0: jdbc:drill:zk=local> SELECT u.user_id, u.name, count(*) reviews FROM
> mongo.yelp.users u, dfs.yelp.`yelp_academic_dataset_review.json` r WHERE
> u.user_id = r.user_id GROUP BY u.user_id, u.name ORDER BY reviews DESC LIMIT
> 10;
> Query failed: Failure while running fragment. Failure finding function that
> runtime code generation expected. Signature: compare_to(
> VARCHAR:OPTIONALINT:OPTIONAL, ) returns INT:REQUIRED
> [c796f1fb-b73a-49e8-8b4a-ed973d89b7d3]
> Error: exception while executing query: Failure while trying to get next
> result batch. (state=,code=0)
> {code}
> Here's the explain plan:
> {code}
> 0: jdbc:drill:zk=local> EXPLAIN PLAN FOR SELECT u.user_id, u.name, count(*)
> reviews FROM mongo.yelp.users u, dfs.yelp.`yelp_academic_dataset_review.json`
> r WHERE u.user_id = r.user_id GROUP BY u.user_id, u.name ORDER BY reviews
> DESC LIMIT 10;
> +------------+------------+
> | text | json |
> +------------+------------+
> | 00-00 Screen
> 00-01 Project(user_id=[$0], name=[$1], reviews=[$2])
> 00-02 SelectionVectorRemover
> 00-03 Limit(fetch=[10])
> 00-04 SingleMergeExchange(sort0=[2 DESC])
> 01-01 SelectionVectorRemover
> 01-02 TopN(limit=[10])
> 01-03 HashToRandomExchange(dist0=[[$2]])
> 02-01 HashAgg(group=[{0, 1}], reviews=[COUNT()])
> 02-02 Project(user_id=[$0], name=[$1])
> 02-03 HashJoin(condition=[=($0, $2)], joinType=[inner])
> 02-05 HashToRandomExchange(dist0=[[$0]])
> 03-01 Scan(groupscan=[MongoGroupScan
> [MongoScanSpec=MongoScanSpec [dbName=yelp, collectionName=users,
> filters=null], columns=[SchemaPath [`user_id`], SchemaPath [`name`]]]])
> 02-04 HashToRandomExchange(dist0=[[$0]])
> 04-01 Project(T8¦¦*=[$0])
> 04-02 Project(T8¦¦*=[$0], T8¦¦user_id=[$ |
> +------------+------------+
> 1 row selected (0.42 seconds)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)