Hello all,
I am using 1GB data in the form of .tsv file, stored in Amazon S3 using
Drill 1.8. I am using default configurations of Drill using S3 storage
plugin coming out of the box. The drill bits are configured on a 5 node
cluster with 32GB RAM and 4VCPU.
I see that select * from xxx; query takes 23 mins to fetch 1,040,000 rows.
Is this the expected behaviour ?
I am looking for any quick tuning that can improve the performance or any
other suggestions.
Attaching is the JSON profile for this query.
Regards,
Projjwal
{
"id": {
"part1": 2834241350655354400,
"part2": -4719640768589854000
},
"type": 1,
"start": 1487585409966,
"end": 1487586748105,
"query": "select * from `xxx`",
"plan": "00-00 Screen : rowType = RecordType(ANY *): rowcount =
1.0704562E7, cumulative cost = {1.17750182E7 rows, 1.17750182E7 cpu, 0.0 io,
0.0 network, 0.0 memory}, id = 187\n00-01 Project(*=[$0]) : rowType =
RecordType(ANY *): rowcount = 1.0704562E7, cumulative cost = {1.0704562E7 rows,
1.0704562E7 cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 186\n00-02
Scan(groupscan=[EasyGroupScan [selectionRoot=s3a://xxx.tsv, numFiles=1,
columns=[`*`], files=[s3a://xxx.tsv]]]) : rowType = (DrillRecordRow[*]):
rowcount = 1.0704562E7, cumulative cost = {1.0704562E7 rows, 1.0704562E7 cpu,
0.0 io, 0.0 network, 0.0 memory}, id = 185\n",
"foreman": {
"address": "xxx",
"userPort": 31010,
"controlPort": 31011,
"dataPort": 31012
},
"state": 2,
"totalFragments": 1,
"finishedFragments": 0,
"fragmentProfile": [
{
"majorFragmentId": 0,
"minorFragmentProfile": [
{
"state": 3,
"minorFragmentId": 0,
"operatorProfile": [
{
"inputProfile": [
{
"records": 1040000,
"batches": 129,
"schemas": 1
}
],
"operatorId": 2,
"operatorType": 28,
"setupNanos": 0,
"processNanos": 50858446809,
"peakLocalMemoryAllocated": 15646720,
"waitNanos": 1257947908700
},
{
"inputProfile": [
{
"records": 1040000,
"batches": 129,
"schemas": 1
}
],
"operatorId": 1,
"operatorType": 10,
"setupNanos": 3929932,
"processNanos": 26307751,
"peakLocalMemoryAllocated": 9142272,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 1040000,
"batches": 129,
"schemas": 1
}
],
"operatorId": 0,
"operatorType": 13,
"setupNanos": 0,
"processNanos": 38391526,
"peakLocalMemoryAllocated": 9142272,
"metric": [
{
"metricId": 0,
"longValue": 1095420252
}
],
"waitNanos": 19474468
}
],
"startTime": 1487585439164,
"endTime": 1487586748101,
"memoryUsed": 0,
"maxMemoryUsed": 21979712,
"endpoint": {
"address": "xxx",
"userPort": 31010,
"controlPort": 31011,
"dataPort": 31012
},
"lastUpdate": 1487586748102,
"lastProgress": 1487586748102
}
]
}
],
"user": "anonymous"
}