Did an explain plan on the query since the query never completes successfully
(I can't see the profile because of that), Here is the result of the explain
plan query
"head":{
"version":1,
"generator":{
"type":"ExplainHandler",
"info":""
},
"type":"APACHE_DRILL_PHYSICAL",
"options":[
],
"queue":0,
"hasResourcePlan":false,
"resultMode":"EXEC"
},
"graph":[
{
"pop":"parquet-scan",
"@id":327683,
"userName":"mapr",
"entries":[
{
"path":"maprfs:///iswdata/rj"
}
],
"storage":{
"type":"file",
"enabled":true,
"connection":"maprfs:///",
"config":null,
"workspaces":{
"root":{
"location":"/",
"writable":false,
"defaultInputFormat":null,
"allowAccessOutsideWorkspace":false
},
"tmp":{
"location":"/tmp",
"writable":true,
"defaultInputFormat":null,
"allowAccessOutsideWorkspace":false
}
},
"formats":{
"psv":{
"type":"text",
"extensions":[
"tbl"
],
"delimiter":"|"
},
"csv":{
"type":"text",
"extensions":[
"csv"
],
"delimiter":","
},
"tsv":{
"type":"text",
"extensions":[
"tsv"
],
"delimiter":"\t"
},
"parquet":{
"type":"parquet"
},
"json":{
"type":"json",
"extensions":[
"json"
]
},
"maprdb":{
"type":"maprdb"
}
}
},
"format":{
"type":"parquet"
},
"columns":[
"`unique_key`"
],
"selectionRoot":"maprfs:/iswdata/rj",
"filter":"true",
"fileSet":[
"/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet",
"/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet",
"/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet",
"/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet",
"/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet",
"/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet",
"/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet",
"/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet",
"/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet"
],
"cost":4.73824196E8
},
{
"pop":"hash-aggregate",
"@id":327682,
"child":327683,
"cardinality":1.0,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"aggPhase":"PHASE_1of2",
"groupByExprs":[
{
"ref":"`unique_key`",
"expr":"`unique_key`"
}
],
"aggrExprs":[
{
"ref":"`$f1`",
"expr":"count(`unique_key`) "
}
],
"cost":2.36912098E8
},
{
"pop":"project",
"@id":327681,
"exprs":[
{
"ref":"`unique_key`",
"expr":"`unique_key`"
},
{
"ref":"`$f1`",
"expr":"`$f1`"
},
{
"ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
"expr":"hash32asdouble(`unique_key`, 1301011) "
}
],
"child":327682,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E7
},
{
"pop":"unordered-mux-exchange",
"@id":196609,
"child":327681,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E7
},
{
"pop":"hash-to-random-exchange",
"@id":65548,
"child":196609,
"expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E7
},
{
"pop":"project",
"@id":65547,
"exprs":[
{
"ref":"`unique_key`",
"expr":"`unique_key`"
},
{
"ref":"`$f1`",
"expr":"`$f1`"
}
],
"child":65548,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E7
},
{
"pop":"hash-aggregate",
"@id":65546,
"child":65547,
"cardinality":1.0,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"aggPhase":"PHASE_2of2",
"groupByExprs":[
{
"ref":"`unique_key`",
"expr":"`unique_key`"
}
],
"aggrExprs":[
{
"ref":"`$f1`",
"expr":"$sum0(`$f1`) "
}
],
"cost":2.36912098E7
},
{
"pop":"filter",
"@id":65545,
"child":65546,
"expr":"greater_than(`$f1`, 1) ",
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":2369120.98
},
{
"pop":"selection-vector-remover",
"@id":65544,
"child":65545,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":2369120.98
},
{
"pop":"project",
"@id":65542,
"exprs":[
{
"ref":"`unique_key`",
"expr":"`unique_key`"
}
],
"child":65544,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":2369120.98
},
{
"pop":"project",
"@id":65540,
"exprs":[
{
"ref":"`unique_key0`",
"expr":"`unique_key`"
}
],
"child":65542,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":2369120.98
},
{
"pop":"parquet-scan",
"@id":262147,
"userName":"mapr",
"entries":[
{
"path":"maprfs:///iswdata/rj"
}
],
"storage":{
"type":"file",
"enabled":true,
"connection":"maprfs:///",
"config":null,
"workspaces":{
"root":{
"location":"/",
"writable":false,
"defaultInputFormat":null,
"allowAccessOutsideWorkspace":false
},
"tmp":{
"location":"/tmp",
"writable":true,
"defaultInputFormat":null,
"allowAccessOutsideWorkspace":false
}
},
"formats":{
"psv":{
"type":"text",
"extensions":[
"tbl"
],
"delimiter":"|"
},
"csv":{
"type":"text",
"extensions":[
"csv"
],
"delimiter":","
},
"tsv":{
"type":"text",
"extensions":[
"tsv"
],
"delimiter":"\t"
},
"parquet":{
"type":"parquet"
},
"json":{
"type":"json",
"extensions":[
"json"
]
},
"maprdb":{
"type":"maprdb"
}
}
},
"format":{
"type":"parquet"
},
"columns":[
"`**`"
],
"selectionRoot":"maprfs:/iswdata/rj",
"filter":"true",
"fileSet":[
"/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet",
"/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet",
"/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet",
"/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet",
"/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet",
"/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet",
"/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet",
"/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet",
"/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet"
],
"cost":4.73824196E8
},
{
"pop":"project",
"@id":262146,
"exprs":[
{
"ref":"`T0¦¦**`",
"expr":"`**`"
},
{
"ref":"`unique_key`",
"expr":"`unique_key`"
}
],
"child":262147,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E8
},
{
"pop":"project",
"@id":262145,
"exprs":[
{
"ref":"`T0¦¦**`",
"expr":"`T0¦¦**`"
},
{
"ref":"`unique_key`",
"expr":"`unique_key`"
},
{
"ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
"expr":"hash32asdouble(`unique_key`, 1301011) "
}
],
"child":262146,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E8
},
{
"pop":"unordered-mux-exchange",
"@id":131073,
"child":262145,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E8
},
{
"pop":"hash-to-random-exchange",
"@id":65543,
"child":131073,
"expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`",
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E8
},
{
"pop":"project",
"@id":65541,
"exprs":[
{
"ref":"`T0¦¦**`",
"expr":"`T0¦¦**`"
},
{
"ref":"`unique_key`",
"expr":"`unique_key`"
}
],
"child":65543,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E8
},
{
"pop":"hash-join",
"@id":65539,
"left":65541,
"right":65540,
"conditions":[
{
"relationship":"EQUALS",
"left":"`unique_key`",
"right":"`unique_key0`"
}
],
"joinType":"INNER",
"isRowKeyJoin":false,
"joinControl":0,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":4.73824196E8
},
{
"pop":"limit",
"@id":65538,
"child":65539,
"first":0,
"last":40,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
},
{
"pop":"selection-vector-remover",
"@id":65537,
"child":65538,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
},
{
"pop":"union-exchange",
"@id":5,
"child":65537,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
},
{
"pop":"limit",
"@id":4,
"child":5,
"first":0,
"last":40,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
},
{
"pop":"selection-vector-remover",
"@id":3,
"child":4,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
},
{
"pop":"project",
"@id":2,
"exprs":[
{
"ref":"`T0¦¦**`",
"expr":"`T0¦¦**`"
}
],
"child":3,
"outputProj":true,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
},
{
"pop":"project",
"@id":1,
"exprs":[
{
"ref":"`**`",
"expr":"`T0¦¦**`"
}
],
"child":2,
"outputProj":false,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
},
{
"pop":"screen",
"@id":0,
"child":1,
"initialAllocation":1000000,
"maxAllocation":10000000000,
"cost":40.0
}
]
}
Please what exactly Am I looking for as to why it never completes. I can see
outofmemory errors but then Drill is running with 16gb on each node and there
are five nodes in the cluster
Best regards,
Peter Edike
Senior Software Engineer
Interswitch
Tel. | Mobile. | IP Phone.
Fax. | mailto:[email protected] | http://
http://www.interswitchgroup.com
InterswitchThis e-mail and all attachments transmitted with it remain the
property of InterSwitch Limited , the information contained herein are private
confidential and intended solely for the use of the addressee. If you have
received this e-mail in error, kindly notify the sender. If you are not the
addressee, you should not disseminate, distribute or copy this e-mail. Kindly
notify InterSwitch immediately by email if you have received this email in
error and delete this email and any attachment from your system Emails cannot
be guaranteed to be secure or error free as the message and any attachments
could be intercepted, corrupted, lost, delayed, incomplete or amended. the
contents of this email or its attachments have been scanned for all viruses and
all reasonable measures have been taken to ensure that no viruses are present.
InterSwitch Limited and its subsidiaries do not accept liability for damage
caused by this email or any attachments.
This message has been marked as CONFIDENTIAL on Tuesday, August 7, 2018 @
4:54:16 PM
-----Original Message-----
From: Kunal Khatua <[email protected]>
Sent: Monday, August 6, 2018 6:41 PM
To: [email protected]
Subject: Re: This query is so slow
Hi Peter
What does the profile for the query indicate?
Take a look at the operator overview. It will indicate which operator is using
the most CPU cycles. If the average and max processing times vary wildly, it
might be a problem of skew, where some fragments are doing a relatively
excessive work.
If that is the case, within that operator's profile segment, you can see the
distribution of the fragments and see which ones are the long pole in your run.
~ KK
On 8/6/2018 9:01:25 AM, Peter Edike <[email protected]> wrote:
Hello all,
The following query takes ages to complete in drill and more often that not,
fails
select * from dfs.`/iswdata/rj/201805` where unique_key in
(
select unique_key from dfs.`/iswdata/rj/201805` group by unique_key
having count(unique_key) > 1
)
limit 40
Please what can I do to improve the performance of this query
Kind Regards
Peter Edike
Senior Software Engineer
Research and Development, ENG
Engineering
Office NO:
Mobile NO:
Email: [email protected]
[mailto:[email protected]]
Interswitch Limited
1648C Oko-Awo Street, Victoria Island Lagos Customer Contact Centre
0700-9065000 ü http://www.interswitchgroup.com
[http://www.interswitchgroup.com/]
[https://www.quickteller.com/loan-request]
This e-mail and all attachments transmitted with it remain the property of
Interswitch Limited , the information contained herein are private
confidential and intended solely for the use of the addressee. If you have
received this e-mail in error, kindly notify the sender. If you are not the
addressee, you should not disseminate, distribute or copy this e-mail. Kindly
notify Interswitch immediately by email if you have received this email in
error and delete this email and any attachment from your system Emails cannot
be guaranteed to be secure or error free as the message and any attachments
could be intercepted, corrupted, lost, delayed, incomplete or amended. the
contents of this email or its attachments have been scanned for all viruses and
all reasonable measures have been taken to ensure that no viruses are present.
Interswitch Limited and its subsidiaries do not accept liability for damage
caused by this email or any attachments.