Depending how many unique keys are there it can cause issues can you try following query with tweaking settings around hash_join
select * from dfs.`/iswdata/rj/201805` t1 join (select unique_key, count(unique_key) as total from dfs.`/iswdata/rj/201805` group by unique_key)t on t1.unique_key = t.unique_key where t.total > 1 On Tue, Aug 7, 2018 at 9:23 PM, Peter Edike < peter.ed...@interswitchgroup.com> wrote: > Did an explain plan on the query since the query never completes > successfully (I can't see the profile because of that), Here is the result > of the explain plan query > > "head":{ > "version":1, > "generator":{ > "type":"ExplainHandler", > "info":"" > }, > "type":"APACHE_DRILL_PHYSICAL", > "options":[ > > ], > "queue":0, > "hasResourcePlan":false, > "resultMode":"EXEC" > }, > "graph":[ > { > "pop":"parquet-scan", > "@id":327683, > "userName":"mapr", > "entries":[ > { > "path":"maprfs:///iswdata/rj" > } > ], > "storage":{ > "type":"file", > "enabled":true, > "connection":"maprfs:///", > "config":null, > "workspaces":{ > "root":{ > "location":"/", > "writable":false, > "defaultInputFormat":null, > "allowAccessOutsideWorkspace":false > }, > "tmp":{ > "location":"/tmp", > "writable":true, > "defaultInputFormat":null, > "allowAccessOutsideWorkspace":false > } > }, > "formats":{ > "psv":{ > "type":"text", > "extensions":[ > "tbl" > ], > "delimiter":"|" > }, > "csv":{ > "type":"text", > "extensions":[ > "csv" > ], > "delimiter":"," > }, > "tsv":{ > "type":"text", > "extensions":[ > "tsv" > ], > "delimiter":"\t" > }, > "parquet":{ > "type":"parquet" > }, > "json":{ > "type":"json", > "extensions":[ > "json" > ] > }, > "maprdb":{ > "type":"maprdb" > } > } > }, > "format":{ > "type":"parquet" > }, > "columns":[ > "`unique_key`" > ], > "selectionRoot":"maprfs:/iswdata/rj", > "filter":"true", > "fileSet":[ > "/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet", > "/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet", > "/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet", > "/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet", > "/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet", > "/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet", > "/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet", > "/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet", > "/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet" > ], > "cost":4.73824196E8 > }, > { > "pop":"hash-aggregate", > "@id":327682, > "child":327683, > "cardinality":1.0, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "aggPhase":"PHASE_1of2", > "groupByExprs":[ > { > "ref":"`unique_key`", > "expr":"`unique_key`" > } > ], > "aggrExprs":[ > { > "ref":"`$f1`", > "expr":"count(`unique_key`) " > } > ], > "cost":2.36912098E8 > }, > { > "pop":"project", > "@id":327681, > "exprs":[ > { > "ref":"`unique_key`", > "expr":"`unique_key`" > }, > { > "ref":"`$f1`", > "expr":"`$f1`" > }, > { > "ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`", > "expr":"hash32asdouble(`unique_key`, 1301011) " > } > ], > "child":327682, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E7 > }, > { > "pop":"unordered-mux-exchange", > "@id":196609, > "child":327681, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E7 > }, > { > "pop":"hash-to-random-exchange", > "@id":65548, > "child":196609, > "expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`", > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E7 > }, > { > "pop":"project", > "@id":65547, > "exprs":[ > { > "ref":"`unique_key`", > "expr":"`unique_key`" > }, > { > "ref":"`$f1`", > "expr":"`$f1`" > } > ], > "child":65548, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E7 > }, > { > "pop":"hash-aggregate", > "@id":65546, > "child":65547, > "cardinality":1.0, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "aggPhase":"PHASE_2of2", > "groupByExprs":[ > { > "ref":"`unique_key`", > "expr":"`unique_key`" > } > ], > "aggrExprs":[ > { > "ref":"`$f1`", > "expr":"$sum0(`$f1`) " > } > ], > "cost":2.36912098E7 > }, > { > "pop":"filter", > "@id":65545, > "child":65546, > "expr":"greater_than(`$f1`, 1) ", > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":2369120.98 > }, > { > "pop":"selection-vector-remover", > "@id":65544, > "child":65545, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":2369120.98 > }, > { > "pop":"project", > "@id":65542, > "exprs":[ > { > "ref":"`unique_key`", > "expr":"`unique_key`" > } > ], > "child":65544, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":2369120.98 > }, > { > "pop":"project", > "@id":65540, > "exprs":[ > { > "ref":"`unique_key0`", > "expr":"`unique_key`" > } > ], > "child":65542, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":2369120.98 > }, > { > "pop":"parquet-scan", > "@id":262147, > "userName":"mapr", > "entries":[ > { > "path":"maprfs:///iswdata/rj" > } > ], > "storage":{ > "type":"file", > "enabled":true, > "connection":"maprfs:///", > "config":null, > "workspaces":{ > "root":{ > "location":"/", > "writable":false, > "defaultInputFormat":null, > "allowAccessOutsideWorkspace":false > }, > "tmp":{ > "location":"/tmp", > "writable":true, > "defaultInputFormat":null, > "allowAccessOutsideWorkspace":false > } > }, > "formats":{ > "psv":{ > "type":"text", > "extensions":[ > "tbl" > ], > "delimiter":"|" > }, > "csv":{ > "type":"text", > "extensions":[ > "csv" > ], > "delimiter":"," > }, > "tsv":{ > "type":"text", > "extensions":[ > "tsv" > ], > "delimiter":"\t" > }, > "parquet":{ > "type":"parquet" > }, > "json":{ > "type":"json", > "extensions":[ > "json" > ] > }, > "maprdb":{ > "type":"maprdb" > } > } > }, > "format":{ > "type":"parquet" > }, > "columns":[ > "`**`" > ], > "selectionRoot":"maprfs:/iswdata/rj", > "filter":"true", > "fileSet":[ > "/iswdata/rj/d43b262e-b5bf-4b70-b891-03e5403186aa.parquet", > "/iswdata/rj/7a3a0be9-4ab7-42e7-b5a4-da15ae6cbd8e.parquet", > "/iswdata/rj/f4ecfe34-0a96-4582-bc0e-569486015bc2.parquet", > "/iswdata/rj/438b1548-89fb-4eb4-8499-982cebcff80b.parquet", > "/iswdata/rj/07a60a9e-46f0-4274-82c8-4ea46aaf10bb.parquet", > "/iswdata/rj/57eb07ef-63b4-4b43-83df-719bcf10e364.parquet", > "/iswdata/rj/8248c70a-3579-4166-a4ce-707db8e4960c.parquet", > "/iswdata/rj/a018b4d7-6891-48e6-958a-8239ee0c0d64.parquet", > "/iswdata/rj/8ff9f8fd-8631-4283-b3ce-e2f90a89e5bc.parquet" > ], > "cost":4.73824196E8 > }, > { > "pop":"project", > "@id":262146, > "exprs":[ > { > "ref":"`T0¦¦**`", > "expr":"`**`" > }, > { > "ref":"`unique_key`", > "expr":"`unique_key`" > } > ], > "child":262147, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E8 > }, > { > "pop":"project", > "@id":262145, > "exprs":[ > { > "ref":"`T0¦¦**`", > "expr":"`T0¦¦**`" > }, > { > "ref":"`unique_key`", > "expr":"`unique_key`" > }, > { > "ref":"`E_X_P_R_H_A_S_H_F_I_E_L_D`", > "expr":"hash32asdouble(`unique_key`, 1301011) " > } > ], > "child":262146, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E8 > }, > { > "pop":"unordered-mux-exchange", > "@id":131073, > "child":262145, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E8 > }, > { > "pop":"hash-to-random-exchange", > "@id":65543, > "child":131073, > "expr":"`E_X_P_R_H_A_S_H_F_I_E_L_D`", > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E8 > }, > { > "pop":"project", > "@id":65541, > "exprs":[ > { > "ref":"`T0¦¦**`", > "expr":"`T0¦¦**`" > }, > { > "ref":"`unique_key`", > "expr":"`unique_key`" > } > ], > "child":65543, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E8 > }, > { > "pop":"hash-join", > "@id":65539, > "left":65541, > "right":65540, > "conditions":[ > { > "relationship":"EQUALS", > "left":"`unique_key`", > "right":"`unique_key0`" > } > ], > "joinType":"INNER", > "isRowKeyJoin":false, > "joinControl":0, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":4.73824196E8 > }, > { > "pop":"limit", > "@id":65538, > "child":65539, > "first":0, > "last":40, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > }, > { > "pop":"selection-vector-remover", > "@id":65537, > "child":65538, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > }, > { > "pop":"union-exchange", > "@id":5, > "child":65537, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > }, > { > "pop":"limit", > "@id":4, > "child":5, > "first":0, > "last":40, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > }, > { > "pop":"selection-vector-remover", > "@id":3, > "child":4, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > }, > { > "pop":"project", > "@id":2, > "exprs":[ > { > "ref":"`T0¦¦**`", > "expr":"`T0¦¦**`" > } > ], > "child":3, > "outputProj":true, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > }, > { > "pop":"project", > "@id":1, > "exprs":[ > { > "ref":"`**`", > "expr":"`T0¦¦**`" > } > ], > "child":2, > "outputProj":false, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > }, > { > "pop":"screen", > "@id":0, > "child":1, > "initialAllocation":1000000, > "maxAllocation":10000000000, > "cost":40.0 > } > ] > } > > Please what exactly Am I looking for as to why it never completes. I can > see outofmemory errors but then Drill is running with 16gb on each node and > there are five nodes in the cluster > > Best regards, > Peter Edike > > Senior Software Engineer > Interswitch > > Tel. | Mobile. | IP Phone. > Fax. | mailto:peter.ed...@interswitchgroup.com | http:// > > http://www.interswitchgroup.com > > InterswitchThis e-mail and all attachments transmitted with it remain the > property of InterSwitch Limited , the information contained herein are > private confidential and intended solely for the use of the addressee. If > you have received this e-mail in error, kindly notify the sender. If you > are not the addressee, you should not disseminate, distribute or copy this > e-mail. Kindly notify InterSwitch immediately by email if you have received > this email in error and delete this email and any attachment from your > system Emails cannot be guaranteed to be secure or error free as the > message and any attachments could be intercepted, corrupted, lost, delayed, > incomplete or amended. the contents of this email or its attachments have > been scanned for all viruses and all reasonable measures have been taken to > ensure that no viruses are present. InterSwitch Limited and its > subsidiaries do not accept liability for damage caused by this email or any > attachments. > > > > This message has been marked as CONFIDENTIAL on Tuesday, August 7, 2018 @ > 4:54:16 PM > > -----Original Message----- > From: Kunal Khatua <ku...@apache.org> > Sent: Monday, August 6, 2018 6:41 PM > To: user@drill.apache.org > Subject: Re: This query is so slow > > Hi Peter > > What does the profile for the query indicate? > > Take a look at the operator overview. It will indicate which operator is > using the most CPU cycles. If the average and max processing times vary > wildly, it might be a problem of skew, where some fragments are doing a > relatively excessive work. > If that is the case, within that operator's profile segment, you can see > the distribution of the fragments and see which ones are the long pole in > your run. > > ~ KK > > On 8/6/2018 9:01:25 AM, Peter Edike <peter.ed...@interswitchgroup.com> > wrote: > Hello all, > > The following query takes ages to complete in drill and more often that > not, fails > > select * from dfs.`/iswdata/rj/201805` where unique_key in > ( > select unique_key from dfs.`/iswdata/rj/201805` group by > unique_key having count(unique_key) > 1 > ) > limit 40 > > Please what can I do to improve the performance of this query > > Kind Regards > > Peter Edike > Senior Software Engineer > Research and Development, ENG > Engineering > > Office NO: > Mobile NO: > Email: peter.ed...@interswitchgroup.com [mailto:peter.edike@ > interswitchgroup.com] > Interswitch Limited > 1648C Oko-Awo Street, Victoria Island Lagos Customer Contact Centre > 0700-9065000 ü http://www.interswitchgroup.com [ > http://www.interswitchgroup.com/] > [https://www.quickteller.com/loan-request] > This e-mail and all attachments transmitted with it remain the property > of Interswitch Limited , the information contained herein are private > confidential and intended solely for the use of the addressee. If you have > received this e-mail in error, kindly notify the sender. If you are not the > addressee, you should not disseminate, distribute or copy this e-mail. > Kindly notify Interswitch immediately by email if you have received this > email in error and delete this email and any attachment from your system > Emails cannot be guaranteed to be secure or error free as the message and > any attachments could be intercepted, corrupted, lost, delayed, incomplete > or amended. the contents of this email or its attachments have been scanned > for all viruses and all reasonable measures have been taken to ensure that > no viruses are present. Interswitch Limited and its subsidiaries do not > accept liability for damage caused by this email or any attachments. > -- Nitin Pawar