[
https://issues.apache.org/jira/browse/DRILL-4234?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15075404#comment-15075404
]
Rahul Challapalli commented on DRILL-4234:
------------------------------------------
Json Profile
{code}
{
"id": {
"part1": 2989184731599643600,
"part2": 6781135993040242000
},
"type": 1,
"start": 1451509842238,
"end": 1451509846328,
"query": "create table `tpch_single_partition/lineitem` partition by
(l_moddate) as select l.*, l_shipdate - extract(day from l_shipdate) + 1
l_moddate from cp.`tpch/lineitem.parquet` l",
"plan": "00-00 Screen : rowType = RecordType(VARCHAR(255) Fragment,
BIGINT Number of records written): rowcount = 60175.0, cumulative cost =
{367067.5 rows, 4549681.701531188 cpu, 0.0 io, 0.0 network, 962800.0 memory},
id = 306\n00-01 Project(Fragment=[$0], Number of records written=[$1]) :
rowType = RecordType(VARCHAR(255) Fragment, BIGINT Number of records written):
rowcount = 60175.0, cumulative cost = {361050.0 rows, 4543664.201531188 cpu,
0.0 io, 0.0 network, 962800.0 memory}, id = 305\n00-02 Writer : rowType
= RecordType(VARCHAR(255) Fragment, BIGINT Number of records written): rowcount
= 60175.0, cumulative cost = {361050.0 rows, 4543664.201531188 cpu, 0.0 io, 0.0
network, 962800.0 memory}, id = 304\n00-03 ProjectAllowDup(*=[$0],
l_moddate=[$1], P_A_R_T_I_T_I_O_N_C_O_M_P_A_R_A_T_O_R=[newPartitionValue($1)])
: rowType = RecordType(ANY *, ANY l_moddate, ANY
P_A_R_T_I_T_I_O_N_C_O_M_P_A_R_A_T_O_R): rowcount = 60175.0, cumulative cost =
{300875.0 rows, 4483489.201531188 cpu, 0.0 io, 0.0 network, 962800.0 memory},
id = 303\n00-04 Project(T0¦¦*=[$0], l_moddate=[$1]) : rowType =
RecordType(ANY T0¦¦*, ANY l_moddate): rowcount = 60175.0, cumulative cost =
{240700.0 rows, 4242789.201531188 cpu, 0.0 io, 0.0 network, 962800.0 memory},
id = 302\n00-05 SelectionVectorRemover : rowType = RecordType(ANY
T0¦¦*, ANY l_moddate): rowcount = 60175.0, cumulative cost = {240700.0 rows,
4242789.201531188 cpu, 0.0 io, 0.0 network, 962800.0 memory}, id = 301\n00-06
Sort(sort0=[$1], dir0=[ASC]) : rowType = RecordType(ANY T0¦¦*, ANY
l_moddate): rowcount = 60175.0, cumulative cost = {180525.0 rows,
4182614.2015311876 cpu, 0.0 io, 0.0 network, 962800.0 memory}, id = 300\n00-07
Project(T0¦¦*=[$0], l_moddate=[+(-($1, EXTRACT(FLAG(DAY), $1)),
1)]) : rowType = RecordType(ANY T0¦¦*, ANY l_moddate): rowcount = 60175.0,
cumulative cost = {120350.0 rows, 361050.0 cpu, 0.0 io, 0.0 network, 0.0
memory}, id = 299\n00-08 Project(T0¦¦*=[$0],
l_shipdate=[$1]) : rowType = RecordType(ANY T0¦¦*, ANY l_shipdate): rowcount =
60175.0, cumulative cost = {60175.0 rows, 120350.0 cpu, 0.0 io, 0.0 network,
0.0 memory}, id = 298\n00-09
Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath
[path=classpath:/tpch/lineitem.parquet]],
selectionRoot=classpath:/tpch/lineitem.parquet, numFiles=1,
usedMetadataFile=false, columns=[`*`]]]) : rowType = (DrillRecordRow[*,
l_shipdate]): rowcount = 60175.0, cumulative cost = {60175.0 rows, 120350.0
cpu, 0.0 io, 0.0 network, 0.0 memory}, id = 297\n",
"foreman": {
"address": "qa-node190.qa.lab",
"userPort": 31010,
"controlPort": 31011,
"dataPort": 31012
},
"state": 4,
"totalFragments": 1,
"finishedFragments": 0,
"fragmentProfile": [
{
"majorFragmentId": 0,
"minorFragmentProfile": [
{
"state": 2,
"minorFragmentId": 0,
"operatorProfile": [
{
"inputProfile": [
{
"records": 4369,
"batches": 1,
"schemas": 1
}
],
"operatorId": 9,
"operatorType": 21,
"setupNanos": 0,
"processNanos": 802213847,
"peakLocalMemoryAllocated": 23068672,
"waitNanos": 1048813
},
{
"inputProfile": [
{
"records": 4369,
"batches": 1,
"schemas": 1
}
],
"operatorId": 8,
"operatorType": 10,
"setupNanos": 463141066,
"processNanos": 24178056,
"peakLocalMemoryAllocated": 14221312,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 4369,
"batches": 1,
"schemas": 1
}
],
"operatorId": 7,
"operatorType": 10,
"setupNanos": 178356093,
"processNanos": 44856303,
"peakLocalMemoryAllocated": 14221312,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 4369,
"batches": 1,
"schemas": 1
}
],
"operatorId": 6,
"operatorType": 17,
"setupNanos": 0,
"processNanos": 11534376,
"peakLocalMemoryAllocated": 10573440,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 0,
"batches": 1,
"schemas": 1
}
],
"operatorId": 5,
"operatorType": 14,
"setupNanos": 1302455,
"processNanos": 14797234,
"peakLocalMemoryAllocated": 573440,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 0,
"batches": 1,
"schemas": 1
}
],
"operatorId": 4,
"operatorType": 10,
"setupNanos": 51157354,
"processNanos": 13293815,
"peakLocalMemoryAllocated": 540672,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 0,
"batches": 1,
"schemas": 1
}
],
"operatorId": 3,
"operatorType": 10,
"setupNanos": 0,
"processNanos": 5349,
"peakLocalMemoryAllocated": 0,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 0,
"batches": 0,
"schemas": 0
}
],
"operatorId": 2,
"operatorType": 25,
"setupNanos": 0,
"processNanos": 0,
"peakLocalMemoryAllocated": 0,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 0,
"batches": 0,
"schemas": 0
}
],
"operatorId": 2,
"operatorType": 25,
"setupNanos": 0,
"processNanos": 5780,
"peakLocalMemoryAllocated": 0,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 0,
"batches": 0,
"schemas": 0
}
],
"operatorId": 1,
"operatorType": 10,
"setupNanos": 0,
"processNanos": 856802,
"peakLocalMemoryAllocated": 0,
"waitNanos": 0
},
{
"inputProfile": [
{
"records": 0,
"batches": 0,
"schemas": 0
}
],
"operatorId": 0,
"operatorType": 13,
"setupNanos": 0,
"processNanos": 1225,
"peakLocalMemoryAllocated": 0,
"waitNanos": 0
}
],
"startTime": 1451509843205,
"endTime": 1451509845227,
"memoryUsed": 50872064,
"maxMemoryUsed": 52969216,
"endpoint": {
"address": "qa-node190.qa.lab",
"userPort": 31010,
"controlPort": 31011,
"dataPort": 31012
},
"lastUpdate": 1451509845231,
"lastProgress": 1451509845231
}
]
}
],
"user": "root",
"error": "RESOURCE ERROR: Drill Remote Exception\n\n",
"verboseError": "RESOURCE ERROR: Drill Remote Exception\n\n\n\n",
"errorId": "e0a9da86-b862-4521-93f8-1e151c35d782",
"errorNode": "qa-node190.qa.lab:31010"
}
{code}
> Drill running out of memory for a simple CTAS query
> ---------------------------------------------------
>
> Key: DRILL-4234
> URL: https://issues.apache.org/jira/browse/DRILL-4234
> Project: Apache Drill
> Issue Type: Bug
> Components: Storage - Writer
> Affects Versions: 1.5.0
> Reporter: Rahul Challapalli
> Priority: Critical
>
> git.commit.id.abbrev=6dea429
> Memory Settings on the cluster
> {code}
> DRILL_MAX_DIRECT_MEMORY="32G"
> DRILL_MAX_HEAP="4G"
> export DRILL_JAVA_OPTS="-Xms1G -Xmx$DRILL_MAX_HEAP
> -XX:MaxDirectMemorySize=$DRILL_MAX_DIRECT_MEMORY -XX:MaxPermSize=512M
> -XX:ReservedCodeCacheSize=1G -ea"
> {code}
> The below query runs out of memory
> {code}
> create table `tpch_single_partition/lineitem` partition by (l_moddate) as
> select l.*, l_shipdate - extract(day from l_shipdate) + 1 l_moddate from
> cp.`tpch/lineitem.parquet` l;
> Error: RESOURCE ERROR: One or more nodes ran out of memory while executing
> the query.
> {code}
> Below is the information from the logs
> {code}
> 2015-12-30 17:44:00,164 [297be821-460b-689f-5b40-8897a6013ffb:frag:0:0] INFO
> o.a.d.e.w.f.FragmentStatusReporter -
> 297be821-460b-689f-5b40-8897a6013ffb:0:0: State to report: RUNNING
> 2015-12-30 17:44:00,868 [297be821-460b-689f-5b40-8897a6013ffb:frag:0:0] INFO
> o.a.d.e.w.fragment.FragmentExecutor - User Error Occurred
> org.apache.drill.common.exceptions.UserException: RESOURCE ERROR: One or more
> nodes ran out of memory while executing the query.
> [Error Id: 1843305a-cc43-42c5-9b96-e1a887972999 ]
> at
> org.apache.drill.common.exceptions.UserException$Builder.build(UserException.java:534)
> ~[drill-common-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:266)
> [drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38)
> [drill-common-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> [na:1.7.0_71]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> [na:1.7.0_71]
> at java.lang.Thread.run(Thread.java:745) [na:1.7.0_71]
> Caused by: org.apache.drill.exec.exception.OutOfMemoryException:
> org.apache.drill.exec.exception.OutOfMemoryException: Unable to allocate sv2,
> and not enough batchGroups to spill
> at
> org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:356)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:51)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.svremover.RemovingRecordBatch.innerNext(RemovingRecordBatch.java:94)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:51)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:132)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:51)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:132)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.WriterRecordBatch.innerNext(WriterRecordBatch.java:91)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:119)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:109)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:51)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:132)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:104)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.ScreenCreator$ScreenRoot.innerNext(ScreenCreator.java:81)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:94)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:256)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:250)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at java.security.AccessController.doPrivileged(Native Method)
> ~[na:1.7.0_71]
> at javax.security.auth.Subject.doAs(Subject.java:415) ~[na:1.7.0_71]
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
> ~[hadoop-common-2.7.0-mapr-1506.jar:na]
> at
> org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:250)
> [drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> ... 4 common frames omitted
> Caused by: org.apache.drill.exec.exception.OutOfMemoryException: Unable to
> allocate sv2, and not enough batchGroups to spill
> at
> org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.newSV2(ExternalSortBatch.java:620)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> at
> org.apache.drill.exec.physical.impl.xsort.ExternalSortBatch.innerNext(ExternalSortBatch.java:352)
> ~[drill-java-exec-1.5.0-SNAPSHOT.jar:1.5.0-SNAPSHOT]
> ... 44 common frames omitted
> 2015-12-30 17:44:00,868 [297be821-460b-689f-5b40-8897a6013ffb:frag:0:0] INFO
> o.a.d.e.w.fragment.FragmentExecutor -
> 297be821-460b-689f-5b40-8897a6013ffb:0:0: State change requested RUNNING -->
> FAILED
> 2015-12-30 17:44:00,881 [297be821-460b-689f-5b40-8897a6013ffb:frag:0:0] INFO
> o.a.d.e.w.fragment.FragmentExecutor -
> 297be821-460b-689f-5b40-8897a6013ffb:0:0: State change requested FAILED -->
> FAILED
> 2015-12-30 17:44:00,883 [297be821-460b-689f-5b40-8897a6013ffb:frag:0:0] INFO
> o.a.d.e.w.fragment.FragmentExecutor -
> 297be821-460b-689f-5b40-8897a6013ffb:0:0: State change requested FAILED -->
> FAILED
> 2015-12-30 17:44:00,883 [297be821-460b-689f-5b40-8897a6013ffb:frag:0:0] INFO
> o.a.d.e.w.fragment.FragmentExecutor -
> 297be821-460b-689f-5b40-8897a6013ffb:0:0: State change requested FAILED -->
> FINISHED
> 2015-12-30 17:44:00,927 [CONTROL-rpc-event-queue] WARN
> o.a.drill.exec.work.foreman.Foreman - Dropping request to move to COMPLETED
> state as query is already at FAILED state (which is terminal).
> 2015-12-30 17:44:00,930 [CONTROL-rpc-event-queue] WARN
> o.a.d.e.w.b.ControlMessageHandler - Dropping request to cancel fragment.
> 297be821-460b-689f-5b40-8897a6013ffb:0:0 does not exist.
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)