I am running a moderate sized data reduction task and getting strange crash
with Drill 1.16. Stack trace is shown below.
The query is this:
```
create table dfs.home.`mrms/grib-07.parquet`
partition by (box)
as
with
t1 as (
select value as precip, datetime as t, cast(latitude as double) as
latitude, cast(longitude as double) longitude
from table(dfs.home.`mrms/*grib*csv`(type => 'text', fieldDelimiter => ',',
extractHeader => true))
limit 400000000)
select precip, latitude, longitude, floor(latitude)*100 - floor(longitude) box
from t1
order by box, latitude, longitude, t
```
The basic idea is that we are scanning 740 CSV files containing about 19GB of
data and I want to write them to a partitioned parquet dataset. I am
progressively increasing the number of lines processed to verify things are
working. The process worked fine at 200M rows of data and fails at 400M. The
text of the error is disconcerting because it claims that there is an index
error, but the index given is in the specified range.
Does anybody have any ideas on this? I haven't tried more recent versions.
Fragment 3:0
Please, refer to logs for more information.
[Error Id: e681aca3-78b7-496a-9af1-7ec34fcf31a9 on nodec:31010]
at
org.apache.drill.common.exceptions.UserException$Builder.build(UserException.java:630)
~[drill-common-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.work.fragment.FragmentExecutor.sendFinalState(FragmentExecutor.java:363)
[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.work.fragment.FragmentExecutor.cleanup(FragmentExecutor.java:219)
[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:329)
[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38)
[drill-common-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[na:1.8.0_292]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[na:1.8.0_292]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_292]
Caused by: java.lang.IllegalStateException:
java.lang.IndexOutOfBoundsException: index: 131071, length: 19 (expected:
range(0, 131072))
at
org.apache.drill.exec.physical.impl.svremover.RemovingRecordBatch.doWork(RemovingRecordBatch.java:69)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractUnaryRecordBatch.innerNext(AbstractUnaryRecordBatch.java:117)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:186)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:126)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:116)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractUnaryRecordBatch.innerNext(AbstractUnaryRecordBatch.java:63)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:141)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:186)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:126)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:116)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractUnaryRecordBatch.innerNext(AbstractUnaryRecordBatch.java:63)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:141)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:186)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:104)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.partitionsender.PartitionSenderRootExec.innerNext(PartitionSenderRootExec.java:152)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:94)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:296)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:283)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at java.security.AccessController.doPrivileged(Native Method)
~[na:1.8.0_292]
at javax.security.auth.Subject.doAs(Subject.java:422) ~[na:1.8.0_292]
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1669)
~[hadoop-common-2.7.0-mapr-1808.jar:na]
at
org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:283)
[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
... 4 common frames omitted
Caused by: java.lang.IndexOutOfBoundsException: index: 131071, length: 19
(expected: range(0, 131072))
at
io.netty.buffer.AbstractByteBuf.checkIndex0(AbstractByteBuf.java:1125)
~[netty-buffer-4.0.48.Final.jar:4.0.48.Final]
at
io.netty.buffer.AbstractByteBuf.checkIndex(AbstractByteBuf.java:1120)
~[netty-buffer-4.0.48.Final.jar:4.0.48.Final]
at
io.netty.buffer.UnsafeByteBufUtil.getBytes(UnsafeByteBufUtil.java:280)
~[netty-buffer-4.0.48.Final.jar:4.0.48.Final]
at
io.netty.buffer.PooledUnsafeDirectByteBuf.getBytes(PooledUnsafeDirectByteBuf.java:104)
~[netty-buffer-4.0.48.Final.jar:4.0.48.Final]
at io.netty.buffer.WrappedByteBuf.getBytes(WrappedByteBuf.java:284)
~[netty-buffer-4.0.48.Final.jar:4.0.48.Final]
at
io.netty.buffer.UnsafeDirectLittleEndian.getBytes(UnsafeDirectLittleEndian.java:34)
~[drill-memory-base-1.16.0.10-mapr.jar:4.0.48.Final]
at io.netty.buffer.DrillBuf.getBytes(DrillBuf.java:684)
~[drill-memory-base-1.16.0.10-mapr.jar:4.0.48.Final]
at
org.apache.drill.exec.vector.VarCharVector.copyFromSafe(VarCharVector.java:274)
~[vector-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.vector.VarCharVector.copyEntry(VarCharVector.java:280)
~[vector-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.svremover.GenericSV2Copier.copyEntry(GenericSV2Copier.java:39)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.svremover.AbstractSV2Copier.copyEntryIndirect(AbstractSV2Copier.java:54)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.svremover.AbstractCopier.insertRecords(AbstractCopier.java:68)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.svremover.AbstractCopier.copyRecords(AbstractCopier.java:47)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.svremover.AbstractSV2Copier.copyRecords(AbstractSV2Copier.java:67)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
at
org.apache.drill.exec.physical.impl.svremover.RemovingRecordBatch.doWork(RemovingRecordBatch.java:67)
~[drill-java-exec-1.16.0.10-mapr.jar:1.16.0.10-mapr]
... 25 common frames omitted