[ https://issues.apache.org/jira/browse/TEZ-3793?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Ayush Saxena resolved TEZ-3793. ------------------------------- Resolution: Cannot Reproduce > Consider reducing the number of times "DiskChecker.doDiskIo" needs to be > invoked for writing to local folders > ------------------------------------------------------------------------------------------------------------- > > Key: TEZ-3793 > URL: https://issues.apache.org/jira/browse/TEZ-3793 > Project: Apache Tez > Issue Type: Improvement > Reporter: Rajesh Balamohan > Priority: Major > > For large jobs, {{LocalDirAllocator}} comes up as bottleneck fairly often as > it tries to do minimal write operation. It would be good to consider reducing > the number of times directories are checked for errors/issues. > Some sample stack traces are given below > {noformat} > at java.io.FileDescriptor.sync(Native Method) > at > org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249) > at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220) > at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82) > at > org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350) > at > org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424) > at > org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151) > at > org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132) > at > org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getOutputFileForWrite(TezTaskOutputFiles.java:91) > at > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.getSpillPathDetails(UnorderedPartitionedKVWriter.java:721) > at > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.mergeAll(UnorderedPartitionedKVWriter.java:748) > at > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.close(UnorderedPartitionedKVWriter.java:545) > at > org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput.close(UnorderedPartitionedKVOutput.java:105) > - locked <0x00007f4823d5dc88> (a > org.apache.tez.runtime.library.output.UnorderedPartitionedKVOutput) > at > org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.close(LogicalIOProcessorRuntimeTask.java:393) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:83) > at > org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61) > at > org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at > org.apache.hadoop.hive.llap.daemon.impl.StatsRecordingThreadPool$WrappedCallable.call(StatsRecordingThreadPool.java:110) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:748) > > at java.io.FileDescriptor.sync(Native Method) > at > org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249) > at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220) > at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82) > at > org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350) > at > org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424) > at > org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151) > at > org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132) > at > org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getInputFileForWrite(TezTaskOutputFiles.java:250) > at > org.apache.tez.runtime.library.common.shuffle.DiskFetchedInput.<init>(DiskFetchedInput.java:52) > at > org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator.allocate(SimpleFetchedInputAllocator.java:140) > - locked <0x00007f4891288858> (a > org.apache.tez.runtime.library.common.shuffle.impl.SimpleFetchedInputAllocator) > at > org.apache.tez.runtime.library.common.shuffle.Fetcher.fetchInputs(Fetcher.java:793) > at > org.apache.tez.runtime.library.common.shuffle.Fetcher.doHttpFetch(Fetcher.java:539) > at > org.apache.tez.runtime.library.common.shuffle.Fetcher.doHttpFetch(Fetcher.java:428) > at > org.apache.tez.runtime.library.common.shuffle.Fetcher.callInternal(Fetcher.java:226) > at > org.apache.tez.runtime.library.common.shuffle.Fetcher.callInternal(Fetcher.java:73) > at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:748) > at java.io.FileDescriptor.sync(Native Method) > at > org.apache.hadoop.util.DiskChecker.diskIoCheckWithoutNativeIo(DiskChecker.java:249) > at org.apache.hadoop.util.DiskChecker.doDiskIo(DiskChecker.java:220) > at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:82) > at > org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.createPath(LocalDirAllocator.java:350) > at > org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:424) > at > org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:151) > at > org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:132) > at > org.apache.tez.runtime.library.common.task.local.output.TezTaskOutputFiles.getSpillFileForWrite(TezTaskOutputFiles.java:207) > at > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.getSpillPathDetails(UnorderedPartitionedKVWriter.java:728) > at > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.setupNextBuffer(UnorderedPartitionedKVWriter.java:356) > at > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(UnorderedPartitionedKVWriter.java:299) > at > org.apache.tez.runtime.library.common.writers.UnorderedPartitionedKVWriter.write(UnorderedPartitionedKVWriter.java:269) > at > org.apache.hadoop.hive.ql.exec.tez.TezProcessor$TezKVOutputCollector.collect(TezProcessor.java:260) > at > org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.collect(VectorReduceSinkCommonOperator.java:432) > at > org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator.process(VectorReduceSinkCommonOperator.java:397) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:145) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:123) > at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:897) > at > org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130) > at > org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:783) > at > org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:86) > {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010)