[
https://issues.apache.org/jira/browse/HIVE-25958?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated HIVE-25958:
----------------------------------
Labels: pull-request-available (was: )
> Optimise BasicStatsNoJobTask
> ----------------------------
>
> Key: HIVE-25958
> URL: https://issues.apache.org/jira/browse/HIVE-25958
> Project: Hive
> Issue Type: Improvement
> Reporter: Rajesh Balamohan
> Priority: Major
> Labels: pull-request-available
> Time Spent: 10m
> Remaining Estimate: 0h
>
> When there are large number of files are present, it takes lot of time for
> analyzing table (for stats) takes lot longer time especially on cloud
> platforms. Each file is read in sequential fashion for computing stats, which
> can be optimized.
>
> {code:java}
> at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:293)
> at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:506)
> - locked <0x0000000642995b10> (a org.apache.hadoop.fs.s3a.S3AInputStream)
> at
> org.apache.hadoop.fs.s3a.S3AInputStream.readFully(S3AInputStream.java:775)
> - locked <0x0000000642995b10> (a org.apache.hadoop.fs.s3a.S3AInputStream)
> at
> org.apache.hadoop.fs.FSDataInputStream.readFully(FSDataInputStream.java:116)
> at
> org.apache.orc.impl.RecordReaderUtils.readDiskRanges(RecordReaderUtils.java:574)
> at
> org.apache.orc.impl.RecordReaderUtils$DefaultDataReader.readFileData(RecordReaderUtils.java:282)
> at
> org.apache.orc.impl.RecordReaderImpl.readAllDataStreams(RecordReaderImpl.java:1172)
> at
> org.apache.orc.impl.RecordReaderImpl.readStripe(RecordReaderImpl.java:1128)
> at
> org.apache.orc.impl.RecordReaderImpl.advanceStripe(RecordReaderImpl.java:1281)
> at
> org.apache.orc.impl.RecordReaderImpl.advanceToNextRow(RecordReaderImpl.java:1316)
> at org.apache.orc.impl.RecordReaderImpl.<init>(RecordReaderImpl.java:302)
> at
> org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.<init>(RecordReaderImpl.java:68)
> at
> org.apache.hadoop.hive.ql.io.orc.ReaderImpl.rowsOptions(ReaderImpl.java:83)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.createReaderFromFile(OrcInputFormat.java:367)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$OrcRecordReader.<init>(OrcInputFormat.java:276)
> at
> org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.getRecordReader(OrcInputFormat.java:2027)
> at
> org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask$FooterStatCollector.run(BasicStatsNoJobTask.java:235)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> "HiveServer2-Background-Pool: Thread-5161" #5161 prio=5 os_prio=0
> tid=0x00007f271217d800 nid=0x21b7 waiting on condition [0x00007f26fce88000]
> java.lang.Thread.State: TIMED_WAITING (parking)
> at sun.misc.Unsafe.park(Native Method)
> - parking to wait for <0x00000006bee1b3a0> (a
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
> at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
> at
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
> at
> java.util.concurrent.ThreadPoolExecutor.awaitTermination(ThreadPoolExecutor.java:1475)
> at
> org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask.shutdownAndAwaitTermination(BasicStatsNoJobTask.java:426)
> at
> org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask.aggregateStats(BasicStatsNoJobTask.java:338)
> at
> org.apache.hadoop.hive.ql.stats.BasicStatsNoJobTask.process(BasicStatsNoJobTask.java:121)
> at org.apache.hadoop.hive.ql.exec.StatsTask.execute(StatsTask.java:107)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:361)
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:334)
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:250) {code}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)