[
https://issues.apache.org/jira/browse/HIVE-25980?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Chiran Ravani updated HIVE-25980:
---------------------------------
Description:
MSCK Repair table for high partition table can perform slow on Cloud Storage
such as S3, one of the case we found where slowness was observed in
HiveMetaStoreChecker.checkTable.
{code:java}
"HiveServer2-Background-Pool: Thread-382" #382 prio=5 os_prio=0
tid=0x00007f97fc4a4000 nid=0x5c2a runnable [0x00007f97c41a8000]
java.lang.Thread.State: RUNNABLE
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at
sun.security.ssl.SSLSocketInputRecord.read(SSLSocketInputRecord.java:464)
at
sun.security.ssl.SSLSocketInputRecord.bytesInCompletePacket(SSLSocketInputRecord.java:68)
at
sun.security.ssl.SSLSocketImpl.readApplicationRecord(SSLSocketImpl.java:1341)
at sun.security.ssl.SSLSocketImpl.access$300(SSLSocketImpl.java:73)
at
sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:957)
at
com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at
com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153)
at
com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:280)
at
com.amazonaws.thirdparty.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:138)
at
com.amazonaws.thirdparty.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56)
at
com.amazonaws.thirdparty.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
at
com.amazonaws.thirdparty.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
at
com.amazonaws.thirdparty.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157)
at
com.amazonaws.thirdparty.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
at
com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:82)
at
com.amazonaws.thirdparty.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at
com.amazonaws.thirdparty.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272)
at
com.amazonaws.thirdparty.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at
com.amazonaws.thirdparty.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at
com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at
com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:56)
at
com.amazonaws.http.apache.client.impl.SdkHttpClient.execute(SdkHttpClient.java:72)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1331)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1145)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:802)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:770)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:744)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:704)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:686)
at
com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:550)
at
com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:530)
at
com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5437)
at
com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5384)
at
com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1367)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$getObjectMetadata$10(S3AFileSystem.java:2458)
at
org.apache.hadoop.fs.s3a.S3AFileSystem$$Lambda$437/835000758.apply(Unknown
Source)
at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:414)
at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:377)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2446)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2426)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:3689)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.innerGetFileStatus(S3AFileSystem.java:3601)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$exists$34(S3AFileSystem.java:4604)
at
org.apache.hadoop.fs.s3a.S3AFileSystem$$Lambda$436/741067886.apply(Unknown
Source)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.lambda$trackDurationOfOperation$5(IOStatisticsBinding.java:499)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding$$Lambda$404/116715871.apply(Unknown
Source)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration(IOStatisticsBinding.java:444)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2250)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2269)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.exists(S3AFileSystem.java:4602)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:338)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:288)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkMetastore(HiveMetaStoreChecker.java:153)
at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:139)
at
org.apache.hadoop.hive.ql.ddl.misc.msck.MsckOperation.execute(MsckOperation.java:77)
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:82)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:756)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:505)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:499)
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:230)
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:87)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:329)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:349)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x00000003c4180c98> (a
java.util.concurrent.locks.ReentrantLock$NonfairSync)
- <0x00000003f74448c0> (a
java.util.concurrent.ThreadPoolExecutor$Worker)
{code}
Currently HiveMetaStoreChecker.checkTable checks fs.exists for all the existing
partitions one by one, This can be a costly operation for Object/Cloud storage
if there are 1000s of partitions overcall it could run for long time. Changing
it to run multithreaded will improve performance.
We currently get all the partition directories in table path as part of
https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java#L398
Output from this call can be used to get the difference and reduce fs.exists
call significantly.
PS: In worse case if user removes all partitions dirs from table and runs msck
repair table there would not be any perf gain, we can still add multi threaded
calls to check multiple paths at once, however it is not usual to see 1000s of
partitions being removed and then run msck.
was:
MSCK Repair table for high partition table can perform slow on Cloud Storage
such as S3, one of the case we found where slowness was observed in
HiveMetaStoreChecker.checkTable.
{code:java}
"HiveServer2-Background-Pool: Thread-382" #382 prio=5 os_prio=0
tid=0x00007f97fc4a4000 nid=0x5c2a runnable [0x00007f97c41a8000]
java.lang.Thread.State: RUNNABLE
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at
sun.security.ssl.SSLSocketInputRecord.read(SSLSocketInputRecord.java:464)
at
sun.security.ssl.SSLSocketInputRecord.bytesInCompletePacket(SSLSocketInputRecord.java:68)
at
sun.security.ssl.SSLSocketImpl.readApplicationRecord(SSLSocketImpl.java:1341)
at sun.security.ssl.SSLSocketImpl.access$300(SSLSocketImpl.java:73)
at
sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:957)
at
com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at
com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153)
at
com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:280)
at
com.amazonaws.thirdparty.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:138)
at
com.amazonaws.thirdparty.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56)
at
com.amazonaws.thirdparty.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
at
com.amazonaws.thirdparty.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
at
com.amazonaws.thirdparty.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157)
at
com.amazonaws.thirdparty.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
at
com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:82)
at
com.amazonaws.thirdparty.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at
com.amazonaws.thirdparty.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272)
at
com.amazonaws.thirdparty.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
at
com.amazonaws.thirdparty.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
at
com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at
com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:56)
at
com.amazonaws.http.apache.client.impl.SdkHttpClient.execute(SdkHttpClient.java:72)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1331)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1145)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:802)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:770)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:744)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:704)
at
com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:686)
at
com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:550)
at
com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:530)
at
com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5437)
at
com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5384)
at
com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1367)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$getObjectMetadata$10(S3AFileSystem.java:2458)
at
org.apache.hadoop.fs.s3a.S3AFileSystem$$Lambda$437/835000758.apply(Unknown
Source)
at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:414)
at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:377)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2446)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2426)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:3689)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.innerGetFileStatus(S3AFileSystem.java:3601)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$exists$34(S3AFileSystem.java:4604)
at
org.apache.hadoop.fs.s3a.S3AFileSystem$$Lambda$436/741067886.apply(Unknown
Source)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.lambda$trackDurationOfOperation$5(IOStatisticsBinding.java:499)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding$$Lambda$404/116715871.apply(Unknown
Source)
at
org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration(IOStatisticsBinding.java:444)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2250)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2269)
at
org.apache.hadoop.fs.s3a.S3AFileSystem.exists(S3AFileSystem.java:4602)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:338)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:288)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkMetastore(HiveMetaStoreChecker.java:153)
at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:139)
at
org.apache.hadoop.hive.ql.ddl.misc.msck.MsckOperation.execute(MsckOperation.java:77)
at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:82)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:756)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:505)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:499)
at
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
at
org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:230)
at
org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:87)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:329)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
at
org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:349)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Locked ownable synchronizers:
- <0x00000003c4180c98> (a
java.util.concurrent.locks.ReentrantLock$NonfairSync)
- <0x00000003f74448c0> (a
java.util.concurrent.ThreadPoolExecutor$Worker)
{code}
Proposal is to introduce multi threaded run for checkTable to improve
performance for Cloud Storage.
My initial thought is to introduce new config property
hive.metastore.msck.fshandler.threads or use existing config property
hive.metastore.fshandler.threads
> Reduce fs calls in HiveMetaStoreChecker.checkTable
> --------------------------------------------------
>
> Key: HIVE-25980
> URL: https://issues.apache.org/jira/browse/HIVE-25980
> Project: Hive
> Issue Type: Improvement
> Components: Standalone Metastore
> Affects Versions: 3.1.2, 4.0.0
> Reporter: Chiran Ravani
> Assignee: Chiran Ravani
> Priority: Major
> Labels: pull-request-available
> Time Spent: 4h 10m
> Remaining Estimate: 0h
>
> MSCK Repair table for high partition table can perform slow on Cloud Storage
> such as S3, one of the case we found where slowness was observed in
> HiveMetaStoreChecker.checkTable.
> {code:java}
> "HiveServer2-Background-Pool: Thread-382" #382 prio=5 os_prio=0
> tid=0x00007f97fc4a4000 nid=0x5c2a runnable [0x00007f97c41a8000]
> java.lang.Thread.State: RUNNABLE
> at java.net.SocketInputStream.socketRead0(Native Method)
> at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
> at java.net.SocketInputStream.read(SocketInputStream.java:171)
> at java.net.SocketInputStream.read(SocketInputStream.java:141)
> at
> sun.security.ssl.SSLSocketInputRecord.read(SSLSocketInputRecord.java:464)
> at
> sun.security.ssl.SSLSocketInputRecord.bytesInCompletePacket(SSLSocketInputRecord.java:68)
> at
> sun.security.ssl.SSLSocketImpl.readApplicationRecord(SSLSocketImpl.java:1341)
> at sun.security.ssl.SSLSocketImpl.access$300(SSLSocketImpl.java:73)
> at
> sun.security.ssl.SSLSocketImpl$AppInputStream.read(SSLSocketImpl.java:957)
> at
> com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
> at
> com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153)
> at
> com.amazonaws.thirdparty.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:280)
> at
> com.amazonaws.thirdparty.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:138)
> at
> com.amazonaws.thirdparty.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:56)
> at
> com.amazonaws.thirdparty.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
> at
> com.amazonaws.thirdparty.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
> at
> com.amazonaws.thirdparty.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:157)
> at
> com.amazonaws.thirdparty.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
> at
> com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:82)
> at
> com.amazonaws.thirdparty.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
> at
> com.amazonaws.thirdparty.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:272)
> at
> com.amazonaws.thirdparty.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:186)
> at
> com.amazonaws.thirdparty.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:185)
> at
> com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
> at
> com.amazonaws.thirdparty.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:56)
> at
> com.amazonaws.http.apache.client.impl.SdkHttpClient.execute(SdkHttpClient.java:72)
> at
> com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1331)
> at
> com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1145)
> at
> com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:802)
> at
> com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:770)
> at
> com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:744)
> at
> com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:704)
> at
> com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:686)
> at
> com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:550)
> at
> com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:530)
> at
> com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5437)
> at
> com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5384)
> at
> com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1367)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$getObjectMetadata$10(S3AFileSystem.java:2458)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem$$Lambda$437/835000758.apply(Unknown
> Source)
> at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:414)
> at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:377)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2446)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.getObjectMetadata(S3AFileSystem.java:2426)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.s3GetFileStatus(S3AFileSystem.java:3689)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.innerGetFileStatus(S3AFileSystem.java:3601)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$exists$34(S3AFileSystem.java:4604)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem$$Lambda$436/741067886.apply(Unknown
> Source)
> at
> org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.lambda$trackDurationOfOperation$5(IOStatisticsBinding.java:499)
> at
> org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding$$Lambda$404/116715871.apply(Unknown
> Source)
> at
> org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration(IOStatisticsBinding.java:444)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2250)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.trackDurationAndSpan(S3AFileSystem.java:2269)
> at
> org.apache.hadoop.fs.s3a.S3AFileSystem.exists(S3AFileSystem.java:4602)
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:338)
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkTable(HiveMetaStoreChecker.java:288)
> at
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker.checkMetastore(HiveMetaStoreChecker.java:153)
> at org.apache.hadoop.hive.metastore.Msck.repair(Msck.java:139)
> at
> org.apache.hadoop.hive.ql.ddl.misc.msck.MsckOperation.execute(MsckOperation.java:77)
> at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:82)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213)
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105)
> at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357)
> at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:330)
> at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246)
> at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:756)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:505)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:499)
> at
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:166)
> at
> org.apache.hive.service.cli.operation.SQLOperation.runQuery(SQLOperation.java:230)
> at
> org.apache.hive.service.cli.operation.SQLOperation.access$700(SQLOperation.java:87)
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork$1.run(SQLOperation.java:329)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
> at
> org.apache.hive.service.cli.operation.SQLOperation$BackgroundWork.run(SQLOperation.java:349)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Locked ownable synchronizers:
> - <0x00000003c4180c98> (a
> java.util.concurrent.locks.ReentrantLock$NonfairSync)
> - <0x00000003f74448c0> (a
> java.util.concurrent.ThreadPoolExecutor$Worker)
> {code}
> Currently HiveMetaStoreChecker.checkTable checks fs.exists for all the
> existing partitions one by one, This can be a costly operation for
> Object/Cloud storage if there are 1000s of partitions overcall it could run
> for long time. Changing it to run multithreaded will improve performance.
> We currently get all the partition directories in table path as part of
> https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java#L398
> Output from this call can be used to get the difference and reduce fs.exists
> call significantly.
> PS: In worse case if user removes all partitions dirs from table and runs
> msck repair table there would not be any perf gain, we can still add multi
> threaded calls to check multiple paths at once, however it is not usual to
> see 1000s of partitions being removed and then run msck.
--
This message was sent by Atlassian Jira
(v8.20.1#820001)