[ 
https://issues.apache.org/jira/browse/HADOOP-18911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

konwu updated HADOOP-18911:
---------------------------
    Environment: 
hadoop 3.3.1

yarn 3.3.1

spark 3.1.3

 

  was:
hadoop 3.3.1

yarn 3.3.1

 


> DfsClientShmManager allocSlot hang on awaitUninterruptibly itself
> -----------------------------------------------------------------
>
>                 Key: HADOOP-18911
>                 URL: https://issues.apache.org/jira/browse/HADOOP-18911
>             Project: Hadoop Common
>          Issue Type: Bug
>          Components: hdfs-client
>    Affects Versions: 3.3.1
>         Environment: hadoop 3.3.1
> yarn 3.3.1
> spark 3.1.3
>  
>            Reporter: konwu
>            Priority: Major
>
> Spark task Driver Thread hang on awaitUninterruptibly
> thread stack like below:
> {code:java}
> sun.misc.Unsafe.park(Native Method)
> java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976)
> org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244)
> org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417)
> org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006)
> org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535)
> org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786)
> org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723)
> org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483)
> org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360)
> org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755)
> org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => 
> holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
> org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884)
>  => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
> org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => 
> holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
> java.io.DataInputStream.read(DataInputStream.java:100)
> java.nio.file.Files.copy(Files.java:2908)
> java.nio.file.Files.copy(Files.java:3027)
> sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220)
> sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216)
> java.security.AccessController.doPrivileged(Native Method)
> sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215)
> sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71)
> sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84)
> sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122)
> sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89)
> sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944)
> sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801)
> sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886)
> sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879)
> java.security.AccessController.doPrivileged(Native Method)
> sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878)
> sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829)
> sun.misc.URLClassPath$3.run(URLClassPath.java:575)
> sun.misc.URLClassPath$3.run(URLClassPath.java:565)
> java.security.AccessController.doPrivileged(Native Method)
> sun.misc.URLClassPath.getLoader(URLClassPath.java:564)
> sun.misc.URLClassPath.getLoader(URLClassPath.java:529)
> sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding 
> Monitor(sun.misc.URLClassPath@929546251})
> sun.misc.URLClassPath.findResource(URLClassPath.java:224)
> java.net.URLClassLoader$2.run(URLClassLoader.java:572)
> java.net.URLClassLoader$2.run(URLClassLoader.java:570)
> java.security.AccessController.doPrivileged(Native Method)
> java.net.URLClassLoader.findResource(URLClassLoader.java:569)
> java.lang.ClassLoader.getResource(ClassLoader.java:1096)
> java.lang.ClassLoader.getResource(ClassLoader.java:1091)
> org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2809)
> org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3081)
> org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3040)
> org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:3013)
> org.apache.hadoop.conf.Configuration.loadProps(Configuration.java:2893) => 
> holding Monitor(org.apache.hadoop.conf.Configuration@1190238736})
> org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2875) => 
> holding Monitor(org.apache.hadoop.conf.Configuration@1190238736})
> org.apache.hadoop.conf.Configuration.get(Configuration.java:1225)
> org.apache.hadoop.conf.Configuration.getTrimmed(Configuration.java:1279)
> org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1686)
> org.apache.hadoop.io.nativeio.NativeIO$POSIX.(NativeIO.java:334)
> org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method)
> org.apache.hadoop.io.nativeio.NativeIO.(NativeIO.java:831)
> org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.(ShortCircuitShm.java:469)
> org.apache.hadoop.hdfs.shortcircuit.DfsClientShm.(DfsClientShm.java:70)
> org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.requestNewShm(DfsClientShmManager.java:181)
> org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:251)
> org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417)
> org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006)
> org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535)
> org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786)
> org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723)
> org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483)
> org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360)
> org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755)
> org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => 
> holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
> org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884)
>  => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
> org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => 
> holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
> java.io.DataInputStream.read(DataInputStream.java:100)
> java.nio.file.Files.copy(Files.java:2908)
> java.nio.file.Files.copy(Files.java:3027)
> sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220)
> sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216)
> java.security.AccessController.doPrivileged(Native Method)
> sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215)
> sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71)
> sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84)
> sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122)
> sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89)
> sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944)
> sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801)
> sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886)
> sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879)
> java.security.AccessController.doPrivileged(Native Method)
> sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878)
> sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829)
> sun.misc.URLClassPath$3.run(URLClassPath.java:575)
> sun.misc.URLClassPath$3.run(URLClassPath.java:565)
> java.security.AccessController.doPrivileged(Native Method)
> sun.misc.URLClassPath.getLoader(URLClassPath.java:564)
> sun.misc.URLClassPath.getLoader(URLClassPath.java:529)
> sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding 
> Monitor(sun.misc.URLClassPath@929546251})
> sun.misc.URLClassPath.access$100(URLClassPath.java:66)
> sun.misc.URLClassPath$1.next(URLClassPath.java:276)
> sun.misc.URLClassPath$1.hasMoreElements(URLClassPath.java:287)
> java.net.URLClassLoader$3$1.run(URLClassLoader.java:604)
> java.net.URLClassLoader$3$1.run(URLClassLoader.java:602)
> java.security.AccessController.doPrivileged(Native Method)
> java.net.URLClassLoader$3.next(URLClassLoader.java:601)
> java.net.URLClassLoader$3.hasMoreElements(URLClassLoader.java:626)
> sun.misc.CompoundEnumeration.next(CompoundEnumeration.java:45)
> sun.misc.CompoundEnumeration.hasMoreElements(CompoundEnumeration.java:54)
> org.aspectj.weaver.loadtime.ClassLoaderWeavingAdaptor.parseDefinitions(ClassLoaderWeavingAdaptor.java:282)
> org.aspectj.weaver.loadtime.DefaultWeavingContext.getDefinitions(DefaultWeavingContext.java:130)
> org.aspectj.weaver.loadtime.ClassLoaderWeavingAdaptor.initialize(ClassLoaderWeavingAdaptor.java:173)
> org.aspectj.weaver.loadtime.Aj$ExplicitlyInitializedClassLoaderWeavingAdaptor.initialize(Aj.java:344)
> org.aspectj.weaver.loadtime.Aj$ExplicitlyInitializedClassLoaderWeavingAdaptor.getWeavingAdaptor(Aj.java:349)
> org.aspectj.weaver.loadtime.Aj$WeaverContainer.getWeaver(Aj.java:323)
> org.aspectj.weaver.loadtime.Aj.preProcess(Aj.java:115) => holding 
> Monitor(org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@879400492})
> org.aspectj.weaver.loadtime.ClassPreProcessorAgentAdapter.transform(ClassPreProcessorAgentAdapter.java:51)
> sun.instrument.TransformerManager.transform(TransformerManager.java:188)
> sun.instrument.InstrumentationImpl.transform(InstrumentationImpl.java:428)
> java.lang.ClassLoader.defineClass1(Native Method)
> java.lang.ClassLoader.defineClass(ClassLoader.java:763)
> java.lang.ClassLoader.defineClass(ClassLoader.java:642)
> org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.doLoadClass(IsolatedClientLoader.scala:244)
> org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.loadClass(IsolatedClientLoader.scala:236)
> java.lang.ClassLoader.loadClass(ClassLoader.java:411) => holding 
> Monitor(java.lang.Object@815422741})
> java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:291)
>  => holding 
> Monitor(org.apache.spark.sql.hive.client.IsolatedClientLoader@1509517497})
> org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:492)
> org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:352)
> org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:71)
>  => holding Monitor(org.apache.spark.sql.hive.HiveExternalCatalog@1343720469})
> org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:70)
>  {code}
> Because 
> {code:java}
> org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.requestNewShm(DfsClientShmManager.java:181){code}
> calling allocSlot Twice , when the free slot was just 1
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to