[
https://issues.apache.org/jira/browse/AMBARI-4930?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13969275#comment-13969275
]
Ximo Guanter commented on AMBARI-4930:
--------------------------------------
OK, I was able to get a jstack dump by modifying the main method of
AmbariServer.java to call jstack when an exception is thrown. It is not ideal
since it does not capture the precise moment the exception happens, but it
should be close enough:
{code}
"Attach Listener" daemon prio=10 tid=0x00007f14cc001000 nid=0x42fc waiting on
condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
Locked ownable synchronizers:
- None
"process reaper" daemon prio=10 tid=0x00007f14fc804000 nid=0x42ef runnable
[0x00007f14d216d000]
java.lang.Thread.State: RUNNABLE
at java.lang.UNIXProcess.waitForProcessExit(Native Method)
at java.lang.UNIXProcess.access$900(UNIXProcess.java:20)
at java.lang.UNIXProcess$1$1.run(UNIXProcess.java:132)
Locked ownable synchronizers:
- None
"Thread-2" prio=10 tid=0x00007f14fc82a000 nid=0x42ed runnable
[0x00007f14f0293000]
java.lang.Thread.State: RUNNABLE
at
org.eclipse.persistence.internal.helper.ConcurrencyManager.getDeferredLockManagers(ConcurrencyManager.java:288)
at
org.eclipse.persistence.internal.helper.ConcurrencyManager.getDeferredLockManager(ConcurrencyManager.java:281)
at
org.eclipse.persistence.internal.helper.ConcurrencyManager.releaseDeferredLock(ConcurrencyManager.java:431)
at
org.eclipse.persistence.internal.identitymaps.CacheKey.releaseDeferredLock(CacheKey.java:419)
at
org.eclipse.persistence.internal.descriptors.ObjectBuilder.buildObject(ObjectBuilder.java:872)
at
org.eclipse.persistence.internal.descriptors.ObjectBuilder.buildObject(ObjectBuilder.java:611)
at
org.eclipse.persistence.internal.descriptors.ObjectBuilder.buildObjectsInto(ObjectBuilder.java:1112)
at
org.eclipse.persistence.queries.ReadAllQuery.executeObjectLevelReadQuery(ReadAllQuery.java:437)
at
org.eclipse.persistence.queries.ObjectLevelReadQuery.executeDatabaseQuery(ObjectLevelReadQuery.java:1150)
at
org.eclipse.persistence.queries.DatabaseQuery.execute(DatabaseQuery.java:852)
at
org.eclipse.persistence.queries.ObjectLevelReadQuery.execute(ObjectLevelReadQuery.java:1109)
at
org.eclipse.persistence.queries.ReadAllQuery.execute(ReadAllQuery.java:393)
at
org.eclipse.persistence.internal.sessions.AbstractSession.internalExecuteQuery(AbstractSession.java:2977)
at
org.eclipse.persistence.internal.sessions.AbstractSession.executeQuery(AbstractSession.java:1607)
at
org.eclipse.persistence.internal.sessions.AbstractSession.executeQuery(AbstractSession.java:1589)
at
org.eclipse.persistence.internal.indirection.QueryBasedValueHolder.instantiate(QueryBasedValueHolder.java:112)
at
org.eclipse.persistence.internal.indirection.QueryBasedValueHolder.instantiate(QueryBasedValueHolder.java:99)
at
org.eclipse.persistence.internal.indirection.DatabaseValueHolder.getValue(DatabaseValueHolder.java:88)
- locked <0x000000078191a1c0> (a
org.eclipse.persistence.internal.indirection.QueryBasedValueHolder)
at
org.eclipse.persistence.internal.indirection.UnitOfWorkValueHolder.instantiateImpl(UnitOfWorkValueHolder.java:161)
at
org.eclipse.persistence.internal.indirection.UnitOfWorkValueHolder.instantiate(UnitOfWorkValueHolder.java:222)
at
org.eclipse.persistence.internal.indirection.DatabaseValueHolder.getValue(DatabaseValueHolder.java:88)
- locked <0x000000078171ecc0> (a
org.eclipse.persistence.internal.indirection.UnitOfWorkQueryValueHolder)
at
org.eclipse.persistence.indirection.IndirectList.buildDelegate(IndirectList.java:252)
at
org.eclipse.persistence.indirection.IndirectList.getDelegate(IndirectList.java:423)
- locked <0x00000007816cc9b0> (a
org.eclipse.persistence.indirection.IndirectList)
at
org.eclipse.persistence.indirection.IndirectList$1.<init>(IndirectList.java:551)
at
org.eclipse.persistence.indirection.IndirectList.listIterator(IndirectList.java:550)
at
org.eclipse.persistence.indirection.IndirectList.iterator(IndirectList.java:514)
at
org.apache.ambari.server.state.cluster.ClusterImpl.getDesiredConfigByType(ClusterImpl.java:1067)
at
org.apache.ambari.server.agent.HeartbeatMonitor.generateStatusCommands(HeartbeatMonitor.java:202)
at
org.apache.ambari.server.agent.HeartbeatMonitor.doWork(HeartbeatMonitor.java:164)
at
org.apache.ambari.server.agent.HeartbeatMonitor.run(HeartbeatMonitor.java:90)
at java.lang.Thread.run(Thread.java:662)
Locked ownable synchronizers:
- None
"HBaseMasterPortScanner" prio=10 tid=0x00007f14fc589000 nid=0x42ec in
Object.wait() [0x00007f14f0395000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x000000078172a8f8> (a java.lang.Object)
at java.lang.Object.wait(Object.java:485)
at
org.apache.ambari.server.state.svccomphost.HBaseMasterPortScanner.run(HBaseMasterPortScanner.java:220)
- locked <0x000000078172a8f8> (a java.lang.Object)
at java.lang.Thread.run(Thread.java:662)
Locked ownable synchronizers:
- None
"Timer-0" prio=10 tid=0x00007f14fc91f800 nid=0x42eb in Object.wait()
[0x00007f14f0496000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x000000078172a9d0> (a java.util.TaskQueue)
at java.lang.Object.wait(Object.java:485)
at java.util.TimerThread.mainLoop(Timer.java:483)
- locked <0x000000078172a9d0> (a java.util.TaskQueue)
at java.util.TimerThread.run(Timer.java:462)
Locked ownable synchronizers:
- None
"com.google.inject.internal.util.$Finalizer" daemon prio=10
tid=0x00007f14fc65d800 nid=0x42e8 in Object.wait() [0x00007f14f819b000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x000000078172aa68> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:118)
- locked <0x000000078172aa68> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:134)
at com.google.inject.internal.util.$Finalizer.run(Finalizer.java:114)
Locked ownable synchronizers:
- None
"Low Memory Detector" daemon prio=10 tid=0x00007f14fc0f0800 nid=0x42e6 runnable
[0x0000000000000000]
java.lang.Thread.State: RUNNABLE
Locked ownable synchronizers:
- None
"C2 CompilerThread1" daemon prio=10 tid=0x00007f14fc0ee000 nid=0x42e5 runnable
[0x0000000000000000]
java.lang.Thread.State: RUNNABLE
Locked ownable synchronizers:
- None
"C2 CompilerThread0" daemon prio=10 tid=0x00007f14fc0ec000 nid=0x42e4 waiting
on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
Locked ownable synchronizers:
- None
"Signal Dispatcher" daemon prio=10 tid=0x00007f14fc0e9800 nid=0x42e3 runnable
[0x0000000000000000]
java.lang.Thread.State: RUNNABLE
Locked ownable synchronizers:
- None
"Surrogate Locker Thread (Concurrent GC)" daemon prio=10 tid=0x00007f14fc0e7800
nid=0x42e2 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
Locked ownable synchronizers:
- None
"Finalizer" daemon prio=10 tid=0x00007f14fc0cb000 nid=0x42e1 in Object.wait()
[0x00007f14f8b9c000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x000000078172ae20> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:118)
- locked <0x000000078172ae20> (a java.lang.ref.ReferenceQueue$Lock)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:134)
at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:159)
Locked ownable synchronizers:
- None
"Reference Handler" daemon prio=10 tid=0x00007f14fc0c9000 nid=0x42e0 in
Object.wait() [0x00007f14f8c9d000]
java.lang.Thread.State: WAITING (on object monitor)
at java.lang.Object.wait(Native Method)
- waiting on <0x0000000781480120> (a java.lang.ref.Reference$Lock)
at java.lang.Object.wait(Object.java:485)
at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:116)
- locked <0x0000000781480120> (a java.lang.ref.Reference$Lock)
Locked ownable synchronizers:
- None
"main" prio=10 tid=0x00007f14fc00e000 nid=0x42d7 runnable [0x00007f1502215000]
java.lang.Thread.State: RUNNABLE
at java.io.FileInputStream.readBytes(Native Method)
at java.io.FileInputStream.read(FileInputStream.java:220)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:256)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
- locked <0x000000077bce7508> (a java.io.BufferedInputStream)
at sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:264)
at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:306)
at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:158)
- locked <0x000000077ba13948> (a java.io.InputStreamReader)
at java.io.InputStreamReader.read(InputStreamReader.java:167)
at java.io.Reader.read(Reader.java:83)
at java.util.Scanner.readInput(Scanner.java:780)
at java.util.Scanner.hasNext(Scanner.java:1315)
at
org.apache.ambari.server.controller.AmbariServer.main(AmbariServer.java:468)
Locked ownable synchronizers:
- None
"VM Thread" prio=10 tid=0x00007f14fc0c2800 nid=0x42df runnable
"Gang worker#0 (Parallel GC Threads)" prio=10 tid=0x00007f14fc01c800 nid=0x42d8
runnable
"Gang worker#1 (Parallel GC Threads)" prio=10 tid=0x00007f14fc01e800 nid=0x42d9
runnable
"Concurrent Mark-Sweep GC Thread" prio=10 tid=0x00007f14fc07d000 nid=0x42dd
runnable
"VM Periodic Task Thread" prio=10 tid=0x00007f14fc0fb800 nid=0x42e7 waiting on
condition
JNI global references: 1729
{code}
> Ambari initialization problems after upgrade to 1.4.1
> -----------------------------------------------------
>
> Key: AMBARI-4930
> URL: https://issues.apache.org/jira/browse/AMBARI-4930
> Project: Ambari
> Issue Type: Bug
> Affects Versions: 1.4.1
> Reporter: Ximo Guanter
>
> Starting the Ambari Server sometime fails with the following error
> {code}
> 04:44:56,972 INFO [main] Configuration:511 - Web App DIR test
> /usr/lib/ambari-server/web
> 04:44:56,975 INFO [main] CertificateManager:70 - Initialization of root
> certificate
> 04:44:56,975 INFO [main] CertificateManager:72 - Certificate exists:true
> 04:44:57,003 INFO [main] AmbariServer:338 - ********* Initializing Clusters
> **********
> 04:44:57,285 WARN [Thread-2] HeartbeatMonitor:123 - Heartbeat lost from host
> andromeda-compute02.hi.inet
> 04:44:57,295 WARN [Thread-2] HeartbeatMonitor:123 - Heartbeat lost from host
> andromeda-compute03.hi.inet
> 04:44:57,296 WARN [Thread-2] HeartbeatMonitor:123 - Heartbeat lost from host
> andromeda-compute06.hi.inet
> 04:44:57,296 WARN [Thread-2] HeartbeatMonitor:123 - Heartbeat lost from host
> andromeda-compute04.hi.inet
> 04:44:57,297 WARN [Thread-2] HeartbeatMonitor:123 - Heartbeat lost from host
> andromeda-data99.hi.inet
> 04:44:57,318 ERROR [main] AmbariServer:461 - Failed to run the Ambari Server
> Local Exception Stack:
> Exception [EclipseLink-2004] (Eclipse Persistence Services -
> 2.4.0.v20120608-r11652):
> org.eclipse.persistence.exceptions.ConcurrencyException
> Exception Description: A signal was attempted before wait() on
> ConcurrencyManager. This normally means that an attempt was made to
> commit or rollback a transaction before it was started, or to rollback a
> transaction twice.
> at
> org.eclipse.persistence.exceptions.ConcurrencyException.signalAttemptedBeforeWait(ConcurrencyException.java:84)
> at
> org.eclipse.persistence.internal.helper.ConcurrencyManager.releaseReadLock(ConcurrencyManager.java:489)
> at
> org.eclipse.persistence.internal.identitymaps.CacheKey.releaseReadLock(CacheKey.java:392)
> at
> org.eclipse.persistence.internal.sessions.UnitOfWorkImpl.cloneAndRegisterObject(UnitOfWorkImpl.java:1022)
> at
> org.eclipse.persistence.internal.sessions.UnitOfWorkImpl.cloneAndRegisterObject(UnitOfWorkImpl.java:933)
> at
> org.eclipse.persistence.internal.sessions.UnitOfWorkIdentityMapAccessor.getAndCloneCacheKeyFromParent(UnitOfWorkIdentityMapAccessor.java:193)
> at
> org.eclipse.persistence.internal.sessions.UnitOfWorkIdentityMapAccessor.getFromIdentityMap(UnitOfWorkIdentityMapAccessor.java:121)
> at
> org.eclipse.persistence.internal.sessions.UnitOfWorkImpl.registerExistingObject(UnitOfWorkImpl.java:3906)
> at
> org.eclipse.persistence.internal.sessions.UnitOfWorkImpl.registerExistingObject(UnitOfWorkImpl.java:3861)
> at
> org.eclipse.persistence.mappings.CollectionMapping.buildElementUnitOfWorkClone(CollectionMapping.java:296)
> at
> org.eclipse.persistence.mappings.CollectionMapping.buildElementClone(CollectionMapping.java:309)
> at
> org.eclipse.persistence.internal.queries.ContainerPolicy.addNextValueFromIteratorInto(ContainerPolicy.java:214)
> at
> org.eclipse.persistence.mappings.CollectionMapping.buildCloneForPartObject(CollectionMapping.java:222)
> at
> org.eclipse.persistence.internal.indirection.UnitOfWorkQueryValueHolder.buildCloneFor(UnitOfWorkQueryValueHolder.java:56)
> at
> org.eclipse.persistence.internal.indirection.UnitOfWorkValueHolder.instantiateImpl(UnitOfWorkValueHolder.java:161)
> at
> org.eclipse.persistence.internal.indirection.UnitOfWorkValueHolder.instantiate(UnitOfWorkValueHolder.java:222)
> at
> org.eclipse.persistence.internal.indirection.DatabaseValueHolder.getValue(DatabaseValueHolder.java:88)
> at
> org.eclipse.persistence.indirection.IndirectList.buildDelegate(IndirectList.java:244)
> at
> org.eclipse.persistence.indirection.IndirectList.getDelegate(IndirectList.java:415)
> at
> org.eclipse.persistence.indirection.IndirectList.isEmpty(IndirectList.java:490)
> at
> org.apache.ambari.server.state.ServiceImpl.<init>(ServiceImpl.java:125)
> at
> org.apache.ambari.server.state.ServiceImpl$$EnhancerByGuice$$807a405e.<init>(<generated>)
> at
> org.apache.ambari.server.state.ServiceImpl$$EnhancerByGuice$$807a405e$$FastClassByGuice$$1c1221ad.newInstance(<generated>)
> at
> com.google.inject.internal.cglib.reflect.$FastConstructor.newInstance(FastConstructor.java:40)
> at
> com.google.inject.internal.ProxyFactory$ProxyConstructor.newInstance(ProxyFactory.java:260)
> at
> com.google.inject.internal.ConstructorInjector.construct(ConstructorInjector.java:85)
> at
> com.google.inject.internal.ConstructorBindingImpl$Factory.get(ConstructorBindingImpl.java:254)
> at
> com.google.inject.internal.InjectorImpl$4$1.call(InjectorImpl.java:978)
> at
> com.google.inject.internal.InjectorImpl.callInContext(InjectorImpl.java:1024)
> at
> com.google.inject.internal.InjectorImpl$4.get(InjectorImpl.java:974)
> at
> com.google.inject.assistedinject.FactoryProvider2.invoke(FactoryProvider2.java:632)
> at $Proxy12.createExisting(Unknown Source)
> at
> org.apache.ambari.server.state.cluster.ClusterImpl.loadServices(ClusterImpl.java:218)
> at
> org.apache.ambari.server.state.cluster.ClusterImpl.debugDump(ClusterImpl.java:808)
> at
> org.apache.ambari.server.state.cluster.ClustersImpl.debugDump(ClustersImpl.java:566)
> at
> org.apache.ambari.server.controller.AmbariServer.run(AmbariServer.java:341)
> at
> org.apache.ambari.server.controller.AmbariServer.main(AmbariServer.java:458)
> {code}
> The issue seems to be related with the amount of data in the {{ambarirca}}
> database: it reproduces 80-90% of the time we try to start the ambari-server
> on an environment in which that DB is 1GB+ and it basically never reproduces
> on environments with a small DB.
> Running the {{VACUUM FULL}} command does not help minimize the problem.
--
This message was sent by Atlassian JIRA
(v6.2#6252)