[ 
https://issues.apache.org/jira/browse/HDDS-5756?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ritesh H Shukla updated HDDS-5756:
----------------------------------
    Description: 
During load testing, the following crash was observed. 

bq. Current thread (0x00007f4e7b083800):  JavaThread 
"grpc-default-executor-1243" daemon [_thread_in_native, id=30646, 
stack(0x00007f4de67b6000,0x00007f4de68b7000)]
bq. 
bq. Stack: [0x00007f4de67b6000,0x00007f4de68b7000],  sp=0x00007f4de68b4c90,  
free space=1019k
bq. Native frames: (J=compiled Java code, A=aot compiled Java code, 
j=interpreted, Vv=VM code, C=native code)
bq. C  [librocksdbjni7210046854294290562.so+0x470f9e]  
rocksdb::ReadFileToString(rocksdb::FileSystem*, std::string const&, 
std::string*)+0x8e
bq. C  [librocksdbjni7210046854294290562.so+0x41a500]  
rocksdb::VersionSet::GetCurrentManifestPath(std::string const&, 
rocksdb::FileSystem*, std::string*, unsigned long*)+0x60
bq. C  [librocksdbjni7210046854294290562.so+0x431776]  
rocksdb::VersionSet::ListColumnFamilies(std::vector<std::string, 
std::allocator<std::string> >*, std::string const&, rocksdb::FileSystem*)+0x96
bq. C  [librocksdbjni7210046854294290562.so+0x300d90]  
rocksdb::DB::ListColumnFamilies(rocksdb::DBOptions const&, std::string const&, 
std::vector<std::string, std::allocator<std::string> >*)+0x30
bq. C  [librocksdbjni7210046854294290562.so+0x241e49]  
Java_org_rocksdb_RocksDB_listColumnFamilies+0x89
bq. J 4802  org.rocksdb.RocksDB.listColumnFamilies(JLjava/lang/String;)[[B (0 
bytes) @ 0x00007f4e8485dda2 [0x00007f4e8485dcc0+0x00000000000000e2]
bq. J 13730 c2 
org.apache.hadoop.hdds.utils.db.RDBStore.<init>(Ljava/io/File;Lorg/rocksdb/DBOptions;Lorg/rocksdb/WriteOptions;Ljava/util/Set;Lorg/apache/hadoop/hdds/utils/db/CodecRegistry;Z)V
 (717 bytes) @ 0x00007f4e85557484 [0x00007f4e85556b80+0x0000000000000904]
bq. J 14818 c2 
org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getUncachedDatanodeStore(JLjava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;Z)Lorg/apache/hadoop/ozone/container/metadata/DatanodeStore;
 (84 bytes) @ 0x00007f4e8530cbac [0x00007f4e85309960+0x000000000000324c]
bq. J 21714 c2 
org.apache.hadoop.ozone.container.common.utils.ContainerCache.getDB(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;)Lorg/apache/hadoop/ozone/container/common/utils/ReferenceCountedDB;
 (339 bytes) @ 0x00007f4e85f200fc [0x00007f4e85f1ec00+0x00000000000014fc]
bq. J 21274 c2 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.getBlock(Lorg/apache/hadoop/ozone/container/common/interfaces/Container;Lorg/apache/hadoop/hdds/client/BlockID;)Lorg/apache/hadoop/ozone/container/common/helpers/BlockData;
 (299 bytes) @ 0x00007f4e85d95cf8 [0x00007f4e85d95b80+0x0000000000000178]
bq. J 21490 c2 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto;
 (1105 bytes) @ 0x00007f4e85ebb89c [0x00007f4e85eb9100+0x000000000000279c]
bq. J 20711 c2 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto;
 (38 bytes) @ 0x00007f4e85bacf00 [0x00007f4e85baccc0+0x0000000000000240]
bq. J 20718 c2 
org.apache.hadoop.ozone.container.common.transport.server.GrpcXceiverService$1.onNext(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;)V
 (52 bytes) @ 0x00007f4e85bb71c8 [0x00007f4e85bb7160+0x0000000000000068]
bq. J 20363 c2 
org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1MessagesAvailable.runInContext()V
 (77 bytes) @ 0x00007f4e8583e290 [0x00007f4e8583cda0+0x00000000000014f0]
bq. J 16086 c2 
org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run()V (35 bytes) 
@ 0x00007f4e855d0fb0 [0x00007f4e855d0b00+0x00000000000004b0]
bq. J 21355 c2 
org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run()V (99 
bytes) @ 0x00007f4e85e7469c [0x00007f4e85e745a0+0x00000000000000fc]
bq. J 20042 c2 
java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V
 [email protected] (187 bytes) @ 0x00007f4e855b7780 
[0x00007f4e855b75a0+0x00000000000001e0]
bq. J 17003 c1 java.util.concurrent.ThreadPoolExecutor$Worker.run()V 
[email protected] (9 bytes) @ 0x00007f4e7d27e9e4 
[0x00007f4e7d27e940+0x00000000000000a4]
bq. J 11466 c1 java.lang.Thread.run()V [email protected] (17 bytes) @ 
0x00007f4e7e77fcd4 [0x00007f4e7e77fb60+0x0000000000000174]
bq. v  ~StubRoutines::call_stub
bq. V  [libjvm.so+0x885ac9]  JavaCalls::call_helper(JavaValue*, methodHandle 
const&, JavaCallArguments*, Thread*)+0x3b9
bq. V  [libjvm.so+0x883a6d]  JavaCalls::call_virtual(JavaValue*, Handle, 
Klass*, Symbol*, Symbol*, Thread*)+0x1ed
bq. V  [libjvm.so+0x92cd4c]  thread_entry(JavaThread*, Thread*)+0x6c
bq. V  [libjvm.so+0xdbea53]  JavaThread::thread_main_inner()+0x103
bq. V  [libjvm.so+0xdbed25]  JavaThread::run()+0x2a5
bq. V  [libjvm.so+0xdbaa3a]  Thread::call_run()+0x13a
bq. V  [libjvm.so+0xc0ad2e]  thread_native_entry(Thread*)+0xee

  was:
During load testing, the following crash was observed. 

Current thread (0x00007f4e7b083800):  JavaThread "grpc-default-executor-1243" 
daemon [_thread_in_native, id=30646, 
stack(0x00007f4de67b6000,0x00007f4de68b7000)]

Stack: [0x00007f4de67b6000,0x00007f4de68b7000],  sp=0x00007f4de68b4c90,  free 
space=1019k
Native frames: (J=compiled Java code, A=aot compiled Java code, j=interpreted, 
Vv=VM code, C=native code)
C  [librocksdbjni7210046854294290562.so+0x470f9e]  
rocksdb::ReadFileToString(rocksdb::FileSystem*, std::string const&, 
std::string*)+0x8e
C  [librocksdbjni7210046854294290562.so+0x41a500]  
rocksdb::VersionSet::GetCurrentManifestPath(std::string const&, 
rocksdb::FileSystem*, std::string*, unsigned long*)+0x60
C  [librocksdbjni7210046854294290562.so+0x431776]  
rocksdb::VersionSet::ListColumnFamilies(std::vector<std::string, 
std::allocator<std::string> >*, std::string const&, rocksdb::FileSystem*)+0x96
C  [librocksdbjni7210046854294290562.so+0x300d90]  
rocksdb::DB::ListColumnFamilies(rocksdb::DBOptions const&, std::string const&, 
std::vector<std::string, std::allocator<std::string> >*)+0x30
C  [librocksdbjni7210046854294290562.so+0x241e49]  
Java_org_rocksdb_RocksDB_listColumnFamilies+0x89
J 4802  org.rocksdb.RocksDB.listColumnFamilies(JLjava/lang/String;)[[B (0 
bytes) @ 0x00007f4e8485dda2 [0x00007f4e8485dcc0+0x00000000000000e2]
J 13730 c2 
org.apache.hadoop.hdds.utils.db.RDBStore.<init>(Ljava/io/File;Lorg/rocksdb/DBOptions;Lorg/rocksdb/WriteOptions;Ljava/util/Set;Lorg/apache/hadoop/hdds/utils/db/CodecRegistry;Z)V
 (717 bytes) @ 0x00007f4e85557484 [0x00007f4e85556b80+0x0000000000000904]
J 14818 c2 
org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getUncachedDatanodeStore(JLjava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;Z)Lorg/apache/hadoop/ozone/container/metadata/DatanodeStore;
 (84 bytes) @ 0x00007f4e8530cbac [0x00007f4e85309960+0x000000000000324c]
J 21714 c2 
org.apache.hadoop.ozone.container.common.utils.ContainerCache.getDB(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;)Lorg/apache/hadoop/ozone/container/common/utils/ReferenceCountedDB;
 (339 bytes) @ 0x00007f4e85f200fc [0x00007f4e85f1ec00+0x00000000000014fc]
J 21274 c2 
org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.getBlock(Lorg/apache/hadoop/ozone/container/common/interfaces/Container;Lorg/apache/hadoop/hdds/client/BlockID;)Lorg/apache/hadoop/ozone/container/common/helpers/BlockData;
 (299 bytes) @ 0x00007f4e85d95cf8 [0x00007f4e85d95b80+0x0000000000000178]
J 21490 c2 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto;
 (1105 bytes) @ 0x00007f4e85ebb89c [0x00007f4e85eb9100+0x000000000000279c]
J 20711 c2 
org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto;
 (38 bytes) @ 0x00007f4e85bacf00 [0x00007f4e85baccc0+0x0000000000000240]
J 20718 c2 
org.apache.hadoop.ozone.container.common.transport.server.GrpcXceiverService$1.onNext(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;)V
 (52 bytes) @ 0x00007f4e85bb71c8 [0x00007f4e85bb7160+0x0000000000000068]
J 20363 c2 
org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1MessagesAvailable.runInContext()V
 (77 bytes) @ 0x00007f4e8583e290 [0x00007f4e8583cda0+0x00000000000014f0]
J 16086 c2 org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run()V 
(35 bytes) @ 0x00007f4e855d0fb0 [0x00007f4e855d0b00+0x00000000000004b0]
J 21355 c2 
org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run()V (99 
bytes) @ 0x00007f4e85e7469c [0x00007f4e85e745a0+0x00000000000000fc]
J 20042 c2 
java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V
 [email protected] (187 bytes) @ 0x00007f4e855b7780 
[0x00007f4e855b75a0+0x00000000000001e0]
J 17003 c1 java.util.concurrent.ThreadPoolExecutor$Worker.run()V 
[email protected] (9 bytes) @ 0x00007f4e7d27e9e4 
[0x00007f4e7d27e940+0x00000000000000a4]
J 11466 c1 java.lang.Thread.run()V [email protected] (17 bytes) @ 
0x00007f4e7e77fcd4 [0x00007f4e7e77fb60+0x0000000000000174]
v  ~StubRoutines::call_stub
V  [libjvm.so+0x885ac9]  JavaCalls::call_helper(JavaValue*, methodHandle 
const&, JavaCallArguments*, Thread*)+0x3b9
V  [libjvm.so+0x883a6d]  JavaCalls::call_virtual(JavaValue*, Handle, Klass*, 
Symbol*, Symbol*, Thread*)+0x1ed
V  [libjvm.so+0x92cd4c]  thread_entry(JavaThread*, Thread*)+0x6c
V  [libjvm.so+0xdbea53]  JavaThread::thread_main_inner()+0x103
V  [libjvm.so+0xdbed25]  JavaThread::run()+0x2a5
V  [libjvm.so+0xdbaa3a]  Thread::call_run()+0x13a
V  [libjvm.so+0xc0ad2e]  thread_native_entry(Thread*)+0xee


> SIGSEGV RocksDB crash in Datanode
> ---------------------------------
>
>                 Key: HDDS-5756
>                 URL: https://issues.apache.org/jira/browse/HDDS-5756
>             Project: Apache Ozone
>          Issue Type: Bug
>          Components: Ozone Datanode
>            Reporter: Ritesh H Shukla
>            Assignee: Ritesh H Shukla
>            Priority: Major
>
> During load testing, the following crash was observed. 
> bq. Current thread (0x00007f4e7b083800):  JavaThread 
> "grpc-default-executor-1243" daemon [_thread_in_native, id=30646, 
> stack(0x00007f4de67b6000,0x00007f4de68b7000)]
> bq. 
> bq. Stack: [0x00007f4de67b6000,0x00007f4de68b7000],  sp=0x00007f4de68b4c90,  
> free space=1019k
> bq. Native frames: (J=compiled Java code, A=aot compiled Java code, 
> j=interpreted, Vv=VM code, C=native code)
> bq. C  [librocksdbjni7210046854294290562.so+0x470f9e]  
> rocksdb::ReadFileToString(rocksdb::FileSystem*, std::string const&, 
> std::string*)+0x8e
> bq. C  [librocksdbjni7210046854294290562.so+0x41a500]  
> rocksdb::VersionSet::GetCurrentManifestPath(std::string const&, 
> rocksdb::FileSystem*, std::string*, unsigned long*)+0x60
> bq. C  [librocksdbjni7210046854294290562.so+0x431776]  
> rocksdb::VersionSet::ListColumnFamilies(std::vector<std::string, 
> std::allocator<std::string> >*, std::string const&, rocksdb::FileSystem*)+0x96
> bq. C  [librocksdbjni7210046854294290562.so+0x300d90]  
> rocksdb::DB::ListColumnFamilies(rocksdb::DBOptions const&, std::string 
> const&, std::vector<std::string, std::allocator<std::string> >*)+0x30
> bq. C  [librocksdbjni7210046854294290562.so+0x241e49]  
> Java_org_rocksdb_RocksDB_listColumnFamilies+0x89
> bq. J 4802  org.rocksdb.RocksDB.listColumnFamilies(JLjava/lang/String;)[[B (0 
> bytes) @ 0x00007f4e8485dda2 [0x00007f4e8485dcc0+0x00000000000000e2]
> bq. J 13730 c2 
> org.apache.hadoop.hdds.utils.db.RDBStore.<init>(Ljava/io/File;Lorg/rocksdb/DBOptions;Lorg/rocksdb/WriteOptions;Ljava/util/Set;Lorg/apache/hadoop/hdds/utils/db/CodecRegistry;Z)V
>  (717 bytes) @ 0x00007f4e85557484 [0x00007f4e85556b80+0x0000000000000904]
> bq. J 14818 c2 
> org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils.getUncachedDatanodeStore(JLjava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;Z)Lorg/apache/hadoop/ozone/container/metadata/DatanodeStore;
>  (84 bytes) @ 0x00007f4e8530cbac [0x00007f4e85309960+0x000000000000324c]
> bq. J 21714 c2 
> org.apache.hadoop.ozone.container.common.utils.ContainerCache.getDB(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Lorg/apache/hadoop/hdds/conf/ConfigurationSource;)Lorg/apache/hadoop/ozone/container/common/utils/ReferenceCountedDB;
>  (339 bytes) @ 0x00007f4e85f200fc [0x00007f4e85f1ec00+0x00000000000014fc]
> bq. J 21274 c2 
> org.apache.hadoop.ozone.container.keyvalue.impl.BlockManagerImpl.getBlock(Lorg/apache/hadoop/ozone/container/common/interfaces/Container;Lorg/apache/hadoop/hdds/client/BlockID;)Lorg/apache/hadoop/ozone/container/common/helpers/BlockData;
>  (299 bytes) @ 0x00007f4e85d95cf8 [0x00007f4e85d95b80+0x0000000000000178]
> bq. J 21490 c2 
> org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatchRequest(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto;
>  (1105 bytes) @ 0x00007f4e85ebb89c [0x00007f4e85eb9100+0x000000000000279c]
> bq. J 20711 c2 
> org.apache.hadoop.ozone.container.common.impl.HddsDispatcher.dispatch(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;Lorg/apache/hadoop/ozone/container/common/transport/server/ratis/DispatcherContext;)Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandResponseProto;
>  (38 bytes) @ 0x00007f4e85bacf00 [0x00007f4e85baccc0+0x0000000000000240]
> bq. J 20718 c2 
> org.apache.hadoop.ozone.container.common.transport.server.GrpcXceiverService$1.onNext(Lorg/apache/hadoop/hdds/protocol/datanode/proto/ContainerProtos$ContainerCommandRequestProto;)V
>  (52 bytes) @ 0x00007f4e85bb71c8 [0x00007f4e85bb7160+0x0000000000000068]
> bq. J 20363 c2 
> org.apache.ratis.thirdparty.io.grpc.internal.ServerImpl$JumpToApplicationThreadServerStreamListener$1MessagesAvailable.runInContext()V
>  (77 bytes) @ 0x00007f4e8583e290 [0x00007f4e8583cda0+0x00000000000014f0]
> bq. J 16086 c2 
> org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run()V (35 
> bytes) @ 0x00007f4e855d0fb0 [0x00007f4e855d0b00+0x00000000000004b0]
> bq. J 21355 c2 
> org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run()V (99 
> bytes) @ 0x00007f4e85e7469c [0x00007f4e85e745a0+0x00000000000000fc]
> bq. J 20042 c2 
> java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V
>  [email protected] (187 bytes) @ 0x00007f4e855b7780 
> [0x00007f4e855b75a0+0x00000000000001e0]
> bq. J 17003 c1 java.util.concurrent.ThreadPoolExecutor$Worker.run()V 
> [email protected] (9 bytes) @ 0x00007f4e7d27e9e4 
> [0x00007f4e7d27e940+0x00000000000000a4]
> bq. J 11466 c1 java.lang.Thread.run()V [email protected] (17 bytes) @ 
> 0x00007f4e7e77fcd4 [0x00007f4e7e77fb60+0x0000000000000174]
> bq. v  ~StubRoutines::call_stub
> bq. V  [libjvm.so+0x885ac9]  JavaCalls::call_helper(JavaValue*, methodHandle 
> const&, JavaCallArguments*, Thread*)+0x3b9
> bq. V  [libjvm.so+0x883a6d]  JavaCalls::call_virtual(JavaValue*, Handle, 
> Klass*, Symbol*, Symbol*, Thread*)+0x1ed
> bq. V  [libjvm.so+0x92cd4c]  thread_entry(JavaThread*, Thread*)+0x6c
> bq. V  [libjvm.so+0xdbea53]  JavaThread::thread_main_inner()+0x103
> bq. V  [libjvm.so+0xdbed25]  JavaThread::run()+0x2a5
> bq. V  [libjvm.so+0xdbaa3a]  Thread::call_run()+0x13a
> bq. V  [libjvm.so+0xc0ad2e]  thread_native_entry(Thread*)+0xee



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to