This is an automated email from the ASF dual-hosted git repository.
arp pushed a commit to branch ozone-0.5.0
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git
The following commit(s) were added to refs/heads/ozone-0.5.0 by this push:
new cf3f6ed HDDS-3116. Datanode sometimes fails to start with NPE when
starting Ratis xceiver server (#630)
cf3f6ed is described below
commit cf3f6ed0da13ba7d71c2ae181e369eaf5e51aa6b
Author: Stephen O'Donnell <[email protected]>
AuthorDate: Thu Mar 12 08:40:31 2020 +0000
HDDS-3116. Datanode sometimes fails to start with NPE when starting Ratis
xceiver server (#630)
(cherry picked from commit c1997218a4e1a6695a275c73cf85360cd046329c)
---
.../common/statemachine/DatanodeStateMachine.java | 27 +++++++++++++++++++---
1 file changed, 24 insertions(+), 3 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index 5229ae8..dc39025 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -21,6 +21,8 @@ import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -90,6 +92,11 @@ public class DatanodeStateMachine implements Closeable {
private JvmPauseMonitor jvmPauseMonitor;
private CertificateClient dnCertClient;
private final HddsDatanodeStopService hddsDatanodeStopService;
+ /**
+ * Used to synchronize to the OzoneContainer object created in the
+ * constructor in a non-thread-safe way - see HDDS-3116.
+ */
+ private final ReadWriteLock constructionLock = new ReentrantReadWriteLock();
/**
* Constructs a a datanode state machine.
@@ -113,8 +120,16 @@ public class DatanodeStateMachine implements Closeable {
.setNameFormat("Datanode State Machine Thread - %d").build());
connectionManager = new SCMConnectionManager(conf);
context = new StateContext(this.conf, DatanodeStates.getInitState(), this);
- container = new OzoneContainer(this.datanodeDetails,
- ozoneConf, context, certClient);
+ // OzoneContainer instance is used in a non-thread safe way by the context
+ // past to its constructor, so we much synchronize its access. See
+ // HDDS-3116 for more details.
+ constructionLock.writeLock().lock();
+ try {
+ container = new OzoneContainer(this.datanodeDetails,
+ ozoneConf, context, certClient);
+ } finally {
+ constructionLock.writeLock().unlock();
+ }
dnCertClient = certClient;
nextHB = new AtomicLong(Time.monotonicNow());
@@ -173,7 +188,13 @@ public class DatanodeStateMachine implements Closeable {
}
public OzoneContainer getContainer() {
- return this.container;
+ // See HDDS-3116 to explain the need for this lock
+ constructionLock.readLock().lock();
+ try {
+ return this.container;
+ } finally {
+ constructionLock.readLock().unlock();
+ }
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]