kerneltime commented on code in PR #7083:
URL: https://github.com/apache/ozone/pull/7083#discussion_r1733731119
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java:
##########
@@ -133,42 +167,47 @@ private Lock getWriteLock(long containerID) {
return fileLock.get(containerID).writeLock();
}
- private ContainerProtos.ContainerChecksumInfo read(ContainerData data)
throws IOException {
+ private Optional<ContainerProtos.ContainerChecksumInfo.Builder>
read(ContainerData data) throws IOException {
long containerID = data.getContainerID();
+ File checksumFile = getContainerChecksumFile(data);
Lock readLock = getReadLock(containerID);
readLock.lock();
try {
- File checksumFile = getContainerChecksumFile(data);
- // If the checksum file has not been created yet, return an empty
instance.
- // Since all writes happen as part of an atomic read-modify-write cycle
that requires a write lock, two empty
- // instances for the same container obtained only under the read lock
will not conflict.
if (!checksumFile.exists()) {
- LOG.debug("No checksum file currently exists for container {} at the
path {}. Returning an empty instance.",
- containerID, checksumFile);
- return ContainerProtos.ContainerChecksumInfo.newBuilder()
- .setContainerID(containerID)
- .build();
+ LOG.debug("No checksum file currently exists for container {} at the
path {}", containerID, checksumFile);
+ return Optional.empty();
}
try (FileInputStream inStream = new FileInputStream(checksumFile)) {
return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(),
- () -> ContainerProtos.ContainerChecksumInfo.parseFrom(inStream));
+ () ->
Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream).toBuilder()));
}
} catch (IOException ex) {
metrics.incrementMerkleTreeReadFailures();
throw new IOException("Error occurred when reading container merkle tree
for containerID "
- + data.getContainerID(), ex);
+ + data.getContainerID() + " at path " + checksumFile, ex);
} finally {
readLock.unlock();
}
}
private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo
checksumInfo) throws IOException {
+ // Make sure callers filled in required fields before writing.
+ Preconditions.assertTrue(checksumInfo.hasContainerID());
+
+ File checksumFile = getContainerChecksumFile(data);
+ File tmpChecksumFile = getTmpContainerChecksumFile(data);
+
Lock writeLock = getWriteLock(data.getContainerID());
writeLock.lock();
- try (FileOutputStream outStream = new
FileOutputStream(getContainerChecksumFile(data))) {
- captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(),
- () -> checksumInfo.writeTo(outStream));
+ try (FileOutputStream tmpOutputStream = new
FileOutputStream(tmpChecksumFile)) {
+ // Write to a tmp file and rename it into place.
+ captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(), () -> {
+ checksumInfo.writeTo(tmpOutputStream);
+ Files.move(tmpChecksumFile.toPath(), checksumFile.toPath(),
REPLACE_EXISTING);
Review Comment:
```suggestion
Files.move(tmpChecksumFile.toPath(), checksumFile.toPath(),
ATOMIC_MOVE);
```
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java:
##########
@@ -121,10 +149,16 @@ public ContainerDiff diff(KeyValueContainerData
thisContainer, ContainerProtos.C
/**
* Returns the container checksum tree file for the specified container
without deserializing it.
*/
+ @VisibleForTesting
public static File getContainerChecksumFile(ContainerData data) {
return new File(data.getMetadataPath(), data.getContainerID() + ".tree");
}
+ @VisibleForTesting
+ public static File getTmpContainerChecksumFile(ContainerData data) {
+ return new File(data.getMetadataPath(), data.getContainerID() +
".tree.tmp");
Review Comment:
The temporary file should have randomization to avoid dealing with
conflicting files from previous runs or spurious errors.
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java:
##########
@@ -133,42 +167,47 @@ private Lock getWriteLock(long containerID) {
return fileLock.get(containerID).writeLock();
}
- private ContainerProtos.ContainerChecksumInfo read(ContainerData data)
throws IOException {
+ private Optional<ContainerProtos.ContainerChecksumInfo.Builder>
read(ContainerData data) throws IOException {
long containerID = data.getContainerID();
+ File checksumFile = getContainerChecksumFile(data);
Lock readLock = getReadLock(containerID);
readLock.lock();
try {
- File checksumFile = getContainerChecksumFile(data);
- // If the checksum file has not been created yet, return an empty
instance.
- // Since all writes happen as part of an atomic read-modify-write cycle
that requires a write lock, two empty
- // instances for the same container obtained only under the read lock
will not conflict.
if (!checksumFile.exists()) {
- LOG.debug("No checksum file currently exists for container {} at the
path {}. Returning an empty instance.",
- containerID, checksumFile);
- return ContainerProtos.ContainerChecksumInfo.newBuilder()
- .setContainerID(containerID)
- .build();
+ LOG.debug("No checksum file currently exists for container {} at the
path {}", containerID, checksumFile);
+ return Optional.empty();
}
try (FileInputStream inStream = new FileInputStream(checksumFile)) {
return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(),
- () -> ContainerProtos.ContainerChecksumInfo.parseFrom(inStream));
+ () ->
Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream).toBuilder()));
}
} catch (IOException ex) {
metrics.incrementMerkleTreeReadFailures();
throw new IOException("Error occurred when reading container merkle tree
for containerID "
- + data.getContainerID(), ex);
+ + data.getContainerID() + " at path " + checksumFile, ex);
} finally {
readLock.unlock();
}
}
private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo
checksumInfo) throws IOException {
+ // Make sure callers filled in required fields before writing.
+ Preconditions.assertTrue(checksumInfo.hasContainerID());
+
+ File checksumFile = getContainerChecksumFile(data);
+ File tmpChecksumFile = getTmpContainerChecksumFile(data);
+
Lock writeLock = getWriteLock(data.getContainerID());
writeLock.lock();
Review Comment:
The locks are reentrant but this can be skipped here, this is a private
method and we hold the lock before calling this method. Cost of requiring an
acquired lock is wasteful.
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java:
##########
@@ -121,10 +149,16 @@ public ContainerDiff diff(KeyValueContainerData
thisContainer, ContainerProtos.C
/**
* Returns the container checksum tree file for the specified container
without deserializing it.
*/
+ @VisibleForTesting
public static File getContainerChecksumFile(ContainerData data) {
return new File(data.getMetadataPath(), data.getContainerID() + ".tree");
}
+ @VisibleForTesting
+ public static File getTmpContainerChecksumFile(ContainerData data) {
Review Comment:
Nit: Simpler signature than passing a massive object.
```suggestion
public static File getTmpContainerChecksumFile(String path, long
containerID) {
```
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java:
##########
@@ -133,42 +167,47 @@ private Lock getWriteLock(long containerID) {
return fileLock.get(containerID).writeLock();
}
- private ContainerProtos.ContainerChecksumInfo read(ContainerData data)
throws IOException {
+ private Optional<ContainerProtos.ContainerChecksumInfo.Builder>
read(ContainerData data) throws IOException {
long containerID = data.getContainerID();
+ File checksumFile = getContainerChecksumFile(data);
Lock readLock = getReadLock(containerID);
readLock.lock();
Review Comment:
Since the code does rename now, there should not be a need for a read lock.
The only reason we need write lock is as we do a ReadModifyWrite cycle which
need protection between writes but readers are dependent on atomic rename.
##########
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java:
##########
@@ -133,42 +167,47 @@ private Lock getWriteLock(long containerID) {
return fileLock.get(containerID).writeLock();
}
- private ContainerProtos.ContainerChecksumInfo read(ContainerData data)
throws IOException {
+ private Optional<ContainerProtos.ContainerChecksumInfo.Builder>
read(ContainerData data) throws IOException {
long containerID = data.getContainerID();
+ File checksumFile = getContainerChecksumFile(data);
Lock readLock = getReadLock(containerID);
readLock.lock();
try {
- File checksumFile = getContainerChecksumFile(data);
- // If the checksum file has not been created yet, return an empty
instance.
- // Since all writes happen as part of an atomic read-modify-write cycle
that requires a write lock, two empty
- // instances for the same container obtained only under the read lock
will not conflict.
if (!checksumFile.exists()) {
- LOG.debug("No checksum file currently exists for container {} at the
path {}. Returning an empty instance.",
- containerID, checksumFile);
- return ContainerProtos.ContainerChecksumInfo.newBuilder()
- .setContainerID(containerID)
- .build();
+ LOG.debug("No checksum file currently exists for container {} at the
path {}", containerID, checksumFile);
+ return Optional.empty();
}
try (FileInputStream inStream = new FileInputStream(checksumFile)) {
return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(),
- () -> ContainerProtos.ContainerChecksumInfo.parseFrom(inStream));
+ () ->
Optional.of(ContainerProtos.ContainerChecksumInfo.parseFrom(inStream).toBuilder()));
}
} catch (IOException ex) {
metrics.incrementMerkleTreeReadFailures();
throw new IOException("Error occurred when reading container merkle tree
for containerID "
- + data.getContainerID(), ex);
+ + data.getContainerID() + " at path " + checksumFile, ex);
} finally {
readLock.unlock();
}
}
private void write(ContainerData data, ContainerProtos.ContainerChecksumInfo
checksumInfo) throws IOException {
+ // Make sure callers filled in required fields before writing.
+ Preconditions.assertTrue(checksumInfo.hasContainerID());
+
+ File checksumFile = getContainerChecksumFile(data);
+ File tmpChecksumFile = getTmpContainerChecksumFile(data);
+
Lock writeLock = getWriteLock(data.getContainerID());
writeLock.lock();
- try (FileOutputStream outStream = new
FileOutputStream(getContainerChecksumFile(data))) {
- captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(),
- () -> checksumInfo.writeTo(outStream));
+ try (FileOutputStream tmpOutputStream = new
FileOutputStream(tmpChecksumFile)) {
+ // Write to a tmp file and rename it into place.
+ captureLatencyNs(metrics.getWriteContainerMerkleTreeLatencyNS(), () -> {
+ checksumInfo.writeTo(tmpOutputStream);
+ Files.move(tmpChecksumFile.toPath(), checksumFile.toPath(),
REPLACE_EXISTING);
Review Comment:
Ref: sun.nio.fs.UnixCopyFile#move
```
if (flags.atomicMove) {
try {
rename(source, target);
} catch (UnixException x) {
if (x.errno() == EXDEV) {
throw new AtomicMoveNotSupportedException(
source.getPathForExceptionMessage(),
target.getPathForExceptionMessage(),
x.errorString());
}
x.rethrowAsIOException(source, target);
}
return;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]