This is an automated email from the ASF dual-hosted git repository.

rong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new a0ac60e0bf2 [IOTDB-6262] Pipe: fix NPE while deserializing WAL (caused 
by non-atomic WAL rename operation during pipe read) (#11753)
a0ac60e0bf2 is described below

commit a0ac60e0bf2b942b6531a3f80abaed317a790efc
Author: yschengzi <[email protected]>
AuthorDate: Wed Dec 20 19:33:25 2023 +0800

    [IOTDB-6262] Pipe: fix NPE while deserializing WAL (caused by non-atomic 
WAL rename operation during pipe read) (#11753)
    
    Problem:
    - Due to the concurrent execution of the renaming of the wal file and the 
reading of the wal file by the pipe, the pipe may not be able to fetch the wal 
file when it reads the wal file, resulting in an NPE.
    
    Solution:
    1. change Files.renameTo in wal code to Files.move for better atomicity and 
visibility.
    2. add logging when wal fetches a file
    3. add logging when pipe fetches a wal value to print the memtable id and 
wal file version id that it failed to fetch.
---
 .../dataregion/wal/buffer/AbstractWALBuffer.java           | 14 +++++++++-----
 .../db/storageengine/dataregion/wal/node/WALNode.java      |  2 +-
 .../dataregion/wal/utils/WALEntryHandler.java              | 14 +++++++++++---
 .../storageengine/dataregion/wal/utils/WALFileUtils.java   |  8 ++++++--
 4 files changed, 27 insertions(+), 11 deletions(-)

diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/buffer/AbstractWALBuffer.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/buffer/AbstractWALBuffer.java
index d8620ca597d..08bca5f6649 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/buffer/AbstractWALBuffer.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/buffer/AbstractWALBuffer.java
@@ -31,6 +31,8 @@ import org.slf4j.LoggerFactory;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.StandardCopyOption;
 import java.util.Arrays;
 
 public abstract class AbstractWALBuffer implements IWALBuffer {
@@ -48,6 +50,7 @@ public abstract class AbstractWALBuffer implements IWALBuffer 
{
   protected volatile long currentWALFileVersion;
   // current search index
   protected volatile long currentSearchIndex;
+
   // current wal file log writer
   // it's safe to use volatile here to make this reference thread-safe.
   @SuppressWarnings("squid:S3077")
@@ -106,11 +109,12 @@ public abstract class AbstractWALBuffer implements 
IWALBuffer {
               WALFileUtils.parseStartSearchIndex(lastName),
               fileStatus);
       File targetFile = SystemFileFactory.INSTANCE.getFile(logDirectory, 
targetName);
-      if (lastFile.renameTo(targetFile)) {
-        lastFile = targetFile;
-      } else {
-        logger.error("Fail to rename file {} to {}", lastName, targetName);
-      }
+      Files.move(
+          lastFile.toPath(),
+          targetFile.toPath(),
+          StandardCopyOption.REPLACE_EXISTING,
+          StandardCopyOption.ATOMIC_MOVE);
+      lastFile = targetFile;
     }
     // roll file
     long nextFileVersion = currentWALFileVersion + 1;
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java
index c84db55510e..16e4cb5e2b4 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/node/WALNode.java
@@ -930,7 +930,7 @@ public class WALNode implements IWALNode {
   }
 
   /** Get the .wal file starts with the specified version id */
-  public File getWALFile(long versionId) {
+  public File getWALFile(long versionId) throws FileNotFoundException {
     return WALFileUtils.getWALFile(logDirectory, versionId);
   }
 
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALEntryHandler.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALEntryHandler.java
index c9e43746aff..821263c6858 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALEntryHandler.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALEntryHandler.java
@@ -41,13 +41,16 @@ public class WALEntryHandler {
   private static final Logger logger = 
LoggerFactory.getLogger(WALEntryHandler.class);
 
   private long memTableId = -1;
+
   // cached value, null after this value is flushed to wal successfully
   @SuppressWarnings("squid:S3077")
   private volatile WALEntryValue value;
+
   // wal entry's position in the wal, valid after the value is flushed to wal 
successfully
   // it's safe to use volatile here to make this reference thread-safe.
   @SuppressWarnings("squid:S3077")
   private final WALEntryPosition walEntryPosition = new WALEntryPosition();
+
   // wal node, null when wal is disabled
   private WALNode walNode = null;
 
@@ -84,9 +87,14 @@ public class WALEntryHandler {
   }
 
   public InsertNode getInsertNodeViaCacheIfPossible() {
-    return value instanceof InsertNode
-        ? (InsertNode) value
-        : 
walEntryPosition.readByteBufferOrInsertNodeViaCacheDirectly().getRight();
+    try {
+      return value instanceof InsertNode
+          ? (InsertNode) value
+          : 
walEntryPosition.readByteBufferOrInsertNodeViaCacheDirectly().getRight();
+    } catch (Exception e) {
+      logger.warn("Fail to get insert node via cache. {}", this, e);
+      throw e;
+    }
   }
 
   /**
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALFileUtils.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALFileUtils.java
index 984edd2effe..027d4b160a6 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALFileUtils.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/dataregion/wal/utils/WALFileUtils.java
@@ -20,6 +20,7 @@
 package org.apache.iotdb.db.storageengine.dataregion.wal.utils;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.regex.Matcher;
@@ -73,12 +74,15 @@ public class WALFileUtils {
   }
 
   /** Get the .wal file starts with the specified version id in the directory. 
*/
-  public static File getWALFile(File dir, long versionId) {
+  public static File getWALFile(File dir, long versionId) throws 
FileNotFoundException {
     String filePrefix = WAL_FILE_PREFIX + versionId + FILE_NAME_SEPARATOR;
     File[] files =
         dir.listFiles((d, name) -> walFilenameFilter(d, name) && 
name.startsWith(filePrefix));
     if (files == null || files.length != 1) {
-      return null;
+      throw new FileNotFoundException(
+          String.format(
+              "Fail to get wal file by versionId=%s and files=%s.",
+              versionId, Arrays.toString(files)));
     }
     return files[0];
   }

Reply via email to