This is an automated email from the ASF dual-hosted git repository.

marcuse pushed a commit to branch cassandra-4.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-4.0 by this push:
     new f2e3a0a4fc Handle sstable metadata stats file getting a new mtime 
after compaction has finished
f2e3a0a4fc is described below

commit f2e3a0a4fc0c70d02a01cc2968c382d8f17697c6
Author: Josh McKenzie <[email protected]>
AuthorDate: Tue Dec 13 14:58:23 2022 +0100

    Handle sstable metadata stats file getting a new mtime after compaction has 
finished
    
    Patch by Josh McKenzie and marcuse; reviewed by Josh McKenzie and marcuse 
for CASSANDRA-18119
---
 CHANGES.txt                                        |   1 +
 .../org/apache/cassandra/db/lifecycle/LogFile.java |  19 ++-
 .../apache/cassandra/db/lifecycle/LogRecord.java   |  69 +++++++-
 .../cassandra/db/lifecycle/LogTransaction.java     |   2 +
 .../cassandra/db/lifecycle/LogTransactionTest.java | 175 ++++++++++++++++++++-
 5 files changed, 256 insertions(+), 10 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 70761240f1..65e3cb4de0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 4.0.18
+ * Handle sstable metadata stats file getting a new mtime after compaction has 
finished (CASSANDRA-18119)
  * Honor MAX_PARALLEL_TRANSFERS correctly (CASSANDRA-20532)
  * Updating a column with a new TTL but same expiration time is 
non-deterministic and causes repair mismatches. (CASSANDRA-20561)
  * Grant permission on keyspaces system_views and system_virtual_schema not 
possible (CASSANDRA-20171)
diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogFile.java 
b/src/java/org/apache/cassandra/db/lifecycle/LogFile.java
index d67019008f..cd793d4831 100644
--- a/src/java/org/apache/cassandra/db/lifecycle/LogFile.java
+++ b/src/java/org/apache/cassandra/db/lifecycle/LogFile.java
@@ -163,6 +163,10 @@ final class LogFile implements AutoCloseable
         this.id = id;
     }
 
+    /**
+     * Check a variety of the internals of the LogRecord as well as the state 
of the LogRecord vs. the files found on disk
+     * to ensure they remain correct and nothing was changed external to the 
process.
+     */
     boolean verify()
     {
         records.clear();
@@ -229,6 +233,9 @@ final class LogFile implements AutoCloseable
         return record;
     }
 
+    /**
+     * Sets the {@link LogRecord.Status#error} if something wrong is found 
with the record.
+     */
     static void verifyRecord(LogRecord record, List<File> existingFiles)
     {
         if (record.checksum != record.computeChecksum())
@@ -240,6 +247,7 @@ final class LogFile implements AutoCloseable
             return;
         }
 
+        // If it's not a removal we don't check it since we're not going to 
take action on it
         if (record.type != Type.REMOVE)
             return;
 
@@ -253,6 +261,16 @@ final class LogFile implements AutoCloseable
         // we can have transaction files with mismatching updateTime 
resolutions due to switching between jdk8 and jdk11, truncate both to be 
consistent:
         if (truncateMillis(record.updateTime) != 
truncateMillis(record.status.onDiskRecord.updateTime) && 
record.status.onDiskRecord.updateTime > 0)
         {
+            // handle the case where we have existing broken transaction file 
on disk, where the update time is
+            // based on the stats file. This is just for the first upgrade, 
patched versions never base the update
+            // time on the stats file.
+            LogRecord statsIncluded = LogRecord.make(record.type, 
existingFiles, existingFiles.size(), record.absolutePath(), true);
+            if (truncateMillis(statsIncluded.updateTime) == 
truncateMillis(record.updateTime))
+            {
+                logger.warn("Found a legacy log record {} with updateTime 
based on the stats file, ignoring to allow startup to continue", record);
+                return;
+            }
+
             record.setError(String.format("Unexpected files detected for 
sstable [%s]: " +
                                           "last update time [%tc] (%d) should 
have been [%tc] (%d)",
                                           record.fileName(),
@@ -260,7 +278,6 @@ final class LogFile implements AutoCloseable
                                           
record.status.onDiskRecord.updateTime,
                                           record.updateTime,
                                           record.updateTime));
-
         }
     }
 
diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java 
b/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java
index 513ad87460..65ff470ac8 100644
--- a/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java
+++ b/src/java/org/apache/cassandra/db/lifecycle/LogRecord.java
@@ -30,12 +30,19 @@ import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.zip.CRC32;
 
+import com.google.common.annotations.VisibleForTesting;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import org.apache.cassandra.io.sstable.Component;
+import org.apache.cassandra.io.sstable.Descriptor;
 import org.apache.cassandra.io.sstable.SSTable;
 import org.apache.cassandra.io.sstable.format.SSTableReader;
 import org.apache.cassandra.io.util.FileUtils;
 import org.apache.cassandra.utils.FBUtilities;
 
+import static org.apache.cassandra.io.sstable.Descriptor.TMP_EXT;
+
 /**
  * A decoded line in a transaction log file replica.
  *
@@ -43,6 +50,10 @@ import org.apache.cassandra.utils.FBUtilities;
  */
 final class LogRecord
 {
+    private static final Logger logger = 
LoggerFactory.getLogger(LogRecord.class);
+    @VisibleForTesting
+    static boolean INCLUDE_STATS_FOR_TESTS = false;
+
     public enum Type
     {
         UNKNOWN, // a record that cannot be parsed
@@ -66,7 +77,10 @@ final class LogRecord
             return this == record.type;
         }
 
-        public boolean isFinal() { return this == Type.COMMIT || this == 
Type.ABORT; }
+        public boolean isFinal()
+        {
+            return this == Type.COMMIT || this == Type.ABORT;
+        }
     }
 
     /**
@@ -182,17 +196,66 @@ final class LogRecord
 
     public LogRecord withExistingFiles(List<File> existingFiles)
     {
+        if (!absolutePath.isPresent())
+            throw new IllegalStateException(String.format("Cannot create 
record from existing files for type %s - file is not present", type));
+
         return make(type, existingFiles, 0, absolutePath.get());
     }
 
+    /**
+     * We create a LogRecord based on the files on disk; there's some subtlety 
around how we handle stats files as the
+     * timestamp can be mutated by the async completion of compaction if 
things race with node shutdown. To work around this,
+     * we don't take the stats file timestamp into account when calculating 
nor using the timestamps for all the components
+     * as we build the LogRecord.
+     */
     public static LogRecord make(Type type, List<File> files, int minFiles, 
String absolutePath)
     {
+        return make(type, files, minFiles, absolutePath, 
INCLUDE_STATS_FOR_TESTS);
+    }
+
+    /**
+     * In most cases we skip including the stats file timestamp entirely as it 
can be mutated during anticompaction
+     * and thus "invalidate" the LogRecord. There is an edge case where we 
have a LogRecord that was written w/the wrong
+     * timestamp (i.e. included a mutated stats file) and we need the node to 
come up, so we need to expose the selective
+     * ability to either include the stats file timestamp or not.
+     *
+     * See {@link LogFile#verifyRecord}
+     */
+    static LogRecord make(Type type, List<File> files, int minFiles, String 
absolutePath, boolean includeStatsFile)
+    {
+        List<File> toVerify;
+        File statsFile = null;
+        if (!includeStatsFile && !files.isEmpty())
+        {
+            toVerify = new ArrayList<>(files.size() - 1);
+            for (File f : files)
+            {
+                if (!f.getName().endsWith(TMP_EXT))
+                {
+                    Component component = 
Descriptor.fromFilenameWithComponent(f).right;
+                    if (component == Component.STATS)
+                        statsFile = f;
+                    else
+                        toVerify.add(f);
+                }
+            }
+        }
+        else
+        {
+            toVerify = files;
+        }
         // CASSANDRA-11889: File.lastModified() returns a positive value only 
if the file exists, therefore
         // we filter by positive values to only consider the files that still 
exists right now, in case things
         // changed on disk since getExistingFiles() was called
-        List<Long> positiveModifiedTimes = 
files.stream().map(File::lastModified).filter(lm -> lm > 
0).collect(Collectors.toList());
+        List<Long> positiveModifiedTimes = 
toVerify.stream().map(File::lastModified).filter(lm -> lm > 
0).collect(Collectors.toList());
         long lastModified = positiveModifiedTimes.stream().reduce(0L, 
Long::max);
-        return new LogRecord(type, absolutePath, lastModified, 
Math.max(minFiles, positiveModifiedTimes.size()));
+
+        // We need to preserve the file count for the number of existing files 
found on disk even though we ignored the
+        // stats file during our timestamp calculation. If the stats file 
still exists, we add in the count of it as
+        // a separate validation assumption that it's one of the files 
considered valid in this LogRecord.
+        boolean addStatTS = statsFile != null && statsFile.exists();
+        int positiveTSCount = addStatTS ? positiveModifiedTimes.size() + 1 : 
positiveModifiedTimes.size();
+        return new LogRecord(type, absolutePath, lastModified, 
Math.max(minFiles, positiveTSCount));
     }
 
     private LogRecord(Type type, long updateTime)
diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java 
b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
index a3c3837dc6..43b9e1f771 100644
--- a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
+++ b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
@@ -545,6 +545,8 @@ class LogTransaction extends 
Transactional.AbstractTransactional implements Tran
             try(LogFile txn = LogFile.make(entry.getKey(), entry.getValue()))
             {
                 logger.info("Verifying logfile transaction {}", txn);
+                // We don't check / include the stats file timestamp on 
LogRecord creation / verification as that might
+                // be modified by a race in compaction notification and then 
needlessly fail subsequent node starts.
                 if (txn.verify())
                 {
                     Throwable failure = txn.removeUnfinishedLeftovers(null);
diff --git 
a/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java 
b/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java
index 7d1cb39ae3..15578d14d5 100644
--- a/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java
+++ b/test/unit/org/apache/cassandra/db/lifecycle/LogTransactionTest.java
@@ -18,7 +18,6 @@
 package org.apache.cassandra.db.lifecycle;
 
 import java.io.File;
-import java.io.IOError;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.io.UncheckedIOException;
@@ -39,6 +38,7 @@ import java.util.stream.Stream;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Sets;
+import org.apache.cassandra.db.streaming.ComponentContext;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -71,6 +71,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -1263,10 +1264,172 @@ public class LogTransactionTest extends 
AbstractTransactionalTest
         logs.finish();
     }
 
+    @Test
+    public void testStatsTSMatchOnStart() throws Throwable
+    {
+        ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE);
+        File dataFolder = new 
Directories(cfs.metadata()).getDirectoryForNewSSTables();
+        SSTableReader sstable = sstable(dataFolder, cfs, 0, 128);
+
+        try(LogTransaction log = new LogTransaction(OperationType.COMPACTION))
+        {
+            assertNotNull(log);
+            log.trackNew(sstable);
+
+            // Confirm we can remove leftovers when they match
+            LogTransaction.removeUnfinishedLeftovers(cfs.metadata());
+        }
+
+        File sFile = new File(sstable.descriptor.filenameFor(Component.STATS));
+        assertFalse("Found STATS file but expected it to be cleaned up.", 
Files.exists(sFile.toPath()));
+        sstable.selfRef().release();
+    }
+
+    @Test
+    public void testStatsTSMatchDuringList() throws Throwable
+    {
+        ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE);
+        File dataFolder = new 
Directories(cfs.metadata()).getDirectoryForNewSSTables();
+        SSTableReader sstable = sstable(dataFolder, cfs, 0, 128);
+
+        try(LogTransaction log = new LogTransaction(OperationType.COMPACTION))
+        {
+            assertNotNull(log);
+            log.trackNew(sstable);
+
+            // Confirm we can successfully classify files when they match - 
this triggers the LogAwareFileLister verify
+            listFiles(dataFolder, Directories.OnTxnErr.THROW, 
Directories.FileType.FINAL);
+        }
+        sstable.selfRef().release();
+    }
+
+    @Test
+    public void testStatsTSMismatchDuringStart() throws Throwable
+    {
+        ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE);
+        File dataFolder = new 
Directories(cfs.metadata()).getDirectoryForNewSSTables();
+        SSTableReader sstable = sstable(dataFolder, cfs, 0, 128);
+
+        File sFile = new File(sstable.descriptor.filenameFor(Component.STATS));
+        assertTrue("STATS file not created successfully in test setup", 
Files.exists(sFile.toPath()));
+
+        // Confirm we can remove leftovers even if the STATS file doesn't match
+        try(LogTransaction log = new LogTransaction(OperationType.COMPACTION))
+        {
+            assertNotNull(log);
+
+            // Need to flag the transaction as having a REMOVE entry so it'll 
trigger the path to calculate stats on list
+            log.obsoleted(sstable);
+
+            // Need to sleep for long enough to bypass the millisecond 
truncation logic due to jdk8 and jdk11 change
+            Thread.sleep(2000);
+            assertTrue("Failed to set mtime for STATS file to 
currentTimeMillis()", sFile.setLastModified(System.currentTimeMillis()));
+
+            // Confirm we have an mtime mismatch
+            File dFile = new 
File(sstable.descriptor.filenameFor(Component.DATA));
+            assertNotEquals(sFile.lastModified(), dFile.lastModified());
+
+            // We need to add another LogRecord as we allow partial or 
incorrect entries as the last record...
+            log.trackNew(sstable(dataFolder, cfs, 2, 128));
+
+            assertTrue("STATS file gone before removeUnfinished...", 
Files.exists(sFile.toPath()));
+            // Confirm we can remove leftovers when the STATS file mismatches
+            log.prepareToCommit(); // commit so that obsolete sstable 
components will be removed.
+            log.commit();
+            ComponentContext.create(sstable.descriptor);
+            
assertTrue(LogTransaction.removeUnfinishedLeftovers(cfs.metadata()));
+        }
+
+        sstable.selfRef().release();
+    }
+
+    @Test
+    public void testWrongTimestampInTxnFile() throws IOException, 
InterruptedException
+    {
+        ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE);
+        File dataFolder = new 
Directories(cfs.metadata()).getDirectoryForNewSSTables();
+        SSTableReader sstable = sstable(dataFolder, cfs, 0, 128);
+
+        File sFile = new File(sstable.descriptor.filenameFor(Component.STATS));
+        assertTrue("STATS file not created successfully in test setup", 
Files.exists(sFile.toPath()));
+
+        LogRecord.INCLUDE_STATS_FOR_TESTS = true;
+
+        Thread.sleep(2000);
+        assertTrue("Failed to set mtime for STATS file to 
currentTimeMillis()", sFile.setLastModified(System.currentTimeMillis()));
+
+        // Confirm we can remove leftovers even if the STATS file doesn't match
+        try(LogTransaction log = new LogTransaction(OperationType.COMPACTION))
+        {
+            assertNotNull(log);
+            // Need to flag the transaction as having a REMOVE entry so it'll 
trigger the path to calculate stats on list
+            log.obsoleted(sstable);
+            // Need to sleep for long enough to bypass the millisecond 
truncation logic due to jdk8 and jdk11 change
+            // Confirm we have an mtime mismatch
+            File dFile = new 
File(sstable.descriptor.filenameFor(Component.DATA));
+            assertNotEquals(sFile.lastModified(), dFile.lastModified());
+
+            // We need to add another LogRecord as we allow partial or 
incorrect entries as the last record...
+            log.trackNew(sstable(dataFolder, cfs, 2, 128));
+
+            assertTrue("STATS file gone before removeUnfinished...", 
Files.exists(sFile.toPath()));
+            // Confirm we can remove leftovers when the STATS file mismatches
+            LogRecord.INCLUDE_STATS_FOR_TESTS = false;
+            
assertTrue(LogTransaction.removeUnfinishedLeftovers(cfs.metadata()));
+        }
+
+        sstable.selfRef().release();
+    }
+
+    /**
+     * We do not consider the stats file's ts for any cases at this point
+     */
+    @Test
+    public void testStatsTSMismatchDuringList() throws Throwable
+    {
+        SSTableReader sstable = null;
+        try
+        {
+            ColumnFamilyStore cfs = MockSchema.newCFS(KEYSPACE);
+            File dataFolder = new 
Directories(cfs.metadata()).getDirectoryForNewSSTables();
+            sstable = sstable(dataFolder, cfs, 0, 128);
+
+            File sFile = new 
File(sstable.descriptor.filenameFor(Component.STATS));
+            assertTrue("STATS file not created successfully in test setup", 
Files.exists(sFile.toPath()));
+
+            try(LogTransaction log = new 
LogTransaction(OperationType.COMPACTION))
+            {
+                assertNotNull(log);
+
+                // Need to flag the transaction as having a REMOVE entry so 
it'll trigger the path to calculate stats on list
+                log.obsoleted(sstable);
+
+                // Need to sleep for long enough to bypass the millisecond 
truncation logic due to jdk8 and jdk11 change
+                Thread.sleep(2000);
+                assertTrue("Failed to set mtime for STATS file to 
currentTimeMillis()", sFile.setLastModified(System.currentTimeMillis()));
+
+                // Confirm we have an mtime mismatch
+                File dFile = new 
File(sstable.descriptor.filenameFor(Component.DATA));
+                assertNotEquals(sFile.lastModified(), dFile.lastModified());
+
+                // We need to add another LogRecord as we allow partial or 
incorrect entries as the last record...
+                log.trackNew(sstable(dataFolder, cfs, 2, 128));
+
+                // Confirm we don't get a mismatch LogRecord error when the 
STATS file is different even on listFiles case
+                listFiles(dataFolder, Directories.OnTxnErr.THROW, 
Directories.FileType.FINAL);
+            }
+        }
+        finally
+        {
+            if (sstable != null)
+                sstable.selfRef().release();
+        }
+    }
+
     private static SSTableReader sstable(File dataFolder, ColumnFamilyStore 
cfs, int generation, int size) throws IOException
     {
         Descriptor descriptor = new Descriptor(dataFolder, 
cfs.keyspace.getName(), cfs.getTableName(), generation, SSTableFormat.Type.BIG);
-        Set<Component> components = ImmutableSet.of(Component.DATA, 
Component.PRIMARY_INDEX, Component.FILTER, Component.TOC);
+        Set<Component> components = ImmutableSet.of(Component.DATA, 
Component.PRIMARY_INDEX, Component.FILTER, Component.TOC, Component.STATS);
         for (Component component : components)
         {
             File file = new File(descriptor.filenameFor(component));
@@ -1360,12 +1523,12 @@ public class LogTransactionTest extends 
AbstractTransactionalTest
 
     static Set<File> getTemporaryFiles(File folder)
     {
-        return listFiles(folder, Directories.FileType.TEMPORARY);
+        return listFiles(folder, Directories.OnTxnErr.IGNORE, 
Directories.FileType.TEMPORARY);
     }
 
     static Set<File> getFinalFiles(File folder)
     {
-        return listFiles(folder, Directories.FileType.FINAL);
+        return listFiles(folder, Directories.OnTxnErr.IGNORE, 
Directories.FileType.FINAL);
     }
 
     // Used by listFiles - this test is deliberately racing with files being
@@ -1390,12 +1553,12 @@ public class LogTransactionTest extends 
AbstractTransactionalTest
         }
     }
 
-    static Set<File> listFiles(File folder, Directories.FileType... types)
+    static Set<File> listFiles(File folder, Directories.OnTxnErr err, 
Directories.FileType... types)
     {
         Collection<Directories.FileType> match = Arrays.asList(types);
         return new LogAwareFileLister(folder.toPath(),
                                       (file, type) -> match.contains(type),
-                                      Directories.OnTxnErr.IGNORE).list()
+                                      err).list()
                        .stream()
                        
.flatMap(LogTransactionTest::toCanonicalIgnoringNotFound)
                        .collect(Collectors.toSet());


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to