[ https://issues.apache.org/jira/browse/CASSANDRA-9908?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14645687#comment-14645687 ]
Sam Tunnicliffe commented on CASSANDRA-9908: -------------------------------------------- The patch narrows the window for the race but doesn't close it completely. It remains possible for the async cleanup to remove the file between the existence check in {{TransactionData#getTemporaryFiles}} and actually reading its content in {{TransactionFile#getTrackedFiles}}. My test fails much less frequently now but I can still hit the error on occasion, so I added some extra logging to capture the race happening (extracted from the attached log): {noformat} INFO [main] 2015-07-29 09:13:50,782 TransactionLogs.java:228 Deleted txn log file build/test/cassandra/data/SecondaryIndexTest1/CompositeIndexToBeAdded-bd86e0e035c911e5951aad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_bea2b120-35c9-11e5-951a-ad83a3dc3e9c_new.log INFO [NonPeriodicTasks:1] 2015-07-29 09:13:50,793 TransactionLogs.java:537 Removing files for transaction bea2b120-35c9-11e5-951a-ad83a3dc3e9c INFO [NonPeriodicTasks:1] 2015-07-29 09:13:50,823 TransactionLogs.java:228 Deleted txn log file build/test/cassandra/data/SecondaryIndexTest1/CompositeIndexToBeAdded-bd86e0e035c911e5951aad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_bea2b120-35c9-11e5-951a-ad83a3dc3e9c_old.log INFO [main] 2015-07-29 09:13:50,823 TransactionLogs.java:351 Txn old log file build/test/cassandra/data/SecondaryIndexTest1/CompositeIndexToBeAdded-bd86e0e035c911e5951aad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_bea2b120-35c9-11e5-951a-ad83a3dc3e9c_old.log exists INFO [main] 2015-07-29 09:13:50,831 TransactionLogs.java:195 Error reading from txn log file build/test/cassandra/data/SecondaryIndexTest1/CompositeIndexToBeAdded-bd86e0e035c911e5951aad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_bea2b120-35c9-11e5-951a-ad83a3dc3e9c_old.log java.lang.RuntimeException: java.nio.file.NoSuchFileException: build/test/cassandra/data/SecondaryIndexTest1/CompositeIndexToBeAdded-bd86e0e035c911e5951aad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_bea2b120-35c9-11e5-951a-ad83a3dc3e9c_old.log at org.apache.cassandra.io.util.FileUtils.readLines(FileUtils.java:620) ~[main/:na] at org.apache.cassandra.db.lifecycle.TransactionLogs$TransactionFile.getTrackedFiles(TransactionLogs.java:192) ~[main/:na] at org.apache.cassandra.db.lifecycle.TransactionLogs$TransactionData.getTemporaryFiles(TransactionLogs.java:352) [main/:na] at org.apache.cassandra.db.lifecycle.TransactionLogs.getTemporaryFiles(TransactionLogs.java:757) [main/:na] at org.apache.cassandra.db.lifecycle.LifecycleTransaction.getTemporaryFiles(LifecycleTransaction.java:541) [main/:na] at org.apache.cassandra.db.Directories$SSTableLister.getFilter(Directories.java:652) [main/:na] at org.apache.cassandra.db.Directories$SSTableLister.filter(Directories.java:641) [main/:na] at org.apache.cassandra.db.Directories$SSTableLister.list(Directories.java:606) [main/:na] at org.apache.cassandra.db.ColumnFamilyStore.<init>(ColumnFamilyStore.java:351) [main/:na] at org.apache.cassandra.db.ColumnFamilyStore.<init>(ColumnFamilyStore.java:313) [main/:na] at org.apache.cassandra.db.ColumnFamilyStore.createColumnFamilyStore(ColumnFamilyStore.java:511) [main/:na] at org.apache.cassandra.index.internal.CassandraIndexer.addIndexedColumn(CassandraIndexer.java:115) [main/:na] at org.apache.cassandra.index.SecondaryIndexManager.addIndexedColumn(SecondaryIndexManager.java:265) [main/:na] at org.apache.cassandra.db.SecondaryIndexTest.testIndexCreate(SecondaryIndexTest.java:467) [classes/:na] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[na:1.8.0_45] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[na:1.8.0_45] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[na:1.8.0_45] at java.lang.reflect.Method.invoke(Method.java:497) ~[na:1.8.0_45] at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:44) [junit-4.6.jar:na] at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15) [junit-4.6.jar:na] at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:41) [junit-4.6.jar:na] at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20) [junit-4.6.jar:na] at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) [junit-4.6.jar:na] at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:31) [junit-4.6.jar:na] at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70) [junit-4.6.jar:na] at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:44) [junit-4.6.jar:na] at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:180) [junit-4.6.jar:na] at org.junit.runners.ParentRunner.access$000(ParentRunner.java:41) [junit-4.6.jar:na] at org.junit.runners.ParentRunner$1.evaluate(ParentRunner.java:173) [junit-4.6.jar:na] at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28) [junit-4.6.jar:na] at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:31) [junit-4.6.jar:na] at org.junit.runners.ParentRunner.run(ParentRunner.java:220) [junit-4.6.jar:na] at org.apache.tools.ant.taskdefs.optional.junit.JUnit4TestMethodAdapter.run(JUnit4TestMethodAdapter.java:105) [ant-junit4.jar:na] at org.apache.tools.ant.taskdefs.optional.junit.JUnitTestRunner.run(JUnitTestRunner.java:532) [ant-junit-1.9.4.jar:na] at org.apache.tools.ant.taskdefs.optional.junit.JUnitTestRunner.launch(JUnitTestRunner.java:1179) [ant-junit-1.9.4.jar:na] at org.apache.tools.ant.taskdefs.optional.junit.JUnitTestRunner.main(JUnitTestRunner.java:1030) [ant-junit-1.9.4.jar:na] Caused by: java.nio.file.NoSuchFileException: build/test/cassandra/data/SecondaryIndexTest1/CompositeIndexToBeAdded-bd86e0e035c911e5951aad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_bea2b120-35c9-11e5-951a-ad83a3dc3e9c_old.log at sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) ~[na:1.8.0_45] at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) ~[na:1.8.0_45] at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) ~[na:1.8.0_45] at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214) ~[na:1.8.0_45] at java.nio.file.Files.newByteChannel(Files.java:361) ~[na:1.8.0_45] at java.nio.file.Files.newByteChannel(Files.java:407) ~[na:1.8.0_45] at java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:384) ~[na:1.8.0_45] at java.nio.file.Files.newInputStream(Files.java:152) ~[na:1.8.0_45] at java.nio.file.Files.newBufferedReader(Files.java:2784) ~[na:1.8.0_45] at java.nio.file.Files.readAllLines(Files.java:3202) ~[na:1.8.0_45] at org.apache.cassandra.io.util.FileUtils.readLines(FileUtils.java:616) ~[main/:na] ... 35 common frames omitted {noformat} > Potential race caused by async cleanup of transaction log files > --------------------------------------------------------------- > > Key: CASSANDRA-9908 > URL: https://issues.apache.org/jira/browse/CASSANDRA-9908 > Project: Cassandra > Issue Type: Bug > Reporter: Sam Tunnicliffe > Assignee: Stefania > Fix For: 3.0 beta 1 > > Attachments: TEST-org.apache.cassandra.db.SecondaryIndexTest.log > > > There seems to be a potential race in the cleanup of transaction log files, > introduced in CASSANDRA-7066 > It's pretty hard to trigger on trunk, but it's possible to hit it via > {{o.a.c.db.SecondaryIndexTest#testCreateIndex}} > That test creates an index, then removes it to check that the removal is > correctly recorded, then adds the index again to assert that it gets rebuilt > from the existing data. > The removal causes the SSTables of the index CFS to be dropped, which is a > transactional operation and so writes a transaction log. When the drop is > completed and the last reference to an SSTable is released, the cleanup of > the transaction log is scheduled on the periodic tasks executor. The issue is > that re-creating the index re-creates the index CFS. When this happens, it's > possible for the cleanup of the txn log to have not yet happened. If so, the > initialization of the CFS attempts to read the log to identify any orphaned > temporary files. The cleanup can happen between the finding the log file and > reading it's contents, which results in a {{NoSuchFileException}} > {noformat} > [junit] java.nio.file.NoSuchFileException: > build/test/cassandra/data:1/SecondaryIndexTest1/CompositeIndexToBeAdded-d0885f60323211e5a5e8ad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_d4b69fc0-3232-11e5-a5e8-ad83a3dc3e9c_old.log > [junit] java.lang.RuntimeException: java.nio.file.NoSuchFileException: > build/test/cassandra/data:1/SecondaryIndexTest1/CompositeIndexToBeAdded-d0885f60323211e5a5e8ad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_d4b69fc0-3232-11e5-a5e8-ad83a3dc3e9c_old.log > [junit] at > org.apache.cassandra.io.util.FileUtils.readLines(FileUtils.java:620) > [junit] at > org.apache.cassandra.db.lifecycle.TransactionLogs$TransactionFile.getTrackedFiles(TransactionLogs.java:190) > [junit] at > org.apache.cassandra.db.lifecycle.TransactionLogs$TransactionData.getTemporaryFiles(TransactionLogs.java:338) > [junit] at > org.apache.cassandra.db.lifecycle.TransactionLogs.getTemporaryFiles(TransactionLogs.java:739) > [junit] at > org.apache.cassandra.db.lifecycle.LifecycleTransaction.getTemporaryFiles(LifecycleTransaction.java:541) > [junit] at > org.apache.cassandra.db.Directories$SSTableLister.getFilter(Directories.java:652) > [junit] at > org.apache.cassandra.db.Directories$SSTableLister.filter(Directories.java:641) > [junit] at > org.apache.cassandra.db.Directories$SSTableLister.list(Directories.java:606) > [junit] at > org.apache.cassandra.db.ColumnFamilyStore.<init>(ColumnFamilyStore.java:351) > [junit] at > org.apache.cassandra.db.ColumnFamilyStore.<init>(ColumnFamilyStore.java:313) > [junit] at > org.apache.cassandra.db.ColumnFamilyStore.createColumnFamilyStore(ColumnFamilyStore.java:511) > [junit] at > org.apache.cassandra.index.internal.CassandraIndexer.addIndexedColumn(CassandraIndexer.java:115) > [junit] at > org.apache.cassandra.index.SecondaryIndexManager.addIndexedColumn(SecondaryIndexManager.java:265) > [junit] at > org.apache.cassandra.db.SecondaryIndexTest.testIndexCreate(SecondaryIndexTest.java:467) > [junit] Caused by: java.nio.file.NoSuchFileException: > build/test/cassandra/data:1/SecondaryIndexTest1/CompositeIndexToBeAdded-d0885f60323211e5a5e8ad83a3dc3e9c/.birthdate_index/transactions/unknowncompactiontype_d4b69fc0-3232-11e5-a5e8-ad83a3dc3e9c_old.log > [junit] at > sun.nio.fs.UnixException.translateToIOException(UnixException.java:86) > [junit] at > sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) > [junit] at > sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) > [junit] at > sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214) > [junit] at java.nio.file.Files.newByteChannel(Files.java:361) > [junit] at java.nio.file.Files.newByteChannel(Files.java:407) > [junit] at > java.nio.file.spi.FileSystemProvider.newInputStream(FileSystemProvider.java:384) > [junit] at java.nio.file.Files.newInputStream(Files.java:152) > [junit] at java.nio.file.Files.newBufferedReader(Files.java:2784) > [junit] at java.nio.file.Files.readAllLines(Files.java:3202) > [junit] at > org.apache.cassandra.io.util.FileUtils.readLines(FileUtils.java:616) > [junit] > [junit] > [junit] Test org.apache.cassandra.db.SecondaryIndexTest FAILED > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)