Repository: activemq Updated Branches: refs/heads/master 6df02555f -> a7178a46b
https://issues.apache.org/jira/browse/AMQ-5703 - fix and test. We now skip past known corruption on a journal replay Project: http://git-wip-us.apache.org/repos/asf/activemq/repo Commit: http://git-wip-us.apache.org/repos/asf/activemq/commit/a7178a46 Tree: http://git-wip-us.apache.org/repos/asf/activemq/tree/a7178a46 Diff: http://git-wip-us.apache.org/repos/asf/activemq/diff/a7178a46 Branch: refs/heads/master Commit: a7178a46b7b596d5ce425d51c6c2a4a387ca46ce Parents: 6df0255 Author: gtully <[email protected]> Authored: Wed Apr 1 14:50:12 2015 +0100 Committer: gtully <[email protected]> Committed: Wed Apr 1 14:59:15 2015 +0100 ---------------------------------------------------------------------- .../activemq/store/kahadb/MessageDatabase.java | 2 +- .../store/kahadb/disk/journal/Journal.java | 9 +- .../store/kahadb/disk/util/SequenceSet.java | 14 + .../JournalCorruptionIndexRecoveryTest.java | 292 +++++++++++++++++++ 4 files changed, 314 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/activemq/blob/a7178a46/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/MessageDatabase.java ---------------------------------------------------------------------- diff --git a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/MessageDatabase.java b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/MessageDatabase.java index 188192f..3cc6879 100644 --- a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/MessageDatabase.java +++ b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/MessageDatabase.java @@ -601,7 +601,7 @@ public abstract class MessageDatabase extends ServiceSupport implements BrokerSe if (recoveryPosition != null) { int redoCounter = 0; - LOG.info("Recovering from the journal ..."); + LOG.info("Recovering from the journal @" + recoveryPosition); while (recoveryPosition != null) { JournalCommand<?> message = load(recoveryPosition); metadata.lastUpdate = recoveryPosition; http://git-wip-us.apache.org/repos/asf/activemq/blob/a7178a46/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/journal/Journal.java ---------------------------------------------------------------------- diff --git a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/journal/Journal.java b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/journal/Journal.java index 358652d..394e3c0 100644 --- a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/journal/Journal.java +++ b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/journal/Journal.java @@ -26,6 +26,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.zip.Adler32; import java.util.zip.Checksum; import org.apache.activemq.store.kahadb.disk.util.LinkedNode; +import org.apache.activemq.store.kahadb.disk.util.SequenceSet; import org.apache.activemq.util.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -623,8 +624,12 @@ public class Journal { accessorPool.closeDataFileAccessor(reader); } - if (cur.getType() == 0) { - // invalid offset - jump to next datafile + Sequence corruptedRange = dataFile.corruptedBlocks.get(cur.getOffset()); + if (corruptedRange != null) { + // skip corruption + cur.setSize((int) corruptedRange.range()); + } else if (cur.getType() == 0) { + // eof - jump to next datafile cur.setOffset(maxFileLength); } else if (cur.getType() == USER_RECORD_TYPE) { // Only return user records. http://git-wip-us.apache.org/repos/asf/activemq/blob/a7178a46/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/SequenceSet.java ---------------------------------------------------------------------- diff --git a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/SequenceSet.java b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/SequenceSet.java index c0fc509..2946a22 100644 --- a/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/SequenceSet.java +++ b/activemq-kahadb-store/src/main/java/org/apache/activemq/store/kahadb/disk/util/SequenceSet.java @@ -352,6 +352,20 @@ public class SequenceSet extends LinkedNodeList<Sequence> implements Iterable<Lo return false; } + public Sequence get(int value) { + if (!isEmpty()) { + Sequence sequence = getHead(); + while (sequence != null) { + if (sequence.contains(value)) { + return sequence; + } + sequence = sequence.getNext(); + } + } + return null; + } + + /** * Computes the size of this Sequence by summing the values of all * the contained sequences. http://git-wip-us.apache.org/repos/asf/activemq/blob/a7178a46/activemq-kahadb-store/src/test/java/org/apache/activemq/store/kahadb/JournalCorruptionIndexRecoveryTest.java ---------------------------------------------------------------------- diff --git a/activemq-kahadb-store/src/test/java/org/apache/activemq/store/kahadb/JournalCorruptionIndexRecoveryTest.java b/activemq-kahadb-store/src/test/java/org/apache/activemq/store/kahadb/JournalCorruptionIndexRecoveryTest.java new file mode 100644 index 0000000..821dfd9 --- /dev/null +++ b/activemq-kahadb-store/src/test/java/org/apache/activemq/store/kahadb/JournalCorruptionIndexRecoveryTest.java @@ -0,0 +1,292 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.activemq.store.kahadb; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import javax.jms.Connection; +import javax.jms.Destination; +import javax.jms.Message; +import javax.jms.MessageConsumer; +import javax.jms.MessageProducer; +import javax.jms.Session; +import org.apache.activemq.ActiveMQConnectionFactory; +import org.apache.activemq.broker.BrokerService; +import org.apache.activemq.command.ActiveMQQueue; +import org.apache.activemq.store.kahadb.disk.journal.DataFile; +import org.apache.activemq.store.kahadb.disk.journal.Journal; +import org.apache.activemq.util.ByteSequence; +import org.apache.activemq.util.RecoverableRandomAccessFile; +import org.junit.After; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + + +@RunWith(Parameterized.class) +public class JournalCorruptionIndexRecoveryTest { + + private static final Logger LOG = LoggerFactory.getLogger(JournalCorruptionIndexRecoveryTest.class); + + ActiveMQConnectionFactory cf = null; + BrokerService broker = null; + private final Destination destination = new ActiveMQQueue("Test"); + private String connectionUri; + private KahaDBPersistenceAdapter adapter; + + @Parameterized.Parameter(0) + public byte fill = Byte.valueOf("3"); + + @Parameterized.Parameters(name = "fill=#{0}") + public static Iterable<Object[]> parameters() { + // corruption can be valid record type values + return Arrays.asList(new Object[][]{{Byte.valueOf("1")}, {Byte.valueOf("0")}, {Byte.valueOf("2")}, {Byte.valueOf("-1")} }); + } + + protected void startBroker() throws Exception { + doStartBroker(true); + } + + + protected void restartBroker() throws Exception { + File dataDir = broker.getPersistenceAdapter().getDirectory(); + + if (broker != null) { + broker.stop(); + broker.waitUntilStopped(); + } + + whackIndex(dataDir); + + doStartBroker(false); + } + + + private void doStartBroker(boolean delete) throws Exception { + broker = new BrokerService(); + broker.setDeleteAllMessagesOnStartup(delete); + broker.setPersistent(true); + broker.setUseJmx(true); + broker.addConnector("tcp://localhost:0"); + + configurePersistence(broker); + + connectionUri = "vm://localhost?create=false"; + cf = new ActiveMQConnectionFactory(connectionUri); + + broker.start(); + LOG.info("Starting broker.."); + } + + protected void configurePersistence(BrokerService brokerService) throws Exception { + adapter = (KahaDBPersistenceAdapter) brokerService.getPersistenceAdapter(); + + // ensure there are a bunch of data files but multiple entries in each + adapter.setJournalMaxFileLength(1024 * 20); + + // speed up the test case, checkpoint an cleanup early and often + adapter.setCheckpointInterval(5000); + adapter.setCleanupInterval(5000); + + adapter.setCheckForCorruptJournalFiles(true); + adapter.setIgnoreMissingJournalfiles(true); + + } + + @After + public void tearDown() throws Exception { + if (broker != null) { + broker.stop(); + broker.waitUntilStopped(); + } + } + + @Test + public void testRecoveryAfterCorruptionMiddle() throws Exception { + startBroker(); + + produceMessagesToConsumeMultipleDataFiles(50); + + int numFiles = getNumberOfJournalFiles(); + + assertTrue("more than x files: " + numFiles, numFiles > 4); + + corruptBatchMiddle(3); + + restartBroker(); + + assertEquals("missing one message", 49, broker.getAdminView().getTotalMessageCount()); + + assertEquals("Drain", 49, drainQueue(49)); + } + + + @Test + public void testRecoveryAfterCorruptionEnd() throws Exception { + startBroker(); + + produceMessagesToConsumeMultipleDataFiles(50); + + int numFiles = getNumberOfJournalFiles(); + + assertTrue("more than x files: " + numFiles, numFiles > 4); + + corruptBatchEnd(4); + + restartBroker(); + + assertEquals("missing one message", 49, broker.getAdminView().getTotalMessageCount()); + + assertEquals("Drain", 49, drainQueue(49)); + + } + + @Test + public void testRecoveryAfterCorruption() throws Exception { + startBroker(); + + produceMessagesToConsumeMultipleDataFiles(50); + + int numFiles = getNumberOfJournalFiles(); + + assertTrue("more than x files: " + numFiles, numFiles > 4); + + corruptBatchMiddle(3); + corruptBatchEnd(4); + + restartBroker(); + + assertEquals("missing one message", 48, broker.getAdminView().getTotalMessageCount()); + assertEquals("Drain", 48, drainQueue(48)); + + } + + private void whackIndex(File dataDir) { + + File indexToDelete = new File(dataDir, "db.data"); + LOG.info("Whacking index: " + indexToDelete); + indexToDelete.delete(); + + } + + private void corruptBatchMiddle(int i) throws IOException { + corruptBatch(i, false); + } + + private void corruptBatchEnd(int i) throws IOException { + corruptBatch(i, true); + } + + private void corruptBatch(int id, boolean atEnd) throws IOException { + + Collection<DataFile> files = + ((KahaDBPersistenceAdapter) broker.getPersistenceAdapter()).getStore().getJournal().getFileMap().values(); + DataFile dataFile = (DataFile) files.toArray()[id]; + + RecoverableRandomAccessFile randomAccessFile = dataFile.openRandomAccessFile(); + + final ByteSequence header = new ByteSequence(Journal.BATCH_CONTROL_RECORD_HEADER); + byte data[] = new byte[1024 * 20]; + + ByteSequence bs = new ByteSequence(data, 0, randomAccessFile.read(data, 0, data.length)); + + int pos = 0; + int offset = 0; + int end = atEnd ? Integer.MAX_VALUE : 3; + for (int i = 0; i < end; i++) { + int found = bs.indexOf(header, pos); + if (found == -1) { + break; + } + offset = found; + pos++; + } + + LOG.info("Whacking batch record in file:" + id + ", at offset: " + offset + " with fill:" + fill); + // whack that record + byte[] bla = new byte[Journal.BATCH_CONTROL_RECORD_HEADER.length]; + Arrays.fill(bla, fill); + randomAccessFile.seek(offset); + randomAccessFile.write(bla, 0, bla.length); + } + + + private int getNumberOfJournalFiles() throws IOException { + + Collection<DataFile> files = + ((KahaDBPersistenceAdapter) broker.getPersistenceAdapter()).getStore().getJournal().getFileMap().values(); + int reality = 0; + for (DataFile file : files) { + if (file != null) { + reality++; + } + } + return reality; + } + + + private int produceMessages(Destination destination, int numToSend) throws Exception { + int sent = 0; + Connection connection = new ActiveMQConnectionFactory( + broker.getTransportConnectors().get(0).getConnectUri()).createConnection(); + connection.start(); + try { + Session session = connection.createSession(false, Session.AUTO_ACKNOWLEDGE); + MessageProducer producer = session.createProducer(destination); + for (int i = 0; i < numToSend; i++) { + producer.send(createMessage(session, i)); + sent++; + } + } finally { + connection.close(); + } + + return sent; + } + + private int produceMessagesToConsumeMultipleDataFiles(int numToSend) throws Exception { + return produceMessages(destination, numToSend); + } + + final String payload = new String(new byte[1024]); + + private Message createMessage(Session session, int i) throws Exception { + return session.createTextMessage(payload + "::" + i); + } + + private int drainQueue(int max) throws Exception { + Connection connection = cf.createConnection(); + connection.start(); + Session session = connection.createSession(false, Session.AUTO_ACKNOWLEDGE); + MessageConsumer consumer = session.createConsumer(destination); + int count = 0; + while (count < max && consumer.receive(5000) != null) { + count++; + } + consumer.close(); + connection.close(); + return count; + } +}
