This is an automated email from the ASF dual-hosted git repository. klund pushed a commit to branch develop in repository https://gitbox.apache.org/repos/asf/geode.git
commit 10eabfb3a89b8996aeeee2786a1f2c988bbfbf85 Author: Kirk Lund <[email protected]> AuthorDate: Wed Mar 21 16:38:25 2018 -0700 GEODE-1279: Rename Bug37500JUnitTest as DiskAccessExceptionDisablesServerRegressionTest --- .../geode/internal/cache/Bug37500JUnitTest.java | 163 -------------- ...ccessExceptionDisablesServerRegressionTest.java | 241 +++++++++++++++++++++ 2 files changed, 241 insertions(+), 163 deletions(-) diff --git a/geode-core/src/test/java/org/apache/geode/internal/cache/Bug37500JUnitTest.java b/geode-core/src/test/java/org/apache/geode/internal/cache/Bug37500JUnitTest.java deleted file mode 100644 index a65b30f..0000000 --- a/geode-core/src/test/java/org/apache/geode/internal/cache/Bug37500JUnitTest.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more contributor license - * agreements. See the NOTICE file distributed with this work for additional information regarding - * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. You may obtain a - * copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License - * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express - * or implied. See the License for the specific language governing permissions and limitations under - * the License. - */ -package org.apache.geode.internal.cache; - -import static org.junit.Assert.*; -import static org.junit.Assert.fail; - -import java.io.File; - -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.experimental.categories.Category; - -import org.apache.geode.cache.Scope; -import org.apache.geode.test.junit.categories.IntegrationTest; - -/** - * This is a bugtest for bug 37500. - * - * - */ -@Category(IntegrationTest.class) -public class Bug37500JUnitTest extends DiskRegionTestingBase { - - /** The disk region configuration object for the test */ - private DiskRegionProperties diskProps = new DiskRegionProperties(); - - /** The key for entry1 */ - static final String KEY1 = "KEY1"; - - /** The key for entry2 */ - static final String KEY2 = "KEY2"; - - /** Boolean to indicate the roller thread to proceed */ - static volatile boolean proceedForRolling = false; - - /** - * Boolean to decide whether we want to allow roller to run ( used via CacheObserver callback - */ - static volatile boolean notifyRoller = false; - - /** - * This test does the following: <br> - * 1. Create a disk-region with following configurations : - * <li>dirSize = 2000 bytes - * <li>maxOplogSize = 500 bytes - * <li>rolling = true - * <li>syncMode = true - * <li>approx size on disk for operations = 440 bytes<br> - * - * 2.Make Roller go into WAIT state via CacheObserverAdapter.beforeGoingToCompact callback<br> - * 3.Put 440 bytes , it will go in oplog1 <br> - * 4.Put another 440 bytes ,it will go in oplog1<br> - * 5.Put 440 bytes , switching will be caused, it will go in oplog2, Roller will remained blocked - * (step 2)<br> - * 6.Put 440 bytes , it will go in oplog2, oplog2 will now be full<br> - * 7.Notify the Roller and put 440 bytes , this will try further switching. The put will fail with - * exception due to bug 37500. The put thread takes an entry level lock for entry2 ( the one with - * KEY2) and tries to write to disk but there is no free space left, so it goes into wait, - * expecting Roller to free up the space. The roller, which has now been notified to run, tries to - * roll entry2 for which it seeks entry level lock which has been acquired by put-thread. So the - * put thread eventually comes out of the wait with DiskAccessException<br> - * - * Another scenario for this bug is, once the disk space was getting exhausted , the entry - * operation threads which had already taken a lock on Entry got stuck trying to seek the Oplog - * Lock. The switching thread had acquired the Oplog.lock & was waiting for the roller thread to - * free disk space. Since the roller needed to acquire Entry lock to roll, it was unable to do so - * because of entry operation threads. This would cause the entry operation threads to get - * DiskAccessException after completing the stipulated wait. The Roller was able to free space - * only when it has rolled all the relevant entries which could happen only when the entry - * operation threads released the entry lock after getting DiskAccessException. - * - * - * @throws Exception - */ - @Test - public void testBug37500() throws Exception { - final int MAX_OPLOG_SIZE = 1000; - diskProps.setMaxOplogSize(MAX_OPLOG_SIZE); - diskProps.setPersistBackup(true); - diskProps.setRolling(true); - diskProps.setSynchronous(false); - - File testdir = new File("bug37500-diskDir"); - testdir.mkdir(); - testdir.deleteOnExit(); - diskProps.setDiskDirsAndSizes(new File[] {testdir}, new int[] {2000}); - - LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER = true; - - region = DiskRegionHelperFactory.getSyncPersistOnlyRegion(cache, diskProps, Scope.LOCAL); - CacheObserver old = CacheObserverHolder.setInstance(new CacheObserverAdapter() { - public void beforeGoingToCompact() { - if (!proceedForRolling) { - synchronized (Bug37500JUnitTest.class) { - if (!proceedForRolling) { - try { - cache.getLogger().info("beforeGoingToCompact :: going into wait"); - Bug37500JUnitTest.class.wait(); - } catch (InterruptedException e) { - cache.getLogger().info("Roller interrupted"); - fail("interrupted"); - } - cache.getLogger().info("beforeGoingToCompact :: coming out of wait"); - } - } - } - } - - public void beforeSwitchingOplog() { - if (notifyRoller) { - cache.getLogger().info("beforeSwitchingOplog :: going to notify Roller"); - synchronized (Bug37500JUnitTest.class) { - proceedForRolling = true; - Bug37500JUnitTest.class.notify(); - cache.getLogger().info("beforeSwitchingOplog :: notified the Roller"); - } - } - - } - }); - - cache.getLogger().info("goin to put no. 1"); - // put 440 bytes , it will go in oplog1 - region.put(KEY1, new byte[420]); - - cache.getLogger().info("goin to put no. 2"); - // put another 440 bytes ,it will go in oplog1 - region.put(KEY2, new byte[420]); - - cache.getLogger().info("goin to put no. 3"); - // put 440 bytes , switching will be caused, it will go in oplog2 (value - // size increased to 432 as key wont be written to disk for UPDATE) - region.put(KEY1, new byte[432]); - - cache.getLogger().info("goin to put no. 4"); - // put 440 bytes , it will go in oplog2 - region.put(KEY1, new byte[432]); - - notifyRoller = true; - cache.getLogger().info("goin to put no. 5"); - // put 440 bytes , this will try further switching - region.put(KEY2, new byte[432]); - - LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER = false; - CacheObserverHolder.setInstance(old); - closeDown(); - } - -} diff --git a/geode-core/src/test/java/org/apache/geode/internal/cache/DiskAccessExceptionDisablesServerRegressionTest.java b/geode-core/src/test/java/org/apache/geode/internal/cache/DiskAccessExceptionDisablesServerRegressionTest.java new file mode 100644 index 0000000..6104204 --- /dev/null +++ b/geode-core/src/test/java/org/apache/geode/internal/cache/DiskAccessExceptionDisablesServerRegressionTest.java @@ -0,0 +1,241 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.geode.internal.cache; + +import static org.apache.geode.distributed.ConfigurationProperties.LOCATORS; +import static org.apache.geode.distributed.ConfigurationProperties.MCAST_PORT; + +import java.io.File; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TemporaryFolder; +import org.junit.rules.TestName; + +import org.apache.geode.cache.Cache; +import org.apache.geode.cache.CacheFactory; +import org.apache.geode.cache.DiskStore; +import org.apache.geode.cache.DiskStoreFactory; +import org.apache.geode.cache.Region; +import org.apache.geode.cache.RegionFactory; +import org.apache.geode.cache.RegionShortcut; +import org.apache.geode.test.junit.categories.IntegrationTest; + +/** + * This is a bugtest for bug 37500. + * + * <p> + * TRAC #37500: A DiskAccessException brings cache server to a stall + * + * <pre> + * com.gemstone.gemfire.cache.DiskAccessException: Unable to get free space for creating an oplog after waiting for 20 seconds + * at com.gemstone.gemfire.internal.cache.ComplexDiskRegion.getNextDir(ComplexDiskRegion.java:150) + * at com.gemstone.gemfire.internal.cache.Oplog.switchOpLog(Oplog.java:2020) + * at com.gemstone.gemfire.internal.cache.Oplog.basicModify(Oplog.java:2423) + * at com.gemstone.gemfire.internal.cache.Oplog.modify(Oplog.java:2339) + * at com.gemstone.gemfire.internal.cache.DiskRegion.put(DiskRegion.java:321) + * at com.gemstone.gemfire.internal.cache.DiskEntry$Helper.writeToDisk(DiskEntry.java:362) + * at com.gemstone.gemfire.internal.cache.DiskEntry$Helper.overflowToDisk(DiskEntry.java:532) + * at com.gemstone.gemfire.internal.cache.AbstractLRURegionMap.evictEntry(AbstractLRURegionMap.java:164) + * at com.gemstone.gemfire.internal.cache.AbstractLRURegionMap.lruUpdateCallback(AbstractLRURegionMap.java:240) + * at com.gemstone.gemfire.internal.cache.AbstractRegionMap.basicPut(AbstractRegionMap.java:928) + * at com.gemstone.gemfire.internal.cache.LocalRegion.virtualPut(LocalRegion.java:3605) + * at com.gemstone.gemfire.internal.cache.DistributedRegion.virtualPut(DistributedRegion.java:151) + * at com.gemstone.gemfire.internal.cache.LocalRegion.basicUpdate(LocalRegion.java:3591) + * at com.gemstone.gemfire.internal.cache.AbstractUpdateOperation.doPutOrCreate(AbstractUpdateOperation.java:100) + * at com.gemstone.gemfire.internal.cache.AbstractUpdateOperation$AbstractUpdateMessage.basicOperateOnRegion(AbstractUpdateOperation.java:171) + * at com.gemstone.gemfire.internal.cache.AbstractUpdateOperation$AbstractUpdateMessage.operateOnRegion(AbstractUpdateOperation.java:154) + * at com.gemstone.gemfire.internal.cache.DistributedCacheOperation$CacheOperationMessage.basicProcess(DistributedCacheOperation.java:487) + * at com.gemstone.gemfire.internal.cache.DistributedCacheOperation$CacheOperationMessage.process(DistributedCacheOperation.java:404) + * at com.gemstone.gemfire.distributed.internal.DistributionMessage.scheduleAction(DistributionMessage.java:242) + * at com.gemstone.gemfire.distributed.internal.DistributionMessage.schedule(DistributionMessage.java:287) + * at com.gemstone.gemfire.distributed.internal.DistributionManager.scheduleIncomingMessage(DistributionManager.java:2732) + * at com.gemstone.gemfire.distributed.internal.DistributionManager.handleIncomingDMsg(DistributionManager.java:2424) + * at com.gemstone.gemfire.distributed.internal.DistributionManager$MyListener.messageReceived(DistributionManager.java:3585) + * at com.gemstone.gemfire.distributed.internal.membership.jgroup.JGroupMembershipManager.processMessage(JGroupMembershipManager.java:1349) + * at com.gemstone.gemfire.distributed.internal.membership.jgroup.JGroupMembershipManager.handleOrDeferMessage(JGroupMembershipManager.java:1289) + * at com.gemstone.gemfire.distributed.internal.membership.jgroup.JGroupMembershipManager$MyDCReceiver.messageReceived(JGroupMembershipManager.java:449) + * at com.gemstone.gemfire.distributed.internal.direct.DirectChannel.receive(DirectChannel.java:535) + * at com.gemstone.gemfire.internal.tcp.TCPConduit.messageReceived(TCPConduit.java:483) + * at com.gemstone.gemfire.internal.tcp.Connection.dispatchMessage(Connection.java:3026) + * at com.gemstone.gemfire.internal.tcp.Connection.processNIOBuffer(Connection.java:2861) + * at com.gemstone.gemfire.internal.tcp.Connection.runNioReader(Connection.java:1332) + * at com.gemstone.gemfire.internal.tcp.Connection.run(Connection.java:1257) + * at java.lang.Thread.run(Thread.java:595) + * </pre> + */ +@Category(IntegrationTest.class) +public class DiskAccessExceptionDisablesServerRegressionTest { + + private static final int MAX_OPLOG_SIZE = 1000; + private static final String KEY1 = "KEY1"; + private static final String KEY2 = "KEY2"; + + private Cache cache; + private Region<String, byte[]> region; + private MyCacheObserver observer; + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Rule + public TestName testName = new TestName(); + + @Before + public void setUp() throws Exception { + String uniqueName = getClass().getSimpleName() + "_" + testName.getMethodName(); + File temporaryDirectory = temporaryFolder.newFolder(uniqueName); + + DirectoryHolder.SET_DIRECTORY_SIZE_IN_BYTES_FOR_TESTING_PURPOSES = true; + DiskStoreImpl.SET_IGNORE_PREALLOCATE = true; + LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER = true; + + observer = new MyCacheObserver(); + CacheObserverHolder.setInstance(observer); + + cache = new CacheFactory().set(LOCATORS, "").set(MCAST_PORT, "0").create(); + + DiskStoreFactory dsf = cache.createDiskStoreFactory(); + dsf.setDiskDirsAndSizes(new File[] {temporaryDirectory}, new int[] {2000}); + ((DiskStoreFactoryImpl) dsf).setMaxOplogSizeInBytes(MAX_OPLOG_SIZE); + + DiskStore diskStore = dsf.create(uniqueName); + + RegionFactory<String, byte[]> regionFactory = + cache.createRegionFactory(RegionShortcut.LOCAL_PERSISTENT); + regionFactory.setDiskStoreName(diskStore.getName()); + + region = regionFactory.create(uniqueName); + } + + @After + public void tearDown() { + CacheObserverHolder.setInstance(null); + + DirectoryHolder.SET_DIRECTORY_SIZE_IN_BYTES_FOR_TESTING_PURPOSES = false; + DiskStoreImpl.SET_IGNORE_PREALLOCATE = false; + LocalRegion.ISSUE_CALLBACKS_TO_CACHE_OBSERVER = false; + + cache.close(); + } + + /** + * This test does the following: <br> + * 1. Create a disk-region with following configurations: + * <ul> + * <li>dirSize = 2000 bytes + * <li>maxOplogSize = 500 bytes + * <li>rolling = true + * <li>syncMode = true + * <li>approx size on disk for operations = 440 bytes + * </ul> + * + * <p> + * 2.Make Roller go into WAIT state via CacheObserverAdapter.beforeGoingToCompact callback + * + * <p> + * 3.Put 440 bytes , it will go in oplog1 + * + * <p> + * 4.Put another 440 bytes ,it will go in oplog1 + * + * <p> + * 5.Put 440 bytes , switching will be caused, it will go in oplog2, Roller will remained blocked + * (step 2) + * + * <p> + * 6.Put 440 bytes , it will go in oplog2, oplog2 will now be full + * + * <p> + * 7.Notify the Roller and put 440 bytes , this will try further switching. The put will fail with + * exception due to bug 37500. The put thread takes an entry level lock for entry2 ( the one with + * KEY2) and tries to write to disk but there is no free space left, so it goes into wait, + * expecting Roller to free up the space. The roller, which has now been notified to run, tries to + * roll entry2 for which it seeks entry level lock which has been acquired by put-thread. So the + * put thread eventually comes out of the wait with DiskAccessException + * + * <p> + * Another scenario for this bug is, once the disk space was getting exhausted , the entry + * operation threads which had already taken a lock on Entry got stuck trying to seek the Oplog + * Lock. The switching thread had acquired the Oplog.lock & was waiting for the roller thread to + * free disk space. Since the roller needed to acquire Entry lock to roll, it was unable to do so + * because of entry operation threads. This would cause the entry operation threads to get + * DiskAccessException after completing the stipulated wait. The Roller was able to free space + * only when it has rolled all the relevant entries which could happen only when the entry + * operation threads released the entry lock after getting DiskAccessException. + */ + @Test + public void testBug37500() throws Exception { + // put 440 bytes , it will go in oplog1 + region.put(KEY1, new byte[420]); + + // put another 440 bytes ,it will go in oplog1 + region.put(KEY2, new byte[420]); + + // put 440 bytes , switching will be caused, it will go in oplog2 (value + // size increased to 432 as key wont be written to disk for UPDATE) + region.put(KEY1, new byte[432]); + + // put 440 bytes , it will go in oplog2 + region.put(KEY1, new byte[432]); + + observer.notifyRoller(); + + // put 440 bytes , this will try further switching + region.put(KEY2, new byte[432]); + } + + private static class MyCacheObserver extends CacheObserverAdapter { + + private final Object notification = new Object(); + + /** + * Flag to decide whether we want to allow roller to run + */ + private volatile boolean notifyRoller = false; + + private volatile boolean proceedForRolling = false; + + void notifyRoller() { + notifyRoller = true; + } + + @Override + public void beforeGoingToCompact() { + synchronized (notification) { + while (!proceedForRolling) { + try { + notification.wait(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + } + + @Override + public void beforeSwitchingOplog() { + if (notifyRoller) { + synchronized (notification) { + proceedForRolling = true; + notification.notifyAll(); + } + } + } + } +} -- To stop receiving notification emails like this one, please contact [email protected].
