alex-plekhanov commented on a change in pull request #6554: IGNITE-11073: Backup page store manager, initial URL: https://github.com/apache/ignite/pull/6554#discussion_r394378221
########## File path: modules/core/src/test/java/org/apache/ignite/internal/processors/cache/persistence/snapshot/IgniteSnapshotManagerSelfTest.java ########## @@ -0,0 +1,880 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache.persistence.snapshot; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.nio.file.DirectoryStream; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.IgniteDataStreamer; +import org.apache.ignite.IgniteException; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.binary.BinaryType; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cache.CacheMode; +import org.apache.ignite.cache.CacheRebalanceMode; +import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.DataRegionConfiguration; +import org.apache.ignite.configuration.DataStorageConfiguration; +import org.apache.ignite.configuration.IgniteConfiguration; +import org.apache.ignite.configuration.WALMode; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.IgniteInternalFuture; +import org.apache.ignite.internal.IgniteInterruptedCheckedException; +import org.apache.ignite.internal.TestRecordingCommunicationSpi; +import org.apache.ignite.internal.cluster.ClusterTopologyCheckedException; +import org.apache.ignite.internal.processors.cache.GridCacheSharedContext; +import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemandMessage; +import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState; +import org.apache.ignite.internal.processors.cache.persistence.CheckpointProgress; +import org.apache.ignite.internal.processors.cache.persistence.CheckpointState; +import org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager; +import org.apache.ignite.internal.processors.cache.persistence.file.FileIO; +import org.apache.ignite.internal.processors.cache.persistence.file.FileIODecorator; +import org.apache.ignite.internal.processors.cache.persistence.file.FileIOFactory; +import org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager; +import org.apache.ignite.internal.processors.cache.persistence.file.RandomAccessFileIOFactory; +import org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId; +import org.apache.ignite.internal.processors.cache.persistence.wal.FileWriteAheadLogManager; +import org.apache.ignite.internal.processors.cache.persistence.wal.crc.FastCrc; +import org.apache.ignite.internal.processors.marshaller.MappedName; +import org.apache.ignite.internal.util.IgniteUtils; +import org.apache.ignite.internal.util.lang.GridAbsPredicate; +import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.internal.util.typedef.internal.U; +import org.apache.ignite.testframework.GridTestUtils; +import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static java.nio.file.Files.newDirectoryStream; +import static org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager.FILE_SUFFIX; +import static org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager.PART_FILE_PREFIX; +import static org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager.cacheDirName; +import static org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.relativeNodePath; + +/** + * + */ +public class IgniteSnapshotManagerSelfTest extends GridCommonAbstractTest { + /** */ + private static final FileIOFactory DFLT_IO_FACTORY = new RandomAccessFileIOFactory(); + + /** */ + private static final String SNAPSHOT_NAME = "testSnapshot"; + + /** */ + private static final String CP_TEST_SNAPSHOT_REASON = "Checkpoint started to enforce snapshot operation %s"; + + /** */ + private static final int CACHE_PARTS_COUNT = 8; + + /** */ + private static final int PAGE_SIZE = 1024; + + /** */ + private static final int CACHE_KEYS_RANGE = 1024; + + /** */ + private static final DataStorageConfiguration memCfg = new DataStorageConfiguration() + .setDefaultDataRegionConfiguration(new DataRegionConfiguration() + .setMaxSize(100L * 1024 * 1024) + .setPersistenceEnabled(true)) + .setCheckpointFrequency(3000) + .setPageSize(PAGE_SIZE) + .setWalMode(WALMode.LOG_ONLY); + + /** */ + private CacheConfiguration<Integer, Integer> defaultCacheCfg = + new CacheConfiguration<Integer, Integer>(DEFAULT_CACHE_NAME) + .setCacheMode(CacheMode.PARTITIONED) + .setRebalanceMode(CacheRebalanceMode.ASYNC) + .setBackups(1) + .setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL) + .setAffinity(new RendezvousAffinityFunction(false) + .setPartitions(CACHE_PARTS_COUNT)); + + /** + * Calculate CRC for all partition files of specified cache. + * + * @param cacheDir Cache directory to iterate over partition files. + * @return The map of [fileName, checksum]. + * @throws IgniteCheckedException If fails. + */ + private static Map<String, Integer> calculateCRC32Partitions(File cacheDir) throws IgniteCheckedException { + assert cacheDir.isDirectory() : cacheDir.getAbsolutePath(); + + Map<String, Integer> result = new HashMap<>(); + + try { + try (DirectoryStream<Path> partFiles = newDirectoryStream(cacheDir.toPath(), + p -> p.toFile().getName().startsWith(PART_FILE_PREFIX) && p.toFile().getName().endsWith(FILE_SUFFIX)) + ) { + for (Path path : partFiles) + result.put(path.toFile().getName(), FastCrc.calcCrc(path.toFile())); + } + + return result; + } + catch (IOException e) { + throw new IgniteCheckedException(e); + } + } + + /** */ + @Before + public void beforeTestSnapshot() throws Exception { + cleanPersistenceDir(); + } + + /** */ + @After + public void afterTestSnapshot() throws Exception { + stopAllGrids(); + } + + /** {@inheritDoc} */ + @Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception { + return super.getConfiguration(igniteInstanceName) + .setConsistentId(igniteInstanceName) + .setCommunicationSpi(new TestRecordingCommunicationSpi()) + .setDataStorageConfiguration(memCfg) + .setCacheConfiguration(defaultCacheCfg); + } + + /** + * + */ + @Test + public void testSnapshotLocalPartitions() throws Exception { + // Start grid node with data before each test. + IgniteEx ig = startGridWithCache(defaultCacheCfg, CACHE_KEYS_RANGE); + + for (int i = CACHE_KEYS_RANGE; i < 2048; i++) + ig.cache(DEFAULT_CACHE_NAME).put(i, i); + + try (IgniteDataStreamer<Integer, TestOrderItem> ds = ig.dataStreamer(DEFAULT_CACHE_NAME)) { + for (int i = 0; i < 2048; i++) + ds.addData(i, new TestOrderItem(i, i)); + } + + try (IgniteDataStreamer<Integer, TestOrderItem> ds = ig.dataStreamer(DEFAULT_CACHE_NAME)) { + for (int i = 0; i < 2048; i++) + ds.addData(i, new TestOrderItem(i, i) { + @Override public String toString() { + return "_" + super.toString(); + } + }); + } + GridCacheSharedContext<?, ?> cctx0 = ig.context().cache().context(); + + // Collection of pairs group and appropratate cache partition to be snapshotted. + IgniteInternalFuture<?> snpFut = startLocalSnapshotTask(cctx0.snapshotMgr(), + (GridCacheDatabaseSharedManager)cctx0.database(), + cctx0.localNodeId(), + SNAPSHOT_NAME, + F.asMap(CU.cacheId(DEFAULT_CACHE_NAME), null), + cctx0.snapshotMgr().localSnapshotSender(SNAPSHOT_NAME)); + + snpFut.get(); + + File cacheWorkDir = ((FilePageStoreManager)ig.context() + .cache() + .context() + .pageStore()) + .cacheWorkDir(defaultCacheCfg); + + stopGrid(ig.name()); + + // Calculate CRCs + final Map<String, Integer> origParts = calculateCRC32Partitions(cacheWorkDir); + + String nodePath = relativeNodePath(ig.context().pdsFolderResolver().resolveFolders()); + + final Map<String, Integer> bakcupCRCs = calculateCRC32Partitions( + Paths.get(cctx0.snapshotMgr().snapshotLocalDir(SNAPSHOT_NAME).getPath(), nodePath, cacheDirName(defaultCacheCfg)).toFile() + ); + + assertEquals("Partiton must have the same CRC after shapshot and after merge", origParts, bakcupCRCs); + + File snpWorkDir = cctx0.snapshotMgr().snapshotTempDir(); + + assertEquals("Snapshot working directory must be cleand after usage", 0, snpWorkDir.listFiles().length); + } + + /** + * + */ + @Test + public void testSnapshotLocalPartitionsNextCpStarted() throws Exception { + final int value_multiplier = 2; + CountDownLatch slowCopy = new CountDownLatch(1); + + IgniteEx ig = startGridWithCache(defaultCacheCfg.setAffinity(new ZeroPartitionAffinityFunction() + .setPartitions(CACHE_PARTS_COUNT)), CACHE_KEYS_RANGE); + + IgniteSnapshotManager mgr = ig.context() + .cache() + .context() + .snapshotMgr(); + + GridCacheDatabaseSharedManager dbMgr = (GridCacheDatabaseSharedManager)ig.context() + .cache() + .context() + .database(); + + File cpDir = dbMgr.checkpointDirectory(); + File walDir = ((FileWriteAheadLogManager) ig.context().cache().context().wal()).walWorkDir(); + + // Change data before backup + for (int i = 0; i < CACHE_KEYS_RANGE; i++) + ig.cache(DEFAULT_CACHE_NAME).put(i, value_multiplier * i); + + IgniteInternalFuture<?> snpFut = startLocalSnapshotTask(mgr, + dbMgr, + ig.context().cache().context().localNodeId(), + SNAPSHOT_NAME, + F.asMap(CU.cacheId(DEFAULT_CACHE_NAME), null), + new DeleagateSnapshotSender(log, mgr.snapshotExecutorService(), mgr.localSnapshotSender(SNAPSHOT_NAME)) { + @Override + public void sendPart0(File part, String cacheDirName, GroupPartitionId pair, Long length) { + try { + if (pair.getPartitionId() == 0) + U.await(slowCopy); + + delegate.sendPart0(part, cacheDirName, pair, length); + } + catch (IgniteInterruptedCheckedException e) { + throw new IgniteException(e); + } + } + }); + + + dbMgr.forceCheckpoint("snapshot is ready to be created") + .futureFor(CheckpointState.MARKER_STORED_TO_DISK) + .get(); + + // Change data after backup + for (int i = 0; i < CACHE_KEYS_RANGE; i++) + ig.cache(DEFAULT_CACHE_NAME).put(i, 3 * i); + + // Backup on the next checkpoint must copy page before write it to partition + CheckpointProgress cpFut = ig.context() + .cache() + .context() + .database() + .forceCheckpoint("second cp"); + + cpFut.futureFor(CheckpointState.FINISHED).get(); + + slowCopy.countDown(); + + snpFut.get(); + + // Now can stop the node and check created backups. + + stopGrid(0); + + IgniteUtils.delete(cpDir); + IgniteUtils.delete(walDir); Review comment: Why do you clean only these directories? I think entire persistence dir should be cleaned up except snapshot dir ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services