ibessonov commented on a change in pull request #7984:
URL: https://github.com/apache/ignite/pull/7984#discussion_r524997891



##########
File path: 
modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/defragmentation/CachePartitionDefragmentationManager.java
##########
@@ -0,0 +1,887 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package 
org.apache.ignite.internal.processors.cache.persistence.defragmentation;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.LongConsumer;
+import java.util.stream.Collectors;
+import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.IgniteException;
+import org.apache.ignite.IgniteLogger;
+import org.apache.ignite.configuration.DataPageEvictionMode;
+import org.apache.ignite.internal.IgniteInternalFuture;
+import org.apache.ignite.internal.metric.IoStatisticsHolderNoOp;
+import org.apache.ignite.internal.pagemem.PageIdAllocator;
+import org.apache.ignite.internal.pagemem.PageIdUtils;
+import org.apache.ignite.internal.pagemem.store.PageStore;
+import org.apache.ignite.internal.processors.cache.CacheGroupContext;
+import org.apache.ignite.internal.processors.cache.CacheType;
+import org.apache.ignite.internal.processors.cache.GridCacheSharedContext;
+import 
org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager.CacheDataStore;
+import org.apache.ignite.internal.processors.cache.persistence.CacheDataRow;
+import org.apache.ignite.internal.processors.cache.persistence.CheckpointState;
+import org.apache.ignite.internal.processors.cache.persistence.DataRegion;
+import 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager;
+import 
org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager;
+import 
org.apache.ignite.internal.processors.cache.persistence.GridCacheOffheapManager.GridCacheDataStore;
+import 
org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointManager;
+import 
org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointTimeoutLock;
+import 
org.apache.ignite.internal.processors.cache.persistence.checkpoint.LightweightCheckpointManager;
+import 
org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreFactory;
+import 
org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager;
+import 
org.apache.ignite.internal.processors.cache.persistence.freelist.AbstractFreeList;
+import 
org.apache.ignite.internal.processors.cache.persistence.freelist.SimpleDataRow;
+import 
org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryEx;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.PageIO;
+import 
org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIO;
+import 
org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIOV3;
+import org.apache.ignite.internal.processors.cache.tree.AbstractDataLeafIO;
+import org.apache.ignite.internal.processors.cache.tree.CacheDataTree;
+import org.apache.ignite.internal.processors.cache.tree.DataRow;
+import org.apache.ignite.internal.processors.cache.tree.PendingEntriesTree;
+import org.apache.ignite.internal.processors.cache.tree.PendingRow;
+import org.apache.ignite.internal.processors.query.GridQueryIndexing;
+import org.apache.ignite.internal.processors.query.GridQueryProcessor;
+import org.apache.ignite.internal.util.GridSpinBusyLock;
+import org.apache.ignite.internal.util.collection.IntHashMap;
+import org.apache.ignite.internal.util.collection.IntMap;
+import org.apache.ignite.internal.util.future.GridCompoundFuture;
+import org.apache.ignite.internal.util.future.GridFinishedFuture;
+import org.apache.ignite.internal.util.future.GridFutureAdapter;
+import org.apache.ignite.internal.util.typedef.internal.CU;
+import org.apache.ignite.internal.util.typedef.internal.S;
+import org.apache.ignite.internal.util.typedef.internal.U;
+import org.apache.ignite.lang.IgniteInClosure;
+import org.apache.ignite.maintenance.MaintenanceRegistry;
+
+import static java.util.stream.StreamSupport.stream;
+import static org.apache.ignite.internal.pagemem.PageIdAllocator.FLAG_DATA;
+import static org.apache.ignite.internal.pagemem.PageIdAllocator.FLAG_IDX;
+import static 
org.apache.ignite.internal.processors.cache.persistence.CheckpointState.FINISHED;
+import static 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.DEFRAGMENTATION_MAPPING_REGION_NAME;
+import static 
org.apache.ignite.internal.processors.cache.persistence.GridCacheDatabaseSharedManager.DEFRAGMENTATION_PART_REGION_NAME;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.CP_LOCK;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.INSERT_ROW;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.ITERATE;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.METADATA;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.READ_ROW;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.STORE_MAP;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.STORE_PENDING;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.CachePartitionDefragmentationManager.PartStages.STORE_PK;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.batchRenameDefragmentedCacheGroupPartitions;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.defragmentedIndexTmpFile;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.defragmentedPartFile;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.defragmentedPartMappingFile;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.defragmentedPartTmpFile;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.renameTempIndexFile;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.renameTempPartitionFile;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.skipAlreadyDefragmentedCacheGroup;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.skipAlreadyDefragmentedPartition;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.DefragmentationFileUtils.writeDefragmentationCompletionMarker;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.TreeIterator.PageAccessType.ACCESS_READ;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.TreeIterator.PageAccessType.ACCESS_WRITE;
+import static 
org.apache.ignite.internal.processors.cache.persistence.defragmentation.TreeIterator.access;
+
+/**
+ * Defragmentation manager is the core class that contains main 
defragmentation procedure.
+ */
+public class CachePartitionDefragmentationManager {
+    /** */
+    public static final String DEFRAGMENTATION_MNTC_TASK_NAME = 
"defragmentationMaintenanceTask";
+
+    /** */
+    private final Set<Integer> cacheGroupsForDefragmentation;
+
+    /** Cache shared context. */
+    private final GridCacheSharedContext<?, ?> sharedCtx;
+
+    /** Maintenance registry. */
+    private final MaintenanceRegistry mntcReg;
+
+    /** Logger. */
+    private final IgniteLogger log;
+
+    /** Database schared manager. */
+    private final GridCacheDatabaseSharedManager dbMgr;
+
+    /** File page store manager. */
+    private final FilePageStoreManager filePageStoreMgr;
+
+    /**
+     * Checkpoint for specific defragmentation regions which would store the 
data to new partitions
+     * during the defragmentation.
+     */
+    private final LightweightCheckpointManager defragmentationCheckpoint;
+
+    /** Default checkpoint for current node. */
+    private final CheckpointManager nodeCheckpoint;
+
+    /** Page size. */
+    private final int pageSize;
+
+    /** */
+    private final DataRegion partDataRegion;
+
+    /** */
+    private final DataRegion mappingDataRegion;
+
+    /**
+     * @param cacheGrpIds
+     * @param sharedCtx Cache shared context.
+     * @param dbMgr Database manager.
+     * @param filePageStoreMgr File page store manager.
+     * @param nodeCheckpoint Default checkpoint for this node.
+     * @param defragmentationCheckpoint Specific checkpoint for 
defragmentation.
+     * @param pageSize Page size.
+     */
+    public CachePartitionDefragmentationManager(
+        List<Integer> cacheGrpIds,
+        GridCacheSharedContext<?, ?> sharedCtx,
+        GridCacheDatabaseSharedManager dbMgr,
+        FilePageStoreManager filePageStoreMgr,
+        CheckpointManager nodeCheckpoint,
+        LightweightCheckpointManager defragmentationCheckpoint,
+        int pageSize
+    ) throws IgniteCheckedException {
+        cacheGroupsForDefragmentation = new HashSet<>(cacheGrpIds);
+
+        this.dbMgr = dbMgr;
+        this.filePageStoreMgr = filePageStoreMgr;
+        this.pageSize = pageSize;
+        this.sharedCtx = sharedCtx;
+
+        this.mntcReg = sharedCtx.kernalContext().maintenanceRegistry();
+        this.log = sharedCtx.logger(getClass());
+        this.defragmentationCheckpoint = defragmentationCheckpoint;
+        this.nodeCheckpoint = nodeCheckpoint;
+
+        partDataRegion = dbMgr.dataRegion(DEFRAGMENTATION_PART_REGION_NAME);
+        mappingDataRegion = 
dbMgr.dataRegion(DEFRAGMENTATION_MAPPING_REGION_NAME);
+    }
+
+    /** */
+    //TODO How will we handle constant fail and restart scenario?
+    public void executeDefragmentation() throws IgniteCheckedException {
+        log.info("Defragmentation started.");
+
+        try {
+            // Checkpointer must be enabled so all pages on disk are in their 
latest valid state.
+            dbMgr.resumeWalLogging();
+
+            dbMgr.onStateRestored(null);
+
+            nodeCheckpoint.forceCheckpoint("beforeDefragmentation", 
null).futureFor(FINISHED).get();
+
+            sharedCtx.wal().onDeActivate(sharedCtx.kernalContext());
+
+            // Now the actual process starts.
+            TreeIterator treeIter = new TreeIterator(pageSize);
+
+            IgniteInternalFuture<?> idxDfrgFut = null;
+            DataPageEvictionMode prevPageEvictionMode = null;
+
+            for (CacheGroupContext oldGrpCtx : 
sharedCtx.cache().cacheGroups()) {
+                if (!oldGrpCtx.userCache())
+                    continue;
+
+                int grpId = oldGrpCtx.groupId();
+
+                if (!cacheGroupsForDefragmentation.isEmpty() && 
!cacheGroupsForDefragmentation.contains(grpId))
+                    continue;
+
+                File workDir = 
filePageStoreMgr.cacheWorkDir(oldGrpCtx.sharedGroup(), 
oldGrpCtx.cacheOrGroupName());
+
+                if (skipAlreadyDefragmentedCacheGroup(workDir, grpId, log))
+                    continue;
+
+                GridCacheOffheapManager offheap = 
(GridCacheOffheapManager)oldGrpCtx.offheap();
+
+                GridSpinBusyLock busyLock = offheap.busyLock();
+
+                List<CacheDataStore> oldCacheDataStores = 
stream(offheap.cacheDataStores().spliterator(), false)
+                    .filter(store -> {
+                        try {
+                            return filePageStoreMgr.exists(grpId, 
store.partId());
+                        }
+                        catch (IgniteCheckedException e) {
+                            throw new IgniteException(e);
+                        }
+                    })
+                    .collect(Collectors.toList());
+
+                if (workDir != null && !oldCacheDataStores.isEmpty()) {
+                    // We can't start defragmentation of new group on the 
region that has wrong eviction mode.
+                    // So waiting of the previous cache group defragmentation 
is inevitable.
+                    DataPageEvictionMode curPageEvictionMode = 
oldGrpCtx.dataRegion().config().getPageEvictionMode();
+
+                    if (prevPageEvictionMode == null || prevPageEvictionMode 
!= curPageEvictionMode) {
+                        prevPageEvictionMode = curPageEvictionMode;
+
+                        
partDataRegion.config().setPageEvictionMode(curPageEvictionMode);
+
+                        if (idxDfrgFut != null)
+                            idxDfrgFut.get();
+                    }
+
+                    IntMap<CacheDataStore> cacheDataStores = new 
IntHashMap<>();
+
+                    for (CacheDataStore store : offheap.cacheDataStores()) {
+                        // Tree can be null for not yet initialized partitions.
+                        // This would mean that these partitions are empty.
+                        assert store.tree() == null || store.tree().groupId() 
== grpId;
+
+                        if (store.tree() != null)
+                            cacheDataStores.put(store.partId(), store);
+                    }
+
+                    //TODO ensure that there are no races.
+                    
dbMgr.checkpointedDataRegions().remove(oldGrpCtx.dataRegion());
+
+                    // Another cheat. Ttl cleanup manager knows too much shit.
+                    oldGrpCtx.caches().stream()
+                        .filter(cacheCtx -> cacheCtx.groupId() == grpId)
+                        .forEach(cacheCtx -> cacheCtx.ttl().unregister());
+
+                    // Technically wal is already disabled, but 
"PageHandler.isWalDeltaRecordNeeded" doesn't care and
+                    // WAL records will be allocated anyway just to be ignored 
later if we don't disable WAL for
+                    // cache group explicitly.
+                    oldGrpCtx.localWalEnabled(false, false);
+
+                    boolean encrypted = 
oldGrpCtx.config().isEncryptionEnabled();
+
+                    FilePageStoreFactory pageStoreFactory = 
filePageStoreMgr.getPageStoreFactory(grpId, encrypted);
+
+                    createIndexPageStore(grpId, workDir, pageStoreFactory, 
partDataRegion, val -> {
+                    }); //TODO Allocated tracker.
+
+                    GridCompoundFuture<Object, Object> cmpFut = new 
GridCompoundFuture<>();
+
+                    PageMemoryEx oldPageMem = 
(PageMemoryEx)oldGrpCtx.dataRegion().pageMemory();
+
+                    CacheGroupContext newGrpCtx = new CacheGroupContext(
+                        sharedCtx,
+                        grpId,
+                        oldGrpCtx.receivedFrom(),
+                        CacheType.USER,
+                        oldGrpCtx.config(),
+                        oldGrpCtx.affinityNode(),
+                        partDataRegion,
+                        oldGrpCtx.cacheObjectContext(),
+                        null,
+                        null,
+                        oldGrpCtx.localStartVersion(),
+                        true,
+                        false,
+                        true
+                    );
+
+                    
defragmentationCheckpoint.checkpointTimeoutLock().checkpointReadLock();
+
+                    try {
+                        // This will initialize partition meta in index 
partition - meta tree and reuse list.
+                        newGrpCtx.start();
+                    }
+                    finally {
+                        
defragmentationCheckpoint.checkpointTimeoutLock().checkpointReadUnlock();
+                    }
+
+                    IntMap<LinkMap> linkMapByPart = new IntHashMap<>();
+
+                    for (CacheDataStore oldCacheDataStore : 
oldCacheDataStores) {
+                        int partId = oldCacheDataStore.partId();
+
+                        PartitionContext partCtx = new PartitionContext(
+                            workDir,
+                            grpId,
+                            partId,
+                            partDataRegion,
+                            mappingDataRegion,
+                            oldGrpCtx,
+                            newGrpCtx,
+                            cacheDataStores.get(partId),
+                            pageStoreFactory
+                        );
+
+                        if (skipAlreadyDefragmentedPartition(workDir, grpId, 
partId, log)) {
+                            partCtx.createMappingPageStore();
+
+                            linkMapByPart.put(partId, 
partCtx.createLinkMapTree(false));
+
+                            continue;
+                        }
+
+                        partCtx.createMappingPageStore();
+
+                        linkMapByPart.put(partId, 
partCtx.createLinkMapTree(true));
+
+                        partCtx.createPartPageStore();
+
+                        copyPartitionData(partCtx, treeIter, busyLock);
+
+                        //TODO Move inside of defragmentSinglePartition.
+                        IgniteInClosure<IgniteInternalFuture<?>> cpLsnr = fut 
-> {
+                            if (fut.error() == null) {
+                                PageStore oldPageStore = null;
+
+                                try {
+                                    oldPageStore = 
filePageStoreMgr.getStore(grpId, partId);
+                                }
+                                catch (IgniteCheckedException ignore) {
+                                }
+
+                                assert oldPageStore != null;
+
+                                log.info(S.toString(
+                                    "Partition defragmented",
+                                    "grpId", grpId, false,
+                                    "partId", partId, false,
+                                    "oldPages", oldPageStore.pages(), false,
+                                    "newPages", 
partCtx.partPagesAllocated.get(), false,
+                                    "bytesSaved", (oldPageStore.pages() - 
partCtx.partPagesAllocated.get()) * pageSize, false,
+                                    "mappingPages", 
partCtx.mappingPagesAllocated.get(), false,
+                                    "partFile", defragmentedPartFile(workDir, 
partId).getName(), false,
+                                    "workDir", workDir, false
+                                ));
+
+                                oldPageMem.invalidate(grpId, partId);
+
+                                partCtx.partPageMemory.invalidate(grpId, 
partId);
+
+                                DefragmentationPageReadWriteManager pageMgr = 
(DefragmentationPageReadWriteManager)partCtx.partPageMemory.pageManager();
+
+                                pageMgr.pageStoreMap().removePageStore(grpId, 
partId); // Yes, it'll be invalid in a second.
+
+                                try {
+                                    renameTempPartitionFile(workDir, partId);
+                                }
+                                catch (IgniteCheckedException e) {
+                                    throw new IgniteException(e);
+                                }
+                            }
+                        };
+
+                        GridFutureAdapter<?> cpFut = defragmentationCheckpoint
+                            .forceCheckpoint("partition defragmented", null)
+                            .futureFor(CheckpointState.FINISHED);
+
+                        cpFut.listen(cpLsnr);
+
+                        cmpFut.add((IgniteInternalFuture<Object>)cpFut);
+                    }
+
+                    // A bit too general for now, but I like it more then 
saving only the last checkpoint future.
+                    cmpFut.markInitialized().get();
+
+                    idxDfrgFut = new GridFinishedFuture<>();
+
+                    if (filePageStoreMgr.hasIndexStore(grpId)) {
+                        defragmentIndexPartition(oldGrpCtx, newGrpCtx, 
linkMapByPart);
+
+                        idxDfrgFut = defragmentationCheckpoint
+                            .forceCheckpoint("index defragmented", null)
+                            .futureFor(CheckpointState.FINISHED);
+                    }
+
+                    idxDfrgFut.listen(fut -> {
+                        oldPageMem.invalidate(grpId, 
PageIdAllocator.INDEX_PARTITION);
+
+                        PageMemoryEx partPageMem = 
(PageMemoryEx)partDataRegion.pageMemory();
+
+                        partPageMem.invalidate(grpId, 
PageIdAllocator.INDEX_PARTITION);
+
+                        DefragmentationPageReadWriteManager pageMgr = 
(DefragmentationPageReadWriteManager)partPageMem.pageManager();
+
+                        pageMgr.pageStoreMap().removePageStore(grpId, 
PageIdAllocator.INDEX_PARTITION);
+
+                        PageMemoryEx mappingPageMem = 
(PageMemoryEx)mappingDataRegion.pageMemory();
+
+                        pageMgr = 
(DefragmentationPageReadWriteManager)mappingPageMem.pageManager();
+
+                        pageMgr.pageStoreMap().clear(grpId);
+
+                        try {
+                            renameTempIndexFile(workDir);
+
+                            
writeDefragmentationCompletionMarker(filePageStoreMgr.getPageStoreFileIoFactory(),
 workDir, log);
+
+                            
batchRenameDefragmentedCacheGroupPartitions(workDir, log);
+                        }
+                        catch (IgniteCheckedException e) {
+                            throw new IgniteException(e);
+                        }
+                    });
+                }
+
+                // I guess we should wait for it?
+                if (idxDfrgFut != null)
+                    idxDfrgFut.get();
+            }
+
+            mntcReg.unregisterMaintenanceTask(DEFRAGMENTATION_MNTC_TASK_NAME);
+
+            log.info("Defragmentation completed. All partitions are 
defragmented.");

Review comment:
       I'd make this change in issue where I implement control.sh "status" 
command, it has some aggregated statistics implemented. Current log will be 
moved to debug level.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to