nizhikov commented on code in PR #10617: URL: https://github.com/apache/ignite/pull/10617#discussion_r1159436706
########## modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/snapshot/IncrementalSnapshotVerificationTask.java: ########## @@ -0,0 +1,421 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.processors.cache.persistence.snapshot; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.LongAdder; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.ignite.IgniteCheckedException; +import org.apache.ignite.IgniteException; +import org.apache.ignite.IgniteLogger; +import org.apache.ignite.cache.CacheAtomicityMode; +import org.apache.ignite.cluster.ClusterNode; +import org.apache.ignite.compute.ComputeJob; +import org.apache.ignite.compute.ComputeJobAdapter; +import org.apache.ignite.compute.ComputeJobResult; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.processors.cache.GridCacheOperation; +import org.apache.ignite.internal.processors.cache.StoredCacheData; +import org.apache.ignite.internal.processors.cache.verify.IdleVerifyResultV2; +import org.apache.ignite.internal.processors.cache.verify.PartitionHashRecordV2; +import org.apache.ignite.internal.processors.cache.verify.PartitionKeyV2; +import org.apache.ignite.internal.processors.cache.verify.TransactionsHashRecord; +import org.apache.ignite.internal.processors.cache.version.GridCacheVersion; +import org.apache.ignite.internal.processors.cluster.BaselineTopology; +import org.apache.ignite.internal.processors.task.GridInternal; +import org.apache.ignite.internal.util.typedef.F; +import org.apache.ignite.internal.util.typedef.internal.CU; +import org.apache.ignite.resources.IgniteInstanceResource; +import org.apache.ignite.resources.LoggerResource; +import org.apache.ignite.transactions.TransactionState; +import org.jetbrains.annotations.Nullable; + +import static org.apache.ignite.internal.managers.discovery.ConsistentIdMapper.ALL_NODES; +import static org.apache.ignite.internal.processors.cache.persistence.file.FilePageStoreManager.CACHE_DATA_FILENAME; +import static org.apache.ignite.internal.processors.cache.persistence.snapshot.IgniteSnapshotManager.databaseRelativePath; + +/** */ +@GridInternal +public class IncrementalSnapshotVerificationTask extends AbstractSnapshotVerificationTask { + /** Serial version uid. */ + private static final long serialVersionUID = 0L; + + /** Ignite instance. */ + @IgniteInstanceResource + private IgniteEx ignite; + + /** Injected logger. */ + @LoggerResource + private IgniteLogger log; + + /** {@inheritDoc} */ + @Override public SnapshotPartitionsVerifyTaskResult reduce(List<ComputeJobResult> results) throws IgniteException { + Map<Object, Map<Object, TransactionsHashRecord>> nodeTxHashMap = new HashMap<>(); + + List<List<TransactionsHashRecord>> txHashConflicts = new ArrayList<>(); + Map<PartitionKeyV2, List<PartitionHashRecordV2>> partHashes = new HashMap<>(); + Map<ClusterNode, Collection<GridCacheVersion>> partialCommits = new HashMap<>(); + + Map<ClusterNode, Exception> errors = new HashMap<>(); + + for (ComputeJobResult nodeRes: results) { + if (nodeRes.getException() != null) { + errors.put(nodeRes.getNode(), nodeRes.getException()); + + continue; + } + + IncrementalSnapshotVerificationTaskResult res = nodeRes.getData(); + + if (!F.isEmpty(res.exceptions())) { + errors.put(nodeRes.getNode(), F.first(res.exceptions())); + + continue; + } + + if (!F.isEmpty(res.partialCommittedTxs())) + partialCommits.put(nodeRes.getNode(), res.partialCommittedTxs()); + + for (Map.Entry<PartitionKeyV2, PartitionHashRecordV2> entry: res.partHashRes().entrySet()) + partHashes.computeIfAbsent(entry.getKey(), v -> new ArrayList<>()).add(entry.getValue()); + + if (log.isDebugEnabled()) + log.debug("Handle VerifyIncrementalSnapshotJob result [node=" + nodeRes.getNode() + ", taskRes=" + res + ']'); + + nodeTxHashMap.put(nodeRes.getNode().consistentId(), res.txHashRes()); + + Iterator<Map.Entry<Object, TransactionsHashRecord>> resIt = res.txHashRes().entrySet().iterator(); + + while (resIt.hasNext()) { + Map.Entry<Object, TransactionsHashRecord> nodeTxHash = resIt.next(); + + Map<Object, TransactionsHashRecord> prevNodeTxHash = nodeTxHashMap.get(nodeTxHash.getKey()); + + if (prevNodeTxHash != null) { + TransactionsHashRecord hash = nodeTxHash.getValue(); + TransactionsHashRecord prevHash = prevNodeTxHash.remove(hash.localConsistentId()); + + if (prevHash == null || prevHash.transactionHash() != hash.transactionHash()) + txHashConflicts.add(F.asList(hash, prevHash)); + + resIt.remove(); + } + } + } + + // Add all missed pairs to conflicts. + nodeTxHashMap.values().stream() + .flatMap(e -> e.values().stream()) + .forEach(e -> txHashConflicts.add(F.asList(e, null))); + + return new SnapshotPartitionsVerifyTaskResult( + metas, + errors.isEmpty() ? new IdleVerifyResultV2(partHashes, txHashConflicts, partialCommits) : new IdleVerifyResultV2(errors)); + } + + /** {@inheritDoc} */ + @Override protected ComputeJob createJob( + String name, + @Nullable String path, + int incIdx, + String constId, + Collection<String> groups, + boolean check + ) { + return new VerifyIncrementalSnapshotJob(name, path, incIdx, constId); + } + + /** */ + private static class VerifyIncrementalSnapshotJob extends ComputeJobAdapter { + /** Serial version uid. */ + private static final long serialVersionUID = 0L; + + /** Ignite instance. */ + @IgniteInstanceResource + private IgniteEx ignite; + + /** Injected logger. */ + @LoggerResource + private IgniteLogger log; + + /** Snapshot name to validate. */ + private final String snpName; + + /** Snapshot directory path. */ + private final String snpPath; + + /** Incremental snapshot index. */ + private final int incIdx; + + /** Consistent ID. */ + private final String consId; + + /** + * @param snpName Snapshot name. + * @param snpPath Snapshot directory path. + * @param incIdx Incremental snapshot index. + * @param consId Consistent ID. + */ + public VerifyIncrementalSnapshotJob( + String snpName, + @Nullable String snpPath, + int incIdx, + String consId + ) { + this.snpName = snpName; + this.snpPath = snpPath; + this.incIdx = incIdx; + this.consId = consId; + } + + /** + * @return Map containing calculated transactions hash for every remote node in the cluster. + */ + @Override public IncrementalSnapshotVerificationTaskResult execute() throws IgniteException { + try { + if (log.isInfoEnabled()) { + log.info("Verify incremental snapshot procedure has been initiated " + + "[snpName=" + snpName + ", incrementIndex=" + incIdx + ", consId=" + consId + ']'); + } + + if (incIdx <= 0) + return new IncrementalSnapshotVerificationTaskResult(); + + BaselineTopology blt = ignite.context().state().clusterState().baselineTopology(); + + checkBaseline(blt); + + Map<Integer, StoredCacheData> txCaches = readTxCachesData(); + + IncrementalSnapshotProcessor proc = new IncrementalSnapshotProcessor( + ignite.context().cache().context(), snpName, snpPath, incIdx, txCaches.keySet() + ) { + @Override void totalWalSegments(int segCnt) { + // No-op. + } + + @Override void processedWalSegments(int segCnt) { + // No-op. + } + + @Override void initWalEntries(LongAdder entriesCnt) { + // No-op. + } + }; + + short locShortId = blt.consistentIdMapping().get(consId); + + Set<GridCacheVersion> activeDhtTxs = new HashSet<>(); + Map<GridCacheVersion, Set<Short>> txPrimPartNodes = new HashMap<>(); + Map<Short, Integer> nodesTxHash = new HashMap<>(); + + Set<GridCacheVersion> partialCommittedTxs = new HashSet<>(); + Map<PartitionKeyV2, PartitionHashHolder> partMap = new HashMap<>(); + List<Exception> exceptions = new ArrayList<>(); + + BiConsumer<GridCacheVersion, Set<Short>> calcTransactionHash = (xid, partNodes) -> { + for (short shortId: partNodes) { + if (shortId != locShortId) + nodesTxHash.compute(shortId, (id, hash) -> xid.hashCode() + (hash == null ? 0 : hash)); + } + }; + + proc.process(dataEntry -> { + if (dataEntry.op() == GridCacheOperation.READ) + return; + + if (!activeDhtTxs.contains(dataEntry.writeVersion())) Review Comment: Let's add logging of `dataEntry` on trace level -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
