sergey-chugunov-1985 commented on code in PR #12178: URL: https://github.com/apache/ignite/pull/12178#discussion_r2235988570
########## modules/core/src/test/java/org/apache/ignite/cache/store/CacheStoreWithIgniteTxFailureTest.java: ########## @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.cache.store; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.IntFunction; +import javax.cache.Cache; +import org.apache.ignite.Ignite; +import org.apache.ignite.IgniteCache; +import org.apache.ignite.IgniteException; +import org.apache.ignite.cache.CacheInterceptor; +import org.apache.ignite.cache.CacheInterceptorAdapter; +import org.apache.ignite.configuration.CacheConfiguration; +import org.apache.ignite.configuration.NearCacheConfiguration; +import org.apache.ignite.failure.FailureHandler; +import org.apache.ignite.failure.StopNodeFailureHandler; +import org.apache.ignite.internal.IgniteEx; +import org.apache.ignite.internal.processors.cache.GridCacheAbstractSelfTest; +import org.apache.ignite.internal.util.typedef.G; +import org.apache.ignite.transactions.Transaction; +import org.jetbrains.annotations.Nullable; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * Tests to check scenarios with system failures during transaction commit. Internal system failures are simulated by + * {@link CacheInterceptor} custom implementation throwing an exception during final commit phase. + */ +@RunWith(Parameterized.class) +public class CacheStoreWithIgniteTxFailureTest extends GridCacheAbstractSelfTest { + /** */ + private static final int GRID_COUNT = 3; + + /** */ + private static final int KEYS_NUMBER = 50; + + /** */ + private static final int FAULTY_NODE_IDX = 1; + + /** */ + private static final IntFunction<Integer> KEY_UPDATE_FUNCTION = key -> key + KEYS_NUMBER * 3; + + /** + * Type of node for keys involved into transaction: primary or backup. + */ + private enum FaultyNodeType { + /** */ + PRIMARY, + /** */ + BACKUP + } + + /** + * Role of faulty node in transaction management: tx coordinator or regular node. + */ + private enum FaultyNodeRole { + /** */ + REGULAR, + /** */ + TX_COORDINATOR + } + + /** */ + @Parameterized.Parameter + public FaultyNodeType faultyNodeType; + + /** */ + @Parameterized.Parameter(1) + public FaultyNodeRole faultyNodeRole; + + /** */ + @Parameterized.Parameter(2) + public boolean withFaulireHandler; + + /** */ + @Parameterized.Parameter(3) + public boolean withNearCacheConfiguration; + + /** */ + @Parameterized.Parameters(name = "faultyNodeType={0}, faultyNodeRole={1}, withFaulireHandler={2}, withNearCacheConfiguration={3}") + public static List<Object[]> parameters() { + List<Object[]> params = new ArrayList<>(); + + params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.REGULAR, true, false}); + params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.REGULAR, false, false}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.REGULAR, true, false}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.REGULAR, false, false}); + + params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.TX_COORDINATOR, false, false}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.TX_COORDINATOR, false, false}); + params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.TX_COORDINATOR, true, false}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.TX_COORDINATOR, true, false}); + + params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.REGULAR, true, true}); + params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.REGULAR, false, true}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.REGULAR, true, true}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.REGULAR, false, true}); + + params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.TX_COORDINATOR, false, true}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.TX_COORDINATOR, false, true}); + // TODO https://issues.apache.org/jira/browse/IGNITE-25924 + // params.add(new Object[] {FaultyNodeType.PRIMARY, FaultyNodeRole.TX_COORDINATOR, true, true}); + params.add(new Object[] {FaultyNodeType.BACKUP, FaultyNodeRole.TX_COORDINATOR, true, true}); + + return params; + } + + /** {@inheritDoc} */ + @Override protected void beforeTestsStarted() throws Exception { + initStoreStrategy(); + } + + /** {@inheritDoc} */ + @Override protected void beforeTest() throws Exception { + // No-op. + } + + /** {@inheritDoc} */ + @Override protected void afterTest() throws Exception { + stopAllGrids(); + + storeStgy.resetStore(); + } + + /** {@inheritDoc} */ + @Override protected int gridCount() { + return GRID_COUNT; + } + + /** {@inheritDoc} */ + @Override protected int backups() { + return 2; + } + + /** {@inheritDoc} */ + @Override protected FailureHandler getFailureHandler(String igniteInstanceName) { + return withFaulireHandler ? new StopNodeFailureHandler() : super.getFailureHandler(igniteInstanceName); + } + + /** {@inheritDoc} */ + @Override protected NearCacheConfiguration nearConfiguration() { + return withNearCacheConfiguration ? super.nearConfiguration() : null; + } + + /** {@inheritDoc} */ + @Override protected CacheConfiguration cacheConfiguration(String igniteInstanceName) throws Exception { + CacheConfiguration ccfg = super.cacheConfiguration(igniteInstanceName); + + ccfg.setInterceptor(new FaultyNodeInterceptor(igniteInstanceName, faultyNodeRole)); + + return ccfg; + } + + /** + * + */ + @Test + public void testSystemExceptionAfterCacheStoreCommit() throws Exception { + IgniteEx ig = startGrids(gridCount()); + IgniteCache<Integer, Integer> cache = ig.cache(DEFAULT_CACHE_NAME); + + fillCache(cache, KEYS_NUMBER); + + int keysType = faultyNodeType == FaultyNodeType.PRIMARY ? 0 : 1; + + List<Integer> keysOnFaultyNode = findKeys(grid(FAULTY_NODE_IDX).localNode(), cache, 5, 0, keysType); + + IgniteEx txCoordinator = faultyNodeRole == FaultyNodeRole.TX_COORDINATOR ? grid(FAULTY_NODE_IDX) : startClientGrid(); + if (faultyNodeType == FaultyNodeType.PRIMARY) + updateKeysInTxWithExceptionCatching(txCoordinator, keysOnFaultyNode); + else + updateKeysInTx(txCoordinator, keysOnFaultyNode); + + if (withFaulireHandler) { // FH doesn't fail tx coordinator + if (faultyNodeRole == FaultyNodeRole.TX_COORDINATOR) { + if (faultyNodeType == FaultyNodeType.BACKUP) { + waitForTopology(2); // two servers - tx coordinator hosting backup partition fails + } + else { + waitForTopology(3); // three servers - tx coordinator hosting primary partition doesn't fail Review Comment: This piece of code is rather entangled and hard to decipher, but the core distinction here is simple. When FH is not configured, no nodes are failing. When FH is configured nodes are always failing *except* a scenario when tx_coordinator is the same node as primary or backup. Only in that case things get tricky and personally I didn't quite get to the bottom of it. I can create a ticket for this scenario and title it like "Errors during tx commit are not propagated to FH on Tx Coordinator". But I left this case intentionally out of scope of this PR as fixing it in a right way could require a lot of efforts. Are you OK with that plan? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@ignite.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org