[jira] [Updated] (IGNITE-12774) Transaction hangs after too many open files NIO exception

2020-04-17 Thread Nikolay Izhikov (Jira)


 [ 
https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Nikolay Izhikov updated IGNITE-12774:
-
Fix Version/s: 2.8.1

> Transaction hangs after too many open files NIO exception
> -
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
>  Issue Type: Bug
>Reporter: Sergey Antonov
>Assignee: Sergey Antonov
>Priority: Major
> Fix For: 2.9, 2.8.1
>
>  Time Spent: 0.5h
>  Remaining Estimate: 0h
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String 
> igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new 
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi = 
> (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx = 
> grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, 
> TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache cache = 
> grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends 
> TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg) 
> throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg =  "Failed to connect to node (is node 
> still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a 
> timeout set " +
> "in order to prevent parties from waiting forever in case of 
> network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to remote 
> node: " + 

[jira] [Updated] (IGNITE-12774) Transaction hangs after too many open files NIO exception

2020-03-19 Thread Alexey Goncharuk (Jira)


 [ 
https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alexey Goncharuk updated IGNITE-12774:
--
Ignite Flags:   (was: Release Notes Required)

> Transaction hangs after too many open files NIO exception
> -
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
>  Issue Type: Bug
>Reporter: Sergey Antonov
>Assignee: Sergey Antonov
>Priority: Major
> Fix For: 2.9
>
>  Time Spent: 20m
>  Remaining Estimate: 0h
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String 
> igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new 
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi = 
> (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx = 
> grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, 
> TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache cache = 
> grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends 
> TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg) 
> throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg =  "Failed to connect to node (is node 
> still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a 
> timeout set " +
> "in order to prevent parties from waiting forever in case of 
> network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to remote 

[jira] [Updated] (IGNITE-12774) Transaction hangs after too many open files NIO exception

2020-03-19 Thread Alexey Goncharuk (Jira)


 [ 
https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Alexey Goncharuk updated IGNITE-12774:
--
Ignite Flags: Release Notes Required

> Transaction hangs after too many open files NIO exception
> -
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
>  Issue Type: Bug
>Reporter: Sergey Antonov
>Assignee: Sergey Antonov
>Priority: Major
> Fix For: 2.9
>
>  Time Spent: 20m
>  Remaining Estimate: 0h
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String 
> igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new 
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi = 
> (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx = 
> grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, 
> TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache cache = 
> grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends 
> TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg) 
> throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg =  "Failed to connect to node (is node 
> still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a 
> timeout set " +
> "in order to prevent parties from waiting forever in case of 
> network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to remote 
> node: 

[jira] [Updated] (IGNITE-12774) Transaction hangs after too many open files NIO exception

2020-03-11 Thread Sergey Antonov (Jira)


 [ 
https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sergey Antonov updated IGNITE-12774:

Fix Version/s: 2.9

> Transaction hangs after too many open files NIO exception
> -
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
>  Issue Type: Bug
>Reporter: Sergey Antonov
>Assignee: Sergey Antonov
>Priority: Major
> Fix For: 2.9
>
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String 
> igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new 
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi = 
> (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx = 
> grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, 
> TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache cache = 
> grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends 
> TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg) 
> throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg =  "Failed to connect to node (is node 
> still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a 
> timeout set " +
> "in order to prevent parties from waiting forever in case of 
> network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to remote 
> node: " + node.id(), new 

[jira] [Updated] (IGNITE-12774) Transaction hangs after too many open files NIO exception

2020-03-11 Thread Sergey Antonov (Jira)


 [ 
https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sergey Antonov updated IGNITE-12774:

Ignite Flags: Release Notes Required  (was: Docs Required,Release Notes 
Required)

> Transaction hangs after too many open files NIO exception
> -
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
>  Issue Type: Bug
>Reporter: Sergey Antonov
>Assignee: Sergey Antonov
>Priority: Major
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String 
> igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new 
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi = 
> (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx = 
> grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, 
> TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache cache = 
> grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends 
> TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg) 
> throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg =  "Failed to connect to node (is node 
> still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a 
> timeout set " +
> "in order to prevent parties from waiting forever in case of 
> network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to 

[jira] [Updated] (IGNITE-12774) Transaction hangs after too many open files NIO exception

2020-03-11 Thread Sergey Antonov (Jira)


 [ 
https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sergey Antonov updated IGNITE-12774:

Summary: Transaction hangs after too many open files NIO exception  (was: 
Transaction hungs after too many open files NIO exception)

> Transaction hangs after too many open files NIO exception
> -
>
> Key: IGNITE-12774
> URL: https://issues.apache.org/jira/browse/IGNITE-12774
> Project: Ignite
>  Issue Type: Bug
>Reporter: Sergey Antonov
>Assignee: Sergey Antonov
>Priority: Major
>
> Transaction hung after “Open too many files” error and never been finished.
> {code:java}
> import java.net.SocketException;
> import java.util.concurrent.atomic.AtomicBoolean;
> import org.apache.ignite.cluster.ClusterNode;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.lang.IgniteInClosure;
> import org.apache.ignite.plugin.extensions.communication.Message;
> import org.apache.ignite.spi.IgniteSpiException;
> import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.apache.ignite.transactions.Transaction;
> import org.apache.ignite.transactions.TransactionConcurrency;
> import org.apache.ignite.transactions.TransactionIsolation;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cache.CacheMode.PARTITIONED;
> public class TooManyOpenFilesTest extends GridCommonAbstractTest {
> @Override protected IgniteConfiguration getConfiguration(String 
> igniteInstanceName) throws Exception {
> return super.getConfiguration(igniteInstanceName)
> .setFailureHandler(new StopNodeOrHaltFailureHandler())
> .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi())
> .setConsistentId(igniteInstanceName);
> }
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> @Override protected void afterTest() throws Exception {
> stopAllGrids();
> cleanPersistenceDir();
> super.afterTest();
> }
> public void test() throws Exception {
> IgniteEx crd = startGrids(3);
> crd.cluster().active(true);
> crd.getOrCreateCache(new 
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED));
> TooManyOpenFilesTcpCommunicationSpi spi = 
> (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi();
> try (Transaction tx = 
> grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, 
> TransactionIsolation.REPEATABLE_READ)) {
> IgniteCache cache = 
> grid(1).cache(DEFAULT_CACHE_NAME);
> cache.put(1, 1);
> spi.throwException.set(true);
> cache.put(2, 2);
> cache.put(3, 2);
> cache.put(4, 2);
> // hungs here.
> tx.commit();
> }
> for (int i=0; i < 3 ; i++) {
> assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1));
> assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2));
> }
> }
> private static class TooManyOpenFilesTcpCommunicationSpi extends 
> TcpCommunicationSpi {
> private final AtomicBoolean throwException = new AtomicBoolean();
> /** {@inheritDoc} */
> @Override public void sendMessage(ClusterNode node, Message msg) 
> throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg);
> }
> /** {@inheritDoc} */
> @Override public void sendMessage(
> ClusterNode node,
> Message msg,
> IgniteInClosure ackC
> ) throws IgniteSpiException {
> if (throwException.get())
> throw getException(node);
> super.sendMessage(node, msg, ackC);
> }
> private IgniteSpiException getException(ClusterNode node) {
> String checkedExceptionMsg =  "Failed to connect to node (is node 
> still alive?). " +
> "Make sure that each ComputeTask and cache Transaction has a 
> timeout set " +
> "in order to prevent parties from waiting forever in case of 
> network issues " +
> "[nodeId=" + node.id() + ", addrs=null]";
> return new IgniteSpiException("Failed to send message to