[jira] [Commented] (IGNITE-12774) Transaction hangs after too many open files NIO exception
[ https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17062511#comment-17062511 ] Alexey Goncharuk commented on IGNITE-12774: --- Good, will merge the ticket soon. > Transaction hangs after too many open files NIO exception > - > > Key: IGNITE-12774 > URL: https://issues.apache.org/jira/browse/IGNITE-12774 > Project: Ignite > Issue Type: Bug >Reporter: Sergey Antonov >Assignee: Sergey Antonov >Priority: Major > Fix For: 2.9 > > Time Spent: 20m > Remaining Estimate: 0h > > Transaction hung after “Open too many files” error and never been finished. > {code:java} > import java.net.SocketException; > import java.util.concurrent.atomic.AtomicBoolean; > import org.apache.ignite.cluster.ClusterNode; > import org.apache.ignite.configuration.CacheConfiguration; > import org.apache.ignite.configuration.IgniteConfiguration; > import org.apache.ignite.failure.StopNodeOrHaltFailureHandler; > import org.apache.ignite.internal.IgniteEx; > import org.apache.ignite.lang.IgniteInClosure; > import org.apache.ignite.plugin.extensions.communication.Message; > import org.apache.ignite.spi.IgniteSpiException; > import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi; > import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; > import org.apache.ignite.transactions.Transaction; > import org.apache.ignite.transactions.TransactionConcurrency; > import org.apache.ignite.transactions.TransactionIsolation; > import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; > import static org.apache.ignite.cache.CacheMode.PARTITIONED; > public class TooManyOpenFilesTest extends GridCommonAbstractTest { > @Override protected IgniteConfiguration getConfiguration(String > igniteInstanceName) throws Exception { > return super.getConfiguration(igniteInstanceName) > .setFailureHandler(new StopNodeOrHaltFailureHandler()) > .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi()) > .setConsistentId(igniteInstanceName); > } > @Override protected void beforeTest() throws Exception { > super.beforeTest(); > stopAllGrids(); > cleanPersistenceDir(); > } > @Override protected void afterTest() throws Exception { > stopAllGrids(); > cleanPersistenceDir(); > super.afterTest(); > } > public void test() throws Exception { > IgniteEx crd = startGrids(3); > crd.cluster().active(true); > crd.getOrCreateCache(new > CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED)); > TooManyOpenFilesTcpCommunicationSpi spi = > (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi(); > try (Transaction tx = > grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, > TransactionIsolation.REPEATABLE_READ)) { > IgniteCache cache = > grid(1).cache(DEFAULT_CACHE_NAME); > cache.put(1, 1); > spi.throwException.set(true); > cache.put(2, 2); > cache.put(3, 2); > cache.put(4, 2); > // hungs here. > tx.commit(); > } > for (int i=0; i < 3 ; i++) { > assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1)); > assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2)); > } > } > private static class TooManyOpenFilesTcpCommunicationSpi extends > TcpCommunicationSpi { > private final AtomicBoolean throwException = new AtomicBoolean(); > /** {@inheritDoc} */ > @Override public void sendMessage(ClusterNode node, Message msg) > throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg); > } > /** {@inheritDoc} */ > @Override public void sendMessage( > ClusterNode node, > Message msg, > IgniteInClosure ackC > ) throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg, ackC); > } > private IgniteSpiException getException(ClusterNode node) { > String checkedExceptionMsg = "Failed to connect to node (is node > still alive?). " + > "Make sure that each ComputeTask and cache Transaction has a > timeout set " + > "in order to prevent parties from waiting forever in case of > network issues " + > "[nodeId=" + node.id() + ", addrs=null]"; > return new IgniteSpiException("Failed
[jira] [Commented] (IGNITE-12774) Transaction hangs after too many open files NIO exception
[ https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17062452#comment-17062452 ] Sergey Antonov commented on IGNITE-12774: - [~agoncharuk] No, I don't. But I created ticket IGNITE-12803 for investigation. > Transaction hangs after too many open files NIO exception > - > > Key: IGNITE-12774 > URL: https://issues.apache.org/jira/browse/IGNITE-12774 > Project: Ignite > Issue Type: Bug >Reporter: Sergey Antonov >Assignee: Sergey Antonov >Priority: Major > Fix For: 2.9 > > Time Spent: 20m > Remaining Estimate: 0h > > Transaction hung after “Open too many files” error and never been finished. > {code:java} > import java.net.SocketException; > import java.util.concurrent.atomic.AtomicBoolean; > import org.apache.ignite.cluster.ClusterNode; > import org.apache.ignite.configuration.CacheConfiguration; > import org.apache.ignite.configuration.IgniteConfiguration; > import org.apache.ignite.failure.StopNodeOrHaltFailureHandler; > import org.apache.ignite.internal.IgniteEx; > import org.apache.ignite.lang.IgniteInClosure; > import org.apache.ignite.plugin.extensions.communication.Message; > import org.apache.ignite.spi.IgniteSpiException; > import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi; > import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; > import org.apache.ignite.transactions.Transaction; > import org.apache.ignite.transactions.TransactionConcurrency; > import org.apache.ignite.transactions.TransactionIsolation; > import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; > import static org.apache.ignite.cache.CacheMode.PARTITIONED; > public class TooManyOpenFilesTest extends GridCommonAbstractTest { > @Override protected IgniteConfiguration getConfiguration(String > igniteInstanceName) throws Exception { > return super.getConfiguration(igniteInstanceName) > .setFailureHandler(new StopNodeOrHaltFailureHandler()) > .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi()) > .setConsistentId(igniteInstanceName); > } > @Override protected void beforeTest() throws Exception { > super.beforeTest(); > stopAllGrids(); > cleanPersistenceDir(); > } > @Override protected void afterTest() throws Exception { > stopAllGrids(); > cleanPersistenceDir(); > super.afterTest(); > } > public void test() throws Exception { > IgniteEx crd = startGrids(3); > crd.cluster().active(true); > crd.getOrCreateCache(new > CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED)); > TooManyOpenFilesTcpCommunicationSpi spi = > (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi(); > try (Transaction tx = > grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, > TransactionIsolation.REPEATABLE_READ)) { > IgniteCache cache = > grid(1).cache(DEFAULT_CACHE_NAME); > cache.put(1, 1); > spi.throwException.set(true); > cache.put(2, 2); > cache.put(3, 2); > cache.put(4, 2); > // hungs here. > tx.commit(); > } > for (int i=0; i < 3 ; i++) { > assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1)); > assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2)); > } > } > private static class TooManyOpenFilesTcpCommunicationSpi extends > TcpCommunicationSpi { > private final AtomicBoolean throwException = new AtomicBoolean(); > /** {@inheritDoc} */ > @Override public void sendMessage(ClusterNode node, Message msg) > throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg); > } > /** {@inheritDoc} */ > @Override public void sendMessage( > ClusterNode node, > Message msg, > IgniteInClosure ackC > ) throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg, ackC); > } > private IgniteSpiException getException(ClusterNode node) { > String checkedExceptionMsg = "Failed to connect to node (is node > still alive?). " + > "Make sure that each ComputeTask and cache Transaction has a > timeout set " + > "in order to prevent parties from waiting forever in case of > network issues " + > "[nodeId=" + node.id() + ", addrs=null]"; >
[jira] [Commented] (IGNITE-12774) Transaction hangs after too many open files NIO exception
[ https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17062403#comment-17062403 ] Alexey Goncharuk commented on IGNITE-12774: --- [~antonovsergey93], do you know which platforms throw exactly this message? Should we add more error message cases for Windows or BSD/zOS, for example? > Transaction hangs after too many open files NIO exception > - > > Key: IGNITE-12774 > URL: https://issues.apache.org/jira/browse/IGNITE-12774 > Project: Ignite > Issue Type: Bug >Reporter: Sergey Antonov >Assignee: Sergey Antonov >Priority: Major > Fix For: 2.9 > > Time Spent: 20m > Remaining Estimate: 0h > > Transaction hung after “Open too many files” error and never been finished. > {code:java} > import java.net.SocketException; > import java.util.concurrent.atomic.AtomicBoolean; > import org.apache.ignite.cluster.ClusterNode; > import org.apache.ignite.configuration.CacheConfiguration; > import org.apache.ignite.configuration.IgniteConfiguration; > import org.apache.ignite.failure.StopNodeOrHaltFailureHandler; > import org.apache.ignite.internal.IgniteEx; > import org.apache.ignite.lang.IgniteInClosure; > import org.apache.ignite.plugin.extensions.communication.Message; > import org.apache.ignite.spi.IgniteSpiException; > import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi; > import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; > import org.apache.ignite.transactions.Transaction; > import org.apache.ignite.transactions.TransactionConcurrency; > import org.apache.ignite.transactions.TransactionIsolation; > import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; > import static org.apache.ignite.cache.CacheMode.PARTITIONED; > public class TooManyOpenFilesTest extends GridCommonAbstractTest { > @Override protected IgniteConfiguration getConfiguration(String > igniteInstanceName) throws Exception { > return super.getConfiguration(igniteInstanceName) > .setFailureHandler(new StopNodeOrHaltFailureHandler()) > .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi()) > .setConsistentId(igniteInstanceName); > } > @Override protected void beforeTest() throws Exception { > super.beforeTest(); > stopAllGrids(); > cleanPersistenceDir(); > } > @Override protected void afterTest() throws Exception { > stopAllGrids(); > cleanPersistenceDir(); > super.afterTest(); > } > public void test() throws Exception { > IgniteEx crd = startGrids(3); > crd.cluster().active(true); > crd.getOrCreateCache(new > CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED)); > TooManyOpenFilesTcpCommunicationSpi spi = > (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi(); > try (Transaction tx = > grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, > TransactionIsolation.REPEATABLE_READ)) { > IgniteCache cache = > grid(1).cache(DEFAULT_CACHE_NAME); > cache.put(1, 1); > spi.throwException.set(true); > cache.put(2, 2); > cache.put(3, 2); > cache.put(4, 2); > // hungs here. > tx.commit(); > } > for (int i=0; i < 3 ; i++) { > assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1)); > assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2)); > } > } > private static class TooManyOpenFilesTcpCommunicationSpi extends > TcpCommunicationSpi { > private final AtomicBoolean throwException = new AtomicBoolean(); > /** {@inheritDoc} */ > @Override public void sendMessage(ClusterNode node, Message msg) > throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg); > } > /** {@inheritDoc} */ > @Override public void sendMessage( > ClusterNode node, > Message msg, > IgniteInClosure ackC > ) throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg, ackC); > } > private IgniteSpiException getException(ClusterNode node) { > String checkedExceptionMsg = "Failed to connect to node (is node > still alive?). " + > "Make sure that each ComputeTask and cache Transaction has a > timeout set " + > "in order to prevent parties from waiting forever in case of > network
[jira] [Commented] (IGNITE-12774) Transaction hangs after too many open files NIO exception
[ https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17057570#comment-17057570 ] Sergey Antonov commented on IGNITE-12774: - [~akalashnikov] Could you review my changes please? > Transaction hangs after too many open files NIO exception > - > > Key: IGNITE-12774 > URL: https://issues.apache.org/jira/browse/IGNITE-12774 > Project: Ignite > Issue Type: Bug >Reporter: Sergey Antonov >Assignee: Sergey Antonov >Priority: Major > Fix For: 2.9 > > Time Spent: 10m > Remaining Estimate: 0h > > Transaction hung after “Open too many files” error and never been finished. > {code:java} > import java.net.SocketException; > import java.util.concurrent.atomic.AtomicBoolean; > import org.apache.ignite.cluster.ClusterNode; > import org.apache.ignite.configuration.CacheConfiguration; > import org.apache.ignite.configuration.IgniteConfiguration; > import org.apache.ignite.failure.StopNodeOrHaltFailureHandler; > import org.apache.ignite.internal.IgniteEx; > import org.apache.ignite.lang.IgniteInClosure; > import org.apache.ignite.plugin.extensions.communication.Message; > import org.apache.ignite.spi.IgniteSpiException; > import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi; > import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; > import org.apache.ignite.transactions.Transaction; > import org.apache.ignite.transactions.TransactionConcurrency; > import org.apache.ignite.transactions.TransactionIsolation; > import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; > import static org.apache.ignite.cache.CacheMode.PARTITIONED; > public class TooManyOpenFilesTest extends GridCommonAbstractTest { > @Override protected IgniteConfiguration getConfiguration(String > igniteInstanceName) throws Exception { > return super.getConfiguration(igniteInstanceName) > .setFailureHandler(new StopNodeOrHaltFailureHandler()) > .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi()) > .setConsistentId(igniteInstanceName); > } > @Override protected void beforeTest() throws Exception { > super.beforeTest(); > stopAllGrids(); > cleanPersistenceDir(); > } > @Override protected void afterTest() throws Exception { > stopAllGrids(); > cleanPersistenceDir(); > super.afterTest(); > } > public void test() throws Exception { > IgniteEx crd = startGrids(3); > crd.cluster().active(true); > crd.getOrCreateCache(new > CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED)); > TooManyOpenFilesTcpCommunicationSpi spi = > (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi(); > try (Transaction tx = > grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, > TransactionIsolation.REPEATABLE_READ)) { > IgniteCache cache = > grid(1).cache(DEFAULT_CACHE_NAME); > cache.put(1, 1); > spi.throwException.set(true); > cache.put(2, 2); > cache.put(3, 2); > cache.put(4, 2); > // hungs here. > tx.commit(); > } > for (int i=0; i < 3 ; i++) { > assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1)); > assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2)); > } > } > private static class TooManyOpenFilesTcpCommunicationSpi extends > TcpCommunicationSpi { > private final AtomicBoolean throwException = new AtomicBoolean(); > /** {@inheritDoc} */ > @Override public void sendMessage(ClusterNode node, Message msg) > throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg); > } > /** {@inheritDoc} */ > @Override public void sendMessage( > ClusterNode node, > Message msg, > IgniteInClosure ackC > ) throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg, ackC); > } > private IgniteSpiException getException(ClusterNode node) { > String checkedExceptionMsg = "Failed to connect to node (is node > still alive?). " + > "Make sure that each ComputeTask and cache Transaction has a > timeout set " + > "in order to prevent parties from waiting forever in case of > network issues " + > "[nodeId=" + node.id() + ", addrs=null]"; > return new
[jira] [Commented] (IGNITE-12774) Transaction hangs after too many open files NIO exception
[ https://issues.apache.org/jira/browse/IGNITE-12774?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17057566#comment-17057566 ] Ignite TC Bot commented on IGNITE-12774: {panel:title=Branch: [pull/7516/head] Base: [master] : No blockers found!|borderStyle=dashed|borderColor=#ccc|titleBGColor=#D6F7C1}{panel} [TeamCity *-- Run :: All* Results|https://ci.ignite.apache.org/viewLog.html?buildId=5119807buildTypeId=IgniteTests24Java8_RunAll] > Transaction hangs after too many open files NIO exception > - > > Key: IGNITE-12774 > URL: https://issues.apache.org/jira/browse/IGNITE-12774 > Project: Ignite > Issue Type: Bug >Reporter: Sergey Antonov >Assignee: Sergey Antonov >Priority: Major > Fix For: 2.9 > > Time Spent: 10m > Remaining Estimate: 0h > > Transaction hung after “Open too many files” error and never been finished. > {code:java} > import java.net.SocketException; > import java.util.concurrent.atomic.AtomicBoolean; > import org.apache.ignite.cluster.ClusterNode; > import org.apache.ignite.configuration.CacheConfiguration; > import org.apache.ignite.configuration.IgniteConfiguration; > import org.apache.ignite.failure.StopNodeOrHaltFailureHandler; > import org.apache.ignite.internal.IgniteEx; > import org.apache.ignite.lang.IgniteInClosure; > import org.apache.ignite.plugin.extensions.communication.Message; > import org.apache.ignite.spi.IgniteSpiException; > import org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi; > import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest; > import org.apache.ignite.transactions.Transaction; > import org.apache.ignite.transactions.TransactionConcurrency; > import org.apache.ignite.transactions.TransactionIsolation; > import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL; > import static org.apache.ignite.cache.CacheMode.PARTITIONED; > public class TooManyOpenFilesTest extends GridCommonAbstractTest { > @Override protected IgniteConfiguration getConfiguration(String > igniteInstanceName) throws Exception { > return super.getConfiguration(igniteInstanceName) > .setFailureHandler(new StopNodeOrHaltFailureHandler()) > .setCommunicationSpi(new TooManyOpenFilesTcpCommunicationSpi()) > .setConsistentId(igniteInstanceName); > } > @Override protected void beforeTest() throws Exception { > super.beforeTest(); > stopAllGrids(); > cleanPersistenceDir(); > } > @Override protected void afterTest() throws Exception { > stopAllGrids(); > cleanPersistenceDir(); > super.afterTest(); > } > public void test() throws Exception { > IgniteEx crd = startGrids(3); > crd.cluster().active(true); > crd.getOrCreateCache(new > CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL).setBackups(1).setCacheMode(PARTITIONED)); > TooManyOpenFilesTcpCommunicationSpi spi = > (TooManyOpenFilesTcpCommunicationSpi)grid(2).context().config().getCommunicationSpi(); > try (Transaction tx = > grid(1).transactions().txStart(TransactionConcurrency.PESSIMISTIC, > TransactionIsolation.REPEATABLE_READ)) { > IgniteCache cache = > grid(1).cache(DEFAULT_CACHE_NAME); > cache.put(1, 1); > spi.throwException.set(true); > cache.put(2, 2); > cache.put(3, 2); > cache.put(4, 2); > // hungs here. > tx.commit(); > } > for (int i=0; i < 3 ; i++) { > assertEquals(1, grid(i).cache(DEFAULT_CACHE_NAME).get(1)); > assertEquals(2, grid(i).cache(DEFAULT_CACHE_NAME).get(2)); > } > } > private static class TooManyOpenFilesTcpCommunicationSpi extends > TcpCommunicationSpi { > private final AtomicBoolean throwException = new AtomicBoolean(); > /** {@inheritDoc} */ > @Override public void sendMessage(ClusterNode node, Message msg) > throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg); > } > /** {@inheritDoc} */ > @Override public void sendMessage( > ClusterNode node, > Message msg, > IgniteInClosure ackC > ) throws IgniteSpiException { > if (throwException.get()) > throw getException(node); > super.sendMessage(node, msg, ackC); > } > private IgniteSpiException getException(ClusterNode node) { > String checkedExceptionMsg = "Failed to connect to node (is node > still alive?). " + > "Make sure that each ComputeTask and cache Transaction has a >