[
https://issues.apache.org/jira/browse/IGNITE-23958?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Mikhail Petrov updated IGNITE-23958:
------------------------------------
Description:
Reproducer:
{code:java}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite;
import java.security.Permissions;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.ignite.client.IgniteClient;
import org.apache.ignite.client.IgniteClientFuture;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.ClientConfiguration;
import org.apache.ignite.configuration.ClientConnectorConfiguration;
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.configuration.ThinClientConfiguration;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.events.DiscoveryCustomEvent;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask;
import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage;
import
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture;
import
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware;
import
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage;
import org.apache.ignite.internal.processors.security.impl.TestSecurityData;
import
org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.internal.visor.VisorTaskArgument;
import org.apache.ignite.internal.visor.VisorTaskResult;
import org.apache.ignite.plugin.security.SecurityPermissionSet;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Test;
import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.cluster.ClusterState.ACTIVE;
import static
org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
import static
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE;
import static
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ;
import static
org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER;
import static
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create;
import static
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions;
/** */
public class TransactionsHangingTest extends GridCommonAbstractTest {
/** {@inheritDoc} */
@Override protected IgniteConfiguration getConfiguration(String
igniteInstanceName) throws Exception {
IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
cfg.setDataStorageConfiguration(new DataStorageConfiguration()
.setDefaultDataRegionConfiguration(new DataRegionConfiguration()
.setPersistenceEnabled(true)));
cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration()
.setThreadPoolSize(2) // first thread is occupied by snapshot
operation, the second is designated to perform cache operations.
.setThinClientConfiguration(new ThinClientConfiguration()
.setMaxActiveComputeTasksPerConnection(1)));
cfg.setPluginProviders(new TestSecurityPluginProvider(
igniteInstanceName,
"",
create()
.defaultAllowAll(false)
.appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE)
.appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE)
.build(),
null,
false,
userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)),
userData("client", create()
.defaultAllowAll(false)
.appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ,
CACHE_PUT)
.build())
));
return cfg;
}
/** {@inheritDoc} */
@Override protected void beforeTest() throws Exception {
super.beforeTest();
cleanPersistenceDir();
}
/** {@inheritDoc} */
@Override protected void afterTest() throws Exception {
super.afterTest();
stopAllGrids();
cleanPersistenceDir();
}
/** */
@Test
public void test() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().state(ACTIVE);
ignite.createCache(new
CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL));
CountDownLatch snapshotBlockedLatch = new CountDownLatch(1);
CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1);
ignite.context().cache().context().exchange().registerExchangeAwareComponent(new
PartitionsExchangeAware() {
/** {@inheritDoc} */
@Override public void
onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) {
if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT)
return;
DiscoveryCustomMessage msg =
((DiscoveryCustomEvent)fut.firstEvent()).customMessage();
assertNotNull(msg);
if (msg instanceof SnapshotDiscoveryMessage) {
snapshotBlockedLatch.countDown();
try {
snapshotUnblockedLatch.await(getTestTimeout(),
TimeUnit.MILLISECONDS);
}
catch (InterruptedException e) {
throw new IgniteException(e);
}
}
}
});
try (
IgniteClient snpAdmin = Ignition.startClient(new
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword(""));
IgniteClient cli = Ignition.startClient(new
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword(""))
) {
SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg();
arg.snapshotName("test_snapshot");
arg.sync(true);
IgniteClientFuture<VisorTaskResult<String>> snpFut =
snpAdmin.compute().<VisorTaskArgument<?>, VisorTaskResult<String>>executeAsync2(
SnapshotCreateTask.class.getName(),
new VisorTaskArgument<>(
grid(0).localNode().id(),
arg,
false)
);
assertTrue(snapshotBlockedLatch.await(getTestTimeout(),
TimeUnit.MILLISECONDS));
IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() ->
cli.cache(DEFAULT_CACHE_NAME).put(0, 0));
IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() ->
cli.cache(DEFAULT_CACHE_NAME).put(1, 1));
U.sleep(1000); // Wait for tx requests chaining.
snapshotUnblockedLatch.countDown();
snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS);
putFut0.get(getTestTimeout());
putFut1.get(getTestTimeout()); // Will hang.
}
}
/** */
private TestSecurityData userData(String login, SecurityPermissionSet
perms) {
return new TestSecurityData(
login,
"",
perms,
new Permissions()
);
}
}
{code}
Code execution steps that lead to the hanging:
1. Start of the snapshot operation initiated by "administrator user account"
causes PME to start.
2. Thin client sends tx-1, which is blocked until PME is completed.
3. Lets consider that thread-1 was used to handle tx-1 request. After
https://issues.apache.org/jira/browse/IGNITE-21183 threads of thin client
thread pool are not longer blocked until the end of transactions. Transactions
requests are handled asynchronously. See suspend/resume of transacitons. This
allows thread-1 to handle another tx request.
4. Thin client sends tx-2 which is handled by the thread-1.
5. Since this thread did not complete the previous tx-1, tx-2 chains itself
with tx-1 future and starts waiting its completion. See
org/apache/ignite/internal/processors/cache/GridCacheAdapter.java:3856
6. PME caused by the snapshot finishes. tx-1 future is notified to proceed by
the PME thread. The crucial thing here is that tx-1 proceeds its execution in
PME thread that is associated with the user that started snapshot. By the end
of tx-1 the tx-2 starts its execution. Also in thread that is associated with
the user that started snapshot.
7. If snapshot administrator user was not granted permissions for cache
operations - tx-2 fails. And future chain described in clause 5 becomes broken.
It leads to the situation when all new transaction handled by thread-1 will
chain itself with previous transaction executed by this thread. But thy will
never complete because the future chain is broken.
We must fix exceptions handling during tx operations chaining and manually
restore security context while executing transactional operation from the
future listener.
was:
Reproducer:
{code:java}
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite;
import java.security.Permissions;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.ignite.client.IgniteClient;
import org.apache.ignite.client.IgniteClientFuture;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.ClientConfiguration;
import org.apache.ignite.configuration.ClientConnectorConfiguration;
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.configuration.ThinClientConfiguration;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.events.DiscoveryCustomEvent;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg;
import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask;
import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage;
import
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture;
import
org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware;
import
org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage;
import org.apache.ignite.internal.processors.security.impl.TestSecurityData;
import
org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.internal.visor.VisorTaskArgument;
import org.apache.ignite.internal.visor.VisorTaskResult;
import org.apache.ignite.plugin.security.SecurityPermissionSet;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Test;
import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.cluster.ClusterState.ACTIVE;
import static
org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
import static
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE;
import static
org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT;
import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ;
import static
org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER;
import static
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create;
import static
org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions;
/** */
public class TransactionsHangingTest extends GridCommonAbstractTest {
/** {@inheritDoc} */
@Override protected IgniteConfiguration getConfiguration(String
igniteInstanceName) throws Exception {
IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
cfg.setDataStorageConfiguration(new DataStorageConfiguration()
.setDefaultDataRegionConfiguration(new DataRegionConfiguration()
.setPersistenceEnabled(true)));
cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration()
.setThreadPoolSize(2) // first thread is occupied by snapshot
operation, the second is designated to perform cache operations.
.setThinClientConfiguration(new ThinClientConfiguration()
.setMaxActiveComputeTasksPerConnection(1)));
cfg.setPluginProviders(new TestSecurityPluginProvider(
igniteInstanceName,
"",
create()
.defaultAllowAll(false)
.appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE)
.appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE)
.build(),
null,
false,
userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)),
userData("client", create()
.defaultAllowAll(false)
.appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ,
CACHE_PUT)
.build())
));
return cfg;
}
/** {@inheritDoc} */
@Override protected void beforeTest() throws Exception {
super.beforeTest();
cleanPersistenceDir();
}
/** {@inheritDoc} */
@Override protected void afterTest() throws Exception {
super.afterTest();
stopAllGrids();
cleanPersistenceDir();
}
/** */
@Test
public void test() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().state(ACTIVE);
ignite.createCache(new
CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL));
CountDownLatch snapshotBlockedLatch = new CountDownLatch(1);
CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1);
ignite.context().cache().context().exchange().registerExchangeAwareComponent(new
PartitionsExchangeAware() {
/** {@inheritDoc} */
@Override public void
onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) {
if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT)
return;
DiscoveryCustomMessage msg =
((DiscoveryCustomEvent)fut.firstEvent()).customMessage();
assertNotNull(msg);
if (msg instanceof SnapshotDiscoveryMessage) {
snapshotBlockedLatch.countDown();
try {
snapshotUnblockedLatch.await(getTestTimeout(),
TimeUnit.MILLISECONDS);
}
catch (InterruptedException e) {
throw new IgniteException(e);
}
}
}
});
try (
IgniteClient snpAdmin = Ignition.startClient(new
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword(""));
IgniteClient cli = Ignition.startClient(new
ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword(""))
) {
SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg();
arg.snapshotName("test_snapshot");
arg.sync(true);
IgniteClientFuture<VisorTaskResult<String>> snpFut =
snpAdmin.compute().<VisorTaskArgument<?>, VisorTaskResult<String>>executeAsync2(
SnapshotCreateTask.class.getName(),
new VisorTaskArgument<>(
grid(0).localNode().id(),
arg,
false)
);
assertTrue(snapshotBlockedLatch.await(getTestTimeout(),
TimeUnit.MILLISECONDS));
IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(() ->
cli.cache(DEFAULT_CACHE_NAME).put(0, 0));
IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(() ->
cli.cache(DEFAULT_CACHE_NAME).put(1, 1));
U.sleep(1000); // Wait for tx requests chaining.
snapshotUnblockedLatch.countDown();
snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS);
putFut0.get(getTestTimeout());
putFut1.get(getTestTimeout()); // Will hang.
}
}
/** */
private TestSecurityData userData(String login, SecurityPermissionSet
perms) {
return new TestSecurityData(
login,
"",
perms,
new Permissions()
);
}
}
{code}
> Fix security context propagation for async transactional operations
> -------------------------------------------------------------------
>
> Key: IGNITE-23958
> URL: https://issues.apache.org/jira/browse/IGNITE-23958
> Project: Ignite
> Issue Type: Bug
> Reporter: Mikhail Petrov
> Assignee: Mikhail Petrov
> Priority: Blocker
> Labels: ise
> Fix For: 2.17
>
> Time Spent: 40m
> Remaining Estimate: 0h
>
> Reproducer:
> {code:java}
> /*
> * Licensed to the Apache Software Foundation (ASF) under one or more
> * contributor license agreements. See the NOTICE file distributed with
> * this work for additional information regarding copyright ownership.
> * The ASF licenses this file to You under the Apache License, Version 2.0
> * (the "License"); you may not use this file except in compliance with
> * the License. You may obtain a copy of the License at
> *
> * http://www.apache.org/licenses/LICENSE-2.0
> *
> * Unless required by applicable law or agreed to in writing, software
> * distributed under the License is distributed on an "AS IS" BASIS,
> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> * See the License for the specific language governing permissions and
> * limitations under the License.
> */
> package org.apache.ignite;
> import java.security.Permissions;
> import java.util.concurrent.CountDownLatch;
> import java.util.concurrent.TimeUnit;
> import org.apache.ignite.client.IgniteClient;
> import org.apache.ignite.client.IgniteClientFuture;
> import org.apache.ignite.configuration.CacheConfiguration;
> import org.apache.ignite.configuration.ClientConfiguration;
> import org.apache.ignite.configuration.ClientConnectorConfiguration;
> import org.apache.ignite.configuration.DataRegionConfiguration;
> import org.apache.ignite.configuration.DataStorageConfiguration;
> import org.apache.ignite.configuration.IgniteConfiguration;
> import org.apache.ignite.configuration.ThinClientConfiguration;
> import org.apache.ignite.internal.IgniteEx;
> import org.apache.ignite.internal.IgniteInternalFuture;
> import org.apache.ignite.internal.events.DiscoveryCustomEvent;
> import
> org.apache.ignite.internal.management.snapshot.SnapshotCreateCommandArg;
> import org.apache.ignite.internal.management.snapshot.SnapshotCreateTask;
> import org.apache.ignite.internal.managers.discovery.DiscoveryCustomMessage;
> import
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsExchangeFuture;
> import
> org.apache.ignite.internal.processors.cache.distributed.dht.preloader.PartitionsExchangeAware;
> import
> org.apache.ignite.internal.processors.cache.persistence.snapshot.SnapshotDiscoveryMessage;
> import org.apache.ignite.internal.processors.security.impl.TestSecurityData;
> import
> org.apache.ignite.internal.processors.security.impl.TestSecurityPluginProvider;
> import org.apache.ignite.internal.util.typedef.internal.U;
> import org.apache.ignite.internal.visor.VisorTaskArgument;
> import org.apache.ignite.internal.visor.VisorTaskResult;
> import org.apache.ignite.plugin.security.SecurityPermissionSet;
> import org.apache.ignite.testframework.GridTestUtils;
> import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
> import org.junit.Test;
> import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
> import static org.apache.ignite.cluster.ClusterState.ACTIVE;
> import static
> org.apache.ignite.internal.events.DiscoveryCustomEvent.EVT_DISCOVERY_CUSTOM_EVT;
> import static
> org.apache.ignite.plugin.security.SecurityPermission.ADMIN_CLUSTER_STATE;
> import static
> org.apache.ignite.plugin.security.SecurityPermission.ADMIN_SNAPSHOT;
> import static
> org.apache.ignite.plugin.security.SecurityPermission.CACHE_CREATE;
> import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_PUT;
> import static org.apache.ignite.plugin.security.SecurityPermission.CACHE_READ;
> import static
> org.apache.ignite.plugin.security.SecurityPermission.JOIN_AS_SERVER;
> import static
> org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.create;
> import static
> org.apache.ignite.plugin.security.SecurityPermissionSetBuilder.systemPermissions;
> /** */
> public class TransactionsHangingTest extends GridCommonAbstractTest {
> /** {@inheritDoc} */
> @Override protected IgniteConfiguration getConfiguration(String
> igniteInstanceName) throws Exception {
> IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
> cfg.setDataStorageConfiguration(new DataStorageConfiguration()
> .setDefaultDataRegionConfiguration(new DataRegionConfiguration()
> .setPersistenceEnabled(true)));
> cfg.setClientConnectorConfiguration(new ClientConnectorConfiguration()
> .setThreadPoolSize(2) // first thread is occupied by snapshot
> operation, the second is designated to perform cache operations.
> .setThinClientConfiguration(new ThinClientConfiguration()
> .setMaxActiveComputeTasksPerConnection(1)));
> cfg.setPluginProviders(new TestSecurityPluginProvider(
> igniteInstanceName,
> "",
> create()
> .defaultAllowAll(false)
> .appendSystemPermissions(JOIN_AS_SERVER, ADMIN_CLUSTER_STATE)
> .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_CREATE)
> .build(),
> null,
> false,
> userData("admin-snapshot", systemPermissions(ADMIN_SNAPSHOT)),
> userData("client", create()
> .defaultAllowAll(false)
> .appendCachePermissions(DEFAULT_CACHE_NAME, CACHE_READ,
> CACHE_PUT)
> .build())
> ));
> return cfg;
> }
> /** {@inheritDoc} */
> @Override protected void beforeTest() throws Exception {
> super.beforeTest();
> cleanPersistenceDir();
> }
> /** {@inheritDoc} */
> @Override protected void afterTest() throws Exception {
> super.afterTest();
> stopAllGrids();
> cleanPersistenceDir();
> }
> /** */
> @Test
> public void test() throws Exception {
> IgniteEx ignite = startGrid(0);
> ignite.cluster().state(ACTIVE);
> ignite.createCache(new
> CacheConfiguration<>().setName(DEFAULT_CACHE_NAME).setAtomicityMode(TRANSACTIONAL));
> CountDownLatch snapshotBlockedLatch = new CountDownLatch(1);
> CountDownLatch snapshotUnblockedLatch = new CountDownLatch(1);
>
> ignite.context().cache().context().exchange().registerExchangeAwareComponent(new
> PartitionsExchangeAware() {
> /** {@inheritDoc} */
> @Override public void
> onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) {
> if (fut.firstEvent().type() != EVT_DISCOVERY_CUSTOM_EVT)
> return;
> DiscoveryCustomMessage msg =
> ((DiscoveryCustomEvent)fut.firstEvent()).customMessage();
> assertNotNull(msg);
> if (msg instanceof SnapshotDiscoveryMessage) {
> snapshotBlockedLatch.countDown();
> try {
> snapshotUnblockedLatch.await(getTestTimeout(),
> TimeUnit.MILLISECONDS);
> }
> catch (InterruptedException e) {
> throw new IgniteException(e);
> }
> }
> }
> });
> try (
> IgniteClient snpAdmin = Ignition.startClient(new
> ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("admin-snapshot").setUserPassword(""));
> IgniteClient cli = Ignition.startClient(new
> ClientConfiguration().setAddresses("127.0.0.1:10800").setUserName("client").setUserPassword(""))
> ) {
> SnapshotCreateCommandArg arg = new SnapshotCreateCommandArg();
> arg.snapshotName("test_snapshot");
> arg.sync(true);
> IgniteClientFuture<VisorTaskResult<String>> snpFut =
> snpAdmin.compute().<VisorTaskArgument<?>,
> VisorTaskResult<String>>executeAsync2(
> SnapshotCreateTask.class.getName(),
> new VisorTaskArgument<>(
> grid(0).localNode().id(),
> arg,
> false)
> );
> assertTrue(snapshotBlockedLatch.await(getTestTimeout(),
> TimeUnit.MILLISECONDS));
> IgniteInternalFuture<Object> putFut0 = GridTestUtils.runAsync(()
> -> cli.cache(DEFAULT_CACHE_NAME).put(0, 0));
> IgniteInternalFuture<Object> putFut1 = GridTestUtils.runAsync(()
> -> cli.cache(DEFAULT_CACHE_NAME).put(1, 1));
> U.sleep(1000); // Wait for tx requests chaining.
> snapshotUnblockedLatch.countDown();
> snpFut.get(getTestTimeout(), TimeUnit.MILLISECONDS);
> putFut0.get(getTestTimeout());
> putFut1.get(getTestTimeout()); // Will hang.
> }
> }
> /** */
> private TestSecurityData userData(String login, SecurityPermissionSet
> perms) {
> return new TestSecurityData(
> login,
> "",
> perms,
> new Permissions()
> );
> }
> }
> {code}
> Code execution steps that lead to the hanging:
> 1. Start of the snapshot operation initiated by "administrator user account"
> causes PME to start.
> 2. Thin client sends tx-1, which is blocked until PME is completed.
> 3. Lets consider that thread-1 was used to handle tx-1 request. After
> https://issues.apache.org/jira/browse/IGNITE-21183 threads of thin client
> thread pool are not longer blocked until the end of transactions.
> Transactions requests are handled asynchronously. See suspend/resume of
> transacitons. This allows thread-1 to handle another tx request.
> 4. Thin client sends tx-2 which is handled by the thread-1.
> 5. Since this thread did not complete the previous tx-1, tx-2 chains itself
> with tx-1 future and starts waiting its completion. See
> org/apache/ignite/internal/processors/cache/GridCacheAdapter.java:3856
> 6. PME caused by the snapshot finishes. tx-1 future is notified to proceed by
> the PME thread. The crucial thing here is that tx-1 proceeds its execution in
> PME thread that is associated with the user that started snapshot. By the end
> of tx-1 the tx-2 starts its execution. Also in thread that is associated with
> the user that started snapshot.
> 7. If snapshot administrator user was not granted permissions for cache
> operations - tx-2 fails. And future chain described in clause 5 becomes
> broken.
> It leads to the situation when all new transaction handled by thread-1 will
> chain itself with previous transaction executed by this thread. But thy will
> never complete because the future chain is broken.
> We must fix exceptions handling during tx operations chaining and manually
> restore security context while executing transactional operation from the
> future listener.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)