valepakh commented on code in PR #3043:
URL: https://github.com/apache/ignite-3/pull/3043#discussion_r1464767379
##########
modules/compute/src/main/java/org/apache/ignite/internal/compute/IgniteComputeImpl.java:
##########
@@ -128,23 +154,27 @@ private <R> JobExecution<R> executeOnOneNodeWithFailover(
return computeComponent.executeLocally(units, jobClassName, args);
} else {
return new ComputeJobFailover<R>(
- computeComponent, nodeLeftEventsSource,
- targetNode, failoverCandidates, units,
+ computeComponent, logicalTopologyService, topologyService,
+ targetNode, nextWorkerSelector, units,
jobClassName, args
).failSafeExecute();
}
}
- private <R> JobExecution<R> executeOnOneNode(
- ClusterNode targetNode,
- List<DeploymentUnit> units,
- String jobClassName,
- Object[] args
- ) {
- if (isLocal(targetNode)) {
- return computeComponent.executeLocally(units, jobClassName, args);
- } else {
- return computeComponent.executeRemotely(targetNode, units,
jobClassName, args);
+ private static class DeqNexWorkerSelector implements NextWorkerSelector {
Review Comment:
```suggestion
private static class DeqNextWorkerSelector implements NextWorkerSelector
{
```
##########
modules/compute/src/main/java/org/apache/ignite/internal/compute/ComputeJobFailover.java:
##########
@@ -18,52 +18,65 @@
package org.apache.ignite.internal.compute;
import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.concurrent.ConcurrentLinkedDeque;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicReference;
-import java.util.function.Consumer;
import org.apache.ignite.compute.DeploymentUnit;
import org.apache.ignite.compute.JobExecution;
+import org.apache.ignite.internal.cluster.management.topology.api.LogicalNode;
+import
org.apache.ignite.internal.cluster.management.topology.api.LogicalTopologyEventListener;
+import
org.apache.ignite.internal.cluster.management.topology.api.LogicalTopologyService;
+import
org.apache.ignite.internal.cluster.management.topology.api.LogicalTopologySnapshot;
import org.apache.ignite.internal.lang.IgniteInternalException;
import org.apache.ignite.internal.logger.IgniteLogger;
import org.apache.ignite.internal.logger.Loggers;
import org.apache.ignite.lang.ErrorGroups.Compute;
import org.apache.ignite.network.ClusterNode;
-import org.jetbrains.annotations.Nullable;
+import org.apache.ignite.network.TopologyService;
/**
* This is a helper class for {@link ComputeComponent} to handle job failures.
You can think about this class as a "retryable compute job
* with captured context". Retry logic is applied ONLY if the worker node
leaves the cluster. If the job itself is failing, then the
* exception is propagated to the caller and this class does not handle it.
*
* <p>If you want to execute a job on node1 and use node2 and node3 as
failover candidates,
- * then you should create an instance of this class with workerNode = node1,
failoverCandidates = [node2, node3] as arguments and
- * call {@link #failSafeExecute()}.
+ * then you should create an instance of this class with workerNode = node1,
failoverCandidates = [node2, node3] as arguments and call
+ * {@link #failSafeExecute()}.
*
* @param <T> the type of the result of the job.
*/
class ComputeJobFailover<T> {
private static final IgniteLogger LOG =
Loggers.forClass(ComputeJobFailover.class);
+ /**
+ * Thread to run failover logic. We can not perform time-consuming
operations in the same thread where we discover topology changes (it
+ * is network id thread).
+ */
+ private static final Executor executor =
Executors.newSingleThreadExecutor();
Review Comment:
Could you please use the constructor using `NamedThreadFactory.create` with
the node name and a proper prefix?
##########
modules/compute/src/main/java/org/apache/ignite/internal/compute/NextColocatedWorkerSelector.java:
##########
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.compute;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import org.apache.ignite.internal.hlc.HybridClock;
+import org.apache.ignite.internal.logger.IgniteLogger;
+import org.apache.ignite.internal.logger.Loggers;
+import org.apache.ignite.internal.placementdriver.PlacementDriver;
+import org.apache.ignite.internal.placementdriver.ReplicaMeta;
+import org.apache.ignite.internal.replicator.TablePartitionId;
+import org.apache.ignite.internal.table.IgniteTablesInternal;
+import org.apache.ignite.internal.table.TableViewInternal;
+import org.apache.ignite.lang.TableNotFoundException;
+import org.apache.ignite.lang.util.IgniteNameUtils;
+import org.apache.ignite.network.ClusterNode;
+import org.apache.ignite.network.TopologyService;
+import org.apache.ignite.table.Tuple;
+import org.apache.ignite.table.mapper.Mapper;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * Next worker selector that returns primary replica node for next worker. If
there is no such node (we lost the majority, for example) the
+ * {@code CompletableFuture.completedFuture(null)} will be returned.
+ *
+ * @param <K> type of the key for the colocated table.
+ */
+public class NextColocatedWorkerSelector<K> implements NextWorkerSelector {
+ private static final IgniteLogger LOG =
Loggers.forClass(NextColocatedWorkerSelector.class);
+
+ private static final int PRIMARY_REPLICA_ASK_CLOCK_ADDITION_MILLIS =
10_000;
+
+ private static final int AWAIT_FOR_PRIMARY_REPLICA_SECONDS = 15;
+
+ private static final String DEFAULT_SCHEMA_NAME = "PUBLIC";
+
+ private final IgniteTablesInternal tables;
+
+ private final PlacementDriver placementDriver;
+
+ private final TopologyService topologyService;
+
+ private final HybridClock clock;
+
+ @Nullable
+ private final K key;
+
+ @Nullable
+ private final Mapper<K> keyMapper;
+
+ private final Tuple tuple;
+
+ private final TableViewInternal table;
+
+ NextColocatedWorkerSelector(
+ IgniteTablesInternal tables,
+ PlacementDriver placementDriver,
+ TopologyService topologyService,
+ HybridClock clock,
+ String tableName,
+ @Nullable K key,
+ @Nullable Mapper<K> keyMapper) {
+ this(tables, placementDriver, topologyService, clock, tableName, key,
keyMapper, null);
+ }
+
+ NextColocatedWorkerSelector(
+ IgniteTablesInternal tables,
+ PlacementDriver placementDriver,
+ TopologyService topologyService,
+ HybridClock clock,
+ String tableName,
+ Tuple tuple) {
+ this(tables, placementDriver, topologyService, clock, tableName, null,
null, tuple);
+ }
+
+ private NextColocatedWorkerSelector(
+ IgniteTablesInternal tables,
+ PlacementDriver placementDriver,
+ TopologyService topologyService,
+ HybridClock clock,
+ String tableName,
+ @Nullable K key,
+ @Nullable Mapper<K> keyMapper,
+ @Nullable Tuple tuple) {
+ this.tables = tables;
+ this.placementDriver = placementDriver;
+ this.topologyService = topologyService;
+ this.table = getTableViewInternal(tableName);
+ this.clock = clock;
+ this.key = key;
+ this.keyMapper = keyMapper;
+ this.tuple = tuple;
+ }
+
+ private TableViewInternal getTableViewInternal(String tableName) {
+ TableViewInternal table;
+ try {
+ table = requiredTable(tableName).get();
+ } catch (InterruptedException | ExecutionException e) {
+ throw new RuntimeException(e);
+ }
+ return table;
+ }
+
+ private CompletableFuture<ClusterNode>
tryToFindPrimaryReplica(TablePartitionId tablePartitionId)
+ throws ExecutionException, InterruptedException {
Review Comment:
It seems that this method doesn't throw these exceptions anymore and so in
the `next()` could be simplified as well.
##########
modules/compute/src/main/java/org/apache/ignite/internal/compute/NextColocatedWorkerSelector.java:
##########
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.compute;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import org.apache.ignite.internal.hlc.HybridClock;
+import org.apache.ignite.internal.logger.IgniteLogger;
+import org.apache.ignite.internal.logger.Loggers;
+import org.apache.ignite.internal.placementdriver.PlacementDriver;
+import org.apache.ignite.internal.placementdriver.ReplicaMeta;
+import org.apache.ignite.internal.replicator.TablePartitionId;
+import org.apache.ignite.internal.table.IgniteTablesInternal;
+import org.apache.ignite.internal.table.TableViewInternal;
+import org.apache.ignite.lang.TableNotFoundException;
+import org.apache.ignite.lang.util.IgniteNameUtils;
+import org.apache.ignite.network.ClusterNode;
+import org.apache.ignite.network.TopologyService;
+import org.apache.ignite.table.Tuple;
+import org.apache.ignite.table.mapper.Mapper;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * Next worker selector that returns primary replica node for next worker. If
there is no such node (we lost the majority, for example) the
+ * {@code CompletableFuture.completedFuture(null)} will be returned.
+ *
+ * @param <K> type of the key for the colocated table.
+ */
+public class NextColocatedWorkerSelector<K> implements NextWorkerSelector {
+ private static final IgniteLogger LOG =
Loggers.forClass(NextColocatedWorkerSelector.class);
+
+ private static final int PRIMARY_REPLICA_ASK_CLOCK_ADDITION_MILLIS =
10_000;
+
+ private static final int AWAIT_FOR_PRIMARY_REPLICA_SECONDS = 15;
+
+ private static final String DEFAULT_SCHEMA_NAME = "PUBLIC";
+
+ private final IgniteTablesInternal tables;
+
+ private final PlacementDriver placementDriver;
+
+ private final TopologyService topologyService;
+
+ private final HybridClock clock;
+
+ @Nullable
+ private final K key;
+
+ @Nullable
+ private final Mapper<K> keyMapper;
+
+ private final Tuple tuple;
+
+ private final TableViewInternal table;
+
+ NextColocatedWorkerSelector(
+ IgniteTablesInternal tables,
+ PlacementDriver placementDriver,
+ TopologyService topologyService,
+ HybridClock clock,
+ String tableName,
+ @Nullable K key,
+ @Nullable Mapper<K> keyMapper) {
+ this(tables, placementDriver, topologyService, clock, tableName, key,
keyMapper, null);
+ }
+
+ NextColocatedWorkerSelector(
+ IgniteTablesInternal tables,
+ PlacementDriver placementDriver,
+ TopologyService topologyService,
+ HybridClock clock,
+ String tableName,
+ Tuple tuple) {
+ this(tables, placementDriver, topologyService, clock, tableName, null,
null, tuple);
+ }
+
+ private NextColocatedWorkerSelector(
+ IgniteTablesInternal tables,
+ PlacementDriver placementDriver,
+ TopologyService topologyService,
+ HybridClock clock,
+ String tableName,
+ @Nullable K key,
+ @Nullable Mapper<K> keyMapper,
+ @Nullable Tuple tuple) {
+ this.tables = tables;
+ this.placementDriver = placementDriver;
+ this.topologyService = topologyService;
+ this.table = getTableViewInternal(tableName);
Review Comment:
Why do we need to do this here, it seems that we can pass the table directly
from the call site?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]