[ 
https://issues.apache.org/jira/browse/FLINK-20364?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17239059#comment-17239059
 ] 

Guruh Fajar Samudra commented on FLINK-20364:
---------------------------------------------

Github user GJL commented on a diff in the pull request:

https://github.com/apache/flink/pull/5091#discussion_r155754738

— Diff: 
flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotPool.java
 —
@@ -266,104 +279,367 @@ public void disconnectResourceManager() {
// ------------------------------------------------------------------------

@Override

public CompletableFuture<SimpleSlot> allocateSlot(
SlotRequestID requestId,
ScheduledUnit task,
ResourceProfile resources,
Iterable<TaskManagerLocation> locationPreferences,
+ public CompletableFuture<LogicalSlot> allocateSlot(
+ SlotRequestId slotRequestId,
+ ScheduledUnit scheduledUnit,
+ ResourceProfile resourceProfile,
+ Collection<TaskManagerLocation> locationPreferences,
+ boolean allowQueuedScheduling,
Time timeout) { - return internalAllocateSlot(requestId, task, resources, 
locationPreferences); + return internalAllocateSlot( + slotRequestId, + 
scheduledUnit, + resourceProfile, + locationPreferences, + 
allowQueuedScheduling); }
@Override
public void returnAllocatedSlot(Slot slot) {
internalReturnAllocatedSlot(slot);
+ private CompletableFuture<LogicalSlot> internalAllocateSlot(
+ SlotRequestId slotRequestId,
+ ScheduledUnit task,
+ ResourceProfile resourceProfile,
+ Collection<TaskManagerLocation> locationPreferences,
+ boolean allowQueuedScheduling) {
+
+ final SlotSharingGroupId slotSharingGroupId = task.getSlotSharingGroupId();
+
+ if (slotSharingGroupId != null) {
+ // allocate slot with slot sharing
+ final SlotSharingManager multiTaskSlotManager = 
slotSharingManagers.computeIfAbsent(
+ slotSharingGroupId,
+ id -> new SlotSharingManager(
+ id,
+ this,
+ providerAndOwner));
+
+ final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotFuture;
+
+ try
Unknown macro: { + if (task.getCoLocationConstraint() != null) { + 
multiTaskSlotFuture = allocateCoLocatedMultiTaskSlot( + 
task.getCoLocationConstraint(), + multiTaskSlotManager, + resourceProfile, + 
locationPreferences, + allowQueuedScheduling); + } else { + multiTaskSlotFuture 
= allocateMultiTaskSlot( + task.getJobVertexId(), multiTaskSlotManager, + 
resourceProfile, + locationPreferences, + allowQueuedScheduling); + } + }
catch (NoResourceAvailableException noResourceException)

{ + return FutureUtils.completedExceptionally(noResourceException); + }
+
+ // sanity check
+ 
Preconditions.checkState(!multiTaskSlotFuture.getMultiTaskSlot().contains(task.getJobVertexId()));
+
+ final SlotSharingManager.SingleTaskSlot leave = 
multiTaskSlotFuture.getMultiTaskSlot().allocateSingleTaskSlot(
+ slotRequestId,
+ task.getJobVertexId(),
+ multiTaskSlotFuture.getLocality());
+
+ return leave.getLogicalSlotFuture();
+ } else {
+ // request an allocated slot to assign a single logical slot to
+ CompletableFuture<SlotAndLocality> slotAndLocalityFuture = 
requestAllocatedSlot(
+ slotRequestId,
+ resourceProfile,
+ locationPreferences,
+ allowQueuedScheduling);
+
+ return slotAndLocalityFuture.thenApply(
+ (SlotAndLocality slotAndLocality) ->

Unknown macro: { + final AllocatedSlot allocatedSlot = 
slotAndLocality.getSlot(); + + final SingleLogicalSlot singleTaskSlot = new 
SingleLogicalSlot( + slotRequestId, + allocatedSlot, + null, + 
slotAndLocality.getLocality(), + providerAndOwner); + + if 
(allocatedSlot.tryAssignPayload(singleTaskSlot)) { + return singleTaskSlot; + } 
else { + final FlinkException flinkException = new FlinkException("Could not 
assign payload to allocated slot " + allocatedSlot.getAllocationId() + '.'); + 
releaseSlot(slotRequestId, null, flinkException); + throw new 
CompletionException(flinkException); + } + }
);
+ }
}

@Override
public CompletableFuture<Acknowledge> cancelSlotAllocation(SlotRequestID 
requestId) {
final PendingRequest pendingRequest = removePendingRequest(requestId);
+ /**
+ * Allocates a co-located {@link SlotSharingManager.MultiTaskSlot} for the 
given {@link CoLocationConstraint}.
+ *
+ * <p>If allowQueuedScheduling is true, then the returned {@link 
SlotSharingManager.MultiTaskSlot}
can be
+ * uncompleted.
+ *
+ * @param coLocationConstraint for which to allocate a

{@link SlotSharingManager.MultiTaskSlot}
+ * @param multiTaskSlotManager responsible for the slot sharing group for 
which to allocate the slot
+ * @param resourceProfile specifying the requirements for the requested slot
+ * @param locationPreferences containing preferred TaskExecutors on which to 
allocate the slot
+ * @param allowQueuedScheduling true if queued scheduling (the returned task 
slot must not be completed yet) is allowed, otherwise false
+ * @return A {@link SlotSharingManager.MultiTaskSlotLocality} which contains 
the allocated{@link SlotSharingManager.MultiTaskSlot}
+ * and its locality wrt the given location preferences
+ * @throws NoResourceAvailableException if no task slot could be allocated
+ */
+ private SlotSharingManager.MultiTaskSlotLocality 
allocateCoLocatedMultiTaskSlot(
+ CoLocationConstraint coLocationConstraint,
+ SlotSharingManager multiTaskSlotManager,
+ ResourceProfile resourceProfile,
+ Collection<TaskManagerLocation> locationPreferences,
+ boolean allowQueuedScheduling) throws NoResourceAvailableException {
+ final SlotRequestId coLocationSlotRequestId = 
coLocationConstraint.getSlotRequestId();
+
+ if (coLocationSlotRequestId != null)

Unknown macro: { + // we have a slot assigned --> try to retrieve it + final 
SlotSharingManager.TaskSlot taskSlot = 
multiTaskSlotManager.getTaskSlot(coLocationSlotRequestId); + + if (taskSlot != 
null) { + Preconditions.checkState(taskSlot instanceof 
SlotSharingManager.MultiTaskSlot); + return 
SlotSharingManager.MultiTaskSlotLocality.of(((SlotSharingManager.MultiTaskSlot) 
taskSlot), Locality.LOCAL); + } else { + // the slot may have been cancelled in 
the mean time + coLocationConstraint.setSlotRequestId(null); + } + }
if (pendingRequest != null) {
failPendingRequest(pendingRequest, new CancellationException("Allocation with 
request id" + requestId + " cancelled."));
+ final Collection<TaskManagerLocation> actualLocationPreferences;
+
+ if (coLocationConstraint.isAssigned()) { + actualLocationPreferences = 
Collections.singleton(coLocationConstraint.getLocation()); }
else

{ - final Slot slot = allocatedSlots.get(requestId); + 
actualLocationPreferences = locationPreferences; + }
+
+ // get a new multi task slot
+ final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality = 
allocateMultiTaskSlot(
+ coLocationConstraint.getGroupId(), multiTaskSlotManager,
+ resourceProfile,
+ actualLocationPreferences,
+ allowQueuedScheduling);
+
+ // check whether we fulfill the co-location constraint
+ if (coLocationConstraint.isAssigned() && multiTaskSlotLocality.getLocality() 
!= Locality.LOCAL) {
+ multiTaskSlotLocality.getMultiTaskSlot().release(
+ new FlinkException("Multi task slot is not local and, thus, does not fulfill 
the co-location constraint."));

if (slot != null) {
LOG.info("Returning allocated slot {} because the corresponding allocation 
request {} was cancelled.", slot, requestId);
if (slot.markCancelled()) { - internalReturnAllocatedSlot(slot); + throw new 
NoResourceAvailableException("Could not allocate a local multi task slot for 
the " + + "co location constraint " + coLocationConstraint + '.'); + }
+
+ final SlotRequestId slotRequestId = new SlotRequestId();
+ final SlotSharingManager.MultiTaskSlot coLocationSlot = 
multiTaskSlotLocality.getMultiTaskSlot().allocateMultiTaskSlot(
+ slotRequestId,
+ coLocationConstraint.getGroupId());
+
+ // mark the requested slot as co-located slot for other co-located tasks
+ coLocationConstraint.setSlotRequestId(slotRequestId);
+
+ // lock the co-location constraint once we have obtained the allocated slot
+ coLocationSlot.getSlotContextFuture().whenComplete(
+ (SlotContext slotContext, Throwable throwable) -> {
+ if (throwable == null)

Unknown macro: { + // check whether we are still assigned to the co-location 
constraint + if (Objects.equals(coLocationConstraint.getSlotRequestId(), 
slotRequestId)) { + 
coLocationConstraint.lockLocation(slotContext.getTaskManagerLocation()); + } }
+ });
+
+ return SlotSharingManager.MultiTaskSlotLocality.of(coLocationSlot, 
multiTaskSlotLocality.getLocality());
+ }
+
+ /**
+ * Allocates a

{@link SlotSharingManager.MultiTaskSlot} for the given groupId which is in the
+ * slot sharing group for which the given {@link SlotSharingManager} is 
responsible.
+ *
+ * <p>If allowQueuedScheduling is true, then the method can return an 
uncompleted {@link SlotSharingManager.MultiTaskSlot}
.
+ *
+ * @param groupId for which to allocate a new

{@link SlotSharingManager.MultiTaskSlot}
+ * @param slotSharingManager responsible for the slot sharing group for which 
to allocate the slot
+ * @param resourceProfile specifying the requirements for the requested slot
+ * @param locationPreferences containing preferred TaskExecutors on which to 
allocate the slot
+ * @param allowQueuedScheduling true if queued scheduling (the returned task 
slot must not be completed yet) is allowed, otherwise false
+ * @return A {@link SlotSharingManager.MultiTaskSlotLocality} which contains 
the allocated {@link SlotSharingManager.MultiTaskSlot}
+ * and its locality wrt the given location preferences
+ * @throws NoResourceAvailableException if no task slot could be allocated
+ */
+ private SlotSharingManager.MultiTaskSlotLocality allocateMultiTaskSlot(
+ AbstractID groupId,
+ SlotSharingManager slotSharingManager,
+ ResourceProfile resourceProfile,
+ Collection<TaskManagerLocation> locationPreferences,
+ boolean allowQueuedScheduling) throws NoResourceAvailableException {
+
+ // check first whether we have a resolved root slot which we can use
+ SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality = 
slotSharingManager.getResolvedRootSlot(
+ groupId,
+ locationPreferences);
+
+ if (multiTaskSlotLocality != null && multiTaskSlotLocality.getLocality() == 
Locality.LOCAL)

{ + return multiTaskSlotLocality; + }
+
+ final SlotRequestId allocatedSlotRequestId = new SlotRequestId();
+ final SlotRequestId multiTaskSlotRequestId = new SlotRequestId();
+
+ // check whether we have an allocated slot available which we can use to 
create a new multi task slot in
+ final SlotAndLocality slotAndLocality = 
pollAndAllocateSlot(allocatedSlotRequestId, resourceProfile, 
locationPreferences);
+
+ if (slotAndLocality != null && (slotAndLocality.getLocality() == 
Locality.LOCAL || multiTaskSlotLocality == null)) {
+
+ final AllocatedSlot allocatedSlot = slotAndLocality.getSlot();
+ final SlotSharingManager.MultiTaskSlot multiTaskSlot = 
slotSharingManager.createRootSlot(
+ multiTaskSlotRequestId,
+ CompletableFuture.completedFuture(slotAndLocality.getSlot()),
+ allocatedSlotRequestId);
+
+ if (allocatedSlot.tryAssignPayload(multiTaskSlot))

{ + return SlotSharingManager.MultiTaskSlotLocality.of(multiTaskSlot, 
slotAndLocality.getLocality()); }
else {

LOG.debug("There was no slot allocation with {} to be cancelled.", requestId);
+ multiTaskSlot.release(new FlinkException("Could not assign payload to 
allocated slot " +
+ allocatedSlot.getAllocationId() + '.'));
}
}
return CompletableFuture.completedFuture(Acknowledge.get());
}
+ if (multiTaskSlotLocality != null)
Unknown macro: { + // prefer slot sharing group slots over unused slots + if 
(slotAndLocality != null) { + releaseSlot( + allocatedSlotRequestId, + null, + 
new FlinkException("Locality constraint is not better fulfilled by allocated 
slot.")); + } + return multiTaskSlotLocality; + }
CompletableFuture<SimpleSlot> internalAllocateSlot(
SlotRequestID requestId,
ScheduledUnit task,
ResourceProfile resources,
Iterable<TaskManagerLocation> locationPreferences) {
+ if (allowQueuedScheduling) {
+ // there is no slot immediately available --> check first for uncompleted 
slots at the slot sharing group
+ SlotSharingManager.MultiTaskSlot multiTaskSlotFuture = 
slotSharingManager.getUnresolvedRootSlot(groupId);
+
+ if (multiTaskSlotFuture == null) {
+ // it seems as if we have to request a new slot from the resource manager, 
this is always the last resort!!!
+ final CompletableFuture<AllocatedSlot> futureSlot = 
requestNewAllocatedSlot(allocatedSlotRequestId, resourceProfile);
+
+ multiTaskSlotFuture = slotSharingManager.createRootSlot(
+ multiTaskSlotRequestId,
+ futureSlot.thenApply(Function.identity()),
End diff –
nit: If you change the signature to
```
public MultiTaskSlot createRootSlot(
SlotRequestId slotRequestId,
CompletableFuture<? extends SlotContext> slotContextFuture,
SlotRequestId allocatedSlotRequestId)
```
you won't need this trick to satisfy the compiler (PECS rule). Unfortunately 
more changes in the code would be needed, e.g., in `MultiTaskSlot`.

Permalink
githubbot
ASF GitHub Bot added a comment - 08/Dec/17 11:33
Github user GJL commented on a diff in the pull request:

https://github.com/apache/flink/pull/5091#discussion_r155758219

— Diff: 
flink-runtime/src/main/java/org/apache/flink/runtime/jobmaster/slotpool/SlotSharingManager.java
 —
@@ -0,0 +1,722 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.jobmaster.slotpool;
+
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.runtime.jobmaster.LogicalSlot;
+import org.apache.flink.runtime.jobmaster.SlotContext;
+import org.apache.flink.runtime.jobmaster.SlotOwner;
+import org.apache.flink.runtime.jobmaster.SlotRequestId;
+import org.apache.flink.runtime.instance.SlotSharingGroupId;
+import org.apache.flink.runtime.jobmanager.scheduler.Locality;
+import org.apache.flink.runtime.taskmanager.TaskManagerLocation;
+import org.apache.flink.util.AbstractID;
+import org.apache.flink.util.FlinkException;
+import org.apache.flink.util.Preconditions;
+
+import javax.annotation.Nullable;
+
+import java.util.AbstractCollection;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Manager which is responsible for slot sharing. Slot sharing allows to run 
different
+ * tasks in the same slot and to realize co-location constraints.
+ *
+ * <p>The SlotSharingManager allows to create a hierarchy of

{@link TaskSlot} such that
+ * every {@link TaskSlot}
is uniquely identified by a

{@link SlotRequestId}
identifying
+ * the request for the TaskSlot and a

{@link AbstractID}
identifying the task or the
+ * co-location constraint running in this slot.
+ *
+ * <p>The

{@link TaskSlot} hierarchy is implemented by {@link MultiTaskSlot} and
+ * {@link SingleTaskSlot}. The former class represents inner nodes which can 
contain
+ * a number of other {@link TaskSlot}
and the latter class represents the leave nodes.
— End diff –

> Add support for scheduling with slot sharing
> --------------------------------------------
>
>                 Key: FLINK-20364
>                 URL: https://issues.apache.org/jira/browse/FLINK-20364
>             Project: Flink
>          Issue Type: Test
>          Components: Runtime / Coordination
>    Affects Versions: statefun-2.2.1
>            Reporter: Guruh Fajar Samudra
>            Priority: Major
>             Fix For: statefun-2.2.2
>
>
> In order to reach feature equivalence with the old code base, we should add 
> support for scheduling with slot sharing to the SlotPool. This will also 
> allow us to run all the IT cases based on the {{AbstractTestBase}} on the 
> Flip-6 {{MiniCluster}}.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to