chenyulin0719 commented on code in PR #918:
URL: https://github.com/apache/yunikorn-k8shim/pull/918#discussion_r1798116560
##########
pkg/cache/context.go:
##########
@@ -370,15 +370,19 @@ func (ctx *Context) updateForeignPod(pod *v1.Pod) {
// 2. pod is now assigned
// 3. pod is not in terminated state
// 4. pod references a known node
- if oldPod == nil && utils.IsAssignedPod(pod) &&
!utils.IsPodTerminated(pod) {
+ if utils.IsAssignedPod(pod) && !utils.IsPodTerminated(pod) {
if ctx.schedulerCache.UpdatePod(pod) {
// pod was accepted by a real node
log.Log(log.ShimContext).Debug("pod is assigned to a
node, trigger occupied resource update",
zap.String("namespace", pod.Namespace),
zap.String("podName", pod.Name),
zap.String("podStatusBefore", podStatusBefore),
zap.String("podStatusCurrent",
string(pod.Status.Phase)))
- ctx.updateNodeOccupiedResources(pod.Spec.NodeName,
pod.Namespace, pod.Name, common.GetPodResource(pod),
schedulercache.AddOccupiedResource)
+ allocReq := common.CreateAllocationForForeignPod(pod)
+ if err :=
ctx.apiProvider.GetAPIs().SchedulerAPI.UpdateAllocation(allocReq); err != nil {
+ log.Log(log.ShimContext).Error("failed to add
foreign allocation to the core",
+ zap.Error(err))
+ }
} else {
// pod is orphaned (references an unknown node)
log.Log(log.ShimContext).Info("skipping occupied
resource update for assigned orphaned pod",
Review Comment:
```suggestion
log.Log(log.ShimContext).Info("skipping updating
foreign allocation for assigned orphaned pod",
```
Change `skipping the occupided resource update` to `skipping updating
foreign allocation` could be more accuracy.
##########
pkg/cache/context_test.go:
##########
@@ -2004,20 +1923,11 @@ func TestInitializeState(t *testing.T) {
assert.Equal(t, *pc.PreemptionPolicy, policy, "wrong preemption policy")
assert.Equal(t, pc.Annotations[constants.AnnotationAllowPreemption],
constants.True, "wrong allow-preemption value")
- // verify occupied / capacity on node
- capacity, occupied, ok :=
context.schedulerCache.SnapshotResources(nodeName1)
- assert.Assert(t, ok, "Unable to retrieve node resources")
- expectedCapacity := common.ParseResource("4", "10G")
- assert.Equal(t, expectedCapacity.Resources["vcore"].Value,
capacity.Resources["vcore"].Value, "wrong capacity vcore")
- assert.Equal(t, expectedCapacity.Resources["memory"].Value,
capacity.Resources["memory"].Value, "wrong capacity memory")
- expectedOccupied := common.ParseResource("1500m", "2G")
- assert.Equal(t, expectedOccupied.Resources["vcore"].Value,
occupied.Resources["vcore"].Value, "wrong occupied vcore")
- assert.Equal(t, expectedOccupied.Resources["memory"].Value,
occupied.Resources["memory"].Value, "wrong occupied memory")
-
Review Comment:
In the test, the following pods were added to podLister. However, after
removing above assertions, there is no other assertion for the first two
foreign pods. Could we add the related assertions?
- "foreignPending” -> **no assertion in the test**
- "foreignRunning” -> **no assertion in the test**
- podName1 (have assertions)
- podName2 (have assertions)
- podName3 (have assertions)
- podForeignUID (have assertions)
##########
pkg/cache/context_test.go:
##########
@@ -569,33 +543,28 @@ func TestAddUpdatePodForeign(t *testing.T) {
})
}
}
- return validatorFunc(request)
+ return nil
})
- host1 := nodeForTest(Host1, "10G", "10")
+ host1 := nodeForTest(Host1, "10G", "10") // add existing foreign pod
context.updateNode(nil, host1)
// pod is not assigned to any node
pod1 := foreignPod(podName1, "1G", "500m")
pod1.Status.Phase = v1.PodPending
pod1.Spec.NodeName = ""
- // validate add
- tc = "add-pod1"
- executed = false
- expectAdd = false
- expectRemove = false
+ // validate add (pending, no node assigned)
+ allocRequest = nil
context.AddPod(pod1)
- assert.Assert(t, !executed, "unexpected update")
+ assert.Assert(t, allocRequest == nil, "unexpected update")
pod := context.schedulerCache.GetPod(string(pod1.UID))
assert.Assert(t, pod == nil, "unassigned pod found in cache")
- // validate update
- tc = "update-pod1"
- executed = false
- expectRemove = false
+ // validate update (no change)
+ allocRequest = nil
context.UpdatePod(nil, pod1)
- assert.Assert(t, !executed, "unexpected update")
+ assert.Assert(t, allocRequest == nil, "unexpected update")
assert.Assert(t, pod == nil, "unassigned pod found in cache")
Review Comment:
```suggestion
pod = context.schedulerCache.GetPod(string(pod1.UID))
assert.Assert(t, pod == nil, "unassigned pod found in cache")
```
Missing one step before asserting the pod.
##########
pkg/cache/context_test.go:
##########
@@ -604,155 +573,101 @@ func TestAddUpdatePodForeign(t *testing.T) {
pod2.Spec.NodeName = Host1
// validate add
- tc = "add-pod2"
- executed = false
- expectAdd = true
- expectRemove = false
context.AddPod(pod2)
- assert.Assert(t, executed, "updated expected")
+ assert.Assert(t, allocRequest != nil, "update expected")
+ assertAddForeignPod(t, podName2, Host1, allocRequest)
pod = context.schedulerCache.GetPod(string(pod2.UID))
assert.Assert(t, pod != nil, "pod not found in cache")
- // validate update
- tc = "update-pod2"
- executed = false
- expectAdd = false
- expectRemove = false
+ // validate update (no change)
+ allocRequest = nil
context.UpdatePod(nil, pod2)
- assert.Assert(t, !executed, "unexpected update")
+ assert.Assert(t, allocRequest != nil, "update expected")
pod = context.schedulerCache.GetPod(string(pod2.UID))
assert.Assert(t, pod != nil, "pod not found in cache")
// validate update when not already in cache
- tc = "update-pod2-nocache-pre"
- executed = false
- expectAdd = false
- expectRemove = true
+ allocRequest = nil
context.DeletePod(pod2)
- assert.Assert(t, executed, "expected update")
- tc = "update-pod2-nocache"
- executed = false
- expectAdd = true
- expectRemove = false
+ assertReleaseForeignPod(t, podName2, allocRequest)
+
+ allocRequest = nil
context.UpdatePod(nil, pod2)
- assert.Assert(t, executed, "expected update")
+ assert.Assert(t, allocRequest != nil, "expected update")
pod = context.schedulerCache.GetPod(string(pod2.UID))
assert.Assert(t, pod != nil, "pod not found in cache")
+ assertAddForeignPod(t, podName2, Host1, allocRequest)
// pod is failed, should trigger update if already in cache
pod3 := pod2.DeepCopy()
pod3.Status.Phase = v1.PodFailed
// validate add
- tc = "add-pod3"
- executed = false
- expectAdd = false
- expectRemove = true
+ allocRequest = nil
context.AddPod(pod3)
- assert.Assert(t, executed, "expected update")
+ assert.Assert(t, allocRequest != nil, "expected update")
pod = context.schedulerCache.GetPod(string(pod3.UID))
assert.Assert(t, pod == nil, "failed pod found in cache")
+ assert.Assert(t, allocRequest.Releases != nil) // expecting a release
due to pod status
+ assertReleaseForeignPod(t, podName2, allocRequest)
// validate update when not already in cache
- tc = "update-pod3-pre"
- executed = false
- expectAdd = true
- expectRemove = false
+ allocRequest = nil
context.AddPod(pod2)
- tc = "update-pod3"
- executed = false
- expectAdd = false
- expectRemove = true
+ assert.Assert(t, allocRequest != nil, "expected update")
+ allocRequest = nil
context.UpdatePod(nil, pod3)
- assert.Assert(t, executed, "expected update")
+ assert.Assert(t, allocRequest != nil, "expected update")
pod = context.schedulerCache.GetPod(string(pod3.UID))
assert.Assert(t, pod == nil, "failed pod found in cache")
Review Comment:
The second test case of `// validate update when not already in cache` seems
redundant.
It's already be done at line 595.
(pod2 and pod3 have the same pod.UID, so it actually tested AddPod() and
Update the pod to failed.)
##########
pkg/cache/context.go:
##########
@@ -370,15 +370,19 @@ func (ctx *Context) updateForeignPod(pod *v1.Pod) {
// 2. pod is now assigned
// 3. pod is not in terminated state
// 4. pod references a known node
- if oldPod == nil && utils.IsAssignedPod(pod) &&
!utils.IsPodTerminated(pod) {
+ if utils.IsAssignedPod(pod) && !utils.IsPodTerminated(pod) {
Review Comment:
Could we remove the first condition after removing the check for `oldPod ==
nil`?
```
// conditions for allocate:
// 1. pod was previously assigned
// 2. pod is now assigned
```
##########
pkg/cache/context.go:
##########
@@ -402,8 +406,12 @@ func (ctx *Context) updateForeignPod(pod *v1.Pod) {
zap.String("podStatusCurrent",
string(pod.Status.Phase)))
// this means pod is terminated
// we need sub the occupied resource and re-sync with
the scheduler-core
Review Comment:
```suggestion
// we need to remove foreign pod in cache and create
release request to remove foreign allocation from the core
```
Maybe we could rephrase it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]