chenyulin0719 commented on code in PR #918:
URL: https://github.com/apache/yunikorn-k8shim/pull/918#discussion_r1798116560


##########
pkg/cache/context.go:
##########
@@ -370,15 +370,19 @@ func (ctx *Context) updateForeignPod(pod *v1.Pod) {
        //   2. pod is now assigned
        //   3. pod is not in terminated state
        //   4. pod references a known node
-       if oldPod == nil && utils.IsAssignedPod(pod) && 
!utils.IsPodTerminated(pod) {
+       if utils.IsAssignedPod(pod) && !utils.IsPodTerminated(pod) {
                if ctx.schedulerCache.UpdatePod(pod) {
                        // pod was accepted by a real node
                        log.Log(log.ShimContext).Debug("pod is assigned to a 
node, trigger occupied resource update",
                                zap.String("namespace", pod.Namespace),
                                zap.String("podName", pod.Name),
                                zap.String("podStatusBefore", podStatusBefore),
                                zap.String("podStatusCurrent", 
string(pod.Status.Phase)))
-                       ctx.updateNodeOccupiedResources(pod.Spec.NodeName, 
pod.Namespace, pod.Name, common.GetPodResource(pod), 
schedulercache.AddOccupiedResource)
+                       allocReq := common.CreateAllocationForForeignPod(pod)
+                       if err := 
ctx.apiProvider.GetAPIs().SchedulerAPI.UpdateAllocation(allocReq); err != nil {
+                               log.Log(log.ShimContext).Error("failed to add 
foreign allocation to the core",
+                                       zap.Error(err))
+                       }
                } else {
                        // pod is orphaned (references an unknown node)
                        log.Log(log.ShimContext).Info("skipping occupied 
resource update for assigned orphaned pod",

Review Comment:
   ```suggestion
                        log.Log(log.ShimContext).Info("skipping updating 
foreign allocation for assigned orphaned pod",
   ```
   
   Change `skipping the occupided resource update` to `skipping updating 
foreign allocation` could be more accuracy.



##########
pkg/cache/context_test.go:
##########
@@ -2004,20 +1923,11 @@ func TestInitializeState(t *testing.T) {
        assert.Equal(t, *pc.PreemptionPolicy, policy, "wrong preemption policy")
        assert.Equal(t, pc.Annotations[constants.AnnotationAllowPreemption], 
constants.True, "wrong allow-preemption value")
 
-       // verify occupied / capacity on node
-       capacity, occupied, ok := 
context.schedulerCache.SnapshotResources(nodeName1)
-       assert.Assert(t, ok, "Unable to retrieve node resources")
-       expectedCapacity := common.ParseResource("4", "10G")
-       assert.Equal(t, expectedCapacity.Resources["vcore"].Value, 
capacity.Resources["vcore"].Value, "wrong capacity vcore")
-       assert.Equal(t, expectedCapacity.Resources["memory"].Value, 
capacity.Resources["memory"].Value, "wrong capacity memory")
-       expectedOccupied := common.ParseResource("1500m", "2G")
-       assert.Equal(t, expectedOccupied.Resources["vcore"].Value, 
occupied.Resources["vcore"].Value, "wrong occupied vcore")
-       assert.Equal(t, expectedOccupied.Resources["memory"].Value, 
occupied.Resources["memory"].Value, "wrong occupied memory")
-

Review Comment:
   In the test, the following pods were added to podLister. However, after 
removing above assertions, there is no other assertion for the first two 
foreign pods. Could we add the related assertions?
   
   - "foreignPending” -> **no assertion in the test**
   - "foreignRunning” -> **no assertion in the test**
   - podName1 (have assertions)
   - podName2 (have assertions)
   - podName3 (have assertions)
   - podForeignUID (have assertions)



##########
pkg/cache/context_test.go:
##########
@@ -569,33 +543,28 @@ func TestAddUpdatePodForeign(t *testing.T) {
                                })
                        }
                }
-               return validatorFunc(request)
+               return nil
        })
 
-       host1 := nodeForTest(Host1, "10G", "10")
+       host1 := nodeForTest(Host1, "10G", "10") // add existing foreign pod
        context.updateNode(nil, host1)
 
        // pod is not assigned to any node
        pod1 := foreignPod(podName1, "1G", "500m")
        pod1.Status.Phase = v1.PodPending
        pod1.Spec.NodeName = ""
 
-       // validate add
-       tc = "add-pod1"
-       executed = false
-       expectAdd = false
-       expectRemove = false
+       // validate add (pending, no node assigned)
+       allocRequest = nil
        context.AddPod(pod1)
-       assert.Assert(t, !executed, "unexpected update")
+       assert.Assert(t, allocRequest == nil, "unexpected update")
        pod := context.schedulerCache.GetPod(string(pod1.UID))
        assert.Assert(t, pod == nil, "unassigned pod found in cache")
 
-       // validate update
-       tc = "update-pod1"
-       executed = false
-       expectRemove = false
+       // validate update (no change)
+       allocRequest = nil
        context.UpdatePod(nil, pod1)
-       assert.Assert(t, !executed, "unexpected update")
+       assert.Assert(t, allocRequest == nil, "unexpected update")
        assert.Assert(t, pod == nil, "unassigned pod found in cache")

Review Comment:
   ```suggestion   
        pod = context.schedulerCache.GetPod(string(pod1.UID))
        assert.Assert(t, pod == nil, "unassigned pod found in cache")
   ```
   Missing one step before asserting the pod.
   



##########
pkg/cache/context_test.go:
##########
@@ -604,155 +573,101 @@ func TestAddUpdatePodForeign(t *testing.T) {
        pod2.Spec.NodeName = Host1
 
        // validate add
-       tc = "add-pod2"
-       executed = false
-       expectAdd = true
-       expectRemove = false
        context.AddPod(pod2)
-       assert.Assert(t, executed, "updated expected")
+       assert.Assert(t, allocRequest != nil, "update expected")
+       assertAddForeignPod(t, podName2, Host1, allocRequest)
        pod = context.schedulerCache.GetPod(string(pod2.UID))
        assert.Assert(t, pod != nil, "pod not found in cache")
 
-       // validate update
-       tc = "update-pod2"
-       executed = false
-       expectAdd = false
-       expectRemove = false
+       // validate update (no change)
+       allocRequest = nil
        context.UpdatePod(nil, pod2)
-       assert.Assert(t, !executed, "unexpected update")
+       assert.Assert(t, allocRequest != nil, "update expected")
        pod = context.schedulerCache.GetPod(string(pod2.UID))
        assert.Assert(t, pod != nil, "pod not found in cache")
 
        // validate update when not already in cache
-       tc = "update-pod2-nocache-pre"
-       executed = false
-       expectAdd = false
-       expectRemove = true
+       allocRequest = nil
        context.DeletePod(pod2)
-       assert.Assert(t, executed, "expected update")
-       tc = "update-pod2-nocache"
-       executed = false
-       expectAdd = true
-       expectRemove = false
+       assertReleaseForeignPod(t, podName2, allocRequest)
+
+       allocRequest = nil
        context.UpdatePod(nil, pod2)
-       assert.Assert(t, executed, "expected update")
+       assert.Assert(t, allocRequest != nil, "expected update")
        pod = context.schedulerCache.GetPod(string(pod2.UID))
        assert.Assert(t, pod != nil, "pod not found in cache")
+       assertAddForeignPod(t, podName2, Host1, allocRequest)
 
        // pod is failed, should trigger update if already in cache
        pod3 := pod2.DeepCopy()
        pod3.Status.Phase = v1.PodFailed
 
        // validate add
-       tc = "add-pod3"
-       executed = false
-       expectAdd = false
-       expectRemove = true
+       allocRequest = nil
        context.AddPod(pod3)
-       assert.Assert(t, executed, "expected update")
+       assert.Assert(t, allocRequest != nil, "expected update")
        pod = context.schedulerCache.GetPod(string(pod3.UID))
        assert.Assert(t, pod == nil, "failed pod found in cache")
+       assert.Assert(t, allocRequest.Releases != nil) // expecting a release 
due to pod status
+       assertReleaseForeignPod(t, podName2, allocRequest)
 
        // validate update when not already in cache
-       tc = "update-pod3-pre"
-       executed = false
-       expectAdd = true
-       expectRemove = false
+       allocRequest = nil
        context.AddPod(pod2)
-       tc = "update-pod3"
-       executed = false
-       expectAdd = false
-       expectRemove = true
+       assert.Assert(t, allocRequest != nil, "expected update")
+       allocRequest = nil
        context.UpdatePod(nil, pod3)
-       assert.Assert(t, executed, "expected update")
+       assert.Assert(t, allocRequest != nil, "expected update")
        pod = context.schedulerCache.GetPod(string(pod3.UID))
        assert.Assert(t, pod == nil, "failed pod found in cache")

Review Comment:
   The second test case of `// validate update when not already in cache` seems 
redundant.
   It's already be done at line 595.
   
   (pod2 and pod3 have the same pod.UID, so it actually tested AddPod() and 
Update the pod to failed.)



##########
pkg/cache/context.go:
##########
@@ -370,15 +370,19 @@ func (ctx *Context) updateForeignPod(pod *v1.Pod) {
        //   2. pod is now assigned
        //   3. pod is not in terminated state
        //   4. pod references a known node
-       if oldPod == nil && utils.IsAssignedPod(pod) && 
!utils.IsPodTerminated(pod) {
+       if utils.IsAssignedPod(pod) && !utils.IsPodTerminated(pod) {

Review Comment:
   Could we remove the first condition after removing the check for `oldPod == 
nil`?
   ```
        // conditions for allocate:
        //   1. pod was previously assigned
        //   2. pod is now assigned
   ```



##########
pkg/cache/context.go:
##########
@@ -402,8 +406,12 @@ func (ctx *Context) updateForeignPod(pod *v1.Pod) {
                                zap.String("podStatusCurrent", 
string(pod.Status.Phase)))
                        // this means pod is terminated
                        // we need sub the occupied resource and re-sync with 
the scheduler-core

Review Comment:
   ```suggestion
                        // we need to remove foreign pod in cache and create 
release request to remove foreign allocation from the core
   ```
   Maybe we could rephrase it?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to