This is an automated email from the ASF dual-hosted git repository.

chenyulin0719 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-k8shim.git


The following commit(s) were added to refs/heads/master by this push:
     new 810b9de0 [YUNIKORN-2667] E2E test for Gang app originator pod changes 
after restart (#865)
810b9de0 is described below

commit 810b9de0c61e7f0cbdc65dd7100258b3e283211b
Author: Tzu-Hua Lan <[email protected]>
AuthorDate: Mon Jul 15 16:14:01 2024 +0000

    [YUNIKORN-2667] E2E test for Gang app originator pod changes after restart 
(#865)
    
    Closes: #865
    
    Signed-off-by: Yu-Lin Chen <[email protected]>
---
 .../framework/helpers/yunikorn/rest_api_utils.go   | 20 ++++++
 test/e2e/gang_scheduling/gang_scheduling_test.go   | 76 ++++++++++++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/test/e2e/framework/helpers/yunikorn/rest_api_utils.go 
b/test/e2e/framework/helpers/yunikorn/rest_api_utils.go
index f33c506b..a27dae2a 100644
--- a/test/e2e/framework/helpers/yunikorn/rest_api_utils.go
+++ b/test/e2e/framework/helpers/yunikorn/rest_api_utils.go
@@ -356,6 +356,26 @@ func (c *RClient) 
WaitForCompletedAppStateTransition(partition string, appID str
        return wait.PollUntilContextTimeout(context.TODO(), time.Second, 
time.Duration(timeout)*time.Second, false, 
c.isAppInDesiredCompletedState(partition, appID, state).WithContext())
 }
 
+func (c *RClient) WaitForAllExecPodsAllocated(partition string, queueName 
string, appID string, execPodCount int, timeout int) error {
+       return wait.PollUntilContextTimeout(context.TODO(), time.Second, 
time.Duration(timeout)*time.Second, false, c.areAllExecPodsAllocated(partition, 
queueName, appID, execPodCount).WithContext())
+}
+
+func (c *RClient) areAllExecPodsAllocated(partition string, queueName string, 
appID string, execPodCount int) wait.ConditionFunc {
+       return func() (bool, error) {
+               appInfo, err := c.GetAppInfo(partition, queueName, appID)
+               if err != nil {
+                       return false, nil // returning nil here for wait & loop
+               }
+               if appInfo.Allocations == nil {
+                       return false, nil
+               }
+               if len(appInfo.Allocations) >= execPodCount {
+                       return true, nil
+               }
+               return false, nil
+       }
+}
+
 func (c *RClient) AreAllExecPodsAllotted(partition string, queueName string, 
appID string, execPodCount int) wait.ConditionFunc {
        return func() (bool, error) {
                appInfo, err := c.GetAppInfo(partition, queueName, appID)
diff --git a/test/e2e/gang_scheduling/gang_scheduling_test.go 
b/test/e2e/gang_scheduling/gang_scheduling_test.go
index a4aa85bb..13bd32bb 100644
--- a/test/e2e/gang_scheduling/gang_scheduling_test.go
+++ b/test/e2e/gang_scheduling/gang_scheduling_test.go
@@ -571,6 +571,82 @@ var _ = Describe("", func() {
                
Ω(appDaoInfo.UsedResource[hugepageKey]).To(Equal(int64(314572800)), "Used huge 
page resource is not correct")
        })
 
+       // Test to verify that the gang app originator pod does not change 
after a restart
+       // 1. Create an originator pod
+       // 2. Verify the originator pod is not a placeholder pod
+       // 3. Restart YuniKorn
+       // 4. Verify the originator pod is not changed after restart
+       It("Verify_Gang_App_Originator_Pod_Does_Not_Change_After_Restart", 
func() {
+               placeholderCount := 5
+
+               By("Create an originator pod")
+               podConf := k8s.TestPodConfig{
+                       Name: "gang-driver-pod-" + common.RandSeq(5),
+                       Labels: map[string]string{
+                               "app":           "sleep-" + common.RandSeq(5),
+                               "applicationId": appID,
+                       },
+                       Annotations: &k8s.PodAnnotation{
+                               TaskGroups: []cache.TaskGroup{
+                                       {Name: groupA, MinMember: 
int32(placeholderCount), MinResource: minResource},
+                               },
+                       },
+                       Resources: &v1.ResourceRequirements{
+                               Requests: v1.ResourceList{"cpu": 
minResource["cpu"], "memory": minResource["memory"]},
+                       },
+               }
+               podTest, err := k8s.InitTestPod(podConf)
+               Ω(err).NotTo(HaveOccurred())
+               originator, err := kClient.CreatePod(podTest, ns)
+               Ω(err).NotTo(HaveOccurred())
+
+               // Wait for the app to be created
+               checkAppStatus(appID, yunikorn.States().Application.Running)
+
+               By("Ensure all pods are allocated")
+               err = 
restClient.WaitForAllExecPodsAllocated(configmanager.DefaultPartition, nsQueue, 
appID, 1+placeholderCount, 30)
+               Ω(err).NotTo(HaveOccurred())
+
+               By("Verify the originator pod is not a placeholder pod")
+               appDaoInfo, appDaoInfoErr := 
restClient.GetAppInfo(configmanager.DefaultPartition, nsQueue, appID)
+               Ω(appDaoInfoErr).NotTo(HaveOccurred())
+               for _, alloc := range appDaoInfo.Allocations {
+                       podName := 
alloc.AllocationTags["kubernetes.io/meta/podName"]
+                       if podName == originator.Name {
+                               Ω(alloc.Originator).To(Equal(true), "Originator 
pod should be a originator pod")
+                               Ω(alloc.Placeholder).To(Equal(false), 
"Originator pod should not be a placeholder pod")
+                       } else {
+                               Ω(alloc.Originator).To(Equal(false), 
"Placeholder pod should not be a originator pod")
+                               Ω(alloc.Placeholder).To(Equal(true), 
"Placeholder pod should be a placeholder pod")
+                       }
+               }
+
+               By("Restart the scheduler pod")
+               yunikorn.RestartYunikorn(&kClient)
+               yunikorn.RestorePortForwarding(&kClient)
+
+               // Wait for the app to be created
+               checkAppStatus(appID, yunikorn.States().Application.Running)
+
+               By("Ensure all pods are allocated")
+               err = 
restClient.WaitForAllExecPodsAllocated(configmanager.DefaultPartition, nsQueue, 
appID, 1+placeholderCount, 30)
+               Ω(err).NotTo(HaveOccurred())
+
+               By("Verify the originator pod is not changed after restart")
+               appDaoInfo, appDaoInfoErr = 
restClient.GetAppInfo(configmanager.DefaultPartition, nsQueue, appID)
+               Ω(appDaoInfoErr).NotTo(HaveOccurred())
+               for _, alloc := range appDaoInfo.Allocations {
+                       podName := 
alloc.AllocationTags["kubernetes.io/meta/podName"]
+                       if podName == originator.Name {
+                               Ω(alloc.Originator).To(Equal(true), "Originator 
pod should be a originator pod")
+                               Ω(alloc.Placeholder).To(Equal(false), 
"Originator pod should not be a placeholder pod")
+                       } else {
+                               Ω(alloc.Originator).To(Equal(false), 
"Placeholder pod should not be a originator pod")
+                               Ω(alloc.Placeholder).To(Equal(true), 
"Placeholder pod should be a placeholder pod")
+                       }
+               }
+       })
+
        AfterEach(func() {
                tests.DumpClusterInfoIfSpecFailed(suiteName, []string{ns})
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to