(yunikorn-core) branch master updated: [YUNIKORN-2495] Remove Starting application state (#824)

ccondit Wed, 20 Mar 2024 07:34:48 -0700

This is an automated email from the ASF dual-hosted git repository.

ccondit pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-core.git



The following commit(s) were added to refs/heads/master by this push:
     new 09e5d741 [YUNIKORN-2495] Remove Starting application state (#824)
09e5d741 is described below

commit 09e5d741b67c397330a6c0d630131a25ae75ef4d
Author: Craig Condit <[email protected]>
AuthorDate: Wed Mar 20 09:28:32 2024 -0500

    [YUNIKORN-2495] Remove Starting application state (#824)
    
    Closes: #824
---
 go.mod                                           |  4 +-
 go.sum                                           |  4 +-
 pkg/scheduler/objects/application.go             | 20 ++----
 pkg/scheduler/objects/application_state.go       | 46 ++++----------
 pkg/scheduler/objects/application_state_test.go  | 80 ++++++++----------------
 pkg/scheduler/objects/application_test.go        | 59 ++++-------------
 pkg/scheduler/objects/queue_test.go              |  2 +-
 pkg/scheduler/partition_test.go                  |  2 +-
 pkg/scheduler/tests/application_tracking_test.go |  4 +-
 pkg/webservice/handlers.go                       |  1 -
 pkg/webservice/handlers_test.go                  | 33 +++++-----
 11 files changed, 77 insertions(+), 178 deletions(-)

diff --git a/go.mod b/go.mod
index 0c527eeb..23a22da2 100644
--- a/go.mod
+++ b/go.mod
@@ -22,7 +22,7 @@ module github.com/apache/yunikorn-core
 go 1.21
 
 require (
-       github.com/apache/yunikorn-scheduler-interface 
v0.0.0-20240222205935-94c25b6d2579
+       github.com/apache/yunikorn-scheduler-interface 
v0.0.0-20240320010951-d392f3c7272d
        github.com/google/btree v1.1.2
        github.com/google/go-cmp v0.6.0
        github.com/google/uuid v1.6.0
@@ -50,7 +50,7 @@ require (
        golang.org/x/sys v0.17.0 // indirect
        golang.org/x/text v0.14.0 // indirect
        google.golang.org/genproto/googleapis/rpc 
v0.0.0-20230711160842-782d3b101e98 // indirect
-       google.golang.org/protobuf v1.31.0 // indirect
+       google.golang.org/protobuf v1.33.0 // indirect
 )
 
 replace (
diff --git a/go.sum b/go.sum
index cc5fd398..7831f636 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,5 @@
-github.com/apache/yunikorn-scheduler-interface 
v0.0.0-20240222205935-94c25b6d2579 
h1:uXGmiGX3sfrUtPM8sb4Nco5pgzPKrDJ0CtiBizhgbRM=
-github.com/apache/yunikorn-scheduler-interface 
v0.0.0-20240222205935-94c25b6d2579/go.mod 
h1:3jCo/Ash4yEmw05ozK3BihJDEEAMOZEN7rmxNfb0gO0=
+github.com/apache/yunikorn-scheduler-interface 
v0.0.0-20240320010951-d392f3c7272d 
h1:ywKv8csdOkrUcQqHit60/PuHXuELpHcZ4ftm7/b3c6Q=
+github.com/apache/yunikorn-scheduler-interface 
v0.0.0-20240320010951-d392f3c7272d/go.mod 
h1:0f4l3ManMROX60xU7GbhejCEYYyMksH275oY2xIVkbM=
 github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
 github.com/beorn7/perks v1.0.1/go.mod 
h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
 github.com/cespare/xxhash/v2 v2.2.0 
h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
diff --git a/pkg/scheduler/objects/application.go 
b/pkg/scheduler/objects/application.go
index 1461a497..7606840f 100644
--- a/pkg/scheduler/objects/application.go
+++ b/pkg/scheduler/objects/application.go
@@ -44,7 +44,6 @@ import (
 
 var (
        reservationDelay          = 2 * time.Second
-       startingTimeout           = 5 * time.Minute
        completingTimeout         = 30 * time.Second
        terminatedTimeout         = 3 * 24 * time.Hour
        defaultPlaceholderTimeout = 15 * time.Minute
@@ -100,7 +99,6 @@ type Application struct {
        placeholderAsk       *resources.Resource         // total placeholder 
request for the app (all task groups)
        stateMachine         *fsm.FSM                    // application state 
machine
        stateTimer           *time.Timer                 // timer for state time
-       startTimeout         time.Duration               // timeout for the 
application starting state
        execTimeout          time.Duration               // execTimeout for the 
application run
        placeholderTimer     *time.Timer                 // placeholder replace 
timer
        gangSchedulingStyle  string                      // gang scheduling 
style can be hard (after timeout we fail the application), or soft (after 
timeeout we schedule it as a normal application)
@@ -184,7 +182,6 @@ func NewApplication(siApp *si.AddApplicationRequest, ugi 
security.UserGroup, eve
        }
        app.gangSchedulingStyle = gangSchedStyle
        app.execTimeout = placeholderTimeout
-       app.startTimeout = startingTimeout
        app.user = ugi
        app.rmEventHandler = eventHandler
        app.rmID = rmID
@@ -233,10 +230,6 @@ func (sa *Application) CurrentState() string {
        return sa.stateMachine.Current()
 }
 
-func (sa *Application) IsStarting() bool {
-       return sa.stateMachine.Is(Starting.String())
-}
-
 func (sa *Application) IsAccepted() bool {
        return sa.stateMachine.Is(Accepted.String())
 }
@@ -327,9 +320,9 @@ func (sa *Application) OnStateChange(event *fsm.Event, 
eventInfo string) {
                })
 }
 
-// Set the starting timer to make sure the application will not get stuck in a 
starting state too long.
-// This prevents an app from not progressing to Running when it only has 1 
allocation.
-// Called when entering the Starting state by the state machine.
+// Set the state timer to make sure the application will not get stuck in a 
time-sensitive state too long.
+// This prevents an app from not progressing to the next state if a timeout is 
required.
+// Used for placeholder timeout and completion handling.
 func (sa *Application) setStateTimer(timeout time.Duration, currentState 
string, event applicationEvent) {
        log.Log(log.SchedApplication).Debug("Application state timer initiated",
                zap.String("appID", sa.ApplicationID),
@@ -371,9 +364,8 @@ func (sa *Application) timeoutStateTimer(expectedState 
string, event application
        }
 }
 
-// Clear the starting timer. If the application has progressed out of the 
starting state we need to stop the
-// timer and clean up.
-// Called when leaving the Starting state by the state machine.
+// Clear the state timer. If the application has progressed out of a 
time-sensitive state we need to stop the timer and
+// clean up. Called when transitioning from Completed to Completing or when 
expiring an application.
 func (sa *Application) clearStateTimer() {
        if sa == nil || sa.stateTimer == nil {
                return
@@ -416,7 +408,7 @@ func (sa *Application) timeoutPlaceholderProcessing() {
        defer sa.Unlock()
        switch {
        // Case 1: if all app's placeholders are allocated, only part of them 
gets replaced, just delete the remaining placeholders
-       case (sa.IsRunning() || sa.IsStarting() || sa.IsCompleting()) && 
!resources.IsZero(sa.allocatedPlaceholder):
+       case (sa.IsRunning() || sa.IsCompleting()) && 
!resources.IsZero(sa.allocatedPlaceholder):
                var toRelease []*Allocation
                replacing := 0
                for _, alloc := range sa.getPlaceholderAllocations() {
diff --git a/pkg/scheduler/objects/application_state.go 
b/pkg/scheduler/objects/application_state.go
index d07a72cc..a23f61da 100644
--- a/pkg/scheduler/objects/application_state.go
+++ b/pkg/scheduler/objects/application_state.go
@@ -64,7 +64,6 @@ type applicationState int
 const (
        New applicationState = iota
        Accepted
-       Starting
        Running
        Rejected
        Completing
@@ -77,7 +76,6 @@ const (
 
 var stateEvents = map[string]si.EventRecord_ChangeDetail{
        Accepted.String():   si.EventRecord_APP_ACCEPTED,
-       Starting.String():   si.EventRecord_APP_STARTING,
        Running.String():    si.EventRecord_APP_RUNNING,
        Rejected.String():   si.EventRecord_APP_REJECT,
        Completing.String(): si.EventRecord_APP_COMPLETING,
@@ -89,7 +87,7 @@ var stateEvents = map[string]si.EventRecord_ChangeDetail{
 }
 
 func (as applicationState) String() string {
-       return [...]string{"New", "Accepted", "Starting", "Running", 
"Rejected", "Completing", "Completed", "Failing", "Failed", "Expired", 
"Resuming"}[as]
+       return [...]string{"New", "Accepted", "Running", "Rejected", 
"Completing", "Completed", "Failing", "Failed", "Expired", "Resuming"}[as]
 }
 
 func NewAppState() *fsm.FSM {
@@ -105,15 +103,11 @@ func NewAppState() *fsm.FSM {
                                Dst:  Accepted.String(),
                        }, {
                                Name: RunApplication.String(),
-                               Src:  []string{Accepted.String()},
-                               Dst:  Starting.String(),
-                       }, {
-                               Name: RunApplication.String(),
-                               Src:  []string{Running.String(), 
Starting.String(), Completing.String()},
+                               Src:  []string{Accepted.String(), 
Running.String(), Completing.String()},
                                Dst:  Running.String(),
                        }, {
                                Name: CompleteApplication.String(),
-                               Src:  []string{Accepted.String(), 
Running.String(), Starting.String()},
+                               Src:  []string{Accepted.String(), 
Running.String()},
                                Dst:  Completing.String(),
                        }, {
                                Name: CompleteApplication.String(),
@@ -121,7 +115,7 @@ func NewAppState() *fsm.FSM {
                                Dst:  Completed.String(),
                        }, {
                                Name: FailApplication.String(),
-                               Src:  []string{New.String(), Accepted.String(), 
Starting.String(), Running.String()},
+                               Src:  []string{New.String(), Accepted.String(), 
Running.String()},
                                Dst:  Failing.String(),
                        }, {
                                Name: FailApplication.String(),
@@ -169,22 +163,6 @@ func NewAppState() *fsm.FSM {
                        "leave_state": func(_ context.Context, event 
*fsm.Event) {
                                event.Args[0].(*Application).clearStateTimer() 
//nolint:errcheck
                        },
-                       fmt.Sprintf("enter_%s", Starting.String()): func(_ 
context.Context, event *fsm.Event) {
-                               app := event.Args[0].(*Application) 
//nolint:errcheck
-                               app.startTime = time.Now()
-                               app.setStateTimer(app.startTimeout, 
app.stateMachine.Current(), RunApplication)
-                               app.queue.incRunningApps(app.ApplicationID)
-                               
metrics.GetQueueMetrics(app.queuePath).IncQueueApplicationsRunning()
-                               
metrics.GetSchedulerMetrics().IncTotalApplicationsRunning()
-                       },
-                       fmt.Sprintf("leave_%s", Starting.String()): func(_ 
context.Context, event *fsm.Event) {
-                               if event.Dst != Running.String() {
-                                       app := event.Args[0].(*Application) 
//nolint:errcheck
-                                       app.queue.decRunningApps()
-                                       
metrics.GetQueueMetrics(app.queuePath).DecQueueApplicationsRunning()
-                                       
metrics.GetSchedulerMetrics().DecTotalApplicationsRunning()
-                               }
-                       },
                        fmt.Sprintf("enter_%s", Completing.String()): func(_ 
context.Context, event *fsm.Event) {
                                app := event.Args[0].(*Application) 
//nolint:errcheck
                                app.setStateTimer(completingTimeout, 
app.stateMachine.Current(), CompleteApplication)
@@ -209,19 +187,21 @@ func NewAppState() *fsm.FSM {
                                }
                        },
                        fmt.Sprintf("enter_%s", Running.String()): func(_ 
context.Context, event *fsm.Event) {
-                               app := event.Args[0].(*Application) 
//nolint:errcheck
-                               // account for going back into running state
-                               if event.Src == Completing.String() {
+                               if event.Src != Running.String() {
+                                       app := event.Args[0].(*Application) 
//nolint:errcheck
+                                       app.startTime = time.Now()
                                        
app.queue.incRunningApps(app.ApplicationID)
                                        
metrics.GetQueueMetrics(app.queuePath).IncQueueApplicationsRunning()
                                        
metrics.GetSchedulerMetrics().IncTotalApplicationsRunning()
                                }
                        },
                        fmt.Sprintf("leave_%s", Running.String()): func(_ 
context.Context, event *fsm.Event) {
-                               app := event.Args[0].(*Application) 
//nolint:errcheck
-                               app.queue.decRunningApps()
-                               
metrics.GetQueueMetrics(app.queuePath).DecQueueApplicationsRunning()
-                               
metrics.GetSchedulerMetrics().DecTotalApplicationsRunning()
+                               if event.Dst != Running.String() {
+                                       app := event.Args[0].(*Application) 
//nolint:errcheck
+                                       app.queue.decRunningApps()
+                                       
metrics.GetQueueMetrics(app.queuePath).DecQueueApplicationsRunning()
+                                       
metrics.GetSchedulerMetrics().DecTotalApplicationsRunning()
+                               }
                        },
                        fmt.Sprintf("enter_%s", Completed.String()): func(_ 
context.Context, event *fsm.Event) {
                                app := event.Args[0].(*Application) 
//nolint:errcheck
diff --git a/pkg/scheduler/objects/application_state_test.go 
b/pkg/scheduler/objects/application_state_test.go
index 7a07f808..7a41fb36 100644
--- a/pkg/scheduler/objects/application_state_test.go
+++ b/pkg/scheduler/objects/application_state_test.go
@@ -74,44 +74,14 @@ func TestRejectStateTransition(t *testing.T) {
        assert.Equal(t, app.CurrentState(), Rejected.String())
 }
 
-func TestStartStateTransition(t *testing.T) {
-       // starting only from accepted
-       appInfo := newApplication("app-00001", "default", "root.a")
-       assert.Equal(t, appInfo.CurrentState(), New.String())
-       err := appInfo.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected new to accepted (start 
test)")
-       assert.Equal(t, appInfo.CurrentState(), Accepted.String())
-
-       // start app
-       err = appInfo.HandleApplicationEvent(RunApplication)
-       assert.Assert(t, err, "no error expected new to starting")
-       assert.Equal(t, appInfo.CurrentState(), Starting.String())
-
-       // starting to rejected: error expected
-       err = appInfo.HandleApplicationEvent(RejectApplication)
-       assert.Assert(t, err != nil, "error expected starting to rejected")
-       assert.Equal(t, appInfo.CurrentState(), Starting.String())
-
-       // start to failing
-       err = appInfo.HandleApplicationEvent(FailApplication)
-       assert.NilError(t, err, "no error expected starting to failing")
-       err = common.WaitFor(10*time.Microsecond, time.Millisecond*100, 
appInfo.IsFailing)
-       assert.NilError(t, err, "App should be in Failing state")
-}
-
 func TestRunStateTransition(t *testing.T) {
-       // run only from starting
+       // run from new
        appInfo := newApplication("app-00001", "default", "root.a")
        assert.Equal(t, appInfo.CurrentState(), New.String())
        err := appInfo.HandleApplicationEvent(RunApplication)
        assert.NilError(t, err, "no error expected new to accepted (run test)")
        err = appInfo.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected accepted to starting (run 
test)")
-       assert.Equal(t, appInfo.CurrentState(), Starting.String())
-
-       // run app
-       err = appInfo.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected starting to running")
+       assert.NilError(t, err, "no error expected accepted to running (run 
test)")
        assert.Equal(t, appInfo.CurrentState(), Running.String())
 
        // run app: same state is allowed for running
@@ -143,9 +113,9 @@ func TestCompletedStateTransition(t *testing.T) {
        err := appInfo1.HandleApplicationEvent(RunApplication)
        assert.NilError(t, err, "no error expected new to accepted (completed 
test)")
        err = appInfo1.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected accepted to starting 
(completed test)")
+       assert.NilError(t, err, "no error expected accepted to running 
(completed test)")
        err = appInfo1.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected starting to running 
(completed test)")
+       assert.NilError(t, err, "no error expected running to running 
(completed test)")
        assert.Equal(t, appInfo1.CurrentState(), Running.String())
        // completed from run through completing
        err = appInfo1.HandleApplicationEvent(CompleteApplication)
@@ -197,21 +167,21 @@ func TestCompletingStateTransition(t *testing.T) {
        assert.NilError(t, err, "no error expected accepted to completing")
        assert.Equal(t, appInfo1.CurrentState(), Completing.String())
 
-       // starting to completing
+       // running to completing
        appInfo2 := newApplication("app-00002", "default", "root.a")
        assert.Equal(t, appInfo2.CurrentState(), New.String())
        err = appInfo2.HandleApplicationEvent(RunApplication)
        assert.NilError(t, err, "no error expected new to accepted (completing 
test)")
        err = appInfo2.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected accepted to starting")
-       assert.Equal(t, appInfo2.CurrentState(), Starting.String())
+       assert.NilError(t, err, "no error expected accepted to running")
+       assert.Equal(t, appInfo2.CurrentState(), Running.String())
        err = appInfo2.HandleApplicationEvent(CompleteApplication)
-       assert.NilError(t, err, "no error expected starting to completing")
+       assert.NilError(t, err, "no error expected running to completing")
        assert.Equal(t, appInfo2.CurrentState(), Completing.String())
 
        // completing to run and back again
        err = appInfo2.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected starting to running 
(completing test)")
+       assert.NilError(t, err, "no error expected completing to running 
(completing test)")
        err = appInfo2.HandleApplicationEvent(CompleteApplication)
        assert.NilError(t, err, "no error expected running to completing")
        assert.Equal(t, appInfo2.CurrentState(), Completing.String())
@@ -260,7 +230,7 @@ func TestAppStateTransitionEvents(t *testing.T) {
 
        // completing to run
        err = appInfo.HandleApplicationEvent(RunApplication)
-       assert.NilError(t, err, "no error expected starting to running 
(completing test)")
+       assert.NilError(t, err, "no error expected completing to running 
(completing test)")
 
        // run to failing
        err = appInfo.HandleApplicationEvent(FailApplication)
@@ -306,15 +276,15 @@ func TestAppStateTransitionEvents(t *testing.T) {
 }
 
 // Test to verify metrics after applications state transition
-// app-00001: New -> Resuming -> Accepted -> Starting -> Running -> 
Completing-> Completed
-// app-00002: New -> Accepted -> Starting -> Completing -> Running -> 
Failing-> Failed
-// app-00003: New -> Accepted -> Starting -> Failing -> Failed
+// app-00001: New -> Resuming -> Accepted -> Running -> Completing-> Completed
+// app-00002: New -> Accepted -> Running -> Completing -> Running -> Failing-> 
Failed
+// app-00003: New -> Accepted -> Running -> Failing -> Failed
 // app-00004: New -> Rejected
 // Final metrics will be: 0 running, 3 accepted, 1 completed, 2 failed and 1 
rejected applications
 func TestAppStateTransitionMetrics(t *testing.T) { //nolint:funlen
        queue := createQueue(t, "root.metrics")
        metrics.GetSchedulerMetrics().Reset()
-       // app-00001: New -> Resuming -> Accepted --> Starting -> Running -> 
Completing-> Completed
+       // app-00001: New -> Resuming -> Accepted --> Running -> Completing-> 
Completed
        app := newApplication("app-00001", "default", "root.metrics")
        app.SetQueue(queue)
        assertState(t, app, nil, New.String())
@@ -345,9 +315,9 @@ func TestAppStateTransitionMetrics(t *testing.T) { 
//nolint:funlen
        assertQueueApplicationsRejectedMetrics(t, app, 0)
        assertQueueApplicationsFailedMetrics(t, app, 0)
        assertQueueApplicationsCompletedMetrics(t, app, 0)
-       // Accepted -> Starting
+       // Accepted -> Running
        err = app.HandleApplicationEvent(RunApplication)
-       assertState(t, app, err, Starting.String())
+       assertState(t, app, err, Running.String())
        assertTotalAppsRunningMetrics(t, 1)
        assertTotalAppsCompletedMetrics(t, 0)
        assertTotalAppsRejectedMetrics(t, 0)
@@ -357,7 +327,7 @@ func TestAppStateTransitionMetrics(t *testing.T) { 
//nolint:funlen
        assertQueueApplicationsRejectedMetrics(t, app, 0)
        assertQueueApplicationsFailedMetrics(t, app, 0)
        assertQueueApplicationsCompletedMetrics(t, app, 0)
-       // Starting -> Running
+       // Running -> Running
        err = app.HandleApplicationEvent(RunApplication)
        assertState(t, app, err, Running.String())
        assertTotalAppsRunningMetrics(t, 1)
@@ -394,17 +364,17 @@ func TestAppStateTransitionMetrics(t *testing.T) { 
//nolint:funlen
        assertQueueApplicationsFailedMetrics(t, app, 0)
        assertQueueApplicationsCompletedMetrics(t, app, 1)
 
-       // app-00002: New -> Accepted -> Starting -> Completing -> Running -> 
Failing-> Failed
+       // app-00002: New -> Accepted -> Completing -> Running -> Failing-> 
Failed
        app = newApplication("app-00002", "default", "root.metrics")
        app.SetQueue(queue)
        assertState(t, app, nil, New.String())
        // New -> Accepted
        err = app.HandleApplicationEvent(RunApplication)
        assertState(t, app, err, Accepted.String())
-       // Accepted -> Starting
+       // Accepted -> Running
        err = app.HandleApplicationEvent(RunApplication)
-       assertState(t, app, err, Starting.String())
-       // Starting -> Completing
+       assertState(t, app, err, Running.String())
+       // Running -> Completing
        err = app.HandleApplicationEvent(CompleteApplication)
        assertState(t, app, err, Completing.String())
        // Completing -> Running
@@ -426,17 +396,17 @@ func TestAppStateTransitionMetrics(t *testing.T) { 
//nolint:funlen
        assertQueueApplicationsFailedMetrics(t, app, 1)
        assertQueueApplicationsCompletedMetrics(t, app, 1)
 
-       // app-00003: New -> Accepted -> Starting -> Failing -> Failed
+       // app-00003: New -> Accepted -> Running -> Failing -> Failed
        app = newApplication("app-00003", "default", "root.metrics")
        app.SetQueue(queue)
        assertState(t, app, nil, New.String())
        // New -> Accepted
        err = app.HandleApplicationEvent(RunApplication)
        assertState(t, app, err, Accepted.String())
-       // Accepted -> Starting
+       // Accepted -> Running
        err = app.HandleApplicationEvent(RunApplication)
-       assertState(t, app, err, Starting.String())
-       // Starting -> Failing
+       assertState(t, app, err, Running.String())
+       // Running -> Failing
        err = app.HandleApplicationEvent(FailApplication)
        assertState(t, app, err, Failing.String())
        // Failing -> Failed
diff --git a/pkg/scheduler/objects/application_test.go 
b/pkg/scheduler/objects/application_test.go
index 4ddafa0b..034da766 100644
--- a/pkg/scheduler/objects/application_test.go
+++ b/pkg/scheduler/objects/application_test.go
@@ -750,14 +750,14 @@ func TestStateChangeOnUpdate(t *testing.T) {
        // add an alloc
        allocInfo := NewAllocation(nodeID1, ask)
        app.AddAllocation(allocInfo)
-       // app should be starting
-       assert.Assert(t, app.IsStarting(), "Application did not return starting 
state after alloc: %s", app.CurrentState())
+       // app should be running
+       assert.Assert(t, app.IsRunning(), "Application did not return running 
state after alloc: %s", app.CurrentState())
        assertUserGroupResource(t, getTestUserGroup(), res)
 
        // removing the ask should not move anywhere as there is an allocation
        released = app.RemoveAllocationAsk(askID)
        assert.Equal(t, released, 0, "allocation ask should not have been 
reserved")
-       assert.Assert(t, app.IsStarting(), "Application should have stayed 
same, changed unexpectedly: %s", app.CurrentState())
+       assert.Assert(t, app.IsRunning(), "Application should have stayed same, 
changed unexpectedly: %s", app.CurrentState())
 
        // remove the allocation, ask has been removed so nothing left
        app.RemoveAllocation(askID+"-0", 
si.TerminationType_UNKNOWN_TERMINATION_TYPE)
@@ -767,7 +767,7 @@ func TestStateChangeOnUpdate(t *testing.T) {
        log := app.GetStateLog()
        assert.Equal(t, len(log), 3, "wrong number of app events")
        assert.Equal(t, log[0].ApplicationState, Accepted.String())
-       assert.Equal(t, log[1].ApplicationState, Starting.String())
+       assert.Equal(t, log[1].ApplicationState, Running.String())
        assert.Equal(t, log[2].ApplicationState, Completing.String())
 }
 
@@ -918,7 +918,7 @@ func TestGangAllocChange(t *testing.T) {
        app.AddAllocation(alloc)
        assert.Assert(t, resources.Equals(app.allocatedPlaceholder, totalPH), 
"allocated placeholders resources is not updated correctly: %s", 
app.allocatedPlaceholder.String())
        assert.Equal(t, len(app.GetAllAllocations()), 2)
-       assert.Assert(t, app.IsStarting(), "app should have changed to starting 
state")
+       assert.Assert(t, app.IsRunning(), "app should have changed to running 
state")
        assertUserGroupResource(t, getTestUserGroup(), resources.Multiply(res, 
2))
 
        // add a real alloc this should NOT trigger state update
@@ -926,15 +926,15 @@ func TestGangAllocChange(t *testing.T) {
        alloc.SetResult(Replaced)
        app.AddAllocation(alloc)
        assert.Equal(t, len(app.GetAllAllocations()), 3)
-       assert.Assert(t, app.IsStarting(), "app should still be in starting 
state")
+       assert.Assert(t, app.IsRunning(), "app should still be in running 
state")
        assertUserGroupResource(t, getTestUserGroup(), resources.Multiply(res, 
3))
 
-       // add a second real alloc this should trigger state update
+       // add a second real alloc this should NOT trigger state update
        alloc = newAllocation(appID1, nodeID1, res)
        alloc.SetResult(Replaced)
        app.AddAllocation(alloc)
        assert.Equal(t, len(app.GetAllAllocations()), 4)
-       assert.Assert(t, app.IsRunning(), "app should be in running state")
+       assert.Assert(t, app.IsRunning(), "app should still be in running 
state")
        assertUserGroupResource(t, getTestUserGroup(), resources.Multiply(res, 
4))
 }
 
@@ -956,7 +956,7 @@ func TestAllocChange(t *testing.T) {
        app.AddAllocation(alloc)
        assert.Assert(t, resources.Equals(app.allocatedResource, res), 
"allocated resources is not updated correctly: %s", 
app.allocatedResource.String())
        assert.Equal(t, len(app.GetAllAllocations()), 1)
-       assert.Assert(t, app.IsStarting(), "app should be in starting state")
+       assert.Assert(t, app.IsRunning(), "app should be in running state")
        assertUserGroupResource(t, getTestUserGroup(), res)
 
        // add a second real alloc this should trigger state update
@@ -978,43 +978,6 @@ func TestQueueUpdate(t *testing.T) {
        assert.Equal(t, app.GetQueuePath(), "root.test")
 }
 
-func TestStateTimeOut(t *testing.T) {
-       startingTimeout = time.Microsecond * 100
-       defer func() { startingTimeout = time.Minute * 5 }()
-       app := newApplication(appID1, "default", "root.a")
-       err := app.handleApplicationEventWithLocking(RunApplication)
-       assert.NilError(t, err, "no error expected new to accepted (timeout 
test)")
-       err = app.handleApplicationEventWithLocking(RunApplication)
-       assert.NilError(t, err, "no error expected accepted to starting 
(timeout test)")
-       // give it some time to run and progress
-       time.Sleep(time.Millisecond * 100)
-       if app.IsStarting() {
-               t.Fatal("Starting state should have timed out")
-       }
-       if app.stateTimer != nil {
-               t.Fatalf("Startup timer has not be cleared on time out as 
expected, %v", app.stateTimer)
-       }
-
-       startingTimeout = time.Millisecond * 100
-       app = newApplication(appID1, "default", "root.a")
-       err = app.handleApplicationEventWithLocking(RunApplication)
-       assert.NilError(t, err, "no error expected new to accepted (timeout 
test2)")
-       err = app.handleApplicationEventWithLocking(RunApplication)
-       assert.NilError(t, err, "no error expected accepted to starting 
(timeout test2)")
-       // give it some time to run and progress
-       time.Sleep(time.Microsecond * 100)
-       if !app.IsStarting() || app.stateTimer == nil {
-               t.Fatalf("Starting state and timer should not have timed out 
yet, state: %s", app.stateMachine.Current())
-       }
-       err = app.handleApplicationEventWithLocking(RunApplication)
-       assert.NilError(t, err, "no error expected starting to run (timeout 
test2)")
-       // give it some time to run and progress
-       time.Sleep(time.Microsecond * 100)
-       if !app.stateMachine.Is(Running.String()) || app.stateTimer != nil {
-               t.Fatalf("State is not running or timer was not cleared, state: 
%s, timer %v", app.stateMachine.Current(), app.stateTimer)
-       }
-}
-
 func TestCompleted(t *testing.T) {
        completingTimeout = time.Millisecond * 100
        terminatedTimeout = time.Millisecond * 100
@@ -1498,12 +1461,12 @@ func TestTimeoutPlaceholderAllocReleased(t *testing.T) {
 
        alloc := newAllocation(appID1, nodeID1, res)
        app.AddAllocation(alloc)
-       assert.Assert(t, app.IsStarting(), "App should be in starting state 
after the first allocation")
+       assert.Assert(t, app.IsRunning(), "App should be in running state after 
the first allocation")
        err = common.WaitFor(10*time.Millisecond, 1*time.Second, func() bool {
                return app.getPlaceholderTimer() == nil
        })
        assert.NilError(t, err, "Placeholder timeout cleanup did not trigger 
unexpectedly")
-       assert.Assert(t, app.IsStarting(), "App should be in starting state 
after the first allocation")
+       assert.Assert(t, app.IsRunning(), "App should be in running state after 
the first allocation")
        assertUserGroupResource(t, getTestUserGroup(), resources.Multiply(res, 
3))
        // two state updates and 1 release event
        events := testHandler.GetEvents()
diff --git a/pkg/scheduler/objects/queue_test.go 
b/pkg/scheduler/objects/queue_test.go
index 6947b358..aacb809c 100644
--- a/pkg/scheduler/objects/queue_test.go
+++ b/pkg/scheduler/objects/queue_test.go
@@ -2590,7 +2590,7 @@ func TestQueueRunningAppsForSingleAllocationApp(t 
*testing.T) {
 
        alloc := NewAllocation(nodeID1, ask)
        app.AddAllocation(alloc)
-       assert.Equal(t, app.CurrentState(), Starting.String(), "app state 
should be starting")
+       assert.Equal(t, app.CurrentState(), Running.String(), "app state should 
be running")
        assert.Equal(t, leaf.runningApps, uint64(1), "leaf should have 1 app 
running")
 
        _, err = app.updateAskRepeatInternal(ask, -1)
diff --git a/pkg/scheduler/partition_test.go b/pkg/scheduler/partition_test.go
index f1f0de89..90159e79 100644
--- a/pkg/scheduler/partition_test.go
+++ b/pkg/scheduler/partition_test.go
@@ -3689,7 +3689,7 @@ func TestTryAllocateMaxRunning(t *testing.T) {
        assert.Equal(t, alloc.GetReleaseCount(), 0, "released allocations 
should have been 0")
        assert.Equal(t, alloc.GetApplicationID(), appID1, "expected application 
app-1 to be allocated")
        assert.Equal(t, alloc.GetAllocationKey(), "alloc-2", "expected ask 
alloc-2 to be allocated")
-       assert.Equal(t, app.CurrentState(), objects.Starting.String(), 
"application should have moved to starting state")
+       assert.Equal(t, app.CurrentState(), objects.Running.String(), 
"application should have moved to running state")
 
        // allocation should still fail: max running apps on parent reached
        alloc = partition.tryAllocate()
diff --git a/pkg/scheduler/tests/application_tracking_test.go 
b/pkg/scheduler/tests/application_tracking_test.go
index 6bfb76bf..d14df297 100644
--- a/pkg/scheduler/tests/application_tracking_test.go
+++ b/pkg/scheduler/tests/application_tracking_test.go
@@ -327,13 +327,13 @@ func verifyAllocationAskAddedEvents(t *testing.T, events 
[]*si.EventRecord) {
        assert.Equal(t, si.EventRecord_ADD, events[2].EventChangeType)
        assert.Equal(t, si.EventRecord_NODE_ALLOC, events[2].EventChangeDetail)
 
-       // state change to Starting
+       // state change to Running
        assert.Equal(t, "app-1", events[3].ObjectID)
        assert.Equal(t, "", events[3].Message)
        assert.Equal(t, "", events[3].ReferenceID)
        assert.Equal(t, si.EventRecord_APP, events[3].Type)
        assert.Equal(t, si.EventRecord_SET, events[3].EventChangeType)
-       assert.Equal(t, si.EventRecord_APP_STARTING, 
events[3].EventChangeDetail)
+       assert.Equal(t, si.EventRecord_APP_RUNNING, events[3].EventChangeDetail)
 
        // Track resource usage for the user - increment
        assert.Equal(t, "testuser", events[4].ObjectID)
diff --git a/pkg/webservice/handlers.go b/pkg/webservice/handlers.go
index ffea7844..e8d644c3 100644
--- a/pkg/webservice/handlers.go
+++ b/pkg/webservice/handlers.go
@@ -70,7 +70,6 @@ func init() {
 
        allowedAppActiveStatuses["new"] = true
        allowedAppActiveStatuses["accepted"] = true
-       allowedAppActiveStatuses["starting"] = true
        allowedAppActiveStatuses["running"] = true
        allowedAppActiveStatuses["completing"] = true
        allowedAppActiveStatuses["failing"] = true
diff --git a/pkg/webservice/handlers_test.go b/pkg/webservice/handlers_test.go
index 6b0f8a90..16ac0cc8 100644
--- a/pkg/webservice/handlers_test.go
+++ b/pkg/webservice/handlers_test.go
@@ -948,29 +948,25 @@ func TestPartitions(t *testing.T) {
        app1 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-1")
        app1.SetState(objects.Accepted.String())
 
-       // add a new app2 - starting
+       // add a new app2 - running
        app2 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-2")
-       app2.SetState(objects.Starting.String())
+       app2.SetState(objects.Running.String())
 
-       // add a new app3 - running
+       // add a new app3 - completing
        app3 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-3")
-       app3.SetState(objects.Running.String())
+       app3.SetState(objects.Completing.String())
 
-       // add a new app4 - completing
+       // add a new app4 - rejected
        app4 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-4")
-       app4.SetState(objects.Completing.String())
+       app4.SetState(objects.Rejected.String())
 
-       // add a new app5 - rejected
+       // add a new app5 - completed
        app5 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-5")
-       app5.SetState(objects.Rejected.String())
-
-       // add a new app6 - completed
-       app6 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-6")
-       app6.SetState(objects.Completed.String())
+       app5.SetState(objects.Completed.String())
 
        // add a new app7 - failed
-       app7 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-7")
-       app7.SetState(objects.Failed.String())
+       app6 := addAndConfirmApplicationExists(t, partitionName, 
defaultPartition, "app-6")
+       app6.SetState(objects.Failed.String())
 
        NewWebApp(schedulerContext, nil)
 
@@ -984,8 +980,8 @@ func TestPartitions(t *testing.T) {
        // create test allocations
        resAlloc1 := 
resources.NewResourceFromMap(map[string]resources.Quantity{siCommon.Memory: 
100, siCommon.CPU: 400})
        resAlloc2 := 
resources.NewResourceFromMap(map[string]resources.Quantity{siCommon.Memory: 
200, siCommon.CPU: 300})
-       ask1 := objects.NewAllocationAsk("alloc-1", app6.ApplicationID, 
resAlloc1)
-       ask2 := objects.NewAllocationAsk("alloc-2", app3.ApplicationID, 
resAlloc2)
+       ask1 := objects.NewAllocationAsk("alloc-1", app5.ApplicationID, 
resAlloc1)
+       ask2 := objects.NewAllocationAsk("alloc-2", app2.ApplicationID, 
resAlloc2)
        allocs := []*objects.Allocation{objects.NewAllocation(node1ID, ask1)}
        err = defaultPartition.AddNode(node1, allocs)
        assert.NilError(t, err, "add node to partition should not have failed")
@@ -1011,10 +1007,9 @@ func TestPartitions(t *testing.T) {
        assert.Equal(t, cs["default"].NodeSortingPolicy.Type, "fair")
        assert.Equal(t, 
cs["default"].NodeSortingPolicy.ResourceWeights["vcore"], 1.0)
        assert.Equal(t, 
cs["default"].NodeSortingPolicy.ResourceWeights["memory"], 1.0)
-       assert.Equal(t, cs["default"].Applications["total"], 8)
+       assert.Equal(t, cs["default"].Applications["total"], 7)
        assert.Equal(t, cs["default"].Applications[objects.New.String()], 1)
        assert.Equal(t, cs["default"].Applications[objects.Accepted.String()], 
1)
-       assert.Equal(t, cs["default"].Applications[objects.Starting.String()], 
1)
        assert.Equal(t, cs["default"].Applications[objects.Running.String()], 1)
        assert.Equal(t, 
cs["default"].Applications[objects.Completing.String()], 1)
        assert.Equal(t, cs["default"].Applications[objects.Rejected.String()], 
1)
@@ -2481,7 +2476,7 @@ func prepareUserAndGroupContext(t *testing.T, config 
string) {
        // add an alloc
        allocInfo := objects.NewAllocation("node-1", ask)
        app.AddAllocation(allocInfo)
-       assert.Assert(t, app.IsStarting(), "Application did not return starting 
state after alloc: %s", app.CurrentState())
+       assert.Assert(t, app.IsRunning(), "Application did not return running 
state after alloc: %s", app.CurrentState())
 
        NewWebApp(schedulerContext, nil)
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(yunikorn-core) branch master updated: [YUNIKORN-2495] Remove Starting application state (#824)

Reply via email to