kingamarton commented on a change in pull request #336:
URL: 
https://github.com/apache/incubator-yunikorn-core/pull/336#discussion_r748076690



##########
File path: pkg/webservice/handlers.go
##########
@@ -150,20 +141,7 @@ func getNodesInfo(w http.ResponseWriter, r *http.Request) {
        writeHeaders(w)
 
        lists := schedulerContext.GetPartitionMapClone()
-       result := make([]*dao.NodesDAOInfo, 0, len(lists))
-       for _, partition := range lists {
-               ns := partition.GetNodes()
-               nodesDao := make([]*dao.NodeDAOInfo, 0, len(ns))
-               for _, node := range ns {
-                       nodeDao := getNodeJSON(node)
-                       nodesDao = append(nodesDao, nodeDao)
-               }
-               result = append(result, &dao.NodesDAOInfo{
-                       PartitionName: 
common.GetPartitionNameWithoutClusterID(partition.Name),
-                       Nodes:         nodesDao,
-               })
-       }
-
+       result := getNodesDao(lists)

Review comment:
       nit: please try to be consistent in the naming. Use everywhere DAO, or 
Dao, but use it in the same way (getClustersUtilDAOInfo, vs getNodesDao)

##########
File path: pkg/webservice/handlers.go
##########
@@ -744,3 +665,151 @@ func getLogLevel(w http.ResponseWriter, r *http.Request) {
                buildJSONErrorResponse(w, err.Error(), 
http.StatusInternalServerError)
        }
 }
+
+func getPartitionInfoDao(lists map[string]*scheduler.PartitionContext) 
[]*dao.PartitionInfo {
+       result := make([]*dao.PartitionInfo, 0, len(lists))
+
+       for _, partitionContext := range lists {
+               partitionInfo := &dao.PartitionInfo{}
+               partitionInfo.ClusterID = partitionContext.RmID
+               partitionInfo.Name = 
common.GetPartitionNameWithoutClusterID(partitionContext.Name)
+               partitionInfo.State = partitionContext.GetCurrentState()
+               partitionInfo.LastStateTransitionTime = 
partitionContext.GetStateTime().String()
+
+               capacityInfo := dao.PartitionCapacity{}
+               capacityInfo.Capacity = 
partitionContext.GetTotalPartitionResource().DAOString()
+               capacityInfo.UsedCapacity = 
partitionContext.GetAllocatedResource().DAOString()
+               partitionInfo.Capacity = capacityInfo
+               partitionInfo.NodeSortingPolicy = dao.NodeSortingPolicy{
+                       Type:            
partitionContext.GetNodeSortingPolicyType().String(),
+                       ResourceWeights: 
partitionContext.GetNodeSortingResourceWeights(),
+               }
+
+               appList := partitionContext.GetApplications()
+               appList = append(appList, 
partitionContext.GetCompletedApplications()...)
+               applicationsState := make(map[string]int)
+               totalApplications := 0
+               for _, app := range appList {
+                       applicationsState[app.CurrentState()]++
+                       totalApplications++
+               }
+               applicationsState["total"] = totalApplications
+               partitionInfo.Applications = applicationsState
+               result = append(result, partitionInfo)
+       }
+
+       return result
+}
+
+func getAppHistoryDao(records []*history.MetricsRecord) 
[]*dao.ApplicationHistoryDAOInfo {
+       result := make([]*dao.ApplicationHistoryDAOInfo, 0, len(records))
+       for _, record := range records {
+               if record == nil {
+                       continue
+               }
+               element := &dao.ApplicationHistoryDAOInfo{
+                       Timestamp:         record.Timestamp.UnixNano(),
+                       TotalApplications: 
strconv.Itoa(record.TotalApplications),
+               }
+               result = append(result, element)
+       }
+
+       return result
+}
+
+func getNodesDao(lists map[string]*scheduler.PartitionContext) 
[]*dao.NodesDAOInfo {
+       result := make([]*dao.NodesDAOInfo, 0, len(lists))
+       for _, partition := range lists {
+               ns := partition.GetNodes()
+               nodesDao := make([]*dao.NodeDAOInfo, 0, len(ns))
+               for _, node := range ns {
+                       nodeDao := getNodeJSON(node)
+                       nodesDao = append(nodesDao, nodeDao)
+               }
+               result = append(result, &dao.NodesDAOInfo{
+                       PartitionName: 
common.GetPartitionNameWithoutClusterID(partition.Name),
+                       Nodes:         nodesDao,
+               })
+       }
+
+       return result
+}
+
+func getContainerHistoryDao(records []*history.MetricsRecord) 
[]*dao.ContainerHistoryDAOInfo {
+       result := make([]*dao.ContainerHistoryDAOInfo, 0, len(records))
+       for _, record := range records {
+               if record == nil {
+                       continue
+               }
+               element := &dao.ContainerHistoryDAOInfo{
+                       Timestamp:       record.Timestamp.UnixNano(),
+                       TotalContainers: strconv.Itoa(record.TotalContainers),
+               }
+               result = append(result, element)
+       }
+
+       return result
+}
+
+func getClustersUtilDAOInfo(lists map[string]*scheduler.PartitionContext) 
[]*dao.ClustersUtilDAOInfo {
+       result := make([]*dao.ClustersUtilDAOInfo, 0, len(lists))
+       for _, partition := range lists {
+               result = append(result, &dao.ClustersUtilDAOInfo{
+                       PartitionName: 
common.GetPartitionNameWithoutClusterID(partition.Name),
+                       ClustersUtil:  getClusterUtilJSON(partition),
+               })
+       }
+
+       return result
+}
+func getNodesUtilizationDao(lists map[string]*scheduler.PartitionContext) 
[]*dao.NodesUtilDAOInfo {
+       var result []*dao.NodesUtilDAOInfo
+
+       for _, partition := range lists {
+               partitionResource := partition.GetTotalPartitionResource()
+               // partitionResource can be null if the partition has no node
+               if partitionResource != nil {
+                       for name := range partitionResource.Resources {
+                               result = append(result, 
getNodesUtilJSON(partition, name))
+                       }
+               }
+       }
+
+       return result
+}
+
+func getApplicationsDao(lists map[string]*scheduler.PartitionContext) 
[]*dao.ApplicationDAOInfo {
+       result := make([]*dao.ApplicationDAOInfo, 0, 32)
+
+       for _, partition := range lists {
+               size := partition.GetTotalCompletedApplicationCount() + 
partition.GetTotalApplicationCount()
+               appList := make([]*objects.Application, size)
+               appList = append(appList, partition.GetApplications()...)
+               appList = append(appList, 
partition.GetCompletedApplications()...)
+
+               for _, app := range appList {
+                       result = append(result, getApplicationJSON(app))
+               }
+       }
+
+       return result
+}
+
+func getPartitionDAOInfo(lists map[string]*scheduler.PartitionContext) 
[]*dao.PartitionDAOInfo {
+       queues := make([]*dao.PartitionDAOInfo, len(lists))

Review comment:
       please rename the variable here, since it is storing partitiondata, not 
queues.

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }

Review comment:
       Please use assert.NilError 

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }
+       var aggregated AggregatedStateInfo
+       if err3 := json.Unmarshal(fileContents, &aggregated); err3 != nil {
+               t.Fatal(err3)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+
+       // disable
+       req, err = http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp = &MockResponseWriter{}
+       disablePeriodicStateDump(resp, req)
+       statusCode = resp.statusCode
+       assert.Assert(t, statusCode != http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryEnableStateDumpTwice(t *testing.T) {
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       enablePeriodicStateDump(resp, req)
+       enablePeriodicStateDump(resp, req)
+
+       statusCode := resp.statusCode
+       assert.Assert(t, statusCode == http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryDisableNotRunningStateDump(t *testing.T) {
+       req, err := http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       disablePeriodicStateDump(resp, req)
+
+       statusCode := resp.statusCode
+       assert.Assert(t, statusCode == http.StatusInternalServerError, 
"response status code")
+}
+
+func prepareSchedulerContext(t *testing.T) *scheduler.ClusterContext {
+       configs.MockSchedulerConfigByData([]byte(configDefault))
+       var err error
+       schedulerContext, err = scheduler.NewClusterContext(rmID, policyGroup)
+       assert.NilError(t, err, "Error when load clusterInfo from config")
+       assert.Equal(t, 1, len(schedulerContext.GetPartitionMapClone()))
+
+       return schedulerContext
+}
+
+func waitForStateDumpFile(t *testing.T) {
+       for {
+               var attempts int
+
+               info, err := os.Stat(stateDumpFilePath)
+
+               // tolerate only "file not found" errors
+               if err != nil && !os.IsNotExist(err) {
+                       t.Fatal(err)
+               }
+
+               if info != nil && info.Size() > 0 {
+                       break
+               }
+
+               if attempts++; attempts > 10 {
+                       t.Fatal("state dump file has not been created")
+               }
+               time.Sleep(1 * time.Second)
+       }
+}
+
+func deleteStateDumpFile() {
+       _ = os.Remove(stateDumpFilePath)

Review comment:
       I think we should keep and return the error here instead of suppressing 
it. Then, it can be checked if no error was thrown

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }
+       var aggregated AggregatedStateInfo
+       if err3 := json.Unmarshal(fileContents, &aggregated); err3 != nil {
+               t.Fatal(err3)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+
+       // disable
+       req, err = http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp = &MockResponseWriter{}
+       disablePeriodicStateDump(resp, req)
+       statusCode = resp.statusCode
+       assert.Assert(t, statusCode != http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryEnableStateDumpTwice(t *testing.T) {
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       enablePeriodicStateDump(resp, req)
+       enablePeriodicStateDump(resp, req)
+
+       statusCode := resp.statusCode
+       assert.Assert(t, statusCode == http.StatusInternalServerError, 
"response status code")

Review comment:
       IN this case you can use assert.Equal as well, maybe this one the more 
appropiate in this case.

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }
+       var aggregated AggregatedStateInfo
+       if err3 := json.Unmarshal(fileContents, &aggregated); err3 != nil {
+               t.Fatal(err3)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+
+       // disable
+       req, err = http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp = &MockResponseWriter{}
+       disablePeriodicStateDump(resp, req)
+       statusCode = resp.statusCode
+       assert.Assert(t, statusCode != http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryEnableStateDumpTwice(t *testing.T) {
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       enablePeriodicStateDump(resp, req)
+       enablePeriodicStateDump(resp, req)
+
+       statusCode := resp.statusCode
+       assert.Assert(t, statusCode == http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryDisableNotRunningStateDump(t *testing.T) {
+       req, err := http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       disablePeriodicStateDump(resp, req)
+
+       statusCode := resp.statusCode
+       assert.Assert(t, statusCode == http.StatusInternalServerError, 
"response status code")

Review comment:
       nit: maybe assert.Equals would be better

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")

Review comment:
       How this assert.Check works? Why is it better to use this one hete than 
assert.Assert?

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)

Review comment:
       Do we need this variable here? It is not used in this test case.

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }
+       var aggregated AggregatedStateInfo
+       if err3 := json.Unmarshal(fileContents, &aggregated); err3 != nil {
+               t.Fatal(err3)
+       }

Review comment:
       Please use assert.NilError

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }
+       var aggregated AggregatedStateInfo
+       if err3 := json.Unmarshal(fileContents, &aggregated); err3 != nil {
+               t.Fatal(err3)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+
+       // disable
+       req, err = http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp = &MockResponseWriter{}
+       disablePeriodicStateDump(resp, req)
+       statusCode = resp.statusCode
+       assert.Assert(t, statusCode != http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryEnableStateDumpTwice(t *testing.T) {
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       enablePeriodicStateDump(resp, req)

Review comment:
       You can add an assert here as well, to check if the first call is 
successfull

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }
+       var aggregated AggregatedStateInfo
+       if err3 := json.Unmarshal(fileContents, &aggregated); err3 != nil {
+               t.Fatal(err3)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+
+       // disable
+       req, err = http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp = &MockResponseWriter{}
+       disablePeriodicStateDump(resp, req)
+       statusCode = resp.statusCode
+       assert.Assert(t, statusCode != http.StatusInternalServerError, 
"response status code")

Review comment:
       instead of asserting to a code what should not be the status code, 
better check if the code is the expected one.

##########
File path: pkg/webservice/handlers_test.go
##########
@@ -1279,3 +1282,159 @@ func TestGetNodesUtilization(t *testing.T) {
        assert.NilError(t, err)
        assert.Equal(t, len(nodesDao), 0)
 }
+
+func TestGetFullStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/getfullstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       getFullStateDump(resp, req)
+       receivedBytes := resp.outputBytes
+       statusCode := resp.statusCode
+       assert.Assert(t, len(receivedBytes) > 0, "json response is empty")
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+       var aggregated AggregatedStateInfo
+       if err := json.Unmarshal(receivedBytes, &aggregated); err != nil {
+               t.Fatal(err)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+}
+
+func TestEnableDisablePeriodicStateDump(t *testing.T) {
+       schedulerContext = prepareSchedulerContext(t)
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       imHistory = history.NewInternalMetricsHistory(5)
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       vars := map[string]string{
+               "period": "3",
+       }
+       req = mux.SetURLVars(req, vars)
+
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       // enable state dump, check file contents
+       enablePeriodicStateDump(resp, req)
+       statusCode := resp.statusCode
+       assert.Check(t, statusCode != http.StatusInternalServerError, "response 
status code")
+
+       waitForStateDumpFile(t)
+       fileContents, err2 := ioutil.ReadFile(stateDumpFilePath)
+       if err2 != nil {
+               t.Fatal(err2)
+       }
+       var aggregated AggregatedStateInfo
+       if err3 := json.Unmarshal(fileContents, &aggregated); err3 != nil {
+               t.Fatal(err3)
+       }
+       verifyStateDumpJSON(t, &aggregated)
+
+       // disable
+       req, err = http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp = &MockResponseWriter{}
+       disablePeriodicStateDump(resp, req)
+       statusCode = resp.statusCode
+       assert.Assert(t, statusCode != http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryEnableStateDumpTwice(t *testing.T) {
+       defer deleteStateDumpFile()
+       defer terminateGoroutine()
+
+       req, err := http.NewRequest("GET", "/ws/v1/enableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       enablePeriodicStateDump(resp, req)
+       enablePeriodicStateDump(resp, req)
+
+       statusCode := resp.statusCode
+       assert.Assert(t, statusCode == http.StatusInternalServerError, 
"response status code")
+}
+
+func TestTryDisableNotRunningStateDump(t *testing.T) {
+       req, err := http.NewRequest("GET", "/ws/v1/disableperiodicstatedump", 
strings.NewReader(""))
+       req = mux.SetURLVars(req, make(map[string]string))
+       assert.NilError(t, err)
+       resp := &MockResponseWriter{}
+
+       disablePeriodicStateDump(resp, req)
+
+       statusCode := resp.statusCode
+       assert.Assert(t, statusCode == http.StatusInternalServerError, 
"response status code")
+}
+
+func prepareSchedulerContext(t *testing.T) *scheduler.ClusterContext {
+       configs.MockSchedulerConfigByData([]byte(configDefault))
+       var err error
+       schedulerContext, err = scheduler.NewClusterContext(rmID, policyGroup)
+       assert.NilError(t, err, "Error when load clusterInfo from config")
+       assert.Equal(t, 1, len(schedulerContext.GetPartitionMapClone()))
+
+       return schedulerContext
+}
+
+func waitForStateDumpFile(t *testing.T) {
+       for {
+               var attempts int
+
+               info, err := os.Stat(stateDumpFilePath)
+
+               // tolerate only "file not found" errors
+               if err != nil && !os.IsNotExist(err) {

Review comment:
       Why do we need to handle this error case differently?

##########
File path: pkg/webservice/state_dump.go
##########
@@ -0,0 +1,207 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package webservice
+
+import (
+       "encoding/json"
+       "fmt"
+       "io"
+       "net/http"
+       "os"
+       "strconv"
+       "sync"
+       "time"
+
+       "github.com/apache/incubator-yunikorn-core/pkg/log"
+       metrics2 "github.com/apache/incubator-yunikorn-core/pkg/metrics"
+       "github.com/apache/incubator-yunikorn-core/pkg/scheduler"
+       "github.com/apache/incubator-yunikorn-core/pkg/webservice/dao"
+
+       "github.com/gorilla/mux"
+       "go.uber.org/zap"
+)
+
+const (
+       defaultStateDumpPeriod = 60

Review comment:
       Creating the state dump by default every minute seems a little too 
aggressive for me. It will make huge files. I think we should print the dumps 
less freqvently. What is the default value in yarn for the state dump creeation?

##########
File path: pkg/webservice/state_dump.go
##########
@@ -0,0 +1,207 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package webservice
+
+import (
+       "encoding/json"
+       "fmt"
+       "io"
+       "net/http"
+       "os"
+       "strconv"
+       "sync"
+       "time"
+
+       "github.com/apache/incubator-yunikorn-core/pkg/log"
+       metrics2 "github.com/apache/incubator-yunikorn-core/pkg/metrics"
+       "github.com/apache/incubator-yunikorn-core/pkg/scheduler"
+       "github.com/apache/incubator-yunikorn-core/pkg/webservice/dao"
+
+       "github.com/gorilla/mux"
+       "go.uber.org/zap"
+)
+
+const (
+       defaultStateDumpPeriod = 60

Review comment:
       From this line we cannot know what this 60 is? mins? seconds? Since this 
is a duration, please use Duration type instead of int, or if it is not 
possible please rename the variable to mirror the exact value.

##########
File path: pkg/webservice/state_dump.go
##########
@@ -0,0 +1,207 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package webservice
+
+import (
+       "encoding/json"
+       "fmt"
+       "io"
+       "net/http"
+       "os"
+       "strconv"
+       "sync"
+       "time"
+
+       "github.com/apache/incubator-yunikorn-core/pkg/log"
+       metrics2 "github.com/apache/incubator-yunikorn-core/pkg/metrics"
+       "github.com/apache/incubator-yunikorn-core/pkg/scheduler"
+       "github.com/apache/incubator-yunikorn-core/pkg/webservice/dao"
+
+       "github.com/gorilla/mux"
+       "go.uber.org/zap"
+)
+
+const (
+       defaultStateDumpPeriod = 60
+       stateDumpFilePath      = "yunikorn-state.txt"
+)
+
+var (
+       periodicStateDump bool
+       abort             chan struct{}
+       startStop         sync.Mutex
+       stateDump         sync.Mutex // guards against simultaneous periodic vs 
web request
+)
+
+type AggregatedStateInfo struct {
+       Timestamp          string
+       Partitions         []*dao.PartitionInfo
+       Applications       []*dao.ApplicationDAOInfo
+       AppHistory         []*dao.ApplicationHistoryDAOInfo
+       Nodes              []*dao.NodesDAOInfo
+       NodesUtilization   []*dao.NodesUtilDAOInfo
+       ClusterInfo        []*dao.ClusterDAOInfo
+       ClusterUtilization []*dao.ClustersUtilDAOInfo
+       ContainerHistory   []*dao.ContainerHistoryDAOInfo
+       Queues             []*dao.PartitionDAOInfo
+       SchedulerHealth    *dao.SchedulerHealthDAOInfo

Review comment:
       I am not sure if the health status should be included or not. Now it is 
set as liveness probe, so when the scheduler becomes unhealthy, it is logged 
and also it will be restarted.

##########
File path: pkg/webservice/routes.go
##########
@@ -170,6 +170,24 @@ var webRoutes = routes{
                "/ws/v1/partition/{partition}/queue/{queue}/applications",
                getQueueApplications,
        },
+       route{
+               "Scheduler",
+               "GET",
+               "/ws/v1/fullstatedump",
+               getFullStateDump,
+       },
+       route{
+               "Scheduler",
+               "PUT",
+               "/ws/v1/enableperiodicstatedump/{period}",
+               enablePeriodicStateDump,
+       },
+       route{
+               "Scheduler",
+               "PUT",
+               "/ws/v1/disableperiodicstatedump",
+               disablePeriodicStateDump,
+       },

Review comment:
       Can we merge this 2 into one? I am thinking about using a parameter for 
enabling/disabling it, and one for the period.
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to