This is an automated email from the ASF dual-hosted git repository.

wwei pushed a commit to branch soak-test
in repository https://gitbox.apache.org/repos/asf/yunikorn-release.git


The following commit(s) were added to refs/heads/soak-test by this push:
     new 7c3a70b  [Yunikorn-3052] Integrate with cluster-autoscaler 
(kwok-provider) to set Node desiredCount and maxCount per Config (#195)
7c3a70b is described below

commit 7c3a70b031cee7872b4b6be823a9eb5ee3124977
Author: junyan-ling <[email protected]>
AuthorDate: Thu Mar 13 10:20:19 2025 -0700

    [Yunikorn-3052] Integrate with cluster-autoscaler (kwok-provider) to set 
Node desiredCount and maxCount per Config (#195)
    
    Integrate with cluster-autoscaler (kwok-provider) to set Node desiredCount 
and maxCount per Config through annotations:
    - "cluster-autoscaler.kwok.nodegroup/min-count"
    - "cluster-autoscaler.kwok.nodegroup/desired-count"
    - "cluster-autoscaler.kwok.nodegroup/max-count"
    
    This way, we can use autoscaler config as a centralized place to set both 
the initial node count and max node count with the help of auto-scaler, without 
having to initialize nodes with Kwok node configurations.
---
 soak/pkg/setup/setup.go                            | 164 +++++++++++++++------
 soak/pkg/setup/setup_test.go                       |  41 ++++++
 .../initial_setup.sh => pkg/setup/test_conf.yaml}  |  29 ++--
 soak/scripts/README.md                             |   1 +
 soak/scripts/initial_setup.sh                      |  12 +-
 .../autoscaler-configmap.yaml}                     |  21 +--
 soak/templates/kwok-node-template.yaml             |  19 +--
 .../test-workload.yaml}                            |  49 ++++--
 8 files changed, 227 insertions(+), 109 deletions(-)

diff --git a/soak/pkg/setup/setup.go b/soak/pkg/setup/setup.go
index 307d170..000375c 100644
--- a/soak/pkg/setup/setup.go
+++ b/soak/pkg/setup/setup.go
@@ -19,9 +19,10 @@ package setup
 import (
        "fmt"
        "github.com/apache/yunikorn-core/pkg/log"
+       "github.com/apache/yunikorn-release/soak/framework"
        "github.com/apache/yunikorn-release/soak/pkg/constants"
-       "github.com/apache/yunikorn-release/soak/pkg/framework"
        "go.uber.org/zap"
+       "gopkg.in/yaml.v3"
        "os"
        "os/exec"
        "path/filepath"
@@ -55,18 +56,18 @@ func setK8sContext() error {
        return nil
 }
 
-func upgradeSchedulerPerConfig(scheduler framework.TemplateFields) error {
+func upgradeSchedulerPerConfig(scheduler framework.SchedulerFields) error {
        if err := setK8sContext(); err != nil {
                logger.Fatal("failed to set kubernetes context", zap.Error(err))
                return err
        }
 
        logger.Info("Scheduler details",
-               zap.String("VcoreRequests", *scheduler.VcoreRequests),
-               zap.String("MemoryRequests", *scheduler.MemoryRequests),
-               zap.String("VcoreLimits", *scheduler.VcoreLimits),
-               zap.String("MemoryLimits", *scheduler.MemoryLimits),
-               zap.String("path", *scheduler.Path))
+               zap.String("VcoreRequests", scheduler.VcoreRequests),
+               zap.String("MemoryRequests", scheduler.MemoryRequests),
+               zap.String("VcoreLimits", scheduler.VcoreLimits),
+               zap.String("MemoryLimits", scheduler.MemoryLimits),
+               zap.String("path", scheduler.Path))
 
        args := []string{
                "upgrade",
@@ -77,17 +78,17 @@ func upgradeSchedulerPerConfig(scheduler 
framework.TemplateFields) error {
 
        var moreArgs []string
 
-       if scheduler.VcoreRequests != nil {
-               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.requests.cpu=%s", *scheduler.VcoreRequests))
+       if scheduler.VcoreRequests != "" {
+               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.requests.cpu=%s", scheduler.VcoreRequests))
        }
-       if scheduler.MemoryRequests != nil {
-               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.requests.memory=%s", *scheduler.MemoryRequests))
+       if scheduler.MemoryRequests != "" {
+               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.requests.memory=%s", scheduler.MemoryRequests))
        }
-       if scheduler.VcoreLimits != nil {
-               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.limits.cpu=%s", *scheduler.VcoreLimits))
+       if scheduler.VcoreLimits != "" {
+               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.limits.cpu=%s", scheduler.VcoreLimits))
        }
-       if scheduler.MemoryLimits != nil {
-               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.limits.memory=%s", *scheduler.MemoryLimits))
+       if scheduler.MemoryLimits != "" {
+               moreArgs = append(moreArgs, "--set", 
fmt.Sprintf("resources.limits.memory=%s", scheduler.MemoryLimits))
        }
 
        if len(moreArgs) > 0 {
@@ -108,9 +109,9 @@ func upgradeSchedulerPerConfig(scheduler 
framework.TemplateFields) error {
                        zap.String("output", string(output)))
        }
 
-       if scheduler.Path != nil {
+       if scheduler.Path != "" {
                kubectlArgs := []string{"apply"}
-               kubectlArgs = append(kubectlArgs, "-f", *scheduler.Path, "-n", 
"yunikorn")
+               kubectlArgs = append(kubectlArgs, "-f", scheduler.Path, "-n", 
"yunikorn")
                kubectlCmd := exec.Command("kubectl", kubectlArgs...)
                logger.Info("Kubectl command to be executed",
                        zap.String("command", fmt.Sprintf("kubectl %s", 
strings.Join(kubectlArgs, " "))))
@@ -125,50 +126,121 @@ func upgradeSchedulerPerConfig(scheduler 
framework.TemplateFields) error {
        return nil
 }
 
-func setNodeScalePerConfig(node framework.TemplateFields) error {
+func setAutoscalerPerConfig(node framework.NodeFields) error {
        if err := setK8sContext(); err != nil {
                logger.Fatal("failed to set kubernetes context", zap.Error(err))
                return err
        }
 
        logger.Info("Node details",
-               zap.String("path", *node.Path),
-               zap.Int("NodesDesiredCount", *node.DesiredCount),
-               zap.Int("maxCount", *node.MaxCount))
+               zap.String("path", node.Path),
+               zap.String("NodesDesiredCount", node.DesiredCount),
+               zap.String("maxCount", node.MaxCount))
 
-       templateContent, err := 
os.ReadFile("soak/templates/kwok-node-template.yaml")
+       templateContent, err := os.ReadFile(node.Path)
        if err != nil {
-               return fmt.Errorf("failed to read template file: %v", err)
+               logger.Error("failed to read template file", zap.Error(err))
+               return err
        }
-       desiredCount := *node.DesiredCount
 
-       for i := 0; i < desiredCount; i++ {
-               currentNodeName := fmt.Sprintf("kwok-node-%d", i)
-               nodeContent := strings.ReplaceAll(string(templateContent), 
"kwok-node-i", currentNodeName)
+       var nodeTemplate map[string]interface{}
+       err = yaml.Unmarshal(templateContent, &nodeTemplate)
+       if err != nil {
+               logger.Error("failed to parse template YAML", zap.Error(err))
+               return err
+       }
 
-               tmpfile, err := os.CreateTemp("", "node-*.yaml")
-               if err != nil {
-                       return fmt.Errorf("failed to create temp file: %v", err)
-               }
-               defer os.Remove(tmpfile.Name()) // Clean up
+       metadata, ok := nodeTemplate["metadata"].(map[string]interface{})
+       if !ok {
+               logger.Error("invalid metadata format in node template")
+               return fmt.Errorf("invalid metadata format in node template")
+       }
 
-               if _, err := tmpfile.WriteString(nodeContent); err != nil {
-                       return fmt.Errorf("failed to write to temp file: %v", 
err)
-               }
-               if err := tmpfile.Close(); err != nil {
-                       return fmt.Errorf("failed to close temp file: %v", err)
-               }
+       annotations, ok := metadata["annotations"].(map[string]interface{})
+       if !ok {
+               logger.Error("invalid annotations format in node template")
+               return fmt.Errorf("invalid annotations format in node template")
+       }
 
-               cmd := exec.Command("kubectl", "apply", "-f", tmpfile.Name())
-               output, err := cmd.CombinedOutput()
-               if err != nil {
-                       return fmt.Errorf("failed to apply node configuration: 
%v", err)
-               }
+       annotations["cluster-autoscaler.kwok.nodegroup/max-count"] = 
node.MaxCount
+       annotations["cluster-autoscaler.kwok.nodegroup/min-count"] = 
node.DesiredCount
+       annotations["cluster-autoscaler.kwok.nodegroup/desired-count"] = 
node.DesiredCount
 
-               logger.Info("Applied node configuration",
-                       zap.String("nodeName", currentNodeName),
-                       zap.String("output", string(output)))
+       autoscalerConfigmapPath := "../../templates/autoscaler-configmap.yaml"
+
+       autoscalerConfigmap, err := os.ReadFile(autoscalerConfigmapPath)
+       if err != nil {
+               logger.Error("failed to read autoscaler configmap template", 
zap.Error(err))
+               return err
+       }
+
+       var autoscalerNodeList map[string]interface{}
+       err = yaml.Unmarshal(autoscalerConfigmap, &autoscalerNodeList)
+       if err != nil {
+               logger.Error("failed to parse autoscalerConfigmap YAML", 
zap.Error(err))
+               return err
        }
+       logger.Info("Autoscaler Node List", zap.Any("autoscalerNodeList", 
autoscalerNodeList))
+
+       var itemsSlice []interface{}
+       itemsSlice = append(itemsSlice, nodeTemplate)
+       autoscalerNodeList["items"] = itemsSlice
+
+       autoscalerNodeListYaml, err := yaml.Marshal(autoscalerNodeList)
+       if err != nil {
+               logger.Error("failed to convert updated autoscalerNodeList to 
YAML", zap.Error(err))
+               return err
+       }
+       logger.Info("Encoded autoscalerNodeListYaml", 
zap.Any("autoscalerNodeListYaml", autoscalerNodeListYaml))
+
+       updatedAcCmTempFile, err := os.CreateTemp("", 
"updated-autoscaler-configmap-temp.yaml")
+       if err != nil {
+               logger.Error("failed to create 
updated-autoscaler-configmap-temp file", zap.Error(err))
+               return err
+       }
+
+       updatedAcCmTempFilePath := updatedAcCmTempFile.Name()
+       defer os.Remove(updatedAcCmTempFilePath)
+
+       if _, err = updatedAcCmTempFile.Write(autoscalerNodeListYaml); err != 
nil {
+               updatedAcCmTempFile.Close()
+               logger.Error("failed to write to 
updated-autoscaler-configmap-temp file", zap.Error(err))
+               return err
+       }
+       if err = updatedAcCmTempFile.Close(); err != nil {
+               logger.Error("failed to close updated-autoscaler-configmap-temp 
file", zap.Error(err))
+               return err
+       }
+
+       // Delete the default autoscaler configMap
+       deleteConfigMapCmd := exec.Command("kubectl", "delete", "cm", 
"kwok-provider-templates")
+       deleteConfigMapCmdOutput, err := deleteConfigMapCmd.CombinedOutput()
+       if err != nil {
+               logger.Error("fail to delete configmap", zap.Error(err))
+               return err
+       }
+       logger.Info(string(deleteConfigMapCmdOutput))
+
+       // Create a new autoscaler configMap
+       createConfigMapCmd := exec.Command("kubectl", "create", "cm", 
"kwok-provider-templates",
+               "--from-file=templates="+updatedAcCmTempFilePath)
+       createConfigMapCmdOutput, err := createConfigMapCmd.CombinedOutput()
+       if err != nil {
+               logger.Error("fail to create new configmap", zap.Error(err))
+               return err
+       }
+       logger.Info(string(createConfigMapCmdOutput))
+
+       // Restart the autoscaler pod after updating the configmap
+       restartAutoscalerPodCmd := exec.Command("kubectl", "rollout", 
"restart", "deployment", "autoscaler-kwok-cluster-autoscaler")
+       restartAutoscalerPodCmdOutput, err := 
restartAutoscalerPodCmd.CombinedOutput()
+       if err != nil {
+               logger.Error("failed to restart autoscaler deployment", 
zap.Error(err))
+               return err
+       }
+       logger.Info("Restarted autoscaler deployment", zap.String("output", 
string(restartAutoscalerPodCmdOutput)))
+
+       logger.Info("Successfully set up kwok provider cluster autoscaler for 
desiredNodeCount and MaxNodeCount")
 
        return nil
 }
diff --git a/soak/pkg/setup/setup_test.go b/soak/pkg/setup/setup_test.go
new file mode 100644
index 0000000..eb64e57
--- /dev/null
+++ b/soak/pkg/setup/setup_test.go
@@ -0,0 +1,41 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+     http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package setup
+
+import (
+       "github.com/apache/yunikorn-release/soak/framework"
+       "github.com/stretchr/testify/assert"
+       "go.uber.org/zap"
+       "testing"
+)
+
+func TestSetAutoScalerPerConfig(t *testing.T) {
+       conf, err := framework.InitConfig("test_conf.yaml")
+       if err != nil {
+               logger.Fatal("failed to parse config", zap.Error(err))
+       }
+       logger.Info("config successfully loaded", zap.Any("conf", conf))
+
+       for _, test := range conf.Tests {
+               if len(test.Template.Node) > 0 {
+                       for _, nodeTemplate := range test.Template.Node {
+                               err := setAutoscalerPerConfig(nodeTemplate)
+                               assert.NoError(t, err)
+                       }
+               }
+       }
+}
diff --git a/soak/scripts/initial_setup.sh b/soak/pkg/setup/test_conf.yaml
similarity index 57%
copy from soak/scripts/initial_setup.sh
copy to soak/pkg/setup/test_conf.yaml
index 4b4bc33..99a2a97 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/pkg/setup/test_conf.yaml
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,17 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# create a kind cluster
-kind create cluster --name soak-test-cluster
-
-# install YuniKorn scheduler on kind Cluster
-helm repo add yunikorn https://apache.github.io/yunikorn-release
-helm repo update
-kubectl create namespace yunikorn
-# TODO: allow to install a customized YuniKorn version to run the soak test
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn
-
-## Deploy kwok in a Cluster
-helm repo add kwok https://kwok.sigs.k8s.io/charts/
-helm upgrade --namespace kube-system --install kwok kwok/kwok
-helm upgrade --install kwok kwok/stage-fast
+tests:
+  - name: autoscaling
+    template:
+      node:
+        - path: ../../templates/kwok-node-template.yaml
+          maxCount: "10"
+          desiredCount: "5"
+      scheduler:
+        - path: ../../templates/autoscaling-queues.yaml
+          vcoreRequests: 2
+          vcoreLimits: 2
+          memoryRequests: 16Gi
+          memoryLimits: 16Gi
\ No newline at end of file
diff --git a/soak/scripts/README.md b/soak/scripts/README.md
index 96f58f7..b64a5e4 100644
--- a/soak/scripts/README.md
+++ b/soak/scripts/README.md
@@ -21,6 +21,7 @@ limitations under the License.
 - [Docker](https://docs.docker.com/get-docker/)
 - [kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation)
 - [Kwok](https://kwok.sigs.k8s.io/docs/user/installation/)
+- [autoscaler](https://kubernetes.github.io/autoscaler)
 
 # set up initial soak test environment
 ```
diff --git a/soak/scripts/initial_setup.sh b/soak/scripts/initial_setup.sh
index 4b4bc33..87ddb9e 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/scripts/initial_setup.sh
@@ -16,8 +16,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Constants
+SOAK_TEST_CLUSTER='soak-test-cluster'
+
 # create a kind cluster
-kind create cluster --name soak-test-cluster
+kind create cluster --name $SOAK_TEST_CLUSTER
 
 # install YuniKorn scheduler on kind Cluster
 helm repo add yunikorn https://apache.github.io/yunikorn-release
@@ -26,7 +29,12 @@ kubectl create namespace yunikorn
 # TODO: allow to install a customized YuniKorn version to run the soak test
 helm install yunikorn yunikorn/yunikorn --namespace yunikorn
 
-## Deploy kwok in a Cluster
+# Deploy kwok in a Cluster
 helm repo add kwok https://kwok.sigs.k8s.io/charts/
 helm upgrade --namespace kube-system --install kwok kwok/kwok
 helm upgrade --install kwok kwok/stage-fast
+
+# Install Helm chart for autoscaler with Kwok provider
+helm repo add autoscaler https://kubernetes.github.io/autoscaler
+helm repo update
+helm upgrade --install autoscaler autoscaler/cluster-autoscaler --set 
cloudProvider=kwok --set 
"autoDiscovery.clusterName"="kind-${SOAK_TEST_CLUSTER}" --set 
"extraArgs.enforce-node-group-min-size"=true
diff --git a/soak/scripts/initial_setup.sh 
b/soak/templates/autoscaler-configmap.yaml
similarity index 57%
copy from soak/scripts/initial_setup.sh
copy to soak/templates/autoscaler-configmap.yaml
index 4b4bc33..3ca6613 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/templates/autoscaler-configmap.yaml
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,17 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# create a kind cluster
-kind create cluster --name soak-test-cluster
-
-# install YuniKorn scheduler on kind Cluster
-helm repo add yunikorn https://apache.github.io/yunikorn-release
-helm repo update
-kubectl create namespace yunikorn
-# TODO: allow to install a customized YuniKorn version to run the soak test
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn
-
-## Deploy kwok in a Cluster
-helm repo add kwok https://kwok.sigs.k8s.io/charts/
-helm upgrade --namespace kube-system --install kwok kwok/kwok
-helm upgrade --install kwok kwok/stage-fast
+apiVersion: v1
+items:
+kind: List
+metadata:
+  resourceVersion: ""
diff --git a/soak/templates/kwok-node-template.yaml 
b/soak/templates/kwok-node-template.yaml
index 5f1de38..eea8399 100644
--- a/soak/templates/kwok-node-template.yaml
+++ b/soak/templates/kwok-node-template.yaml
@@ -18,23 +18,17 @@ apiVersion: v1
 kind: Node
 metadata:
   annotations:
-    node.alpha.kubernetes.io/ttl: "0"
-    kwok.x-k8s.io/node: fake
+    cluster-autoscaler.kwok.nodegroup/max-count:
+    cluster-autoscaler.kwok.nodegroup/min-count:
+    cluster-autoscaler.kwok.nodegroup/desired-count:
   labels:
     beta.kubernetes.io/arch: amd64
     beta.kubernetes.io/os: linux
     kubernetes.io/arch: amd64
-    kubernetes.io/hostname: kwok-node-i
+    kubernetes.io/hostname: kwok-node
+    kwok-nodegroup: kwok-node
     kubernetes.io/os: linux
-    kubernetes.io/role: agent
-    node-role.kubernetes.io/agent: ""
-    type: kwok
-  name: kwok-node-i
-spec:
-  taints: # Avoid scheduling actual running pods to fake Node
-    - effect: NoSchedule
-      key: kwok.x-k8s.io/node
-      value: fake
+  name: kwok-node
 status:
   allocatable:
     cpu: 32
@@ -55,4 +49,3 @@ status:
     operatingSystem: linux
     osImage: ""
     systemUUID: ""
-  phase: Running
diff --git a/soak/scripts/initial_setup.sh b/soak/templates/test-workload.yaml
similarity index 50%
copy from soak/scripts/initial_setup.sh
copy to soak/templates/test-workload.yaml
index 4b4bc33..2d6b66d 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/templates/test-workload.yaml
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -16,17 +14,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# create a kind cluster
-kind create cluster --name soak-test-cluster
-
-# install YuniKorn scheduler on kind Cluster
-helm repo add yunikorn https://apache.github.io/yunikorn-release
-helm repo update
-kubectl create namespace yunikorn
-# TODO: allow to install a customized YuniKorn version to run the soak test
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn
-
-## Deploy kwok in a Cluster
-helm repo add kwok https://kwok.sigs.k8s.io/charts/
-helm upgrade --namespace kube-system --install kwok kwok/kwok
-helm upgrade --install kwok kwok/stage-fast
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: test-workload
+spec:
+  replicas: 15
+  selector:
+    matchLabels:
+      app: fake-pod
+  template:
+    metadata:
+      labels:
+        app: fake-pod
+        kwok-nodegroup: "kwok-node"
+        applicationId: "test-workload-queue-a"
+        queue: root.a
+    spec:
+      schedulerName: yunikorn
+      tolerations:
+        - effect: NoSchedule
+          key: kwok-provider
+          value: "true"
+      containers:
+        - name: fake-container
+          image: docker-upstream.apple.com/alpine:latest
+          command: ["sleep", "300"]
+          resources:
+            requests:
+              cpu: "30"
+              memory: "1Gi"
+            limits:
+              cpu: "30"
+              memory: "1Gi"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to