This is an automated email from the ASF dual-hosted git repository.
wwei pushed a commit to branch soak-test
in repository https://gitbox.apache.org/repos/asf/yunikorn-release.git
The following commit(s) were added to refs/heads/soak-test by this push:
new 7c3a70b [Yunikorn-3052] Integrate with cluster-autoscaler
(kwok-provider) to set Node desiredCount and maxCount per Config (#195)
7c3a70b is described below
commit 7c3a70b031cee7872b4b6be823a9eb5ee3124977
Author: junyan-ling <[email protected]>
AuthorDate: Thu Mar 13 10:20:19 2025 -0700
[Yunikorn-3052] Integrate with cluster-autoscaler (kwok-provider) to set
Node desiredCount and maxCount per Config (#195)
Integrate with cluster-autoscaler (kwok-provider) to set Node desiredCount
and maxCount per Config through annotations:
- "cluster-autoscaler.kwok.nodegroup/min-count"
- "cluster-autoscaler.kwok.nodegroup/desired-count"
- "cluster-autoscaler.kwok.nodegroup/max-count"
This way, we can use autoscaler config as a centralized place to set both
the initial node count and max node count with the help of auto-scaler, without
having to initialize nodes with Kwok node configurations.
---
soak/pkg/setup/setup.go | 164 +++++++++++++++------
soak/pkg/setup/setup_test.go | 41 ++++++
.../initial_setup.sh => pkg/setup/test_conf.yaml} | 29 ++--
soak/scripts/README.md | 1 +
soak/scripts/initial_setup.sh | 12 +-
.../autoscaler-configmap.yaml} | 21 +--
soak/templates/kwok-node-template.yaml | 19 +--
.../test-workload.yaml} | 49 ++++--
8 files changed, 227 insertions(+), 109 deletions(-)
diff --git a/soak/pkg/setup/setup.go b/soak/pkg/setup/setup.go
index 307d170..000375c 100644
--- a/soak/pkg/setup/setup.go
+++ b/soak/pkg/setup/setup.go
@@ -19,9 +19,10 @@ package setup
import (
"fmt"
"github.com/apache/yunikorn-core/pkg/log"
+ "github.com/apache/yunikorn-release/soak/framework"
"github.com/apache/yunikorn-release/soak/pkg/constants"
- "github.com/apache/yunikorn-release/soak/pkg/framework"
"go.uber.org/zap"
+ "gopkg.in/yaml.v3"
"os"
"os/exec"
"path/filepath"
@@ -55,18 +56,18 @@ func setK8sContext() error {
return nil
}
-func upgradeSchedulerPerConfig(scheduler framework.TemplateFields) error {
+func upgradeSchedulerPerConfig(scheduler framework.SchedulerFields) error {
if err := setK8sContext(); err != nil {
logger.Fatal("failed to set kubernetes context", zap.Error(err))
return err
}
logger.Info("Scheduler details",
- zap.String("VcoreRequests", *scheduler.VcoreRequests),
- zap.String("MemoryRequests", *scheduler.MemoryRequests),
- zap.String("VcoreLimits", *scheduler.VcoreLimits),
- zap.String("MemoryLimits", *scheduler.MemoryLimits),
- zap.String("path", *scheduler.Path))
+ zap.String("VcoreRequests", scheduler.VcoreRequests),
+ zap.String("MemoryRequests", scheduler.MemoryRequests),
+ zap.String("VcoreLimits", scheduler.VcoreLimits),
+ zap.String("MemoryLimits", scheduler.MemoryLimits),
+ zap.String("path", scheduler.Path))
args := []string{
"upgrade",
@@ -77,17 +78,17 @@ func upgradeSchedulerPerConfig(scheduler
framework.TemplateFields) error {
var moreArgs []string
- if scheduler.VcoreRequests != nil {
- moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.requests.cpu=%s", *scheduler.VcoreRequests))
+ if scheduler.VcoreRequests != "" {
+ moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.requests.cpu=%s", scheduler.VcoreRequests))
}
- if scheduler.MemoryRequests != nil {
- moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.requests.memory=%s", *scheduler.MemoryRequests))
+ if scheduler.MemoryRequests != "" {
+ moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.requests.memory=%s", scheduler.MemoryRequests))
}
- if scheduler.VcoreLimits != nil {
- moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.limits.cpu=%s", *scheduler.VcoreLimits))
+ if scheduler.VcoreLimits != "" {
+ moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.limits.cpu=%s", scheduler.VcoreLimits))
}
- if scheduler.MemoryLimits != nil {
- moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.limits.memory=%s", *scheduler.MemoryLimits))
+ if scheduler.MemoryLimits != "" {
+ moreArgs = append(moreArgs, "--set",
fmt.Sprintf("resources.limits.memory=%s", scheduler.MemoryLimits))
}
if len(moreArgs) > 0 {
@@ -108,9 +109,9 @@ func upgradeSchedulerPerConfig(scheduler
framework.TemplateFields) error {
zap.String("output", string(output)))
}
- if scheduler.Path != nil {
+ if scheduler.Path != "" {
kubectlArgs := []string{"apply"}
- kubectlArgs = append(kubectlArgs, "-f", *scheduler.Path, "-n",
"yunikorn")
+ kubectlArgs = append(kubectlArgs, "-f", scheduler.Path, "-n",
"yunikorn")
kubectlCmd := exec.Command("kubectl", kubectlArgs...)
logger.Info("Kubectl command to be executed",
zap.String("command", fmt.Sprintf("kubectl %s",
strings.Join(kubectlArgs, " "))))
@@ -125,50 +126,121 @@ func upgradeSchedulerPerConfig(scheduler
framework.TemplateFields) error {
return nil
}
-func setNodeScalePerConfig(node framework.TemplateFields) error {
+func setAutoscalerPerConfig(node framework.NodeFields) error {
if err := setK8sContext(); err != nil {
logger.Fatal("failed to set kubernetes context", zap.Error(err))
return err
}
logger.Info("Node details",
- zap.String("path", *node.Path),
- zap.Int("NodesDesiredCount", *node.DesiredCount),
- zap.Int("maxCount", *node.MaxCount))
+ zap.String("path", node.Path),
+ zap.String("NodesDesiredCount", node.DesiredCount),
+ zap.String("maxCount", node.MaxCount))
- templateContent, err :=
os.ReadFile("soak/templates/kwok-node-template.yaml")
+ templateContent, err := os.ReadFile(node.Path)
if err != nil {
- return fmt.Errorf("failed to read template file: %v", err)
+ logger.Error("failed to read template file", zap.Error(err))
+ return err
}
- desiredCount := *node.DesiredCount
- for i := 0; i < desiredCount; i++ {
- currentNodeName := fmt.Sprintf("kwok-node-%d", i)
- nodeContent := strings.ReplaceAll(string(templateContent),
"kwok-node-i", currentNodeName)
+ var nodeTemplate map[string]interface{}
+ err = yaml.Unmarshal(templateContent, &nodeTemplate)
+ if err != nil {
+ logger.Error("failed to parse template YAML", zap.Error(err))
+ return err
+ }
- tmpfile, err := os.CreateTemp("", "node-*.yaml")
- if err != nil {
- return fmt.Errorf("failed to create temp file: %v", err)
- }
- defer os.Remove(tmpfile.Name()) // Clean up
+ metadata, ok := nodeTemplate["metadata"].(map[string]interface{})
+ if !ok {
+ logger.Error("invalid metadata format in node template")
+ return fmt.Errorf("invalid metadata format in node template")
+ }
- if _, err := tmpfile.WriteString(nodeContent); err != nil {
- return fmt.Errorf("failed to write to temp file: %v",
err)
- }
- if err := tmpfile.Close(); err != nil {
- return fmt.Errorf("failed to close temp file: %v", err)
- }
+ annotations, ok := metadata["annotations"].(map[string]interface{})
+ if !ok {
+ logger.Error("invalid annotations format in node template")
+ return fmt.Errorf("invalid annotations format in node template")
+ }
- cmd := exec.Command("kubectl", "apply", "-f", tmpfile.Name())
- output, err := cmd.CombinedOutput()
- if err != nil {
- return fmt.Errorf("failed to apply node configuration:
%v", err)
- }
+ annotations["cluster-autoscaler.kwok.nodegroup/max-count"] =
node.MaxCount
+ annotations["cluster-autoscaler.kwok.nodegroup/min-count"] =
node.DesiredCount
+ annotations["cluster-autoscaler.kwok.nodegroup/desired-count"] =
node.DesiredCount
- logger.Info("Applied node configuration",
- zap.String("nodeName", currentNodeName),
- zap.String("output", string(output)))
+ autoscalerConfigmapPath := "../../templates/autoscaler-configmap.yaml"
+
+ autoscalerConfigmap, err := os.ReadFile(autoscalerConfigmapPath)
+ if err != nil {
+ logger.Error("failed to read autoscaler configmap template",
zap.Error(err))
+ return err
+ }
+
+ var autoscalerNodeList map[string]interface{}
+ err = yaml.Unmarshal(autoscalerConfigmap, &autoscalerNodeList)
+ if err != nil {
+ logger.Error("failed to parse autoscalerConfigmap YAML",
zap.Error(err))
+ return err
}
+ logger.Info("Autoscaler Node List", zap.Any("autoscalerNodeList",
autoscalerNodeList))
+
+ var itemsSlice []interface{}
+ itemsSlice = append(itemsSlice, nodeTemplate)
+ autoscalerNodeList["items"] = itemsSlice
+
+ autoscalerNodeListYaml, err := yaml.Marshal(autoscalerNodeList)
+ if err != nil {
+ logger.Error("failed to convert updated autoscalerNodeList to
YAML", zap.Error(err))
+ return err
+ }
+ logger.Info("Encoded autoscalerNodeListYaml",
zap.Any("autoscalerNodeListYaml", autoscalerNodeListYaml))
+
+ updatedAcCmTempFile, err := os.CreateTemp("",
"updated-autoscaler-configmap-temp.yaml")
+ if err != nil {
+ logger.Error("failed to create
updated-autoscaler-configmap-temp file", zap.Error(err))
+ return err
+ }
+
+ updatedAcCmTempFilePath := updatedAcCmTempFile.Name()
+ defer os.Remove(updatedAcCmTempFilePath)
+
+ if _, err = updatedAcCmTempFile.Write(autoscalerNodeListYaml); err !=
nil {
+ updatedAcCmTempFile.Close()
+ logger.Error("failed to write to
updated-autoscaler-configmap-temp file", zap.Error(err))
+ return err
+ }
+ if err = updatedAcCmTempFile.Close(); err != nil {
+ logger.Error("failed to close updated-autoscaler-configmap-temp
file", zap.Error(err))
+ return err
+ }
+
+ // Delete the default autoscaler configMap
+ deleteConfigMapCmd := exec.Command("kubectl", "delete", "cm",
"kwok-provider-templates")
+ deleteConfigMapCmdOutput, err := deleteConfigMapCmd.CombinedOutput()
+ if err != nil {
+ logger.Error("fail to delete configmap", zap.Error(err))
+ return err
+ }
+ logger.Info(string(deleteConfigMapCmdOutput))
+
+ // Create a new autoscaler configMap
+ createConfigMapCmd := exec.Command("kubectl", "create", "cm",
"kwok-provider-templates",
+ "--from-file=templates="+updatedAcCmTempFilePath)
+ createConfigMapCmdOutput, err := createConfigMapCmd.CombinedOutput()
+ if err != nil {
+ logger.Error("fail to create new configmap", zap.Error(err))
+ return err
+ }
+ logger.Info(string(createConfigMapCmdOutput))
+
+ // Restart the autoscaler pod after updating the configmap
+ restartAutoscalerPodCmd := exec.Command("kubectl", "rollout",
"restart", "deployment", "autoscaler-kwok-cluster-autoscaler")
+ restartAutoscalerPodCmdOutput, err :=
restartAutoscalerPodCmd.CombinedOutput()
+ if err != nil {
+ logger.Error("failed to restart autoscaler deployment",
zap.Error(err))
+ return err
+ }
+ logger.Info("Restarted autoscaler deployment", zap.String("output",
string(restartAutoscalerPodCmdOutput)))
+
+ logger.Info("Successfully set up kwok provider cluster autoscaler for
desiredNodeCount and MaxNodeCount")
return nil
}
diff --git a/soak/pkg/setup/setup_test.go b/soak/pkg/setup/setup_test.go
new file mode 100644
index 0000000..eb64e57
--- /dev/null
+++ b/soak/pkg/setup/setup_test.go
@@ -0,0 +1,41 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package setup
+
+import (
+ "github.com/apache/yunikorn-release/soak/framework"
+ "github.com/stretchr/testify/assert"
+ "go.uber.org/zap"
+ "testing"
+)
+
+func TestSetAutoScalerPerConfig(t *testing.T) {
+ conf, err := framework.InitConfig("test_conf.yaml")
+ if err != nil {
+ logger.Fatal("failed to parse config", zap.Error(err))
+ }
+ logger.Info("config successfully loaded", zap.Any("conf", conf))
+
+ for _, test := range conf.Tests {
+ if len(test.Template.Node) > 0 {
+ for _, nodeTemplate := range test.Template.Node {
+ err := setAutoscalerPerConfig(nodeTemplate)
+ assert.NoError(t, err)
+ }
+ }
+ }
+}
diff --git a/soak/scripts/initial_setup.sh b/soak/pkg/setup/test_conf.yaml
similarity index 57%
copy from soak/scripts/initial_setup.sh
copy to soak/pkg/setup/test_conf.yaml
index 4b4bc33..99a2a97 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/pkg/setup/test_conf.yaml
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,17 +14,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# create a kind cluster
-kind create cluster --name soak-test-cluster
-
-# install YuniKorn scheduler on kind Cluster
-helm repo add yunikorn https://apache.github.io/yunikorn-release
-helm repo update
-kubectl create namespace yunikorn
-# TODO: allow to install a customized YuniKorn version to run the soak test
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn
-
-## Deploy kwok in a Cluster
-helm repo add kwok https://kwok.sigs.k8s.io/charts/
-helm upgrade --namespace kube-system --install kwok kwok/kwok
-helm upgrade --install kwok kwok/stage-fast
+tests:
+ - name: autoscaling
+ template:
+ node:
+ - path: ../../templates/kwok-node-template.yaml
+ maxCount: "10"
+ desiredCount: "5"
+ scheduler:
+ - path: ../../templates/autoscaling-queues.yaml
+ vcoreRequests: 2
+ vcoreLimits: 2
+ memoryRequests: 16Gi
+ memoryLimits: 16Gi
\ No newline at end of file
diff --git a/soak/scripts/README.md b/soak/scripts/README.md
index 96f58f7..b64a5e4 100644
--- a/soak/scripts/README.md
+++ b/soak/scripts/README.md
@@ -21,6 +21,7 @@ limitations under the License.
- [Docker](https://docs.docker.com/get-docker/)
- [kind](https://kind.sigs.k8s.io/docs/user/quick-start/#installation)
- [Kwok](https://kwok.sigs.k8s.io/docs/user/installation/)
+- [autoscaler](https://kubernetes.github.io/autoscaler)
# set up initial soak test environment
```
diff --git a/soak/scripts/initial_setup.sh b/soak/scripts/initial_setup.sh
index 4b4bc33..87ddb9e 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/scripts/initial_setup.sh
@@ -16,8 +16,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# Constants
+SOAK_TEST_CLUSTER='soak-test-cluster'
+
# create a kind cluster
-kind create cluster --name soak-test-cluster
+kind create cluster --name $SOAK_TEST_CLUSTER
# install YuniKorn scheduler on kind Cluster
helm repo add yunikorn https://apache.github.io/yunikorn-release
@@ -26,7 +29,12 @@ kubectl create namespace yunikorn
# TODO: allow to install a customized YuniKorn version to run the soak test
helm install yunikorn yunikorn/yunikorn --namespace yunikorn
-## Deploy kwok in a Cluster
+# Deploy kwok in a Cluster
helm repo add kwok https://kwok.sigs.k8s.io/charts/
helm upgrade --namespace kube-system --install kwok kwok/kwok
helm upgrade --install kwok kwok/stage-fast
+
+# Install Helm chart for autoscaler with Kwok provider
+helm repo add autoscaler https://kubernetes.github.io/autoscaler
+helm repo update
+helm upgrade --install autoscaler autoscaler/cluster-autoscaler --set
cloudProvider=kwok --set
"autoDiscovery.clusterName"="kind-${SOAK_TEST_CLUSTER}" --set
"extraArgs.enforce-node-group-min-size"=true
diff --git a/soak/scripts/initial_setup.sh
b/soak/templates/autoscaler-configmap.yaml
similarity index 57%
copy from soak/scripts/initial_setup.sh
copy to soak/templates/autoscaler-configmap.yaml
index 4b4bc33..3ca6613 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/templates/autoscaler-configmap.yaml
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,17 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# create a kind cluster
-kind create cluster --name soak-test-cluster
-
-# install YuniKorn scheduler on kind Cluster
-helm repo add yunikorn https://apache.github.io/yunikorn-release
-helm repo update
-kubectl create namespace yunikorn
-# TODO: allow to install a customized YuniKorn version to run the soak test
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn
-
-## Deploy kwok in a Cluster
-helm repo add kwok https://kwok.sigs.k8s.io/charts/
-helm upgrade --namespace kube-system --install kwok kwok/kwok
-helm upgrade --install kwok kwok/stage-fast
+apiVersion: v1
+items:
+kind: List
+metadata:
+ resourceVersion: ""
diff --git a/soak/templates/kwok-node-template.yaml
b/soak/templates/kwok-node-template.yaml
index 5f1de38..eea8399 100644
--- a/soak/templates/kwok-node-template.yaml
+++ b/soak/templates/kwok-node-template.yaml
@@ -18,23 +18,17 @@ apiVersion: v1
kind: Node
metadata:
annotations:
- node.alpha.kubernetes.io/ttl: "0"
- kwok.x-k8s.io/node: fake
+ cluster-autoscaler.kwok.nodegroup/max-count:
+ cluster-autoscaler.kwok.nodegroup/min-count:
+ cluster-autoscaler.kwok.nodegroup/desired-count:
labels:
beta.kubernetes.io/arch: amd64
beta.kubernetes.io/os: linux
kubernetes.io/arch: amd64
- kubernetes.io/hostname: kwok-node-i
+ kubernetes.io/hostname: kwok-node
+ kwok-nodegroup: kwok-node
kubernetes.io/os: linux
- kubernetes.io/role: agent
- node-role.kubernetes.io/agent: ""
- type: kwok
- name: kwok-node-i
-spec:
- taints: # Avoid scheduling actual running pods to fake Node
- - effect: NoSchedule
- key: kwok.x-k8s.io/node
- value: fake
+ name: kwok-node
status:
allocatable:
cpu: 32
@@ -55,4 +49,3 @@ status:
operatingSystem: linux
osImage: ""
systemUUID: ""
- phase: Running
diff --git a/soak/scripts/initial_setup.sh b/soak/templates/test-workload.yaml
similarity index 50%
copy from soak/scripts/initial_setup.sh
copy to soak/templates/test-workload.yaml
index 4b4bc33..2d6b66d 100644
--- a/soak/scripts/initial_setup.sh
+++ b/soak/templates/test-workload.yaml
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -16,17 +14,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# create a kind cluster
-kind create cluster --name soak-test-cluster
-
-# install YuniKorn scheduler on kind Cluster
-helm repo add yunikorn https://apache.github.io/yunikorn-release
-helm repo update
-kubectl create namespace yunikorn
-# TODO: allow to install a customized YuniKorn version to run the soak test
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn
-
-## Deploy kwok in a Cluster
-helm repo add kwok https://kwok.sigs.k8s.io/charts/
-helm upgrade --namespace kube-system --install kwok kwok/kwok
-helm upgrade --install kwok kwok/stage-fast
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ name: test-workload
+spec:
+ replicas: 15
+ selector:
+ matchLabels:
+ app: fake-pod
+ template:
+ metadata:
+ labels:
+ app: fake-pod
+ kwok-nodegroup: "kwok-node"
+ applicationId: "test-workload-queue-a"
+ queue: root.a
+ spec:
+ schedulerName: yunikorn
+ tolerations:
+ - effect: NoSchedule
+ key: kwok-provider
+ value: "true"
+ containers:
+ - name: fake-container
+ image: docker-upstream.apple.com/alpine:latest
+ command: ["sleep", "300"]
+ resources:
+ requests:
+ cpu: "30"
+ memory: "1Gi"
+ limits:
+ cpu: "30"
+ memory: "1Gi"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]