This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/spark-kubernetes-operator.git
The following commit(s) were added to refs/heads/main by this push:
new b2cee84 [SPARK-49790] Support `HPA` template for `SparkCluster`
b2cee84 is described below
commit b2cee8443e7760b82e63bc9b343a5b9279c0ae6a
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Sep 25 14:58:30 2024 -0700
[SPARK-49790] Support `HPA` template for `SparkCluster`
### What changes were proposed in this pull request?
This PR aims to support `HPA` template for `SparkCluster`.
### Why are the changes needed?
Although `SparkCluster` needs generated values for the following `HPA`
field.s
```
maxReplicas:
minReplicas:
scaleTargetRef:
apiVersion: apps/v1
kind: StatefulSet
name:
```
We still can allow the users to tune HPA for their cluster usage pattern
like the following.
```yaml
horizontalPodAutoscalerSpec:
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 10
behavior:
scaleUp:
policies:
- type: Pods
value: 1
periodSeconds: 10
scaleDown:
policies:
- type: Pods
value: 1
periodSeconds: 1200
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Pass the CIs. And, do the manual review.
- Delete the existing CRD because it's changed.
```
$ kubectl delete crd sparkclusters.spark.apache.org
```
- Build and Install
```
$ gradle build buildDockerImage spark-operator-api:relocateGeneratedCRD
$ helm install spark-kubernetes-operator -f
build-tools/helm/spark-kubernetes-operator/values.yaml
build-tools/helm/spark-kubernetes-operator/
```
- Create a `SparkCluster` with HPA template via the given example.
```
$ kubectl apply -f examples/cluster-with-hpa-template.yaml
$ kubectl get hpa cluster-with-hpa-template-worker-hpa -oyaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
creationTimestamp: "2024-09-25T21:11:40Z"
labels:
spark.operator/name: spark-kubernetes-operator
spark.operator/spark-cluster-name: cluster-with-hpa-template
name: cluster-with-hpa-template-worker-hpa
namespace: default
...
spec:
behavior:
scaleDown:
policies:
- periodSeconds: 1200
type: Pods
value: 1
selectPolicy: Max
scaleUp:
policies:
- periodSeconds: 10
type: Pods
value: 1
selectPolicy: Max
stabilizationWindowSeconds: 0
maxReplicas: 2
metrics:
- resource:
name: cpu
target:
averageUtilization: 10
type: Utilization
type: Resource
minReplicas: 1
...
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #137 from dongjoon-hyun/SPARK-49790.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
examples/cluster-with-hpa-template.yaml | 64 +++++++++++++++++++++
.../apache/spark/k8s/operator/spec/WorkerSpec.java | 2 +
.../k8s/operator/SparkClusterResourceSpec.java | 66 +++++++++++++---------
3 files changed, 104 insertions(+), 28 deletions(-)
diff --git a/examples/cluster-with-hpa-template.yaml
b/examples/cluster-with-hpa-template.yaml
new file mode 100644
index 0000000..cee5b18
--- /dev/null
+++ b/examples/cluster-with-hpa-template.yaml
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+apiVersion: spark.apache.org/v1alpha1
+kind: SparkCluster
+metadata:
+ name: cluster-with-hpa-template
+spec:
+ runtimeVersions:
+ sparkVersion: "4.0.0-preview2"
+ clusterTolerations:
+ instanceConfig:
+ initWorkers: 1
+ minWorkers: 1
+ maxWorkers: 2
+ workerSpec:
+ statefulSetSpec:
+ template:
+ spec:
+ containers:
+ - name: worker
+ resources:
+ requests:
+ cpu: "3"
+ memory: "3Gi"
+ limits:
+ cpu: "3"
+ memory: "3Gi"
+ horizontalPodAutoscalerSpec:
+ metrics:
+ - type: Resource
+ resource:
+ name: cpu
+ target:
+ type: Utilization
+ averageUtilization: 10
+ behavior:
+ scaleUp:
+ policies:
+ - type: Pods
+ value: 1
+ periodSeconds: 10
+ scaleDown:
+ policies:
+ - type: Pods
+ value: 1
+ periodSeconds: 1200
+ sparkConf:
+ spark.kubernetes.container.image: "apache/spark:4.0.0-preview2-java21"
+ spark.master.ui.title: "Cluster with HorizontalPodAutoscaler Template"
+ spark.master.rest.enabled: "true"
+ spark.master.rest.host: "0.0.0.0"
+ spark.ui.reverseProxy: "true"
diff --git
a/spark-operator-api/src/main/java/org/apache/spark/k8s/operator/spec/WorkerSpec.java
b/spark-operator-api/src/main/java/org/apache/spark/k8s/operator/spec/WorkerSpec.java
index 04f5abe..78b86e3 100644
---
a/spark-operator-api/src/main/java/org/apache/spark/k8s/operator/spec/WorkerSpec.java
+++
b/spark-operator-api/src/main/java/org/apache/spark/k8s/operator/spec/WorkerSpec.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonInclude;
import io.fabric8.kubernetes.api.model.ObjectMeta;
import io.fabric8.kubernetes.api.model.ServiceSpec;
import io.fabric8.kubernetes.api.model.apps.StatefulSetSpec;
+import
io.fabric8.kubernetes.api.model.autoscaling.v2.HorizontalPodAutoscalerSpec;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
@@ -38,4 +39,5 @@ public class WorkerSpec {
protected ObjectMeta statefulSetMetadata;
protected ServiceSpec serviceSpec;
protected ObjectMeta serviceMetadata;
+ protected HorizontalPodAutoscalerSpec horizontalPodAutoscalerSpec;
}
diff --git
a/spark-submission-worker/src/main/java/org/apache/spark/k8s/operator/SparkClusterResourceSpec.java
b/spark-submission-worker/src/main/java/org/apache/spark/k8s/operator/SparkClusterResourceSpec.java
index 301c049..e7bd8d4 100644
---
a/spark-submission-worker/src/main/java/org/apache/spark/k8s/operator/SparkClusterResourceSpec.java
+++
b/spark-submission-worker/src/main/java/org/apache/spark/k8s/operator/SparkClusterResourceSpec.java
@@ -35,6 +35,8 @@ import
io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder;
import io.fabric8.kubernetes.api.model.apps.StatefulSetSpec;
import io.fabric8.kubernetes.api.model.autoscaling.v2.HorizontalPodAutoscaler;
import
io.fabric8.kubernetes.api.model.autoscaling.v2.HorizontalPodAutoscalerBuilder;
+import
io.fabric8.kubernetes.api.model.autoscaling.v2.HorizontalPodAutoscalerSpec;
+import
io.fabric8.kubernetes.api.model.autoscaling.v2.HorizontalPodAutoscalerSpecBuilder;
import io.fabric8.kubernetes.api.model.autoscaling.v2.MetricSpecBuilder;
import lombok.Getter;
@@ -278,13 +280,48 @@ public class SparkClusterResourceSpec {
if (instanceConfig.getMinWorkers() >= instanceConfig.getMaxWorkers()) {
return Optional.empty();
}
+ HorizontalPodAutoscalerSpec horizontalPodAutoscalerSpec;
+ if (spec.getWorkerSpec().getHorizontalPodAutoscalerSpec() != null) {
+ horizontalPodAutoscalerSpec =
spec.getWorkerSpec().getHorizontalPodAutoscalerSpec();
+ } else {
+ horizontalPodAutoscalerSpec =
+ new HorizontalPodAutoscalerSpecBuilder()
+ .addToMetrics(
+ new MetricSpecBuilder()
+ .withType("Resource")
+ .withNewResource()
+ .withName("cpu")
+ .withNewTarget()
+ .withType("Utilization")
+ .withAverageUtilization(30)
+ .endTarget()
+ .endResource()
+ .build())
+ .withNewBehavior()
+ .withNewScaleUp()
+ .addNewPolicy()
+ .withType("Pods")
+ .withValue(1)
+ .withPeriodSeconds(60)
+ .endPolicy()
+ .endScaleUp()
+ .withNewScaleDown()
+ .addNewPolicy()
+ .withType("Pods")
+ .withValue(1)
+ .withPeriodSeconds(600)
+ .endPolicy()
+ .endScaleDown()
+ .endBehavior()
+ .build();
+ }
return Optional.of(
new HorizontalPodAutoscalerBuilder()
.withNewMetadata()
.withNamespace(namespace)
.withName(clusterName + "-worker-hpa")
.endMetadata()
- .withNewSpec()
+ .withNewSpecLike(horizontalPodAutoscalerSpec)
.withNewScaleTargetRef()
.withApiVersion("apps/v1")
.withKind("StatefulSet")
@@ -292,33 +329,6 @@ public class SparkClusterResourceSpec {
.endScaleTargetRef()
.withMinReplicas(instanceConfig.getMinWorkers())
.withMaxReplicas(instanceConfig.getMaxWorkers())
- .addToMetrics(
- new MetricSpecBuilder()
- .withType("Resource")
- .withNewResource()
- .withName("cpu")
- .withNewTarget()
- .withType("Utilization")
- .withAverageUtilization(30)
- .endTarget()
- .endResource()
- .build())
- .withNewBehavior()
- .withNewScaleUp()
- .addNewPolicy()
- .withType("Pods")
- .withValue(1)
- .withPeriodSeconds(60)
- .endPolicy()
- .endScaleUp()
- .withNewScaleDown()
- .addNewPolicy()
- .withType("Pods")
- .withValue(1)
- .withPeriodSeconds(600)
- .endPolicy()
- .endScaleDown()
- .endBehavior()
.endSpec()
.build());
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]