This is an automated email from the ASF dual-hosted git repository.
chenyulin0719 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/yunikorn-k8shim.git
The following commit(s) were added to refs/heads/master by this push:
new 7b89664a [YUNIKORN-2811] Warn if a pod has inconsistent metadata in
the shim (#964)
7b89664a is described below
commit 7b89664ad52b1bf606509cfef5f1121d076e5887
Author: Yu-Lin Chen <[email protected]>
AuthorDate: Sun Apr 20 05:53:08 2025 +0000
[YUNIKORN-2811] Warn if a pod has inconsistent metadata in the shim (#964)
Closes: #964
Signed-off-by: Yu-Lin Chen <[email protected]>
---
pkg/cache/application.go | 3 +
pkg/cache/task.go | 49 ++++++----
pkg/common/utils/utils.go | 40 ++------
pkg/common/utils/utils_test.go | 201 +++++++++--------------------------------
4 files changed, 82 insertions(+), 211 deletions(-)
diff --git a/pkg/cache/application.go b/pkg/cache/application.go
index 8438957b..d92a4d00 100644
--- a/pkg/cache/application.go
+++ b/pkg/cache/application.go
@@ -392,6 +392,9 @@ func (app *Application) scheduleTasks(taskScheduleCondition
func(t *Task) bool)
if taskScheduleCondition(task) {
// for each new task, we do a sanity check before
moving the state to Pending_Schedule
if err := task.sanityCheckBeforeScheduling(); err ==
nil {
+ // check inconsistent pod metadata before
submitting the task
+ task.checkPodMetadataBeforeScheduling()
+
// note, if we directly trigger submit task
event, it may spawn too many duplicate
// events, because a task might be submitted
multiple times before its state transits to PENDING.
if handleErr := task.handle(
diff --git a/pkg/cache/task.go b/pkg/cache/task.go
index 5a3a3dc3..4dbe69e9 100644
--- a/pkg/cache/task.go
+++ b/pkg/cache/task.go
@@ -21,6 +21,7 @@ package cache
import (
"context"
"fmt"
+ "strings"
"time"
"github.com/looplab/fsm"
@@ -533,28 +534,38 @@ func (task *Task) releaseAllocation() {
// this reduces the scheduling overhead by blocking such
// request away from the core scheduler.
func (task *Task) sanityCheckBeforeScheduling() error {
- // After version 1.7.0, we should reject the task whose pod is unbound
and has conflicting metadata.
- if !utils.PodAlreadyBound(task.pod) {
- if err := utils.CheckAppIdInPod(task.pod); err != nil {
- log.Log(log.ShimCacheTask).Warn("Pod has inconsistent
application metadata and may be rejected in a future YuniKorn release",
- zap.String("appID", task.applicationID),
- zap.String("podName", task.pod.Name),
- zap.String("error", err.Error()))
+ return task.checkPodPVCs()
+}
- events.GetRecorder().Eventf(task.pod.DeepCopy(),
- nil, v1.EventTypeWarning, "Scheduling",
"Scheduling", fmt.Sprintf("Pod has inconsistent application metadata and may be
rejected in a future YuniKorn release: %s", err.Error()))
- }
- if err := utils.CheckQueueNameInPod(task.pod); err != nil {
- log.Log(log.ShimCacheTask).Warn("Pod has inconsistent
queue metadata and may be rejected in a future YuniKorn release",
- zap.String("appID", task.applicationID),
- zap.String("podName", task.pod.Name),
- zap.String("error", err.Error()))
+// throw a warning if the pod has inconsistent metadata
+func (task *Task) checkPodMetadataBeforeScheduling() {
+ appID := utils.GetApplicationIDFromPod(task.pod)
+ ignoredAppIDLabels, ignoredAppIDAnnotation :=
utils.GetIgnoredLabelAnnotationInPod(task.pod, appID, constants.AppIdLabelKeys,
constants.AppIdAnnotationKeys)
+ if len(ignoredAppIDLabels) > 0 || len(ignoredAppIDAnnotation) > 0 {
+ task.logIgnoredPodMetadata("app-id", appID, ignoredAppIDLabels,
ignoredAppIDAnnotation)
+ }
- events.GetRecorder().Eventf(task.pod.DeepCopy(),
- nil, v1.EventTypeWarning, "Scheduling",
"Scheduling", fmt.Sprintf("Pod has inconsistent queue metadata and may be
rejected in a future YuniKorn release: %s", err.Error()))
- }
+ queueName := utils.GetQueueNameFromPod(task.pod)
+ ignoredQueueLabels, ignoredQueueAnnotation :=
utils.GetIgnoredLabelAnnotationInPod(task.pod, queueName,
constants.QueueLabelKeys, constants.QueueAnnotationKeys)
+ if len(ignoredQueueLabels) > 0 || len(ignoredQueueAnnotation) > 0 {
+ task.logIgnoredPodMetadata("queue", queueName,
ignoredQueueLabels, ignoredQueueAnnotation)
}
- return task.checkPodPVCs()
+}
+
+func (task *Task) logIgnoredPodMetadata(metadataType string, fianlValue
string, ignoredLabel map[string]string, ignoredAnnotation map[string]string) {
+ ignoredItems := make([]string, 0)
+ for key, value := range ignoredLabel {
+ ignoredItems = append(ignoredItems, fmt.Sprintf("(Label) %s:
%s", key, value))
+ }
+ for key, value := range ignoredAnnotation {
+ ignoredItems = append(ignoredItems, fmt.Sprintf("(Annotation)
%s: %s", key, value))
+ }
+ logMessage := fmt.Sprintf("Found multiple '%s' value in pod. { podName:
%s, fianlValue: %s, ignored: [%s] }",
+ metadataType, task.pod.Name, fianlValue,
strings.Join(ignoredItems, ", "))
+
+ log.Log(log.ShimCacheTask).Warn(logMessage)
+ events.GetRecorder().Eventf(task.pod.DeepCopy(),
+ nil, v1.EventTypeWarning, "Scheduling", "Scheduling",
logMessage)
}
func (task *Task) checkPodPVCs() error {
diff --git a/pkg/common/utils/utils.go b/pkg/common/utils/utils.go
index f58c96e5..748d4e2e 100644
--- a/pkg/common/utils/utils.go
+++ b/pkg/common/utils/utils.go
@@ -213,51 +213,27 @@ func GetApplicationIDFromPod(pod *v1.Pod) string {
return GenerateApplicationID(pod.Namespace,
conf.GetSchedulerConf().GenerateUniqueAppIds, string(pod.UID))
}
-func CheckAppIdInPod(pod *v1.Pod) error {
- return ValidatePodLabelAnnotation(pod, constants.AppIdLabelKeys,
constants.AppIdAnnotationKeys)
-}
-
-func CheckQueueNameInPod(pod *v1.Pod) error {
- return ValidatePodLabelAnnotation(pod, constants.QueueLabelKeys,
constants.QueueAnnotationKeys)
-}
-
-// return true if all non-empty values are same across all provided
label/annotation
-func ValidatePodLabelAnnotation(pod *v1.Pod, labelKeys []string,
annotationKeys []string) error {
- var referenceKey string
- var referenceValue string
- var referenceType string
+func GetIgnoredLabelAnnotationInPod(pod *v1.Pod, currentValue string,
labelKeys []string, annotationKeys []string) (map[string]string,
map[string]string) {
+ ignoredLabel := make(map[string]string, 0)
+ ignoredAnnotation := make(map[string]string, 0)
- checkingType := constants.Label
for _, key := range labelKeys {
value := GetPodLabelValue(pod, key)
- if value == "" {
+ if value == "" || value == currentValue {
continue
}
- if referenceValue == "" {
- referenceKey = key
- referenceValue = value
- referenceType = checkingType
- } else if referenceValue != value {
- return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s:
\"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue)
- }
+ ignoredLabel[key] = value
}
- checkingType = constants.Annotation
for _, key := range annotationKeys {
value := GetPodAnnotationValue(pod, key)
- if value == "" {
+ if value == "" || value == currentValue {
continue
}
- if referenceValue == "" {
- referenceKey = key
- referenceValue = value
- referenceType = checkingType
- } else if referenceValue != value {
- return fmt.Errorf("%s %s: \"%s\" doesn't match %s %s:
\"%s\"", checkingType, key, value, referenceType, referenceKey, referenceValue)
- }
+ ignoredAnnotation[key] = value
}
- return nil
+ return ignoredLabel, ignoredAnnotation
}
// compare the existing pod condition with the given one, return true if the
pod condition remains not changed.
diff --git a/pkg/common/utils/utils_test.go b/pkg/common/utils/utils_test.go
index 60dd1d48..282205f7 100644
--- a/pkg/common/utils/utils_test.go
+++ b/pkg/common/utils/utils_test.go
@@ -21,7 +21,6 @@ package utils
import (
"bytes"
"compress/gzip"
- "errors"
"fmt"
"reflect"
"strings"
@@ -724,205 +723,87 @@ func TestGetApplicationIDFromPod(t *testing.T) {
}
}
-func TestCheckAppIdInPod(t *testing.T) {
- testCases := []struct {
- name string
- pod *v1.Pod
- expected error
- }{
- {
- name: "consistent app ID",
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Labels: map[string]string{
-
constants.CanonicalLabelApplicationID: "app-123",
- constants.SparkLabelAppID:
"app-123",
- constants.LabelApplicationID:
"app-123",
- },
- Annotations: map[string]string{
-
constants.AnnotationApplicationID: "app-123",
- },
- },
- },
- expected: nil,
- },
- {
- name: "inconsistent app ID in labels",
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Labels: map[string]string{
-
constants.CanonicalLabelApplicationID: "app-123",
- constants.SparkLabelAppID:
"app-456",
- },
- },
- },
- expected: errors.New("label spark-app-selector:
\"app-456\" doesn't match label yunikorn.apache.org/app-id: \"app-123\""),
- },
- {
- name: "inconsistent app ID between label and
annotation",
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Labels: map[string]string{
-
constants.CanonicalLabelApplicationID: "app-123",
- },
- Annotations: map[string]string{
-
constants.AnnotationApplicationID: "app-456",
- },
- },
- },
- expected: errors.New("annotation
yunikorn.apache.org/app-id: \"app-456\" doesn't match label
yunikorn.apache.org/app-id: \"app-123\""),
- },
- }
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- err := CheckAppIdInPod(tc.pod)
- if tc.expected != nil {
- assert.ErrorContains(t, err,
tc.expected.Error())
- } else {
- assert.NilError(t, err)
- }
- })
- }
-}
-
-func TestCheckQueueNameInPod(t *testing.T) {
- testCases := []struct {
- name string
- pod *v1.Pod
- expected error
- }{
- {
- name: "consistent queue name",
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Labels: map[string]string{
-
constants.CanonicalLabelQueueName: "root.a",
- constants.LabelQueueName:
"root.a",
- },
- Annotations: map[string]string{
- constants.AnnotationQueueName:
"root.a",
- },
- },
- },
- expected: nil,
- },
- {
- name: "inconsistent app ID in labels",
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Labels: map[string]string{
-
constants.CanonicalLabelQueueName: "root.a",
- constants.LabelQueueName:
"root.b",
- },
- },
- },
- expected: errors.New("label queue: \"root.b\" doesn't
match label yunikorn.apache.org/queue: \"root.a\""),
- },
- {
- name: "inconsistent app ID between label and
annotation",
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
- Labels: map[string]string{
-
constants.CanonicalLabelQueueName: "root.a",
- },
- Annotations: map[string]string{
- constants.AnnotationQueueName:
"root.b",
- },
- },
- },
- expected: errors.New("annotation
yunikorn.apache.org/queue: \"root.b\" doesn't match label
yunikorn.apache.org/queue: \"root.a\""),
- },
- }
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- err := CheckQueueNameInPod(tc.pod)
- if tc.expected != nil {
- assert.ErrorContains(t, err,
tc.expected.Error())
- } else {
- assert.NilError(t, err)
- }
- })
- }
-}
-
-func TestValidatePodLabelAnnotation(t *testing.T) {
+func TestGetIgnoredLabelAnnotationInPod(t *testing.T) {
labelKeys := []string{"labelKey1", "labelKey2"}
annotationKeys := []string{"annotationKey1", "annotationKey2"}
testCases := []struct {
- name string
- pod *v1.Pod
- expected error
+ name string
+ pod *v1.Pod
+ currentValue string
+ labelKeys []string
+ annotationKeys []string
+ expectedIgnoredLabel map[string]string
+ expectedIgnoredAnnotation map[string]string
}{
{
- name: "empty pod",
- pod: &v1.Pod{},
- expected: nil,
+ name: "empty pod",
+ pod: &v1.Pod{},
+ currentValue: "",
+ labelKeys: labelKeys,
+ annotationKeys: annotationKeys,
+ expectedIgnoredLabel: map[string]string{},
+ expectedIgnoredAnnotation: map[string]string{},
},
{
- name: "pod with all values are consistent",
+ name: "have ignored label",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"labelKey1": "value1",
- "labelKey2": "value1",
- },
- Annotations: map[string]string{
- "annotationKey1": "value1",
- "annotationKey2": "value1",
+ "labelKey2": "value2",
},
},
},
- expected: nil,
+ currentValue: "value1",
+ labelKeys: labelKeys,
+ annotationKeys: annotationKeys,
+ expectedIgnoredLabel:
map[string]string{"labelKey2": "value2"},
+ expectedIgnoredAnnotation: map[string]string{},
},
{
- name: "pod with inconsistent value in labels",
+ name: "have ignored annotation",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
- Labels: map[string]string{
- "labelKey1": "value1",
- "labelKey2": "value2",
+ Annotations: map[string]string{
+ "annotationKey1": "value1",
+ "annotationKey2": "value2",
},
},
},
- expected: errors.New("label labelKey2: \"value2\"
doesn't match label labelKey1: \"value1\""),
+ currentValue: "value1",
+ labelKeys: labelKeys,
+ annotationKeys: annotationKeys,
+ expectedIgnoredLabel: map[string]string{},
+ expectedIgnoredAnnotation:
map[string]string{"annotationKey2": "value2"},
},
{
- name: "pod with inconsistent value between label and
annotation",
+ name: "have both ignored label and annotation",
pod: &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"labelKey1": "value1",
+ "labelKey2": "value2",
},
- Annotations: map[string]string{
- "annotationKey1": "value2",
- },
- },
- },
- expected: errors.New("annotation annotationKey1:
\"value2\" doesn't match label labelKey1: \"value1\""),
- },
- {
- name: "pod with inconsistent value in annotations",
- pod: &v1.Pod{
- ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
"annotationKey1": "value1",
"annotationKey2": "value2",
},
},
},
- expected: errors.New("annotation annotationKey2:
\"value2\" doesn't match annotation annotationKey1: \"value1\""),
+ currentValue: "value1",
+ labelKeys: labelKeys,
+ annotationKeys: annotationKeys,
+ expectedIgnoredLabel:
map[string]string{"labelKey2": "value2"},
+ expectedIgnoredAnnotation:
map[string]string{"annotationKey2": "value2"},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
- err := ValidatePodLabelAnnotation(tc.pod, labelKeys,
annotationKeys)
- if tc.expected != nil {
- assert.ErrorContains(t, err,
tc.expected.Error())
- } else {
- assert.NilError(t, err)
- }
+ ignoredLabel, ignoredAnnotation :=
GetIgnoredLabelAnnotationInPod(tc.pod, tc.currentValue, tc.labelKeys,
tc.annotationKeys)
+ assert.DeepEqual(t, ignoredLabel,
tc.expectedIgnoredLabel)
+ assert.DeepEqual(t, ignoredAnnotation,
tc.expectedIgnoredAnnotation)
})
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]