This is an automated email from the ASF dual-hosted git repository.

villebro pushed a commit to branch main
in repository 
https://gitbox.apache.org/repos/asf/superset-kubernetes-operator.git


The following commit(s) were added to refs/heads/main by this push:
     new 22d8688  fix(lifecycle): validate cron schedules and unstick 
maintenance on webServer removal (#48)
22d8688 is described below

commit 22d868809a46f11cd503131c4733aaf36709e5ab
Author: Ville Brofeldt <[email protected]>
AuthorDate: Tue May 12 10:27:18 2026 -0700

    fix(lifecycle): validate cron schedules and unstick maintenance on 
webServer removal (#48)
---
 docs/architecture/internals.md     |  9 ++++++++-
 docs/index.md                      | 13 ++++++-------
 internal/controller/lifecycle.go   |  3 +++
 internal/controller/maintenance.go |  8 ++++++++
 internal/controller/schedule.go    | 24 ++++++++++++++++++++++++
 5 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/docs/architecture/internals.md b/docs/architecture/internals.md
index 2796218..2a67b5a 100644
--- a/docs/architecture/internals.md
+++ b/docs/architecture/internals.md
@@ -336,12 +336,19 @@ During lifecycle drain, the parent:
 
 - Service selector changes propagate in ~1 second via the endpoints controller,
   giving instant traffic switchover regardless of ingress implementation
-- Works for all access patterns: Ingress, direct Service, port-forward
+- Works for all access patterns: Ingress, Gateway API, direct Service
 - No orphan deletion complexity — the Service is always owned by the parent,
   so GC of child CRs never affects it
 - The child `SupersetWebServer` reconciler skips Service management (the parent
   handles it), keeping the child controller simple
 
+> **Note for developers using `kubectl port-forward`:** port-forward 
establishes a
+> tunnel to a specific pod, not through the Service selector. When that pod is
+> deleted during drain, the tunnel breaks with a "lost connection to pod" 
error.
+> This does not affect Ingress/Gateway users — they route through 
EndpointSlices
+> and see seamless transitions. Restart port-forward to reconnect to the
+> maintenance pod.
+
 ### Alternatives Considered
 
 **Orphan deletion + selector patch** (previous design): Used 
`propagationPolicy:
diff --git a/docs/index.md b/docs/index.md
index 84a7a87..72862a7 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -29,14 +29,13 @@ The operator manages the full Superset lifecycle: database 
migrations, configura
 ## Features
 
 - **Sane defaults** — production-ready settings out of the box that adapt 
automatically to your workload
-- **Painless management** — structured configuration fields with per-component 
config generated automatically
-- **Full control** — every default is overridable, from high-level presets 
down to individual fields, with a raw Python escape hatch for anything not 
covered
-- **Flat configuration** — shared top-level defaults inherited by all 
components, with per-component overrides (primitives replace, collections merge)
+- **Automatic config rendering** — structured fields for metastore, Valkey, 
Gunicorn, and Celery generate correct `superset_config.py` per component; 
config changes trigger rolling restarts
+- **Full control** — every default is overridable, from high-level presets 
down to individual container fields, with a raw Python escape hatch for 
anything not covered
 - **Component toggle** — enable CeleryWorker, CeleryBeat, CeleryFlower, 
WebsocketServer, or McpServer by setting their spec; omit to disable
-- **Lifecycle management** — database cloning, migration, and initialization 
run as managed Pods before components deploy
-- **Checksum-driven rollouts** — config changes automatically trigger rolling 
restarts of affected components
-- **Networking** — Gateway API (HTTPRoute) and Ingress support
-- **HPA with custom metrics**, PodDisruptionBudgets, NetworkPolicies, 
Prometheus ServiceMonitor
+- **Zero-downtime upgrades** — maintenance page serves users during database 
migrations; the operator drains components gracefully, runs lifecycle tasks, 
and restores traffic only after the new version is healthy
+- **Database cloning** — snapshot a production database into staging or QA 
environments on demand or on a cron schedule, with automatic migration and init 
afterward
+- **Networking** — Gateway API (HTTPRoute) and Ingress support with 
per-component routing
+- **Production hardening** — HPA with custom metrics, PodDisruptionBudgets, 
NetworkPolicies, Prometheus ServiceMonitor
 
 ## What it looks like
 
diff --git a/internal/controller/lifecycle.go b/internal/controller/lifecycle.go
index c7598b8..fd76dd0 100644
--- a/internal/controller/lifecycle.go
+++ b/internal/controller/lifecycle.go
@@ -105,6 +105,9 @@ func (r *SupersetReconciler) reconcileLifecycle(
                superset.Status.Lifecycle = &supersetv1alpha1.LifecycleStatus{}
        }
 
+       // Validate cron schedules early so invalid expressions are surfaced 
immediately.
+       r.validateSchedules(superset)
+
        // Resolve the current lifecycle image.
        var imageOverride *supersetv1alpha1.ImageOverrideSpec
        if superset.Spec.Lifecycle != nil {
diff --git a/internal/controller/maintenance.go 
b/internal/controller/maintenance.go
index 79412b2..8b16366 100644
--- a/internal/controller/maintenance.go
+++ b/internal/controller/maintenance.go
@@ -128,6 +128,14 @@ func (r *SupersetReconciler) reconcileMaintenanceReturn(
        }
        log := logf.FromContext(ctx)
 
+       // If webServer was removed while maintenance is active, clear 
immediately
+       // rather than waiting forever for a Deployment that won't come.
+       if superset.Spec.WebServer == nil {
+               superset.Status.Lifecycle.MaintenanceActive = false
+               log.Info("WebServer removed while maintenance active, clearing 
maintenance")
+               return true, nil
+       }
+
        // Check web-server Deployment readiness before switching traffic.
        webDeployName := naming.ResourceBaseName(superset.Name, 
naming.ComponentWebServer)
        deploy := &appsv1.Deployment{}
diff --git a/internal/controller/schedule.go b/internal/controller/schedule.go
index 231ba3e..2e23b02 100644
--- a/internal/controller/schedule.go
+++ b/internal/controller/schedule.go
@@ -21,6 +21,7 @@ package controller
 import (
        "time"
 
+       corev1 "k8s.io/api/core/v1"
        metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 
        supersetv1alpha1 
"github.com/apache/superset-kubernetes-operator/api/v1alpha1"
@@ -105,3 +106,26 @@ func (r *SupersetReconciler) 
projectScheduleStatus(superset *supersetv1alpha1.Su
                taskRef.NextScheduleAt = &t
        }
 }
+
+// validateSchedules checks all active cron expressions for validity and sets
+// a warning condition + event if any are invalid.
+func (r *SupersetReconciler) validateSchedules(superset 
*supersetv1alpha1.Superset) {
+       if superset.Spec.Lifecycle == nil {
+               return
+       }
+       if superset.Spec.Lifecycle.Clone != nil && 
superset.Spec.Lifecycle.Clone.CronSchedule != nil &&
+               !isDisabled(superset.Spec.Lifecycle.Clone.Disabled) {
+               expr := *superset.Spec.Lifecycle.Clone.CronSchedule
+               if err := schedule.Validate(expr); err != nil {
+                       setCondition(&superset.Status.Conditions, 
conditionTypeScheduleValid,
+                               metav1.ConditionFalse, "InvalidCronSchedule", 
err.Error(), superset.Generation)
+                       r.Recorder.Eventf(superset, nil, 
corev1.EventTypeWarning, "InvalidCronSchedule", "Lifecycle",
+                               "Clone cron schedule is invalid: %v", err)
+                       return
+               }
+       }
+       setCondition(&superset.Status.Conditions, conditionTypeScheduleValid,
+               metav1.ConditionTrue, "SchedulesValid", "All cron schedules are 
valid", superset.Generation)
+}
+
+const conditionTypeScheduleValid = "ScheduleValid"

Reply via email to