This is an automated email from the ASF dual-hosted git repository.

kaihsun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/submarine.git


The following commit(s) were added to refs/heads/master by this push:
     new 72edff6  SUBMARINE-825. Handle non-namespaced resources in delete 
event handler
72edff6 is described below

commit 72edff68224540937b31917fdc035542d727b26a
Author: Kai-Hsun Chen <[email protected]>
AuthorDate: Tue May 11 11:13:36 2021 +0800

    SUBMARINE-825. Handle non-namespaced resources in delete event handler
    
    ### What is this PR for?
    #### Issue 1
    
    Resources can be categorized into four categories
    
    Type 1: namespaced resource created by client-go API
    
    Type 2: non-namespaced resource created by client-go API, ex: persistent 
volume
    
    Type 3: namespaced resource created by Helm API
    
    Type 4: non-namespaced resource created by Helm API, ex: clusterrole 
(traefik)
    
    Currently, the delete event resource handler can handle type 1, 2, 3, but 
this is not enough. Thus, this Jira aims to handle Type 4. If we do not handle 
Type 4, the error message will be shown as follows.
    
    ```
    2021/05/11 09:58:50 rendered manifests contain a resource that already 
exists. Unable to continue with install: ClusterRole "traefik" in namespace "" 
exists and cannot be imported into the current release: invalid ownership 
metadata; annotation validation error: key "meta.helm.sh/release-namespace" 
must equal "submarine-operator-test": current value is 
"submarine-operator-test-2"
    ```
    
    #### Issue 2
    Originally, we have two workers to handle the WorkQueueItems in the 
WorkQueue. However, it will cause some race conditions. To elaborate, worker A 
and worker B call `helm install traefik` at the same time, and thus one of the 
workers will report the error `release exist `.
    
    To solve this problem, we change the number of workers to 1 temporarily. 
However, this solution is not good enough, and thus we need use concurrency 
techniques, such as Lock and Channel, to avoid these race conditions.
    
    ### What type of PR is it?
    [Bug Fix]
    
    ### Todos
    * Handle race conditions caused by multi workers
    * Maintain `charts []helm.HelmUninstallInfo` for multi-tenant
    
    ### What is the Jira issue?
    https://issues.apache.org/jira/browse/SUBMARINE-825
    
    ### How should this be tested?
    ```
    # Step1: Out-of-cluster
    go build -o submarine-operator
    ./submarine-operator
    
    # Step2: Create submarine in namespace "submarine-operator-test"
    kubectl create ns submarine-operator-test
    kubectl apply -n submarine-operator-test -f 
artifacts/examples/example-submarine.yaml
    
    # Step2: Delete Custom Resource
    kubectl delete submarine example-submarine -n submarine-operator-test
    
    # Step3: Check the result (non-namespaced resources)
    kubectl get ns
    kubectl get pv
    kubectl get clusterrole
    
    # Step4: Create submarine in namespace "submarine-operator-test-2"
    kubectl create ns submarine-operator-test-2
    kubectl apply -n submarine-operator-test-2 -f 
artifacts/examples/example-submarine.yaml
    
    # Step5: Delete Custom Resource
    kubectl delete submarine example-submarine -n submarine-operator-test-2
    
    # Step6: Check the result (non-namespaced resources)
    kubectl get ns
    kubectl get pv
    kubectl get clusterrole
    ```
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Do the license files need updating? No
    * Are there breaking changes for older versions? No
    * Does this need new documentation? No
    
    Author: Kai-Hsun Chen <[email protected]>
    
    Signed-off-by: Kai-Hsun Chen <[email protected]>
    
    Closes #584 from kevin85421/SUBMARINE-825 and squashes the following 
commits:
    
    260f182d [Kai-Hsun Chen] Update README.md
    1d79985f [Kai-Hsun Chen] Avoid race condition
    874ca6fd [Kai-Hsun Chen] SUBMARINE-825. Delete Event Handler needs to 
handle non-namespaced resources
---
 submarine-cloud-v2/README.md        |  5 ++++-
 submarine-cloud-v2/controller.go    | 35 ++++++++++++++++++++++-------------
 submarine-cloud-v2/main.go          |  2 +-
 submarine-cloud-v2/pkg/helm/helm.go | 20 +++++++++++++-------
 4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/submarine-cloud-v2/README.md b/submarine-cloud-v2/README.md
index 2bc6b28..c3f95d6 100644
--- a/submarine-cloud-v2/README.md
+++ b/submarine-cloud-v2/README.md
@@ -62,10 +62,13 @@ make image
 kubectl apply -f artifacts/examples/submarine-operator-service-account.yaml
 
 # Step3: Deploy a submarine-operator
+kubectl apply -f artifacts/examples/submarine-operator.yaml
+
+# Step4: Deploy a submarine
 kubectl create ns submarine-operator-test
 kubectl apply -n submarine-operator-test -f 
artifacts/examples/example-submarine.yaml
 
-# Step4: Inspect submarine-operator POD logs 
+# Step5: Inspect submarine-operator POD logs 
 kubectl logs ${submarine-operator POD}
 ```
 
diff --git a/submarine-cloud-v2/controller.go b/submarine-cloud-v2/controller.go
index bd64f10..bf54e36 100644
--- a/submarine-cloud-v2/controller.go
+++ b/submarine-cloud-v2/controller.go
@@ -93,6 +93,10 @@ type Controller struct {
        // recorder is an event recorder for recording Event resources to the
        // Kubernetes API.
        recorder record.EventRecorder
+
+       // TODO: Need to be modified to implement multi-tenant
+       // Store charts
+       charts []helm.HelmUninstallInfo
 }
 
 const (
@@ -185,7 +189,7 @@ func (c *Controller) Run(threadiness int, stopCh <-chan 
struct{}) error {
        }
 
        klog.Info("Starting workers")
-       // Launch two workers to process Submarine resources
+       // Launch $threadiness workers to process Submarine resources
        for i := 0; i < threadiness; i++ {
                go wait.Until(c.runWorker, time.Second, stopCh)
        }
@@ -746,46 +750,46 @@ func (c *Controller) newSubCharts(namespace string) error 
{
 
        if !helm.CheckRelease("traefik", namespace) {
                klog.Info("[Helm] Install Traefik")
-               helm.HelmInstallLocalChart(
+               c.charts = append(c.charts, helm.HelmInstallLocalChart(
                        "traefik",
                        "charts/traefik",
                        "traefik",
                        namespace,
                        map[string]string{},
-               )
+               ))
        }
 
        if !helm.CheckRelease("notebook-controller", namespace) {
                klog.Info("[Helm] Install Notebook-Controller")
-               helm.HelmInstallLocalChart(
+               c.charts = append(c.charts, helm.HelmInstallLocalChart(
                        "notebook-controller",
                        "charts/notebook-controller",
                        "notebook-controller",
                        namespace,
                        map[string]string{},
-               )
+               ))
        }
 
        if !helm.CheckRelease("tfjob", namespace) {
                klog.Info("[Helm] Install TFjob")
-               helm.HelmInstallLocalChart(
+               c.charts = append(c.charts, helm.HelmInstallLocalChart(
                        "tfjob",
                        "charts/tfjob",
                        "tfjob",
                        namespace,
                        map[string]string{},
-               )
+               ))
        }
 
        if !helm.CheckRelease("pytorchjob", namespace) {
                klog.Info("[Helm] Install pytorchjob")
-               helm.HelmInstallLocalChart(
+               c.charts = append(c.charts, helm.HelmInstallLocalChart(
                        "pytorchjob",
                        "charts/pytorchjob",
                        "pytorchjob",
                        namespace,
                        map[string]string{},
-               )
+               ))
        }
 
        // TODO: maintain "error"
@@ -1122,13 +1126,20 @@ func (c *Controller) syncHandler(workqueueItem 
WorkQueueItem) error {
                if err != nil {
                        return err
                }
-       } else {
-               // DELETE
+       } else { // Case: DELETE
+               // Uninstall Helm charts
+               for _, chart := range c.charts {
+                       helm.HelmUninstall(chart)
+               }
+               c.charts = nil
+
+               // Delete namespace: Delete all namespaced resources, ex: POD, 
Deployment, ... etc.
                err = 
c.kubeclientset.CoreV1().Namespaces().Delete(context.TODO(), namespace, 
metav1.DeleteOptions{})
                if err != nil {
                        return err
                }
 
+               // Delete non-namespaced resources (ex: PersistentVolume)
                err = 
c.kubeclientset.CoreV1().PersistentVolumes().Delete(context.TODO(), 
"submarine-database-pv--"+namespace, metav1.DeleteOptions{})
                if err != nil {
                        return err
@@ -1138,8 +1149,6 @@ func (c *Controller) syncHandler(workqueueItem 
WorkQueueItem) error {
                if err != nil {
                        return err
                }
-
-               klog.Info("Delete Namespace: ", namespace)
        }
 
        return nil
diff --git a/submarine-cloud-v2/main.go b/submarine-cloud-v2/main.go
index c2e6125..47236e9 100644
--- a/submarine-cloud-v2/main.go
+++ b/submarine-cloud-v2/main.go
@@ -102,7 +102,7 @@ func main() {
        traefikInformerFactory.Start(stopCh)
 
        // Run controller
-       if err = controller.Run(2, stopCh); err != nil {
+       if err = controller.Run(1, stopCh); err != nil {
                klog.Fatalf("Error running controller: %s", err.Error())
        }
 }
diff --git a/submarine-cloud-v2/pkg/helm/helm.go 
b/submarine-cloud-v2/pkg/helm/helm.go
index 3961e00..c1cb150 100644
--- a/submarine-cloud-v2/pkg/helm/helm.go
+++ b/submarine-cloud-v2/pkg/helm/helm.go
@@ -45,7 +45,12 @@ import (
        "helm.sh/helm/v3/pkg/strvals"
 )
 
-func HelmInstall(url string, repoName string, chartName string, releaseName 
string, namespace string, args map[string]string) *action.Configuration {
+type HelmUninstallInfo struct {
+       config      *action.Configuration
+       releaseName string
+}
+
+func HelmInstall(url string, repoName string, chartName string, releaseName 
string, namespace string, args map[string]string) HelmUninstallInfo {
        var settings *cli.EnvSettings
        os.Setenv("HELM_NAMESPACE", namespace)
        settings = cli.New()
@@ -55,21 +60,22 @@ func HelmInstall(url string, repoName string, chartName 
string, releaseName stri
        RepoUpdate(settings)
        // Install charts
        cfg := InstallChart(releaseName, repoName, chartName, args, settings)
-       return cfg
+       return HelmUninstallInfo{config: cfg, releaseName: releaseName}
 }
 
-func HelmInstallLocalChart(chartName string, chartPath string, releaseName 
string, namespace string, args map[string]string) *action.Configuration {
+func HelmInstallLocalChart(chartName string, chartPath string, releaseName 
string, namespace string, args map[string]string) HelmUninstallInfo {
        var settings *cli.EnvSettings
        os.Setenv("HELM_NAMESPACE", namespace)
        settings = cli.New()
        // Install charts
        cfg := InstallLocalChart(releaseName, chartName, chartPath, args, 
settings)
-       return cfg
+       return HelmUninstallInfo{config: cfg, releaseName: releaseName}
 }
 
-func HelmUninstall(releaseName string, cfg *action.Configuration) error {
-       uninstall := action.NewUninstall(cfg)
-       _, err := uninstall.Run(releaseName)
+func HelmUninstall(info HelmUninstallInfo) error {
+       debug("HelmUninstall: release %s", info.releaseName)
+       uninstall := action.NewUninstall(info.config)
+       _, err := uninstall.Run(info.releaseName)
        return err
 }
 

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to