This is an automated email from the ASF dual-hosted git repository.

pearl11594 pushed a commit to branch ghi12249-cleanupImplementingNetworks
in repository https://gitbox.apache.org/repos/asf/cloudstack.git

commit f59ff96998efb269e1d4fb069f64ea37fb86fac2
Author: Pearl Dsilva <[email protected]>
AuthorDate: Mon Jan 12 11:46:50 2026 -0500

    Allow cleaning up of networks stuck in Implementing state
---
 .../framework/jobs/impl/AsyncJobManagerImpl.java   | 76 ++++++++++++++++++++++
 tools/marvin/setup.py                              |  2 +-
 2 files changed, 77 insertions(+), 1 deletion(-)

diff --git 
a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
 
b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
index 80140b0d950..d11ecbdc124 100644
--- 
a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
+++ 
b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java
@@ -969,6 +969,13 @@ public class AsyncJobManagerImpl extends ManagerBase 
implements AsyncJobManager,
                     }
 
                     logger.trace("End cleanup expired async-jobs");
+
+                    // 3) Cleanup orphaned networks stuck in Implementing 
state without async jobs
+                    try {
+                        cleanupOrphanedNetworks();
+                    } catch (Throwable e) {
+                        logger.error("Unexpected exception when trying to 
cleanup orphaned networks", e);
+                    }
                 } catch (Throwable e) {
                     logger.error("Unexpected exception when trying to execute 
queue item, ", e);
                 }
@@ -1284,6 +1291,75 @@ public class AsyncJobManagerImpl extends ManagerBase 
implements AsyncJobManager,
         }
     }
 
+    /**
+     * Cleanup networks that are stuck in Implementing state without 
associated async jobs.
+     * Similar to how expired unfinished jobs are cleaned up, this only 
processes networks
+     * that have been stuck for longer than the job expiration threshold.
+     */
+    private void cleanupOrphanedNetworks() {
+        try {
+            SearchCriteria<NetworkVO> sc = networkDao.createSearchCriteria();
+            sc.addAnd("state", SearchCriteria.Op.EQ, 
Network.State.Implementing);
+            sc.addAnd("removed", SearchCriteria.Op.NULL);
+            List<NetworkVO> implementingNetworks = networkDao.search(sc, null);
+
+            if (implementingNetworks == null || 
implementingNetworks.isEmpty()) {
+                return;
+            }
+
+            logger.debug("Found {} networks in Implementing state, checking 
for orphaned networks", implementingNetworks.size());
+
+            final long expireMinutes = JobExpireMinutes.value();
+            final Date cutoffTime = new Date(System.currentTimeMillis() - 
(expireMinutes * 60 * 1000));
+
+            for (NetworkVO network : implementingNetworks) {
+                if (network.getCreated().after(cutoffTime)) {
+                    logger.trace("Network {} in Implementing state is only {} 
minutes old (threshold: {} minutes), skipping cleanup",
+                               network.getId(),
+                               (System.currentTimeMillis() - 
network.getCreated().getTime()) / 60000,
+                               expireMinutes);
+                    continue;
+                }
+
+                List<AsyncJobVO> jobs = 
_jobDao.findInstancePendingAsyncJobs("Network", network.getAccountId());
+                boolean hasActiveJob = false;
+                for (AsyncJobVO job : jobs) {
+                    if (job.getInstanceId() != null && 
job.getInstanceId().equals(network.getId())) {
+                        hasActiveJob = true;
+                        break;
+                    }
+                }
+
+                if (hasActiveJob) {
+                    logger.debug("Network {} in Implementing state has active 
async job, skipping cleanup", network.getId());
+                    continue;
+                }
+
+                logger.warn("Found orphaned network {} in Implementing state 
without async job. " +
+                           "Network created: {}, age: {} minutes, expiration 
threshold: {} minutes. Transitioning to Shutdown state.",
+                           network.getId(), network.getCreated(),
+                           (System.currentTimeMillis() - 
network.getCreated().getTime()) / 60000,
+                           expireMinutes);
+                updateNetworkState(network);
+
+            }
+        } catch (Exception e) {
+            logger.error("Error while cleaning up orphaned networks", e);
+        }
+    }
+
+    private void updateNetworkState(NetworkVO network) {
+        try {
+            networkOrchestrationService.stateTransitTo(network, 
Network.Event.OperationFailed);
+            logger.info("Successfully transitioned orphaned network {} to 
Shutdown state using state machine", network.getId());
+        } catch (final NoTransitionException e) {
+            logger.debug("State transition failed for orphaned network {}, 
forcing state update", network.getId());
+            network.setState(Network.State.Shutdown);
+            networkDao.update(network.getId(), network);
+            logger.info("Successfully forced orphaned network {} to Shutdown 
state", network.getId());
+        }
+    }
+
     @Override
     public void onManagementNodeJoined(List<? extends ManagementServerHost> 
nodeList, long selfNodeId) {
     }
diff --git a/tools/marvin/setup.py b/tools/marvin/setup.py
index f01863d6989..1ceff3dc204 100644
--- a/tools/marvin/setup.py
+++ b/tools/marvin/setup.py
@@ -27,7 +27,7 @@ except ImportError:
         raise RuntimeError("python setuptools is required to build Marvin")
 
 
-VERSION = "4.20.3.0-SNAPSHOT"
+VERSION = "4.20.3.0"
 
 setup(name="Marvin",
       version=VERSION,

Reply via email to