From: Wilson Peng <[email protected]>

    While deploying Tanzu Kubernetes(Antrea based solution) in Broadcom 
customer,
Sometimes it is found that the kernel thread OvsConntrackEntryCleaner is not 
started
After the Windows node is rebooted on unexpected condition.  It could be also
observed a similar issue in local Antrea setup via Clean-AntreaNetwork.ps1 
which will
Remove-VMSwitch  and re-create it on Windows node.

    After checking the local conntrack dump, OVS doesn’t remove the connection 
entries
Even though the time is overdue, we could find the connection entries created 
several
Hours ago in the dump , within a state (TIME_WAIT) that was supposed to be 
deleted earlier.
At that time, the count of the existing entries in the OVS conntrack zone is 
far from the
Up limit, the actual number is 19k. Then we tried to flush the conntrack with 
CMD
"ovs-dpctl.exe flush-conntrack" and all the conntrack entries could be removed.

In this patch, it is adding the complete sanity check when starting 
OvsConntrackEntryCleaner
Thread. If anything abnormal is happening it will rollback the thread creating.

With the fix, Antrea team does help do the regression test and it could PASS. 
And it is
not find the Connectract not timeout issue with same reproducing condition.

It is good to backport the fix to main and backported until 2.17

Signed-off-by: Wilson Peng <[email protected]>
---
 datapath-windows/ovsext/Conntrack.c | 86 ++++++++++++++++++++---------
 1 file changed, 61 insertions(+), 25 deletions(-)

diff --git a/datapath-windows/ovsext/Conntrack.c 
b/datapath-windows/ovsext/Conntrack.c
index 39ba5cc10..fbd036418 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -36,6 +36,9 @@ static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
 static NDIS_SPIN_LOCK ovsCtZoneLock;
+static BOOLEAN ovsCtZoneLock_release = FALSE;
+static BOOLEAN OvsNatInitDone = FALSE;
+static BOOLEAN OvsConntrackCleanThreadExist = FALSE;
 static POVS_CT_ZONE_INFO zoneInfo = NULL;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 static ULONG ctTotalEntries;
@@ -95,32 +98,44 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context)
         goto freeBucketLock;
     }
 
-    ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
-                              &ctThreadCtx.threadObject, NULL);
+    OvsConntrackCleanThreadExist = FALSE;
+    ctThreadCtx.exit = 0;
+    status = ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, 
KernelMode,
+                                       &ctThreadCtx.threadObject, NULL);
     ZwClose(threadHandle);
     threadHandle = NULL;
+    if (!NT_SUCCESS(status)) {
+        ctThreadCtx.exit = 1;
+        KeSetEvent(&ctThreadCtx.event, 0, FALSE);
+        KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
+                               KernelMode, FALSE, NULL);
+        goto cleanupConntrack;
+    }
 
     zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) *
                                         CT_MAX_ZONE, OVS_CT_POOL_TAG);
     if (zoneInfo == NULL) {
         status = STATUS_INSUFFICIENT_RESOURCES;
-        goto freeBucketLock;
+        goto cleanupConntrack;
     }
 
     NdisAllocateSpinLock(&ovsCtZoneLock);
+    ovsCtZoneLock_release = FALSE;
     defaultCtLimit = CT_MAX_ENTRIES;
     for (UINT32 i = 0; i < CT_MAX_ZONE; i++) {
         zoneInfo[i].entries = 0;
         zoneInfo[i].limit = defaultCtLimit;
     }
 
-    status = OvsNatInit();
-
-    if (status != STATUS_SUCCESS) {
-        OvsCleanupConntrack();
+    if (OvsNatInitDone == FALSE) {
+        status = OvsNatInit();
+        if (status != STATUS_SUCCESS) {
+            goto cleanupConntrack;
+        }
+        OvsNatInitDone = TRUE;
     }
+    OvsConntrackCleanThreadExist = TRUE;
     return STATUS_SUCCESS;
-
 freeBucketLock:
     for (UINT32 i = 0; i < numBucketLocks; i++) {
         if (ovsCtBucketLock[i] != NULL) {
@@ -132,6 +147,9 @@ freeBucketLock:
 freeTable:
     OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
     ovsConntrackTable = NULL;
+cleanupConntrack:
+    OvsCleanupConntrack();
+
     return status;
 }
 
@@ -144,35 +162,51 @@ freeTable:
 VOID
 OvsCleanupConntrack(VOID)
 {
-    ctThreadCtx.exit = 1;
-    KeSetEvent(&ctThreadCtx.event, 0, FALSE);
-    KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
-                          KernelMode, FALSE, NULL);
-    ObDereferenceObject(ctThreadCtx.threadObject);
-
+    if (OvsConntrackCleanThreadExist) {
+        ctThreadCtx.exit = 1;
+        KeSetEvent(&ctThreadCtx.event, 0, FALSE);
+        KeWaitForSingleObject(ctThreadCtx.threadObject, Executive,
+                              KernelMode, FALSE, NULL);
+        ObDereferenceObject(ctThreadCtx.threadObject);
+    }
     /* Force flush all entries before removing */
-    OvsCtFlush(0, NULL);
+    if (ovsConntrackTable) {
+       OvsCtFlush(0, NULL);
+    }
 
     if (ovsConntrackTable) {
         OvsFreeMemoryWithTag(ovsConntrackTable, OVS_CT_POOL_TAG);
         ovsConntrackTable = NULL;
     }
 
-    for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
-        /* Disabling the uninitialized memory warning because it should
-         * always be initialized during OvsInitConntrack */
-#pragma warning(suppress: 6001)
-        if (ovsCtBucketLock[i] != NULL) {
-            NdisFreeRWLock(ovsCtBucketLock[i]);
+    if (ovsCtBucketLock) {
+        for (UINT32 i = 0; i < CT_HASH_TABLE_SIZE; i++) {
+            /* Disabling the uninitialized memory warning because it should
+             * always be initialized during OvsInitConntrack */
+        #pragma warning(suppress: 6001)
+            if (ovsCtBucketLock[i] != NULL) {
+               NdisFreeRWLock(ovsCtBucketLock[i]);
+            }
         }
     }
-    OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
-    ovsCtBucketLock = NULL;
-    OvsNatCleanup();
-    NdisFreeSpinLock(&ovsCtZoneLock);
+
+    if (ovsCtBucketLock) {
+        OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
+        ovsCtBucketLock = NULL;
+    }
+    if (OvsNatInitDone) {
+        OvsNatCleanup();
+        OvsNatInitDone = FALSE;
+    }
+    if (ovsCtZoneLock_release == FALSE) {
+       NdisFreeSpinLock(&ovsCtZoneLock);
+       ovsCtZoneLock_release = TRUE;
+    }
     if (zoneInfo) {
         OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG);
+        zoneInfo = NULL;
     }
+    OvsConntrackCleanThreadExist = FALSE;
 }
 
 VOID
@@ -1520,6 +1554,7 @@ OvsConntrackEntryCleaner(PVOID data)
     LOCK_STATE_EX lockState;
     BOOLEAN success = TRUE;
 
+    OVS_LOG_INFO("Start the ConntrackEntry Cleaner Thread, context: %p", 
context);
     while (success) {
         if (context->exit) {
             break;
@@ -1541,6 +1576,7 @@ OvsConntrackEntryCleaner(PVOID data)
         KeWaitForSingleObject(&context->event, Executive, KernelMode,
                               FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
     }
+    OVS_LOG_INFO("Terminating the OVS ConntrackEntry Cleaner system thread");
 
     PsTerminateSystemThread(STATUS_SUCCESS);
 }
-- 
2.39.2 (Apple Git-143)

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to