Hi Sai,

You are right. Locking is a little bit tricky here than in Conntrack.c
because we cannot grab the lock during the entire Conntrack transaction. I
will have to address thread safety in another patch.

Best regards,
Yin Lin

On Thu, Mar 23, 2017 at 11:58 PM, Sairam Venugopal <[email protected]>
wrote:

> Hi Yin,
>
> Thanks for sending over the patch. While it looks good for the most part,
> there are certain indentations that need fixing. I will send those out
> separately.
>
> I see that you have allocated - ovsNatLock and have added in stubs for
> flush and cleanup similar to Conntrack.c.
>
> However, I don’t see you taking out the lock for accessing the NAT tables.
> How are you verifying thread-safety here?
>
> Thanks,
> Sairam
>
>
>
>
> On 3/22/17, 3:11 PM, "[email protected] on behalf of Yin
> Lin" <[email protected] on behalf of [email protected]>
> wrote:
>
> >Signed-off-by: Yin Lin <[email protected]>
> >
> >Issue: #
> >Change-Id: I6f37360c36525548b343f0016304015fec8aba7d
> >---
> > datapath-windows/automake.mk            |   2 +
> > datapath-windows/ovsext/Conntrack-nat.c | 437
> ++++++++++++++++++++++++++++++++
> > datapath-windows/ovsext/Conntrack-nat.h |  39 +++
> > 3 files changed, 478 insertions(+)
> > create mode 100644 datapath-windows/ovsext/Conntrack-nat.c
> > create mode 100644 datapath-windows/ovsext/Conntrack-nat.h
> >
> >diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
> >index 53983ae..296e785 100644
> >--- a/datapath-windows/automake.mk
> >+++ b/datapath-windows/automake.mk
> >@@ -16,7 +16,9 @@ EXTRA_DIST += \
> >       datapath-windows/ovsext/Conntrack-icmp.c \
> >       datapath-windows/ovsext/Conntrack-other.c \
> >       datapath-windows/ovsext/Conntrack-related.c \
> >+    datapath-windows/ovsext/Conntrack-nat.c \
> >       datapath-windows/ovsext/Conntrack-tcp.c \
> >+    datapath-windows/ovsext/Conntrack-nat.h \
> >       datapath-windows/ovsext/Conntrack.c \
> >       datapath-windows/ovsext/Conntrack.h \
> >       datapath-windows/ovsext/Datapath.c \
> >diff --git a/datapath-windows/ovsext/Conntrack-nat.c
> b/datapath-windows/ovsext/Conntrack-nat.c
> >new file mode 100644
> >index 0000000..4930694
> >--- /dev/null
> >+++ b/datapath-windows/ovsext/Conntrack-nat.c
> >@@ -0,0 +1,437 @@
> >+#include "Conntrack-nat.h"
> >+#include "Jhash.h"
> >+
> >+PLIST_ENTRY ovsNatTable = NULL;
> >+PLIST_ENTRY ovsUnNatTable = NULL;
> >+static PNDIS_RW_LOCK_EX ovsNatLock;
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsHashNatKey
> >+ *     Hash NAT related fields in a Conntrack key.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+static __inline UINT32
> >+OvsHashNatKey(const OVS_CT_KEY *key)
> >+{
> >+    UINT32 hash = 0;
> >+#define HASH_ADD(field) \
> >+    hash = OvsJhashBytes(&key->field, sizeof(key->field), hash)
> >+
> >+    HASH_ADD(src.addr.ipv4_aligned);
> >+    HASH_ADD(dst.addr.ipv4_aligned);
> >+    HASH_ADD(src.port);
> >+    HASH_ADD(dst.port);
> >+    HASH_ADD(zone);
> >+#undef HASH_ADD
> >+    return hash;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsNatKeyAreSame
> >+ *     Compare NAT related fields in a Conntrack key.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+static __inline BOOLEAN
> >+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2)
> >+{
> >+    // XXX: Compare IPv6 key as well
> >+#define FIELD_COMPARE(field) \
> >+    if (key1->field != key2->field) return FALSE
> >+
> >+    FIELD_COMPARE(src.addr.ipv4_aligned);
> >+    FIELD_COMPARE(dst.addr.ipv4_aligned);
> >+    FIELD_COMPARE(src.port);
> >+    FIELD_COMPARE(dst.port);
> >+    FIELD_COMPARE(zone);
> >+    return TRUE;
> >+#undef FIELD_COMPARE
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsNaGetBucket
> >+ *     Returns the row of NAT table that has the same hash as the given
> NAT
> >+ *     hash key. If isReverse is TRUE, returns the row of reverse NAT
> table
> >+ *     instead.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+static __inline PLIST_ENTRY
> >+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse)
> >+{
> >+    uint32_t hash = OvsHashNatKey(key);
> >+    if (isReverse) {
> >+        return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK];
> >+    } else {
> >+        return &ovsNatTable[hash & NAT_HASH_TABLE_MASK];
> >+    }
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> ----------------
> >+ * OvsNatInit
> >+ *     Initialize NAT related resources.
> >+ *-----------------------------------------------------------
> ----------------
> >+ */
> >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT context)
> >+{
> >+    ASSERT(ovsNatTable == NULL);
> >+
> >+    /* Init the sync-lock */
> >+    ovsNatLock = NdisAllocateRWLock(context->NdisFilterHandle);
> >+    if (ovsNatLock == NULL) {
> >+        goto failNoMem;
> >+    }
> >+
> >+    /* Init the Hash Buffer */
> >+    ovsNatTable = OvsAllocateMemoryWithTag(
> >+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
> >+        OVS_CT_POOL_TAG);
> >+    if (ovsNatTable == NULL) {
> >+        goto freeNatLock;
> >+    }
> >+
> >+    ovsUnNatTable = OvsAllocateMemoryWithTag(
> >+        sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE,
> >+        OVS_CT_POOL_TAG);
> >+    if (ovsUnNatTable == NULL) {
> >+        goto freeNatTable;
> >+    }
> >+
> >+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
> >+        InitializeListHead(&ovsNatTable[i]);
> >+        InitializeListHead(&ovsUnNatTable[i]);
> >+    }
> >+    return STATUS_SUCCESS;
> >+
> >+freeNatTable:
> >+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
> >+freeNatLock:
> >+    NdisFreeRWLock(ovsNatLock);
> >+failNoMem:
> >+    return STATUS_INSUFFICIENT_RESOURCES;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatFlush
> >+ *     Flushes out all NAT entries that match the given zone.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID OvsNatFlush(UINT16 zone)
> >+{
> >+    LOCK_STATE_EX lockState;
> >+    PLIST_ENTRY link, next;
> >+    NdisAcquireRWLockWrite(ovsNatLock, &lockState, 0);
> >+    for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) {
> >+        LIST_FORALL_SAFE(&ovsNatTable[i], link, next) {
> >+            POVS_NAT_ENTRY entry =
> >+                CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
> >+            /* zone is a non-zero value */
> >+            if (!zone || zone == entry->key.zone) {
> >+                OvsNatDeleteEntry(entry);
> >+            }
> >+        }
> >+    }
> >+    NdisReleaseRWLock(ovsNatLock, &lockState);
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatCleanup
> >+ *     Releases all NAT related resources.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID OvsNatCleanup()
> >+{
> >+    if (ovsNatTable == NULL) return;
> >+    OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG);
> >+    OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG);
> >+    NdisFreeRWLock(ovsNatLock);
> >+    ovsNatTable = NULL;
> >+    ovsUnNatTable = NULL;
> >+    ovsNatLock = NULL;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatPacket
> >+ *     Performs NAT operation on the packet by replacing the
> source/destinaton
> >+ *     address/port based on natAction. If reverse is TRUE, perform unNAT
> >+ *     instead.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatPacket(OvsForwardingContext *ovsFwdCtx,
> >+             const OVS_CT_ENTRY *entry,
> >+             UINT16 natAction,
> >+             OvsFlowKey *key,
> >+             BOOLEAN reverse)
> >+{
> >+    UINT32 natFlag;
> >+    const struct ct_endpoint* endpoint;
> >+    /* When it is NAT, only entry->rev_key contains NATTED address;
> >+       When it is unNAT, only entry->key contains the UNNATTED address;*/
> >+    const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key;
> >+    BOOLEAN isSrcNat;
> >+
> >+    if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) {
> >+        return;
> >+    }
> >+    isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) ||
> >+                ((natAction & NAT_ACTION_DST) && reverse));
> >+
> >+    if (isSrcNat) {
> >+        /* Flag is set to SNAT for SNAT case and the reverse DNAT case */
> >+        natFlag = OVS_CS_F_SRC_NAT;
> >+        /* Note that ctKey is the key in the other direction, so
> >+           endpoint has to be reverted, i.e. ctKey->dst for SNAT
> >+           and ctKey->src for DNAT */
> >+        endpoint = &ctKey->dst;
> >+    } else {
> >+        natFlag = OVS_CS_F_DST_NAT;
> >+        endpoint = &ctKey->src;
> >+    }
> >+    key->ct.state |= natFlag;
> >+    if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) {
> >+        OvsUpdateAddressAndPort(ovsFwdCtx,
> >+                                endpoint->addr.ipv4_aligned,
> >+                                endpoint->port, isSrcNat);
> >+        if (isSrcNat) {
> >+            key->ipKey.nwSrc = endpoint->addr.ipv4_aligned;
> >+        } else {
> >+            key->ipKey.nwDst = endpoint->addr.ipv4_aligned;
> >+        }
> >+    } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){
> >+        // XXX: IPv6 packet not supported yet.
> >+        return;
> >+    }
> >+    if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) {
> >+        if (isSrcNat) {
> >+            if (key->ipKey.l4.tpSrc != 0) {
> >+                key->ipKey.l4.tpSrc = endpoint->port;
> >+            }
> >+        } else {
> >+            if (key->ipKey.l4.tpDst != 0) {
> >+                key->ipKey.l4.tpDst = endpoint->port;
> >+            }
> >+        }
> >+    }
> >+}
> >+
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatHashRange
> >+ *     Compute hash for a range of addresses specified in natInfo.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis)
> >+{
> >+    UINT32 hash = basis;
> >+#define HASH_ADD(field) \
> >+    hash = OvsJhashBytes(&field, sizeof(field), hash)
> >+
> >+    HASH_ADD(entry->natInfo.minAddr);
> >+    HASH_ADD(entry->natInfo.maxAddr);
> >+    HASH_ADD(entry->key.dl_type);
> >+    HASH_ADD(entry->key.nw_proto);
> >+    HASH_ADD(entry->key.zone);
> >+#undef HASH_ADD
> >+    return hash;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatAddEntry
> >+ *     Add an entry to the NAT table. Also updates the reverse NAT lookup
> >+ *     table.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatAddEntry(OVS_NAT_ENTRY* entry)
> >+{
> >+    InsertHeadList(OvsNatGetBucket(&entry->key, FALSE),
> >+                   &entry->link);
> >+    InsertHeadList(OvsNatGetBucket(&entry->value, TRUE),
> >+                   &entry->reverseLink);
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatCtEntry
> >+ *     Update an Conntrack entry with NAT information. Translated
> address and
> >+ *     port will be generated and write back to the conntrack entry as a
> >+ *     result.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+BOOLEAN
> >+OvsNatCtEntry(OVS_CT_ENTRY *entry)
> >+{
> >+    const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024;
> >+    const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535;
> >+
> >+    uint16_t minPort;
> >+    uint16_t maxPort;
> >+    uint16_t firstPort;
> >+
> >+    uint32_t hash = OvsNatHashRange(entry, 0);
> >+
> >+    if ((entry->natInfo.natAction & NAT_ACTION_SRC) &&
> >+        (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) {
> >+        firstPort = minPort = maxPort = ntohs(entry->key.src.port);
> >+    } else if ((entry->natInfo.natAction & NAT_ACTION_DST) &&
> >+               (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) {
> >+        firstPort = minPort = maxPort = ntohs(entry->key.dst.port);
> >+    } else {
> >+        uint16_t portDelta = entry->natInfo.maxPort -
> entry->natInfo.minPort;
> >+        uint16_t portIndex = (uint16_t) hash % (portDelta + 1);
> >+        firstPort = entry->natInfo.minPort + portIndex;
> >+        minPort = entry->natInfo.minPort;
> >+        maxPort = entry->natInfo.maxPort;
> >+    }
> >+
> >+    uint32_t addrDelta = 0;
> >+    uint32_t addrIndex;
> >+    struct ct_addr ctAddr, maxCtAddr;
> >+    memset(&ctAddr, 0, sizeof ctAddr);
> >+    memset(&maxCtAddr, 0, sizeof maxCtAddr);
> >+    maxCtAddr = entry->natInfo.maxAddr;
> >+
> >+    if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
> >+        addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) -
> >+                    ntohl(entry->natInfo.minAddr.ipv4_aligned);
> >+        addrIndex = hash % (addrDelta + 1);
> >+        ctAddr.ipv4_aligned = htonl(
> >+            ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex);
> >+    } else {
> >+        // XXX: IPv6 not supported
> >+        return FALSE;
> >+    }
> >+
> >+    uint16_t port = firstPort;
> >+    BOOLEAN allPortsTried = FALSE;
> >+    BOOLEAN originalPortsTried = FALSE;
> >+    struct ct_addr firstAddr = ctAddr;
> >+    for (;;) {
> >+        if (entry->natInfo.natAction & NAT_ACTION_SRC) {
> >+            entry->rev_key.dst.addr = ctAddr;
> >+            entry->rev_key.dst.port = htons(port);
> >+        } else {
> >+            entry->rev_key.src.addr = ctAddr;
> >+            entry->rev_key.src.port = htons(port);
> >+        }
> >+
> >+        OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE);
> >+
> >+        if (!natEntry) {
> >+            natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry),
> >+                                                OVS_CT_POOL_TAG);
> >+            memcpy(&natEntry->key, &entry->key,
> >+                   sizeof natEntry->key);
> >+            memcpy(&natEntry->value, &entry->rev_key,
> >+                   sizeof natEntry->value);
> >+            natEntry->ctEntry = entry;
> >+            OvsNatAddEntry(natEntry);
> >+            return TRUE;
> >+        } else if (!allPortsTried) {
> >+            if (minPort == maxPort) {
> >+                allPortsTried = TRUE;
> >+            } else if (port == maxPort) {
> >+                port = minPort;
> >+            } else {
> >+                port++;
> >+            }
> >+            if (port == firstPort) {
> >+                allPortsTried = TRUE;
> >+            }
> >+        } else {
> >+            if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) {
> >+                if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) {
> >+                    ctAddr.ipv4_aligned = htonl(
> >+                        ntohl(ctAddr.ipv4_aligned) + 1);
> >+                } else {
> >+                    // XXX: IPv6 not supported
> >+                    return FALSE;
> >+                }
> >+            } else {
> >+                ctAddr = entry->natInfo.minAddr;
> >+            }
> >+            if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) {
> >+                if (!originalPortsTried) {
> >+                    originalPortsTried = TRUE;
> >+                    ctAddr = entry->natInfo.minAddr;
> >+                    minPort = MIN_NAT_EPHEMERAL_PORT;
> >+                    maxPort = MAX_NAT_EPHEMERAL_PORT;
> >+                } else {
> >+                    break;
> >+                }
> >+            }
> >+            firstPort = minPort;
> >+            port = firstPort;
> >+            allPortsTried = FALSE;
> >+        }
> >+    }
> >+    return FALSE;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatLookup
> >+ *     Look up a NAT entry with the given key in the NAT table.
> >+ *     If reverse is TRUE, look up a NAT entry with the given value
> instead.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+POVS_NAT_ENTRY
> >+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse)
> >+{
> >+    PLIST_ENTRY link;
> >+    POVS_NAT_ENTRY entry;
> >+
> >+    LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) {
> >+        if (reverse) {
> >+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink);
> >+
> >+            if (OvsNatKeyAreSame(ctKey, &entry->value)) {
> >+                return entry;
> >+            }
> >+        } else {
> >+            entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link);
> >+
> >+            if (OvsNatKeyAreSame(ctKey, &entry->key)) {
> >+                return entry;
> >+            }
> >+        }
> >+    }
> >+    return NULL;
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatDeleteEntry
> >+ *     Delete a NAT entry.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatDeleteEntry(POVS_NAT_ENTRY entry)
> >+{
> >+    if (entry == NULL) {
> >+        return;
> >+    }
> >+    RemoveEntryList(&entry->link);
> >+    RemoveEntryList(&entry->reverseLink);
> >+    OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
> >+}
> >+
> >+/*
> >+ *-----------------------------------------------------------
> -----------------
> >+ * OvsNatDeleteKey
> >+ *     Delete a NAT entry with the given key.
> >+ *-----------------------------------------------------------
> -----------------
> >+ */
> >+VOID
> >+OvsNatDeleteKey(const OVS_CT_KEY *key)
> >+{
> >+    OvsNatDeleteEntry(OvsNatLookup(key, FALSE));
> >+}
> >diff --git a/datapath-windows/ovsext/Conntrack-nat.h
> b/datapath-windows/ovsext/Conntrack-nat.h
> >new file mode 100644
> >index 0000000..99f2b67
> >--- /dev/null
> >+++ b/datapath-windows/ovsext/Conntrack-nat.h
> >@@ -0,0 +1,39 @@
> >+#ifndef _CONNTRACK_NAT_H
> >+#define _CONNTRACK_NAT_H
> >+
> >+#include "precomp.h"
> >+#include "Flow.h"
> >+#include "Debug.h"
> >+#include <stddef.h>
> >+#include "Conntrack.h"
> >+
> >+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10)
> >+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1)
> >+
> >+typedef struct OVS_NAT_ENTRY {
> >+    LIST_ENTRY link;
> >+    LIST_ENTRY reverseLink;
> >+    OVS_CT_KEY key;
> >+    OVS_CT_KEY value;
> >+    POVS_CT_ENTRY  ctEntry;
> >+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY;
> >+
> >+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) {
> >+    return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST));
> >+}
> >+
> >+NTSTATUS OvsNatInit(POVS_SWITCH_CONTEXT);
> >+VOID OvsNatFlush(UINT16 zone);
> >+
> >+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry);
> >+
> >+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry);
> >+VOID OvsNatDeleteKey(const OVS_CT_KEY *key);
> >+VOID OvsNatCleanup();
> >+
> >+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse);
> >+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry);
> >+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY
> *entry,
> >+                  UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse);
> >+
> >+#endif
> >\ No newline at end of file
> >--
> >2.10.2.windows.1
> >
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to