Hi Yin, Thanks for the patch. Please find my comments inline.
Thanks, Sairam On 5/9/17, 3:59 PM, "[email protected] on behalf of Yin Lin" <[email protected] on behalf of [email protected]> wrote: >Signed-off-by: Yin Lin <[email protected]> >--- > datapath-windows/automake.mk | 2 + > datapath-windows/ovsext/Conntrack-nat.c | 424 ++++++++++++++++++++++++++++++++ > datapath-windows/ovsext/Conntrack-nat.h | 39 +++ > 3 files changed, 465 insertions(+) > create mode 100644 datapath-windows/ovsext/Conntrack-nat.c > create mode 100644 datapath-windows/ovsext/Conntrack-nat.h > >diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk >index 4f7b55a..7177630 100644 >--- a/datapath-windows/automake.mk >+++ b/datapath-windows/automake.mk >@@ -16,7 +16,9 @@ EXTRA_DIST += \ > datapath-windows/ovsext/Conntrack-icmp.c \ > datapath-windows/ovsext/Conntrack-other.c \ > datapath-windows/ovsext/Conntrack-related.c \ >+ datapath-windows/ovsext/Conntrack-nat.c \ > datapath-windows/ovsext/Conntrack-tcp.c \ >+ datapath-windows/ovsext/Conntrack-nat.h \ > datapath-windows/ovsext/Conntrack.c \ > datapath-windows/ovsext/Conntrack.h \ > datapath-windows/ovsext/Datapath.c \ >diff --git a/datapath-windows/ovsext/Conntrack-nat.c >b/datapath-windows/ovsext/Conntrack-nat.c >new file mode 100644 >index 0000000..edf5f8f >--- /dev/null >+++ b/datapath-windows/ovsext/Conntrack-nat.c >@@ -0,0 +1,424 @@ >+#include "Conntrack-nat.h" >+#include "Jhash.h" >+ >+PLIST_ENTRY ovsNatTable = NULL; >+PLIST_ENTRY ovsUnNatTable = NULL; >+ >+/* >+ *--------------------------------------------------------------------------- >+ * OvsHashNatKey >+ * Hash NAT related fields in a Conntrack key. >+ *--------------------------------------------------------------------------- >+ */ >+static __inline UINT32 >+OvsHashNatKey(const OVS_CT_KEY *key) >+{ >+ UINT32 hash = 0; >+#define HASH_ADD(field) \ >+ hash = OvsJhashBytes(&key->field, sizeof(key->field), hash) >+ >+ HASH_ADD(src.addr.ipv4_aligned); >+ HASH_ADD(dst.addr.ipv4_aligned); >+ HASH_ADD(src.port); >+ HASH_ADD(dst.port); >+ HASH_ADD(zone); >+#undef HASH_ADD >+ return hash; >+} >+ >+/* >+ *--------------------------------------------------------------------------- >+ * OvsNatKeyAreSame >+ * Compare NAT related fields in a Conntrack key. >+ *--------------------------------------------------------------------------- >+ */ >+static __inline BOOLEAN >+OvsNatKeyAreSame(const OVS_CT_KEY *key1, const OVS_CT_KEY *key2) >+{ >+ // XXX: Compare IPv6 key as well >+#define FIELD_COMPARE(field) \ [Sai]: Nit: move return statement to next line. >+ if (key1->field != key2->field) return FALSE >+ >+ FIELD_COMPARE(src.addr.ipv4_aligned); >+ FIELD_COMPARE(dst.addr.ipv4_aligned); >+ FIELD_COMPARE(src.port); >+ FIELD_COMPARE(dst.port); >+ FIELD_COMPARE(zone); >+ return TRUE; >+#undef FIELD_COMPARE >+} >+ >+/* >+ *--------------------------------------------------------------------------- [Sai]: Nit - OvsNatGetBucket >+ * OvsNaGetBucket >+ * Returns the row of NAT table that has the same hash as the given NAT >+ * hash key. If isReverse is TRUE, returns the row of reverse NAT table >+ * instead. >+ *--------------------------------------------------------------------------- >+ */ >+static __inline PLIST_ENTRY >+OvsNatGetBucket(const OVS_CT_KEY *key, BOOLEAN isReverse) >+{ >+ uint32_t hash = OvsHashNatKey(key); >+ if (isReverse) { >+ return &ovsUnNatTable[hash & NAT_HASH_TABLE_MASK]; >+ } else { >+ return &ovsNatTable[hash & NAT_HASH_TABLE_MASK]; >+ } >+} >+ >+/* >+ *--------------------------------------------------------------------------- >+ * OvsNatInit >+ * Initialize NAT related resources. >+ *--------------------------------------------------------------------------- >+ */ >+NTSTATUS OvsNatInit() >+{ >+ ASSERT(ovsNatTable == NULL); >+ >+ /* Init the Hash Buffer */ >+ ovsNatTable = OvsAllocateMemoryWithTag( >+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, >+ OVS_CT_POOL_TAG); >+ if (ovsNatTable == NULL) { >+ goto failNoMem; >+ } >+ >+ ovsUnNatTable = OvsAllocateMemoryWithTag( >+ sizeof(LIST_ENTRY) * NAT_HASH_TABLE_SIZE, >+ OVS_CT_POOL_TAG); >+ if (ovsUnNatTable == NULL) { >+ goto freeNatTable; >+ } >+ >+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { >+ InitializeListHead(&ovsNatTable[i]); >+ InitializeListHead(&ovsUnNatTable[i]); >+ } >+ return STATUS_SUCCESS; >+ >+freeNatTable: >+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); >+failNoMem: >+ return STATUS_INSUFFICIENT_RESOURCES; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatFlush >+ * Flushes out all NAT entries that match the given zone. >+ *---------------------------------------------------------------------------- >+ */ >+VOID OvsNatFlush(UINT16 zone) >+{ >+ PLIST_ENTRY link, next; >+ for (int i = 0; i < NAT_HASH_TABLE_SIZE; i++) { >+ LIST_FORALL_SAFE(&ovsNatTable[i], link, next) { >+ POVS_NAT_ENTRY entry = >+ CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); >+ /* zone is a non-zero value */ >+ if (!zone || zone == entry->key.zone) { >+ OvsNatDeleteEntry(entry); >+ } >+ } >+ } >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatCleanup >+ * Releases all NAT related resources. >+ *---------------------------------------------------------------------------- >+ */ >+VOID OvsNatCleanup() >+{ [Sai]: Nit: move return statement to next line. >+ if (ovsNatTable == NULL) return; >+ OvsFreeMemoryWithTag(ovsNatTable, OVS_CT_POOL_TAG); >+ OvsFreeMemoryWithTag(ovsUnNatTable, OVS_CT_POOL_TAG); >+ ovsNatTable = NULL; >+ ovsUnNatTable = NULL; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatPacket >+ * Performs NAT operation on the packet by replacing the source/destinaton >+ * address/port based on natAction. If reverse is TRUE, perform unNAT >+ * instead. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatPacket(OvsForwardingContext *ovsFwdCtx, >+ const OVS_CT_ENTRY *entry, >+ UINT16 natAction, >+ OvsFlowKey *key, >+ BOOLEAN reverse) >+{ >+ UINT32 natFlag; >+ const struct ct_endpoint* endpoint; >+ /* When it is NAT, only entry->rev_key contains NATTED address; >+ When it is unNAT, only entry->key contains the UNNATTED address;*/ >+ const OVS_CT_KEY *ctKey = reverse ? &entry->key : &entry->rev_key; >+ BOOLEAN isSrcNat; >+ >+ if (!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST))) { >+ return; >+ } >+ isSrcNat = (((natAction & NAT_ACTION_SRC) && !reverse) || >+ ((natAction & NAT_ACTION_DST) && reverse)); >+ >+ if (isSrcNat) { >+ /* Flag is set to SNAT for SNAT case and the reverse DNAT case */ >+ natFlag = OVS_CS_F_SRC_NAT; >+ /* Note that ctKey is the key in the other direction, so >+ endpoint has to be reverted, i.e. ctKey->dst for SNAT >+ and ctKey->src for DNAT */ >+ endpoint = &ctKey->dst; >+ } else { >+ natFlag = OVS_CS_F_DST_NAT; >+ endpoint = &ctKey->src; >+ } >+ key->ct.state |= natFlag; >+ if (ctKey->dl_type == htons(ETH_TYPE_IPV4)) { >+ OvsUpdateAddressAndPort(ovsFwdCtx, >+ endpoint->addr.ipv4_aligned, >+ endpoint->port, isSrcNat, >+ !reverse); >+ if (isSrcNat) { >+ key->ipKey.nwSrc = endpoint->addr.ipv4_aligned; >+ } else { >+ key->ipKey.nwDst = endpoint->addr.ipv4_aligned; >+ } >+ } else if (ctKey->dl_type == htons(ETH_TYPE_IPV6)){ >+ // XXX: IPv6 packet not supported yet. >+ return; >+ } >+ if (natAction & (NAT_ACTION_SRC_PORT | NAT_ACTION_DST_PORT)) { >+ if (isSrcNat) { >+ if (key->ipKey.l4.tpSrc != 0) { >+ key->ipKey.l4.tpSrc = endpoint->port; >+ } >+ } else { >+ if (key->ipKey.l4.tpDst != 0) { >+ key->ipKey.l4.tpDst = endpoint->port; >+ } >+ } >+ } >+} >+ >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatHashRange >+ * Compute hash for a range of addresses specified in natInfo. >+ *---------------------------------------------------------------------------- >+ */ >+static UINT32 OvsNatHashRange(const OVS_CT_ENTRY *entry, UINT32 basis) >+{ >+ UINT32 hash = basis; >+#define HASH_ADD(field) \ >+ hash = OvsJhashBytes(&field, sizeof(field), hash) >+ >+ HASH_ADD(entry->natInfo.minAddr); >+ HASH_ADD(entry->natInfo.maxAddr); >+ HASH_ADD(entry->key.dl_type); >+ HASH_ADD(entry->key.nw_proto); >+ HASH_ADD(entry->key.zone); >+#undef HASH_ADD >+ return hash; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatAddEntry >+ * Add an entry to the NAT table. Also updates the reverse NAT lookup >+ * table. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatAddEntry(OVS_NAT_ENTRY* entry) >+{ >+ InsertHeadList(OvsNatGetBucket(&entry->key, FALSE), >+ &entry->link); >+ InsertHeadList(OvsNatGetBucket(&entry->value, TRUE), >+ &entry->reverseLink); >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatCtEntry >+ * Update an Conntrack entry with NAT information. Translated address and >+ * port will be generated and write back to the conntrack entry as a >+ * result. >+ *---------------------------------------------------------------------------- >+ */ [Sai] - Can you name this OvsNatUpdateCtEntry? Or something to imply what this function does. >+BOOLEAN >+OvsNatCtEntry(OVS_CT_ENTRY *entry) >+{ >+ const uint16_t MIN_NAT_EPHEMERAL_PORT = 1024; >+ const uint16_t MAX_NAT_EPHEMERAL_PORT = 65535; >+ >+ uint16_t minPort; >+ uint16_t maxPort; >+ uint16_t firstPort; >+ >+ uint32_t hash = OvsNatHashRange(entry, 0); >+ >+ if ((entry->natInfo.natAction & NAT_ACTION_SRC) && >+ (!(entry->natInfo.natAction & NAT_ACTION_SRC_PORT))) { >+ firstPort = minPort = maxPort = ntohs(entry->key.src.port); >+ } else if ((entry->natInfo.natAction & NAT_ACTION_DST) && >+ (!(entry->natInfo.natAction & NAT_ACTION_DST_PORT))) { >+ firstPort = minPort = maxPort = ntohs(entry->key.dst.port); >+ } else { >+ uint16_t portDelta = entry->natInfo.maxPort - entry->natInfo.minPort; >+ uint16_t portIndex = (uint16_t) hash % (portDelta + 1); >+ firstPort = entry->natInfo.minPort + portIndex; >+ minPort = entry->natInfo.minPort; >+ maxPort = entry->natInfo.maxPort; >+ } >+ [Sai] - Can you move these definitions to the top? >+ uint32_t addrDelta = 0; >+ uint32_t addrIndex; >+ struct ct_addr ctAddr, maxCtAddr; >+ memset(&ctAddr, 0, sizeof ctAddr); >+ memset(&maxCtAddr, 0, sizeof maxCtAddr); >+ maxCtAddr = entry->natInfo.maxAddr; >+ >+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { >+ addrDelta = ntohl(entry->natInfo.maxAddr.ipv4_aligned) - >+ ntohl(entry->natInfo.minAddr.ipv4_aligned); >+ addrIndex = hash % (addrDelta + 1); >+ ctAddr.ipv4_aligned = htonl( >+ ntohl(entry->natInfo.minAddr.ipv4_aligned) + addrIndex); >+ } else { >+ // XXX: IPv6 not supported >+ return FALSE; >+ } [Sai] - Can you move these definitions to the top? >+ >+ uint16_t port = firstPort; >+ BOOLEAN allPortsTried = FALSE; >+ BOOLEAN originalPortsTried = FALSE; >+ struct ct_addr firstAddr = ctAddr; >+ for (;;) { >+ if (entry->natInfo.natAction & NAT_ACTION_SRC) { >+ entry->rev_key.dst.addr = ctAddr; >+ entry->rev_key.dst.port = htons(port); >+ } else { >+ entry->rev_key.src.addr = ctAddr; >+ entry->rev_key.src.port = htons(port); >+ } >+ >+ OVS_NAT_ENTRY *natEntry = OvsNatLookup(&entry->rev_key, TRUE); >+ >+ if (!natEntry) { >+ natEntry = OvsAllocateMemoryWithTag(sizeof(*natEntry), >+ OVS_CT_POOL_TAG); [Sai]: Need to check if natEntry is not NULL and handle the Insufficient resources case. nit: NdisMoveMemory instead of memcpy >+ memcpy(&natEntry->key, &entry->key, >+ sizeof natEntry->key); >+ memcpy(&natEntry->value, &entry->rev_key, >+ sizeof natEntry->value); >+ natEntry->ctEntry = entry; >+ OvsNatAddEntry(natEntry); >+ return TRUE; >+ } else if (!allPortsTried) { >+ if (minPort == maxPort) { >+ allPortsTried = TRUE; >+ } else if (port == maxPort) { >+ port = minPort; >+ } else { >+ port++; >+ } >+ if (port == firstPort) { >+ allPortsTried = TRUE; >+ } >+ } else { >+ if (memcmp(&ctAddr, &maxCtAddr, sizeof ctAddr)) { >+ if (entry->key.dl_type == htons(ETH_TYPE_IPV4)) { >+ ctAddr.ipv4_aligned = htonl( >+ ntohl(ctAddr.ipv4_aligned) + 1); >+ } else { >+ // XXX: IPv6 not supported [Sai] - This can be done later. However, it will be good to return NDIS_STATUS and return NDIS_STATUS_INVALID. At the very minimum, we should add a log msg. >+ return FALSE; >+ } >+ } else { >+ ctAddr = entry->natInfo.minAddr; >+ } >+ if (!memcmp(&ctAddr, &firstAddr, sizeof ctAddr)) { >+ if (!originalPortsTried) { >+ originalPortsTried = TRUE; >+ ctAddr = entry->natInfo.minAddr; >+ minPort = MIN_NAT_EPHEMERAL_PORT; >+ maxPort = MAX_NAT_EPHEMERAL_PORT; >+ } else { >+ break; >+ } >+ } >+ firstPort = minPort; >+ port = firstPort; >+ allPortsTried = FALSE; >+ } >+ } >+ return FALSE; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatLookup >+ * Look up a NAT entry with the given key in the NAT table. >+ * If reverse is TRUE, look up a NAT entry with the given value instead. >+ *---------------------------------------------------------------------------- >+ */ >+POVS_NAT_ENTRY >+OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse) >+{ >+ PLIST_ENTRY link; >+ POVS_NAT_ENTRY entry; >+ >+ LIST_FORALL(OvsNatGetBucket(ctKey, reverse), link) { >+ if (reverse) { >+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, reverseLink); >+ >+ if (OvsNatKeyAreSame(ctKey, &entry->value)) { >+ return entry; >+ } >+ } else { >+ entry = CONTAINING_RECORD(link, OVS_NAT_ENTRY, link); >+ >+ if (OvsNatKeyAreSame(ctKey, &entry->key)) { >+ return entry; >+ } >+ } >+ } >+ return NULL; >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatDeleteEntry >+ * Delete a NAT entry. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatDeleteEntry(POVS_NAT_ENTRY entry) >+{ >+ if (entry == NULL) { >+ return; >+ } >+ RemoveEntryList(&entry->link); >+ RemoveEntryList(&entry->reverseLink); >+ OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); >+} >+ >+/* >+ *---------------------------------------------------------------------------- >+ * OvsNatDeleteKey >+ * Delete a NAT entry with the given key. >+ *---------------------------------------------------------------------------- >+ */ >+VOID >+OvsNatDeleteKey(const OVS_CT_KEY *key) >+{ >+ OvsNatDeleteEntry(OvsNatLookup(key, FALSE)); >+} >diff --git a/datapath-windows/ovsext/Conntrack-nat.h >b/datapath-windows/ovsext/Conntrack-nat.h >new file mode 100644 >index 0000000..aaa8ceb >--- /dev/null >+++ b/datapath-windows/ovsext/Conntrack-nat.h >@@ -0,0 +1,39 @@ >+#ifndef _CONNTRACK_NAT_H >+#define _CONNTRACK_NAT_H >+ >+#include "precomp.h" >+#include "Flow.h" >+#include "Debug.h" >+#include <stddef.h> >+#include "Conntrack.h" >+ >+#define NAT_HASH_TABLE_SIZE ((UINT32)1 << 10) >+#define NAT_HASH_TABLE_MASK (NAT_HASH_TABLE_SIZE - 1) >+ >+typedef struct OVS_NAT_ENTRY { >+ LIST_ENTRY link; >+ LIST_ENTRY reverseLink; >+ OVS_CT_KEY key; >+ OVS_CT_KEY value; >+ POVS_CT_ENTRY ctEntry; >+} OVS_NAT_ENTRY, *POVS_NAT_ENTRY; >+ >+__inline static BOOLEAN OvsIsForwardNat(UINT16 natAction) { [Sai] - What does the double !! do here? >+ return !!(natAction & (NAT_ACTION_SRC | NAT_ACTION_DST)); >+} >+ >+NTSTATUS OvsNatInit(); >+VOID OvsNatFlush(UINT16 zone); >+ >+VOID OvsNatAddEntry(OVS_NAT_ENTRY* entry); >+ >+VOID OvsNatDeleteEntry(POVS_NAT_ENTRY entry); >+VOID OvsNatDeleteKey(const OVS_CT_KEY *key); >+VOID OvsNatCleanup(); >+ >+POVS_NAT_ENTRY OvsNatLookup(const OVS_CT_KEY *ctKey, BOOLEAN reverse); >+BOOLEAN OvsNatCtEntry(OVS_CT_ENTRY *ctEntry); >+VOID OvsNatPacket(OvsForwardingContext *ovsFwdCtx, const OVS_CT_ENTRY *entry, >+ UINT16 natAction, OvsFlowKey *key, BOOLEAN reverse); >+ >+#endif >-- >2.10.2.windows.1 > >_______________________________________________ >dev mailing list >[email protected] >https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=Z6vowHUOjP5ysP_g372c49Nqc1vEKqHKNBkR5Q5Z7uo&m=veEopphpitZ1dkD8nov8CuLArrdnUU8WDczrz9UClwo&s=U88QV-uHXUzVrccSWR_EYp7F7ySpFOMRBlwZy34qfzA&e= > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
