Hey Alin, Thanks for the patch. I just had few minor suggestions. Looks good otherwise.
Acked-by: Sairam Venugopal <vsai...@vmware.com> - Sairam On 9/22/15, 3:09 PM, "Alin Serdean" <aserd...@cloudbasesolutions.com> wrote: >Windows does not support VXLAN hardware offloading. > >Currently we do not compute IP/TCP/UDP checksums for the inner packet. >This >patch computes the checksums mentioned above in regards with the enabled >settings. > >i.e. if IP checksum offloading is enabled for the inner packet we compute >it. >The same applies for TCP and UDP packets. > >This patch also revizes the computation of ones' complement over different >memory blocks, in the case the lengths are odd. > >Also per documentation: >https://urldefense.proofpoint.com/v2/url?u=https-3A__msdn.microsoft.com_en >-2Dus_library_windows_hardware_ff568840-2528v-3Dvs.85-2529.aspx&d=BQIGaQ&c >=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Dcruz40PROJ40ROzSpxyQSLw6fc >rOWpJgEcEmNR3JEQ&m=u6uOvwTUL-b9_PmCsUgfXowIL9sCWNb75Q40Mpl9zVg&s=Fb1yC0mkE >hq3ZfMB5P7iEqkZgSP2n_CL_XcjjXbEdhA&e= >set the TCP flags FIN and PSH only for the last segment in the case LSO is >enabled. > >Signed-off-by: Alin Gabriel Serdean <aserd...@cloudbasesolutions.com> >--- >This patch is meant for branch-2.4 as well >v2 address comments >--- > datapath-windows/ovsext/BufferMgmt.c | 37 ++++++++++++---- > datapath-windows/ovsext/Checksum.c | 64 +++++++++++++++++---------- > datapath-windows/ovsext/Vxlan.c | 84 >++++++++++++++++++++++++++++++++---- > 3 files changed, 145 insertions(+), 40 deletions(-) > >diff --git a/datapath-windows/ovsext/BufferMgmt.c >b/datapath-windows/ovsext/BufferMgmt.c >index 3550e20..9a1cf96 100644 >--- a/datapath-windows/ovsext/BufferMgmt.c >+++ b/datapath-windows/ovsext/BufferMgmt.c >@@ -1116,7 +1116,8 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, > * >-------------------------------------------------------------------------- > */ > static NDIS_STATUS >-FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber) >+FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber, >+ BOOLEAN lastPacket, UINT16 packetCounter) > { > EthHdr *dstEth; > IPHdr *dstIP; >@@ -1141,18 +1142,34 @@ FixSegmentHeader(PNET_BUFFER nb, UINT16 >segmentSize, UINT32 seqNumber) > /* Fix IP length and checksum */ > ASSERT(dstIP->protocol == IPPROTO_TCP); > dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + >TCP_HDR_LEN(dstTCP)); >+ dstIP->id += packetCounter; > dstIP->check = 0; > dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0); > > /* Fix TCP checksum */ > dstTCP->seq = htonl(seqNumber); >- dstTCP->check = >- IPPseudoChecksum((UINT32 *)&dstIP->saddr, >- (UINT32 *)&dstIP->daddr, >- IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP)); >+ >+ /* >+ * Set the TCP FIN and PSH bit only for the last packet >+ * More information can be found under: >+ * >https://urldefense.proofpoint.com/v2/url?u=https-3A__msdn.microsoft.com_en >-2Dus_library_windows_hardware_ff568840-2528v-3Dvs.85-2529.aspx&d=BQIGaQ&c >=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Dcruz40PROJ40ROzSpxyQSLw6fc >rOWpJgEcEmNR3JEQ&m=u6uOvwTUL-b9_PmCsUgfXowIL9sCWNb75Q40Mpl9zVg&s=Fb1yC0mkE >hq3ZfMB5P7iEqkZgSP2n_CL_XcjjXbEdhA&e= >+ */ Sai: Isn¹t it enough to just check if(lastPacket) and set the bits for psh/fin? >+ if (dstTCP->fin) { >+ dstTCP->fin = lastPacket; >+ } >+ if (dstTCP->psh) { >+ dstTCP->psh = lastPacket; >+ } >+ >+ UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP); Sai: I understand that this is existing code. Can you add a comment saying we don¹t support IPv6? Or mark it as a to-do item? >+ dstTCP->check = IPPseudoChecksum(&dstIP->saddr, >+ &dstIP->daddr, >+ IPPROTO_TCP, >+ csumLength); > dstTCP->check = CalculateChecksumNB(nb, >- (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - >dstIP->ihl * 4), >- sizeof *dstEth + dstIP->ihl * 4); >+ csumLength, >+ sizeof *dstEth + dstIP->ihl * 4); >+ > return STATUS_SUCCESS; > } > >@@ -1190,6 +1207,7 @@ OvsTcpSegmentNBL(PVOID ovsContext, > NDIS_STATUS status; > UINT16 segmentSize; > ULONG copiedSize; >+ UINT16 packetCounter = 0; > > srcCtx = >(POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); > if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { >@@ -1232,7 +1250,9 @@ OvsTcpSegmentNBL(PVOID ovsContext, > goto nblcopy_error; > } > >- status = FixSegmentHeader(newNb, segmentSize, seqNumber); >+ status = FixSegmentHeader(newNb, segmentSize, seqNumber, >+ NET_BUFFER_NEXT_NB(newNb) == NULL, >+ packetCounter); > if (status != NDIS_STATUS_SUCCESS) { > goto nblcopy_error; > } >@@ -1241,6 +1261,7 @@ OvsTcpSegmentNBL(PVOID ovsContext, > /* Move on to the next segment */ > size -= segmentSize; > seqNumber += segmentSize; >+ packetCounter++; > } > > status = OvsAllocateNBLContext(context, newNbl); >diff --git a/datapath-windows/ovsext/Checksum.c >b/datapath-windows/ovsext/Checksum.c >index 510a094..5d9b035 100644 >--- a/datapath-windows/ovsext/Checksum.c >+++ b/datapath-windows/ovsext/Checksum.c >@@ -68,34 +68,48 @@ CalculateOnesComplement(UINT8 *start, > { > UINT64 sum = 0, val; > UINT64 *src = (UINT64 *)start; >- union { >- UINT32 val; >- UINT8 b8[4]; >- } tmp; >- > while (totalLength > 7) { > val = *src; >- sum += (val >> 32) + (val & 0xffffffff); >+ sum += val; >+ if (sum < val) sum++; > src++; > totalLength -= 8; > } >+ >+ start = (UINT8 *)src; >+ > if (totalLength > 3) { >- sum += *(UINT32 *)src; >- src = (UINT64 *)((UINT8 *)src + 4); >+ UINT32 val = *(UINT32 *)start; >+ sum += val; >+ if (sum < val) sum++; >+ start += 4; > totalLength -= 4; > } >- start = (UINT8 *)src; >- tmp.val = 0; >- switch (totalLength) { >- case 3: >- tmp.b8[2] = start[2]; >- case 2: >- tmp.b8[1] = start[1]; >- case 1: >- tmp.b8[0] = start[0]; >- sum += tmp.val; >+ >+ if (totalLength > 1) { >+ UINT16 val = *(UINT16 *)start; >+ sum += val; >+ if (sum < val) sum++; >+ start += 2; >+ totalLength -= 2; > } >- sum = (isEvenStart ? sum : swap64(sum)) + initial; >+ >+ if (totalLength > 0) { >+ UINT8 val = *start; >+ sum += val; >+ if (sum < val) sum++; >+ start += 1; >+ totalLength -= 1; >+ } >+ ASSERT(totalLength == 0); >+ >+ if (!isEvenStart) { >+ sum = _byteswap_uint64(sum); >+ } >+ >+ sum += initial; >+ if (sum < initial) sum++; >+ > return sum; > } > >@@ -428,6 +442,7 @@ CalculateChecksumNB(const PNET_BUFFER nb, > ULONG firstMdlLen; > /* Running count of bytes in remainder of the MDLs including >current. */ > ULONG packetLen; >+ BOOLEAN swapEnd = 1 & csumDataLen; > > if ((nb == NULL) || (csumDataLen == 0) > || (offset >= NET_BUFFER_DATA_LENGTH(nb)) >@@ -482,10 +497,8 @@ CalculateChecksumNB(const PNET_BUFFER nb, > while (csumDataLen && (currentMdl != NULL)) { > ASSERT(mdlLen < 65536); > csLen = MIN((UINT16) mdlLen, csumDataLen); >- //XXX Not handling odd bytes yet. >- ASSERT(((csLen & 0x1) == 0) || csumDataLen <= mdlLen); > >- csum = CalculateOnesComplement(src, csLen, csum, TRUE); >+ csum = CalculateOnesComplement(src, csLen, csum, !(1 & >csumDataLen)); > fold64(csum); > > csumDataLen -= csLen; >@@ -504,9 +517,14 @@ CalculateChecksumNB(const PNET_BUFFER nb, > } > } > >+ fold64(csum); > ASSERT(csumDataLen == 0); > ASSERT((csum & ~0xffff) == 0); >- return (UINT16) ~csum; >+ csum = (UINT16)~csum; >+ if (swapEnd) { >+ return _byteswap_ushort((UINT16)csum); >+ } >+ return (UINT16)csum; > } > > /* >diff --git a/datapath-windows/ovsext/Vxlan.c >b/datapath-windows/ovsext/Vxlan.c >index 2364f28..c0611da 100644 >--- a/datapath-windows/ovsext/Vxlan.c >+++ b/datapath-windows/ovsext/Vxlan.c >@@ -152,9 +152,9 @@ OvsCleanupVxlanTunnel(PIRP irp, > > if (vxlanPort->filterID != 0) { > status = OvsTunnelFilterDelete(irp, >- vxlanPort->filterID, >- callback, >- tunnelContext); >+ vxlanPort->filterID, >+ callback, >+ tunnelContext); > } else { > OvsFreeMemoryWithTag(vport->priv, OVS_VXLAN_POOL_TAG); > vport->priv = NULL; >@@ -198,20 +198,24 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, > */ > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > packetLength = NET_BUFFER_DATA_LENGTH(curNb); >+ > if (layers->isTcp) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; > > tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, >- TcpLargeSendNetBufferListInfo); >- OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, >packetLength); >+ >TcpLargeSendNetBufferListInfo); >+ OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, >+ packetLength); > if (tsoInfo.LsoV1Transmit.MSS) { > OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); > *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, >- tsoInfo.LsoV1Transmit.MSS, headRoom); >+ tsoInfo.LsoV1Transmit.MSS, >headRoom); > if (*newNbl == NULL) { > OVS_LOG_ERROR("Unable to segment NBL"); > return NDIS_STATUS_FAILURE; > } >+ /* Clear out LSO flags after this point */ >+ NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) >= 0; > } > } > >@@ -226,6 +230,70 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, > OVS_LOG_ERROR("Unable to copy NBL"); > return NDIS_STATUS_FAILURE; > } >+ /* >+ * To this point we do not have VXLAN offloading. >+ * Apply defined checksums >+ */ >+ curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); >+ curMdl = NET_BUFFER_CURRENT_MDL(curNb); >+ bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, >LowPagePriority); >+ if (!bufferStart) { >+ status = NDIS_STATUS_RESOURCES; >+ goto ret_error; >+ } >+ >+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; >+ csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, >+ >TcpIpChecksumNetBufferListInfo); >+ >+ bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); >+ >+ if (layers->isIPv4) { >+ IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); >+ >+ if (csumInfo.Transmit.IpHeaderChecksum) { >+ ip->check = 0; >+ ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0); >+ } >+ >+ if (layers->isTcp && csumInfo.Transmit.TcpChecksum) { >+ UINT16 csumLength = (UINT16)(packetLength - >layers->l4Offset); >+ TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset); >+ tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, >+ IPPROTO_TCP, csumLength); >+ tcp->check = CalculateChecksumNB(curNb, csumLength, >+ >(UINT32)(layers->l4Offset)); >+ } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) { >+ UINT16 csumLength = (UINT16)(packetLength - >layers->l4Offset); >+ UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); >+ udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, >+ IPPROTO_UDP, csumLength); >+ udp->check = CalculateChecksumNB(curNb, csumLength, >+ >(UINT32)(layers->l4Offset)); >+ } >+ } else if (layers->isIPv6) { >+ IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset); >+ >+ if (layers->isTcp && csumInfo.Transmit.TcpChecksum) { >+ UINT16 csumLength = (UINT16)(packetLength - >layers->l4Offset); >+ TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset); >+ tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr, >+ (UINT32 *) &ip->daddr, >+ IPPROTO_TCP, csumLength); >+ tcp->check = CalculateChecksumNB(curNb, csumLength, >+ >(UINT32)(layers->l4Offset)); >+ } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) { >+ UINT16 csumLength = (UINT16)(packetLength - >layers->l4Offset); >+ UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); >+ udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr, >+ (UINT32 *) &ip->daddr, >+ IPPROTO_UDP, csumLength); >+ udp->check = CalculateChecksumNB(curNb, csumLength, >+ >(UINT32)(layers->l4Offset)); >+ } >+ } >+ /* Clear out TcpIpChecksumNetBufferListInfo flag */ >+ NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = >0; > } > > curNbl = *newNbl; >@@ -257,9 +325,6 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, > sizeof ethHdr->Destination + sizeof >ethHdr->Source); > ethHdr->Type = htons(ETH_TYPE_IPV4); > >- // XXX: question: there are fields in the OvsIPv4TunnelKey for >ttl and such, >- // should we use those values instead? or will they end up being >- // uninitialized; > /* IP header */ > ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); > >@@ -277,6 +342,7 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, > ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); > ipHdr->saddr = fwdInfo->srcIpAddr; > ipHdr->daddr = fwdInfo->dstIpAddr; >+ > ipHdr->check = 0; > ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); > >-- >1.9.5.msysgit.0 >_______________________________________________ >dev mailing list >dev@openvswitch.org >https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma >n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Dc >ruz40PROJ40ROzSpxyQSLw6fcrOWpJgEcEmNR3JEQ&m=u6uOvwTUL-b9_PmCsUgfXowIL9sCWN >b75Q40Mpl9zVg&s=7raD-kdicCNCvimJk61PlLMse60trIk25-oyY3_VVls&e= _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev