hi Sai, Thanks for implementing this. I had a few minor comments. I should be able to ack the v4.
thanks, -- Nithin > On Sep 18, 2015, at 5:20 PM, Sairam Venugopal <[email protected]> wrote: > > Enable support for Checksum offloads in STT if it's enabled in the Windows > VM. Set the Checksum Partial and Checksum Verified flags as mentioned in > the STT Draft - > https://urldefense.proofpoint.com/v2/url?u=https-3A__tools.ietf.org_html_draft-2Ddavie-2Dstt-2D06&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=pNHQcdr7B40b4h6Yb7FIedI1dnBsxdDuTLBYD3JqV80&m=e3YmoGr1-dIvbo58QHFnIo-Q4pNk3YsxJY1iS5I7afk&s=2Bumt-O8JxfVAmm3t96osyL6lGKdN5fJPtj1NTeH6Jk&e= > > > Signed-off-by: Sairam Venugopal <[email protected]> > --- > datapath-windows/ovsext/Stt.c | 164 +++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 156 insertions(+), 8 deletions(-) > > diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c > index b6272c3..1982a93 100644 > --- a/datapath-windows/ovsext/Stt.c > +++ b/datapath-windows/ovsext/Stt.c > @@ -146,10 +146,16 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > POVS_STT_VPORT vportStt; > UINT32 headRoom = OvsGetSttTunHdrSize(); > UINT32 tcpChksumLen; > + PUINT8 bufferStart; > > UNREFERENCED_PARAMETER(layers); > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > + > + /* Verify if inner checksum is verified */ > + BOOLEAN innerChecksumVerified = FALSE; > + BOOLEAN innerPartialChecksum = FALSE; > + > if (layers->isTcp) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; > > @@ -165,6 +171,9 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > vportStt = (POVS_STT_VPORT) GetOvsVportPriv(vport); > ASSERT(vportStt); > > + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, > + TcpIpChecksumNetBufferListInfo); > *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, > FALSE /*copy NblInfo*/); > if (*newNbl == NULL) { > @@ -172,6 +181,30 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > return NDIS_STATUS_FAILURE; > } > > + curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); > + curMdl = NET_BUFFER_CURRENT_MDL(curNb); > + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, > + LowPagePriority); > + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); > + > + if (layers->isIPv4 && csumInfo.Transmit.IpHeaderChecksum) { > + IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); > + ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0); > + } > + if (layers->isTcp) { > + if(!csumInfo.Transmit.TcpChecksum) { > + innerChecksumVerified = TRUE; > + } else { > + innerPartialChecksum = TRUE; > + } > + } else if (layers->isUdp) { > + if(!csumInfo.Transmit.UdpChecksum) { > + innerChecksumVerified = TRUE; > + } else { > + innerPartialChecksum = TRUE; > + } > + } > + > curNbl = *newNbl; > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > /* NB Chain should be split before */ > @@ -243,7 +276,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > outerIpHdr->check = 0; > outerIpHdr->saddr = fwdInfo->srcIpAddr; > outerIpHdr->daddr = tunKey->dst; > - outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, > 0); > > /* L4 header */ > RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr); > @@ -266,6 +298,11 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > > /* XXX need to peek into the inner packet, hard code for now */ > sttHdr->flags = STT_PROTO_IPV4; > + if (innerChecksumVerified)jjj { > + sttHdr->flags |= STT_CSUM_VERIFIED; > + } else if (innerPartialChecksum) { > + sttHdr->flags |= STT_CSUM_PARTIAL; > + } > sttHdr->l4Offset = 0; > > sttHdr->reserved = 0; > @@ -276,13 +313,13 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > /* Zero out stt padding */ > *(uint16 *)(sttHdr + 1) = 0; > > - /* Calculate software tcp checksum */ > - outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen, > - sizeof(EthHdr) + sizeof(IPHdr)); > - if (outerTcpHdr->check == 0) { > - status = NDIS_STATUS_FAILURE; > - goto ret_error; > - } > + /* Offload IP and TCP checksum */ > + csumInfo.Value = 0; > + csumInfo.Transmit.IpHeaderChecksum = 1; > + csumInfo.Transmit.TcpChecksum = 1; > + csumInfo.Transmit.IsIPv4 = 1; > + csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr + sizeof > *outerIpHdr; > + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = > csumInfo.Value; > > return STATUS_SUCCESS; > > @@ -293,6 +330,48 @@ ret_error: > } > > /* > + > *---------------------------------------------------------------------------- > + * OvsCalculateTCPChecksum > + * Calculate TCP checksum > + > *---------------------------------------------------------------------------- > + */ > +static __inline NDIS_STATUS > +OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb) > +{ > + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, > TcpIpChecksumNetBufferListInfo); > + UINT16 checkSum; > + > + /* Check if TCP Checksum has been calculated by NIC */ > + if (csumInfo.Receive.TcpChecksumSucceeded) { > + return NDIS_STATUS_SUCCESS; > + } > + > + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), > + NULL, 1, 0); > + > + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { > + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); > + UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4; > + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); minor: it might be better to use: (PCHAR) ip + (ip->ihl * 4) > + checkSum = tcp->check; > + > + tcp->check = 0; > + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, > + IPPROTO_TCP, (UINT16)l4Payload); > + tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload), > + sizeof(EthHdr) + ip->ihl * 4); > + if (checkSum != tcp->check) { > + return NDIS_STATUS_INVALID_PACKET; > + } > + } I know we don’t support IPv6 right now. I looked up the RFC and it says that STT is supported over IPV6 as well. An ASSERT might not be a bad idea or just dropping the packet with a log that IPv6 is not supported yet. > + > + csumInfo.Receive.TcpChecksumSucceeded = 1; > + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = > csumInfo.Value; > + return NDIS_STATUS_SUCCESS; > +} > + > +/* > * -------------------------------------------------------------------------- > * OvsDecapStt -- > * Decapsulates an STT packet. > @@ -311,6 +390,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > SttHdr *sttHdr; > char *sttBuf[STT_HDR_LEN]; > UINT32 advanceCnt, hdrLen; > + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); > @@ -321,6 +401,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > return NDIS_STATUS_INVALID_LENGTH; > } > > + /* Verify outer TCP Checksum */ > + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, > TcpIpChecksumNetBufferListInfo); > + > + /* Check if NIC has indicated TCP checksum failure */ > + if (csumInfo.Receive.TcpChecksumFailed) { > + return NDIS_STATUS_INVALID_PACKET; > + } > + > + /* Calculate the TCP Checksum */ > + status = OvsCalculateTCPChecksum(curNbl, curNb); > + if (status != NDIS_STATUS_SUCCESS) { > + return NDIS_STATUS_INVALID_PACKET; > + } > + > /* Skip Eth header */ > hdrLen = sizeof(EthHdr); > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); > @@ -353,6 +447,60 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > hdrLen = STT_HDR_LEN; > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); > advanceCnt += hdrLen; > + > + /* Verify checksum for inner packet if it's required */ > + if (!(sttHdr->flags & STT_CSUM_VERIFIED)) { > + BOOLEAN innerChecksumPartial = sttHdr->flags & STT_CSUM_PARTIAL; > + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), > + NULL, 1, 0); > + > + /* XXX Figure out a way to offload checksum receives */ > + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { > + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); > + UINT16 l4Payload = (UINT16)ntohs(ip->tot_len) - ip->ihl * 4; > + UINT32 offset = sizeof(EthHdr) + ip->ihl * 4; > + > + if (ip->protocol == IPPROTO_TCP) { > + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); minor: You can use ‘offset’ to get to the TCP header. > + if (!innerChecksumPartial){ > + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, > + IPPROTO_TCP, > + (UINT16)l4Payload); minor: indentation of arg #3, #4 should be inline with arg #1. > + } > + tcp->check = CalculateChecksumNB(curNb, l4Payload, offset); > + } else if (ip->protocol == IPPROTO_UDP) { > + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); > + if (!innerChecksumPartial){ > + udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, > + IPPROTO_UDP, l4Payload); > + } > + udp->check = CalculateChecksumNB(curNb, l4Payload, offset); > + } > + } > + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) { minor: Can be an else-if > + IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth); > + UINT32 offset = (UINT32)(sizeof *eth + sizeof *ip); > + UINT16 totalLength = (UINT16)ntohs(ip->payload_len); > + if (ip->nexthdr == IPPROTO_TCP) { > + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); > + if (!innerChecksumPartial){ > + tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, > + (UINT32 *)&ip->daddr, > + IPPROTO_TCP, > totalLength); > + } > + tcp->check = CalculateChecksumNB(curNb, totalLength, offset); > + } > + else if (ip->nexthdr == IPPROTO_UDP) { > + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); > + if (!innerChecksumPartial) { > + udp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, > + (UINT32 *)&ip->daddr, > + IPPROTO_UDP, > totalLength); > + } > + udp->check = CalculateChecksumNB(curNb, totalLength, offset); > + } > + } > + } Since you are calculating the checksums, it is safe to either reset the csumInfo value for the inner packet or setup the receive flags appropriately. > > *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE, > 0, FALSE /*copy NBL info*/); _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
