Ok, this makes sense now. Ive updated and tested the patch:
From 93bc4c59b4eb814ed4bc3da3b7459498863add1e Mon Sep 17 00:00:00 2001 From: John Hurley <john.hur...@netronome.com> Date: Thu, 5 Jan 2017 17:08:37 +0000 Subject: [PATCH 1/1] ensure correct checksum when a packet is sent to NAT --- datapath/conntrack.c | 47 +++++++++++++++++------------------------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/datapath/conntrack.c b/datapath/conntrack.c index d942884..3faa209 100644 --- a/datapath/conntrack.c +++ b/datapath/conntrack.c @@ -314,6 +314,11 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) u8 nexthdr; int err; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) + bool dst_set = false; + struct rtable rt = { .rt_flags = 0 }; +#endif + ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) return NF_ACCEPT; @@ -352,43 +357,25 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 proto) #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) /* Linux 4.5 and older depend on skb_dst being set when recalculating * checksums after NAT helper has mangled TCP or UDP packet payload. - * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP - * has no checksum. - * - * The dependency is not triggered when the main NAT code updates - * checksums after translating the IP header (address, port), so this - * fix only needs to be executed on packets that are both being NATted - * and that have a helper assigned. + * skb_dst is cast to a rtable struct and the flags examined. + * Forcing these flags to have RTCF_LOCAL not set ensures checksum mod + * is carried out in the same way as kernel versions > 4.5 */ - if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) { - u8 ipproto = (proto == NFPROTO_IPV4) - ? ip_hdr(skb)->protocol : nexthdr; - u16 offset = 0; - - switch (ipproto) { - case IPPROTO_TCP: - offset = offsetof(struct tcphdr, check); - break; - case IPPROTO_UDP: - /* Skip if no csum. */ - if (udp_hdr(skb)->check) - offset = offsetof(struct udphdr, check); - break; - } - if (offset) { - if (unlikely(!pskb_may_pull(skb, protoff + offset + 2))) - return NF_DROP; - - skb->csum_start = skb_headroom(skb) + protoff; - skb->csum_offset = offset; - skb->ip_summed = CHECKSUM_PARTIAL; - } + if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL + && !skb_rtable(skb)) { + dst_set = true; + skb_dst_set(skb, (struct dst_entry *)&rt); } #endif err = helper->help(skb, protoff, ct, ctinfo); if (err != NF_ACCEPT) return err; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) + if (dst_set) + skb_dst_set(skb, NULL); +#endif + /* Adjust seqs after helper. This is needed due to some helpers (e.g., * FTP with NAT) adusting the TCP payload size when mangling IP * addresses and/or port numbers in the text-based control connection. -- 2.7.4 On Wed, Jan 4, 2017 at 9:33 PM, Jarno Rajahalme <ja...@ovn.org> wrote: > > On Jan 4, 2017, at 9:50 AM, John Hurley <john.hur...@netronome.com> wrote: > > From 64ce83672aab5634990426e0a51af16d882ac2f9 Mon Sep 17 00:00:00 2001 > From: John Hurley <john.hur...@netronome.com> > Date: Wed, 4 Jan 2017 16:52:43 +0000 > Subject: [PATCH] ensure correct checksum when a packet is sent to NAT > helper > in kernel < 4.6 > > --- > datapath/conntrack.c | 45 ++++++++++++++++----------------------------- > 1 file changed, 16 insertions(+), 29 deletions(-) > > diff --git a/datapath/conntrack.c b/datapath/conntrack.c > index d942884..fa3b0b5 100644 > --- a/datapath/conntrack.c > +++ b/datapath/conntrack.c > @@ -314,6 +314,10 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 > proto) > u8 nexthdr; > int err; > > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) > + struct rtable rt; > +#endif > + > > > This should be properly initialized (e.g., " = { .rt_flags = 0 };”). With > this RFCF_LOCAL will be unset (see below) and everything else will also be > initialized to 0. > > ct = nf_ct_get(skb, &ctinfo); > if (!ct || ctinfo == IP_CT_RELATED_REPLY) > return NF_ACCEPT; > @@ -352,43 +356,26 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 > proto) > #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) > /* Linux 4.5 and older depend on skb_dst being set when recalculating > * checksums after NAT helper has mangled TCP or UDP packet payload. > - * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP > - * has no checksum. > - * > - * The dependency is not triggered when the main NAT code updates > - * checksums after translating the IP header (address, port), so this > - * fix only needs to be executed on packets that are both being NATted > - * and that have a helper assigned. > + * skb_dst is cast to a rtable struct and the flags examined. > + * Forcing these flags to have RTCF_LOCAL set allows checksum mod > + * to be carried out in the same way as kernel versions > 4.5 > > > As noted in the previous email, the reverse is the case. When RTCF_LOCAL > is not set, then the checksum update code will behave the same as in kernel > versions > 4.5. > > */ > if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) { > > > Maybe it would be a bit more robust to set the dst only if not already > set, i.e., add “!skb_rtable(skb)” to the condition above? > > - u8 ipproto = (proto == NFPROTO_IPV4) > - ? ip_hdr(skb)->protocol : nexthdr; > - u16 offset = 0; > - > - switch (ipproto) { > - case IPPROTO_TCP: > - offset = offsetof(struct tcphdr, check); > - break; > - case IPPROTO_UDP: > - /* Skip if no csum. */ > - if (udp_hdr(skb)->check) > - offset = offsetof(struct udphdr, check); > - break; > - } > - if (offset) { > - if (unlikely(!pskb_may_pull(skb, protoff + offset + 2))) > - return NF_DROP; > - > - skb->csum_start = skb_headroom(skb) + protoff; > - skb->csum_offset = offset; > - skb->ip_summed = CHECKSUM_PARTIAL; > - } > + rt.rt_flags = RTCF_LOCAL; > > > Given the initialization above this line is not needed any more. > > + skb_dst_set(skb, (struct dst_entry *)&rt); > } > #endif > err = helper->help(skb, protoff, ct, ctinfo); > if (err != NF_ACCEPT) > return err; > > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) > + if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) { > > > The checksum code may change skb->ip_summed value, so we cannot use this > check here any more. Maybe set a boolean (e.g., "dst_set = true;”) if the > skb_dst_set call was done above and use that as the check to undo it here? > > + /* reset dst of skb to NULL */ > + skb_dst_set(skb, NULL); > + } > +#endif > + > /* Adjust seqs after helper. This is needed due to some helpers (e.g., > * FTP with NAT) adusting the TCP payload size when mangling IP > * addresses and/or port numbers in the text-based control connection. > -- > 2.7.4 > > > On Wed, Jan 4, 2017 at 5:50 PM, John Hurley <john.hur...@netronome.com> > wrote: > >> >> >> On Wed, Jan 4, 2017 at 12:53 AM, Jarno Rajahalme <ja...@ovn.org> wrote: >> >>> >>> > On Dec 28, 2016, at 3:05 PM, John Hurley <john.hur...@netronome.com> >>> wrote: >>> > >>> > sorry, updated patch…. >>> >>> This patch is still whitespace damaged. Maybe use git format-patch and >>> git send-email to send it? >>> >>> > >>> > -------------------------------- >>> > >>> > Fix for a bug when sending a NATed packet to helper function in kernels >>> > <4.6. >>> > >>> > Setting CHECKSUM_PARTIAL flag can lead to L4 checksum corruption. >>> > >>> > Corruption can occur in datapath.c/queue_userspace_packet(). >>> > >>> >>> You mean this code: >>> >>> /* Complete checksum if needed */ >>> if (skb->ip_summed == CHECKSUM_PARTIAL && >>> (err = skb_checksum_help(skb))) >>> goto out; >>> >>> >> >> >> >> Yes, my finding was that if a packet was sent to conntrack NAT and an FTP >> helper, but had only IP addresses/ports translated (no payload changes), >> that the TCP checksum would be correct prior to the call of >> skb_checksum_help and corrupt after. >> >> If the packet payload was mangled then the checksum was incorrect both >> before and after this call. >> >> >> >> >>> Would you care detailing why the corruption happens? Does it always >>> happen if ip_summed is CHECKSUM_PARTIAL? >>> >>> >> >> If ip_summed was not set to CHECKSUM_PARTIAL then the non payload mangled >> FTP packets on the tests I was running would have the correct checksum but >> mangled payload packets would not. >> >> My take on the CHECKSUM_PARTIAL flag from the documentation is that it >> signals that the checksum had been partially complete and that the >> remaining (payload section) section needs to be calculated and combined to >> the existing field. However, the NAT kernel modules seem to handle this >> update so we end up doing the 'remainder' of the calculation without >> needing to. It may also expect this to be corrected in the network card >> but the cards I was testing on were not running checksum offloading. >> >> >> >> >>> > Giving the packet an skb_dst allows the kernel to correct the checksum. >>> > >>> > Verified with packets mangled by Conntrack/NAT helpers. >>> > >>> >>> It would also be helpful to add the reference to the patch that this >>> fixes (“Fixes:”) >>> >>> > Signed-off-by: John Hurley <john.hur...@netronome.com> >>> > --- >>> > >>> > diff --git a/datapath/conntrack.c b/datapath/conntrack.c >>> > index d942884..fef67ba 100644 >>> > --- a/datapath/conntrack.c >>> > +++ b/datapath/conntrack.c >>> > @@ -314,6 +314,10 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 >>> > proto) >>> > u8 nexthdr; >>> > int err; >>> > >>> > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) >>> > + struct rtable *rt = NULL; >>> > +#endif >>> > + >>> > ct = nf_ct_get(skb, &ctinfo); >>> > if (!ct || ctinfo == IP_CT_RELATED_REPLY) >>> > return NF_ACCEPT; >>> > @@ -352,43 +356,28 @@ static int ovs_ct_helper(struct sk_buff *skb, u16 >>> > proto) >>> > #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) >>> > /* Linux 4.5 and older depend on skb_dst being set when recalculating >>> > * checksums after NAT helper has mangled TCP or UDP packet payload. >>> > - * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP >>> > - * has no checksum. >>> > * >>> > - * The dependency is not triggered when the main NAT code updates >>> > - * checksums after translating the IP header (address, port), so this >>> > - * fix only needs to be executed on packets that are both being NATted >>> > - * and that have a helper assigned. >>> > + * skb_dst is cast to a rtable struct and the flags examined. >>> > + * Forcing these flags to have RTCF_LOCAL set allows checksum mod >>> > + * to be carried out in the same way as kernel versions > 4.5 >>> >>> Are kernels > 4.5 treating all these packets as RTCF_LOCAL, or are some >>> of them possibly CHECKSUM_PARTIAL? >>> >>> >> >> >> yes, it appears so - assuming CHECKSUM_PARTIAL is not set already. >> >> The following are the nat checksum calculations from kernel 4.5: >> >> 126 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L126> >> static void nf_nat_ipv4_csum_recalc >> <http://lxr.free-electrons.com/ident?v=4.5;i=nf_nat_ipv4_csum_recalc>(struct >> sk_buff <http://lxr.free-electrons.com/ident?v=4.5;i=sk_buff> *skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>,127 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L127> >> u8 >> <http://lxr.free-electrons.com/ident?v=4.5;i=u8> proto >> <http://lxr.free-electrons.com/ident?v=4.5;i=proto>, void *data >> <http://lxr.free-electrons.com/ident?v=4.5;i=data>, __sum16 >> <http://lxr.free-electrons.com/ident?v=4.5;i=__sum16> *check >> <http://lxr.free-electrons.com/ident?v=4.5;i=check>,128 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L128> >> int datalen, int oldlen)129 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L129> >> {130 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L130> >> const struct iphdr >> <http://lxr.free-electrons.com/ident?v=4.5;i=iphdr> *iph = ip_hdr >> <http://lxr.free-electrons.com/ident?v=4.5;i=ip_hdr>(skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);131 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L131> >> struct rtable <http://lxr.free-electrons.com/ident?v=4.5;i=rtable> >> *rt <http://lxr.free-electrons.com/ident?v=4.5;i=rt> = skb_rtable >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb_rtable>(skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);132 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L132> >> 133 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L133> >> if (skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->ip_summed != >> CHECKSUM_PARTIAL >> <http://lxr.free-electrons.com/ident?v=4.5;i=CHECKSUM_PARTIAL>) {134 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L134> >> if (!(rt >> <http://lxr.free-electrons.com/ident?v=4.5;i=rt>->rt_flags & RTCF_LOCAL >> <http://lxr.free-electrons.com/ident?v=4.5;i=RTCF_LOCAL>) &&135 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L135> >> (!skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->dev >> <http://lxr.free-electrons.com/ident?v=4.5;i=dev> || skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->dev >> <http://lxr.free-electrons.com/ident?v=4.5;i=dev>->features >> <http://lxr.free-electrons.com/ident?v=4.5;i=features> &136 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L136> >> (NETIF_F_IP_CSUM >> <http://lxr.free-electrons.com/ident?v=4.5;i=NETIF_F_IP_CSUM> | >> NETIF_F_HW_CSUM >> <http://lxr.free-electrons.com/ident?v=4.5;i=NETIF_F_HW_CSUM>))) {137 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L137> >> skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->ip_summed = >> CHECKSUM_PARTIAL >> <http://lxr.free-electrons.com/ident?v=4.5;i=CHECKSUM_PARTIAL>;138 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L138> >> skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->csum_start = skb_headroom >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb_headroom>(skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>) +139 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L139> >> skb_network_offset >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb_network_offset>(skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>) +140 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L140> >> ip_hdrlen >> <http://lxr.free-electrons.com/ident?v=4.5;i=ip_hdrlen>(skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);141 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L141> >> skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->csum_offset = (void >> *)check <http://lxr.free-electrons.com/ident?v=4.5;i=check> - data >> <http://lxr.free-electrons.com/ident?v=4.5;i=data>;142 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L142> >> *check >> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = >> ~csum_tcpudp_magic(iph->saddr >> <http://lxr.free-electrons.com/ident?v=4.5;i=saddr>, iph->daddr >> <http://lxr.free-electrons.com/ident?v=4.5;i=daddr>,143 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L143> >> datalen, proto >> <http://lxr.free-electrons.com/ident?v=4.5;i=proto>, 0);144 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L144> >> } else {145 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L145> >> *check >> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = 0;146 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L146> >> *check >> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = csum_tcpudp_magic >> <http://lxr.free-electrons.com/ident?v=4.5;i=csum_tcpudp_magic>(iph->saddr >> <http://lxr.free-electrons.com/ident?v=4.5;i=saddr>, iph->daddr >> <http://lxr.free-electrons.com/ident?v=4.5;i=daddr>,147 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L147> >> datalen, proto >> <http://lxr.free-electrons.com/ident?v=4.5;i=proto>,148 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L148> >> csum_partial >> <http://lxr.free-electrons.com/ident?v=4.5;i=csum_partial>(data >> <http://lxr.free-electrons.com/ident?v=4.5;i=data>, datalen,149 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L149> >> 0));150 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L150> >> if (proto >> <http://lxr.free-electrons.com/ident?v=4.5;i=proto> == IPPROTO_UDP >> <http://lxr.free-electrons.com/ident?v=4.5;i=IPPROTO_UDP> && !*check >> <http://lxr.free-electrons.com/ident?v=4.5;i=check>)151 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L151> >> *check >> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = CSUM_MANGLED_0 >> <http://lxr.free-electrons.com/ident?v=4.5;i=CSUM_MANGLED_0>;152 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L152> >> }153 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L153> >> } else154 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L154> >> inet_proto_csum_replace2 >> <http://lxr.free-electrons.com/ident?v=4.5;i=inet_proto_csum_replace2>(check >> <http://lxr.free-electrons.com/ident?v=4.5;i=check>, skb >> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>,155 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L155> >> htons >> <http://lxr.free-electrons.com/ident?v=4.5;i=htons>(oldlen), htons >> <http://lxr.free-electrons.com/ident?v=4.5;i=htons>(datalen), true >> <http://lxr.free-electrons.com/ident?v=4.5;i=true>);156 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L156> >> } >> >> and version 4.6: >> >> 126 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L126> >> static void nf_nat_ipv4_csum_recalc >> <http://lxr.free-electrons.com/ident?v=4.6;i=nf_nat_ipv4_csum_recalc>(struct >> sk_buff <http://lxr.free-electrons.com/ident?v=4.6;i=sk_buff> *skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>,127 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L127> >> u8 >> <http://lxr.free-electrons.com/ident?v=4.6;i=u8> proto >> <http://lxr.free-electrons.com/ident?v=4.6;i=proto>, void *data >> <http://lxr.free-electrons.com/ident?v=4.6;i=data>, __sum16 >> <http://lxr.free-electrons.com/ident?v=4.6;i=__sum16> *check >> <http://lxr.free-electrons.com/ident?v=4.6;i=check>,128 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L128> >> int datalen, int oldlen)129 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L129> >> {130 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L130> >> if (skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->ip_summed != >> CHECKSUM_PARTIAL >> <http://lxr.free-electrons.com/ident?v=4.6;i=CHECKSUM_PARTIAL>) {131 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L131> >> const struct iphdr >> <http://lxr.free-electrons.com/ident?v=4.6;i=iphdr> *iph = ip_hdr >> <http://lxr.free-electrons.com/ident?v=4.6;i=ip_hdr>(skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>);132 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L132> >> 133 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L133> >> skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->ip_summed = >> CHECKSUM_PARTIAL >> <http://lxr.free-electrons.com/ident?v=4.6;i=CHECKSUM_PARTIAL>;134 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L134> >> skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->csum_start = skb_headroom >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb_headroom>(skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>) + skb_network_offset >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb_network_offset>(skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>) +135 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L135> >> ip_hdrlen >> <http://lxr.free-electrons.com/ident?v=4.6;i=ip_hdrlen>(skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>);136 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L136> >> skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->csum_offset = (void >> *)check <http://lxr.free-electrons.com/ident?v=4.6;i=check> - data >> <http://lxr.free-electrons.com/ident?v=4.6;i=data>;137 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L137> >> *check <http://lxr.free-electrons.com/ident?v=4.6;i=check> >> = ~csum_tcpudp_magic(iph->saddr >> <http://lxr.free-electrons.com/ident?v=4.6;i=saddr>, iph->daddr >> <http://lxr.free-electrons.com/ident?v=4.6;i=daddr>, datalen,138 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L138> >> proto >> <http://lxr.free-electrons.com/ident?v=4.6;i=proto>, 0);139 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L139> >> } else140 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L140> >> inet_proto_csum_replace2 >> <http://lxr.free-electrons.com/ident?v=4.6;i=inet_proto_csum_replace2>(check >> <http://lxr.free-electrons.com/ident?v=4.6;i=check>, skb >> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>,141 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L141> >> htons >> <http://lxr.free-electrons.com/ident?v=4.6;i=htons>(oldlen), htons >> <http://lxr.free-electrons.com/ident?v=4.6;i=htons>(datalen), true >> <http://lxr.free-electrons.com/ident?v=4.6;i=true>);142 >> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L142> >> } >> >> >> > */ >>> > if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) { >>> > - u8 ipproto = (proto == NFPROTO_IPV4) >>> > - ? ip_hdr(skb)->protocol : nexthdr; >>> > - u16 offset = 0; >>> > - >>> > - switch (ipproto) { >>> > - case IPPROTO_TCP: >>> > - offset = offsetof(struct tcphdr, check); >>> > - break; >>> > - case IPPROTO_UDP: >>> > - /* Skip if no csum. */ >>> > - if (udp_hdr(skb)->check) >>> > - offset = offsetof(struct udphdr, check); >>> > - break; >>> > - } >>> > - if (offset) { >>> > - if (unlikely(!pskb_may_pull(skb, protoff + offset + 2))) >>> > - return NF_DROP; >>> > - >>> > - skb->csum_start = skb_headroom(skb) + protoff; >>> > - skb->csum_offset = offset; >>> > - skb->ip_summed = CHECKSUM_PARTIAL; >>> > - } >>> > + rt = kmalloc(sizeof(struct rtable), GFP_KERNEL); >>> >>> Could the struct rtable be allocated from stack instead? Or could it be >>> a static variable in the file scope? In either case we would avoid dynamic >>> memory allocation/free. I recall setting ip_summed to CHECKSUM_PARTIAL was >>> deemed a simpler way to backport as it (also) avoids the dynamic memory >>> allocations. >>> >>> >> >> yes, it could be added as a stack variable. As it is only added to the >> skb temporarily to force the checksum change then this would make more >> sense! >> I'll forward on a further email after this with a patch using a stack >> based variable - I tested this patch using an FTP session across OVS. >> >> >> >> >>> > + rt->rt_flags = RTCF_LOCAL; >>> > + skb_dst_set(skb, (struct dst_entry *)rt); >>> > } >>> > #endif >>> > err = helper->help(skb, protoff, ct, ctinfo); >>> > if (err != NF_ACCEPT) >>> > return err; >>> > >>> > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0) >>> > + if (rt) { >>> > + skb_dst_set(skb, NULL); >>> > + kfree(rt); >>> > + } >>> > +#endif >>> > + >>> > /* Adjust seqs after helper. This is needed due to some helpers >>> (e.g., >>> > * FTP with NAT) adusting the TCP payload size when mangling IP >>> > * addresses and/or port numbers in the text-based control connection. >>> > >>> > -- >>> > >>> > On Wed, Dec 28, 2016 at 10:08 PM, Ben Pfaff <b...@ovn.org> wrote: >>> > >>> >> On Wed, Dec 28, 2016 at 09:37:30PM +0000, John Hurley wrote: >>> >>> Fix for a bug when sending a NATed packet to helper function in >>> kernels >>> >>> <4.6. >>> >>> >>> >>> Setting CHECKSUM_PARTIAL flag means packets could have L4 checksum >>> >>> corrupted in >>> >>> >>> >>> datapath.c/queue_userspace_packet(). >>> >>> >>> >>> Giving the packet an skb_dst allows the kernel to correct the >>> checksum if >>> >>> packet >>> >>> mangling happens in Conntrack/NAT helpers. >>> >>> >>> >>> Signed-off-by: John Hurley <john.hur...@netronome.com> >>> >> >>> >> I'm not the right person to review this but I did notice that the >>> patch >>> >> is wordwrapped and otherwise space-damaged. >>> >> >>> > _______________________________________________ >>> > dev mailing list >>> > d...@openvswitch.org >>> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev >>> >>> >> > > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev