Ok, this makes sense now. Ive updated and tested the patch:

From 93bc4c59b4eb814ed4bc3da3b7459498863add1e Mon Sep 17 00:00:00 2001
From: John Hurley <john.hur...@netronome.com>
Date: Thu, 5 Jan 2017 17:08:37 +0000
Subject: [PATCH 1/1] ensure correct checksum when a packet is sent to NAT

---
 datapath/conntrack.c | 47 +++++++++++++++++------------------------------
 1 file changed, 17 insertions(+), 30 deletions(-)

diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index d942884..3faa209 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -314,6 +314,11 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
proto)
  u8 nexthdr;
  int err;

+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+ bool dst_set = false;
+ struct rtable rt = { .rt_flags = 0 };
+#endif
+
  ct = nf_ct_get(skb, &ctinfo);
  if (!ct || ctinfo == IP_CT_RELATED_REPLY)
  return NF_ACCEPT;
@@ -352,43 +357,25 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
proto)
 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
  /* Linux 4.5 and older depend on skb_dst being set when recalculating
  * checksums after NAT helper has mangled TCP or UDP packet payload.
- * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
- * has no checksum.
- *
- * The dependency is not triggered when the main NAT code updates
- * checksums after translating the IP header (address, port), so this
- * fix only needs to be executed on packets that are both being NATted
- * and that have a helper assigned.
+ * skb_dst is cast to a rtable struct and the flags examined.
+ * Forcing these flags to have RTCF_LOCAL not set ensures checksum mod
+ * is carried out in the same way as kernel versions > 4.5
  */
- if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
- u8 ipproto = (proto == NFPROTO_IPV4)
- ? ip_hdr(skb)->protocol : nexthdr;
- u16 offset = 0;
-
- switch (ipproto) {
- case IPPROTO_TCP:
- offset = offsetof(struct tcphdr, check);
- break;
- case IPPROTO_UDP:
- /* Skip if no csum. */
- if (udp_hdr(skb)->check)
- offset = offsetof(struct udphdr, check);
- break;
- }
- if (offset) {
- if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
- return NF_DROP;
-
- skb->csum_start = skb_headroom(skb) + protoff;
- skb->csum_offset = offset;
- skb->ip_summed = CHECKSUM_PARTIAL;
- }
+ if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL
+ && !skb_rtable(skb)) {
+ dst_set = true;
+ skb_dst_set(skb, (struct dst_entry *)&rt);
  }
 #endif
  err = helper->help(skb, protoff, ct, ctinfo);
  if (err != NF_ACCEPT)
  return err;

+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
+ if (dst_set)
+ skb_dst_set(skb, NULL);
+#endif
+
  /* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
  * FTP with NAT) adusting the TCP payload size when mangling IP
  * addresses and/or port numbers in the text-based control connection.
-- 
2.7.4


On Wed, Jan 4, 2017 at 9:33 PM, Jarno Rajahalme <ja...@ovn.org> wrote:

>
> On Jan 4, 2017, at 9:50 AM, John Hurley <john.hur...@netronome.com> wrote:
>
> From 64ce83672aab5634990426e0a51af16d882ac2f9 Mon Sep 17 00:00:00 2001
> From: John Hurley <john.hur...@netronome.com>
> Date: Wed, 4 Jan 2017 16:52:43 +0000
> Subject: [PATCH] ensure correct checksum when a packet is sent to NAT
> helper
>  in kernel < 4.6
>
> ---
>  datapath/conntrack.c | 45 ++++++++++++++++-----------------------------
>  1 file changed, 16 insertions(+), 29 deletions(-)
>
> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> index d942884..fa3b0b5 100644
> --- a/datapath/conntrack.c
> +++ b/datapath/conntrack.c
> @@ -314,6 +314,10 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
> proto)
>   u8 nexthdr;
>   int err;
>
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> + struct rtable rt;
> +#endif
> +
>
>
> This should be properly initialized (e.g., " = { .rt_flags = 0 };”). With
> this RFCF_LOCAL will be unset (see below) and everything else will also be
> initialized to 0.
>
>   ct = nf_ct_get(skb, &ctinfo);
>   if (!ct || ctinfo == IP_CT_RELATED_REPLY)
>   return NF_ACCEPT;
> @@ -352,43 +356,26 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
> proto)
>  #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
>   /* Linux 4.5 and older depend on skb_dst being set when recalculating
>   * checksums after NAT helper has mangled TCP or UDP packet payload.
> - * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
> - * has no checksum.
> - *
> - * The dependency is not triggered when the main NAT code updates
> - * checksums after translating the IP header (address, port), so this
> - * fix only needs to be executed on packets that are both being NATted
> - * and that have a helper assigned.
> + * skb_dst is cast to a rtable struct and the flags examined.
> + * Forcing these flags to have RTCF_LOCAL set allows checksum mod
> + * to be carried out in the same way as kernel versions > 4.5
>
>
> As noted in the previous email, the reverse is the case. When RTCF_LOCAL
> is not set, then the checksum update code will behave the same as in kernel
> versions > 4.5.
>
>   */
>   if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
>
>
> Maybe it would be a bit more robust to set the dst only if not already
> set, i.e., add “!skb_rtable(skb)” to the condition above?
>
> - u8 ipproto = (proto == NFPROTO_IPV4)
> - ? ip_hdr(skb)->protocol : nexthdr;
> - u16 offset = 0;
> -
> - switch (ipproto) {
> - case IPPROTO_TCP:
> - offset = offsetof(struct tcphdr, check);
> - break;
> - case IPPROTO_UDP:
> - /* Skip if no csum. */
> - if (udp_hdr(skb)->check)
> - offset = offsetof(struct udphdr, check);
> - break;
> - }
> - if (offset) {
> - if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
> - return NF_DROP;
> -
> - skb->csum_start = skb_headroom(skb) + protoff;
> - skb->csum_offset = offset;
> - skb->ip_summed = CHECKSUM_PARTIAL;
> - }
> + rt.rt_flags = RTCF_LOCAL;
>
>
> Given the initialization above this line is not needed any more.
>
> + skb_dst_set(skb, (struct dst_entry *)&rt);
>   }
>  #endif
>   err = helper->help(skb, protoff, ct, ctinfo);
>   if (err != NF_ACCEPT)
>   return err;
>
> +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
> + if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
>
>
> The checksum code may change skb->ip_summed value, so we cannot use this
> check here any more. Maybe set a boolean (e.g., "dst_set = true;”) if the
> skb_dst_set call was done above and use that as the check to undo it here?
>
> + /* reset dst of skb to NULL */
> + skb_dst_set(skb, NULL);
> + }
> +#endif
> +
>   /* Adjust seqs after helper.  This is needed due to some helpers (e.g.,
>   * FTP with NAT) adusting the TCP payload size when mangling IP
>   * addresses and/or port numbers in the text-based control connection.
> --
> 2.7.4
>
>
> On Wed, Jan 4, 2017 at 5:50 PM, John Hurley <john.hur...@netronome.com>
> wrote:
>
>>
>>
>> On Wed, Jan 4, 2017 at 12:53 AM, Jarno Rajahalme <ja...@ovn.org> wrote:
>>
>>>
>>> > On Dec 28, 2016, at 3:05 PM, John Hurley <john.hur...@netronome.com>
>>> wrote:
>>> >
>>> > sorry, updated patch….
>>>
>>> This patch is still whitespace damaged. Maybe use git format-patch and
>>> git send-email to send it?
>>>
>>> >
>>> > --------------------------------
>>> >
>>> > Fix for a bug when sending a NATed packet to helper function in kernels
>>> > <4.6.
>>> >
>>> > Setting CHECKSUM_PARTIAL flag can lead to L4 checksum corruption.
>>> >
>>> > Corruption can occur in datapath.c/queue_userspace_packet().
>>> >
>>>
>>> You mean this code:
>>>
>>>         /* Complete checksum if needed */
>>>         if (skb->ip_summed == CHECKSUM_PARTIAL &&
>>>             (err = skb_checksum_help(skb)))
>>>                 goto out;
>>>
>>>
>>
>>
>>
>> Yes, my finding was that if a packet was sent to conntrack NAT and an FTP
>> helper, but had only IP addresses/ports translated (no payload changes),
>> that the TCP checksum would be correct prior to the call of
>> skb_checksum_help and corrupt after.
>>
>> If the packet payload was mangled then the checksum was incorrect both
>> before and after this call.
>>
>>
>>
>>
>>> Would you care detailing why the corruption happens? Does it always
>>> happen if ip_summed is CHECKSUM_PARTIAL?
>>>
>>>
>>
>> If ip_summed was not set to CHECKSUM_PARTIAL then the non payload mangled
>> FTP packets on the tests I was running would have the correct checksum but
>> mangled payload packets would not.
>>
>> My take on the CHECKSUM_PARTIAL flag from the documentation is that it
>> signals that the checksum had been partially complete and that the
>> remaining (payload section) section needs to be calculated and combined to
>> the existing field. However, the NAT kernel modules seem to handle this
>> update so we end up doing the 'remainder' of the calculation without
>> needing to.  It may also expect this to be corrected in the network card
>> but the cards I was testing on were not running checksum offloading.
>>
>>
>>
>>
>>> > Giving the packet an skb_dst allows the kernel to correct the checksum.
>>> >
>>> > Verified with packets mangled by Conntrack/NAT helpers.
>>> >
>>>
>>> It would also be helpful to add the reference to the patch that this
>>> fixes (“Fixes:”)
>>>
>>> > Signed-off-by: John Hurley <john.hur...@netronome.com>
>>> > ---
>>> >
>>> > diff --git a/datapath/conntrack.c b/datapath/conntrack.c
>>> > index d942884..fef67ba 100644
>>> > --- a/datapath/conntrack.c
>>> > +++ b/datapath/conntrack.c
>>> > @@ -314,6 +314,10 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
>>> > proto)
>>> >  u8 nexthdr;
>>> >  int err;
>>> >
>>> > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
>>> > + struct rtable *rt = NULL;
>>> > +#endif
>>> > +
>>> >  ct = nf_ct_get(skb, &ctinfo);
>>> >  if (!ct || ctinfo == IP_CT_RELATED_REPLY)
>>> >  return NF_ACCEPT;
>>> > @@ -352,43 +356,28 @@ static int ovs_ct_helper(struct sk_buff *skb, u16
>>> > proto)
>>> > #if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
>>> >  /* Linux 4.5 and older depend on skb_dst being set when recalculating
>>> >  * checksums after NAT helper has mangled TCP or UDP packet payload.
>>> > - * This dependency is avoided when skb is CHECKSUM_PARTIAL or when UDP
>>> > - * has no checksum.
>>> >  *
>>> > - * The dependency is not triggered when the main NAT code updates
>>> > - * checksums after translating the IP header (address, port), so this
>>> > - * fix only needs to be executed on packets that are both being NATted
>>> > - * and that have a helper assigned.
>>> > + * skb_dst is cast to a rtable struct and the flags examined.
>>> > + * Forcing these flags to have RTCF_LOCAL set allows checksum mod
>>> > + * to be carried out in the same way as kernel versions > 4.5
>>>
>>> Are kernels > 4.5 treating all these packets as RTCF_LOCAL, or are some
>>> of them possibly CHECKSUM_PARTIAL?
>>>
>>>
>>
>>
>> yes, it appears so - assuming CHECKSUM_PARTIAL is not set already.
>>
>> The following are the nat checksum calculations from kernel 4.5:
>>
>> 126 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L126>
>>  static void nf_nat_ipv4_csum_recalc 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=nf_nat_ipv4_csum_recalc>(struct 
>> sk_buff <http://lxr.free-electrons.com/ident?v=4.5;i=sk_buff> *skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>,127 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L127>
>>                                      u8 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=u8> proto 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=proto>, void *data 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=data>, __sum16 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=__sum16> *check 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=check>,128 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L128>
>>                                      int datalen, int oldlen)129 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L129>
>>  {130 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L130>
>>          const struct iphdr 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=iphdr> *iph = ip_hdr 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=ip_hdr>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);131 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L131>
>>          struct rtable <http://lxr.free-electrons.com/ident?v=4.5;i=rtable> 
>> *rt <http://lxr.free-electrons.com/ident?v=4.5;i=rt> = skb_rtable 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb_rtable>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);132 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L132>
>>  133 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L133>
>>          if (skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->ip_summed != 
>> CHECKSUM_PARTIAL 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=CHECKSUM_PARTIAL>) {134 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L134>
>>                  if (!(rt 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=rt>->rt_flags & RTCF_LOCAL 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=RTCF_LOCAL>) &&135 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L135>
>>                      (!skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->dev 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=dev> || skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->dev 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=dev>->features 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=features> &136 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L136>
>>                       (NETIF_F_IP_CSUM 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=NETIF_F_IP_CSUM> | 
>> NETIF_F_HW_CSUM 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=NETIF_F_HW_CSUM>))) {137 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L137>
>>                          skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->ip_summed = 
>> CHECKSUM_PARTIAL 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=CHECKSUM_PARTIAL>;138 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L138>
>>                          skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->csum_start = skb_headroom 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb_headroom>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>) +139 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L139>
>>                                            skb_network_offset 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb_network_offset>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>) +140 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L140>
>>                                            ip_hdrlen 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=ip_hdrlen>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>);141 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L141>
>>                          skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>->csum_offset = (void 
>> *)check <http://lxr.free-electrons.com/ident?v=4.5;i=check> - data 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=data>;142 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L142>
>>                          *check 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = 
>> ~csum_tcpudp_magic(iph->saddr 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=saddr>, iph->daddr 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=daddr>,143 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L143>
>>                                                      datalen, proto 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=proto>, 0);144 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L144>
>>                  } else {145 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L145>
>>                          *check 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = 0;146 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L146>
>>                          *check 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = csum_tcpudp_magic 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=csum_tcpudp_magic>(iph->saddr 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=saddr>, iph->daddr 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=daddr>,147 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L147>
>>                                                     datalen, proto 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=proto>,148 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L148>
>>                                                     csum_partial 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=csum_partial>(data 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=data>, datalen,149 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L149>
>>                                                                  0));150 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L150>
>>                          if (proto 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=proto> == IPPROTO_UDP 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=IPPROTO_UDP> && !*check 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=check>)151 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L151>
>>                                  *check 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=check> = CSUM_MANGLED_0 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=CSUM_MANGLED_0>;152 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L152>
>>                  }153 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L153>
>>          } else154 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L154>
>>                  inet_proto_csum_replace2 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=inet_proto_csum_replace2>(check 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=check>, skb 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=skb>,155 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L155>
>>                                           htons 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=htons>(oldlen), htons 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=htons>(datalen), true 
>> <http://lxr.free-electrons.com/ident?v=4.5;i=true>);156 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.5#L156>
>>  }
>>
>> and version 4.6:
>>
>> 126 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L126>
>>  static void nf_nat_ipv4_csum_recalc 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=nf_nat_ipv4_csum_recalc>(struct 
>> sk_buff <http://lxr.free-electrons.com/ident?v=4.6;i=sk_buff> *skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>,127 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L127>
>>                                      u8 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=u8> proto 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=proto>, void *data 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=data>, __sum16 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=__sum16> *check 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=check>,128 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L128>
>>                                      int datalen, int oldlen)129 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L129>
>>  {130 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L130>
>>          if (skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->ip_summed != 
>> CHECKSUM_PARTIAL 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=CHECKSUM_PARTIAL>) {131 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L131>
>>                  const struct iphdr 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=iphdr> *iph = ip_hdr 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=ip_hdr>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>);132 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L132>
>>  133 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L133>
>>                  skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->ip_summed = 
>> CHECKSUM_PARTIAL 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=CHECKSUM_PARTIAL>;134 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L134>
>>                  skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->csum_start = skb_headroom 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb_headroom>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>) + skb_network_offset 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb_network_offset>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>) +135 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L135>
>>                          ip_hdrlen 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=ip_hdrlen>(skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>);136 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L136>
>>                  skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>->csum_offset = (void 
>> *)check <http://lxr.free-electrons.com/ident?v=4.6;i=check> - data 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=data>;137 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L137>
>>                  *check <http://lxr.free-electrons.com/ident?v=4.6;i=check> 
>> = ~csum_tcpudp_magic(iph->saddr 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=saddr>, iph->daddr 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=daddr>, datalen,138 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L138>
>>                                              proto 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=proto>, 0);139 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L139>
>>          } else140 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L140>
>>                  inet_proto_csum_replace2 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=inet_proto_csum_replace2>(check 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=check>, skb 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=skb>,141 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L141>
>>                                           htons 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=htons>(oldlen), htons 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=htons>(datalen), true 
>> <http://lxr.free-electrons.com/ident?v=4.6;i=true>);142 
>> <http://lxr.free-electrons.com/source/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c?v=4.6#L142>
>>  }
>>
>>
>> >  */
>>> >  if (ct->status & IPS_NAT_MASK && skb->ip_summed != CHECKSUM_PARTIAL) {
>>> > - u8 ipproto = (proto == NFPROTO_IPV4)
>>> > - ? ip_hdr(skb)->protocol : nexthdr;
>>> > - u16 offset = 0;
>>> > -
>>> > - switch (ipproto) {
>>> > - case IPPROTO_TCP:
>>> > - offset = offsetof(struct tcphdr, check);
>>> > - break;
>>> > - case IPPROTO_UDP:
>>> > - /* Skip if no csum. */
>>> > - if (udp_hdr(skb)->check)
>>> > - offset = offsetof(struct udphdr, check);
>>> > - break;
>>> > - }
>>> > - if (offset) {
>>> > - if (unlikely(!pskb_may_pull(skb, protoff + offset + 2)))
>>> > - return NF_DROP;
>>> > -
>>> > - skb->csum_start = skb_headroom(skb) + protoff;
>>> > - skb->csum_offset = offset;
>>> > - skb->ip_summed = CHECKSUM_PARTIAL;
>>> > - }
>>> > + rt = kmalloc(sizeof(struct rtable), GFP_KERNEL);
>>>
>>> Could the struct rtable be allocated from stack instead? Or could it be
>>> a static variable in the file scope? In either case we would avoid dynamic
>>> memory allocation/free. I recall setting ip_summed to CHECKSUM_PARTIAL was
>>> deemed a simpler way to backport as it (also) avoids the dynamic memory
>>> allocations.
>>>
>>>
>>
>> yes, it could be added as a stack variable. As it is only added to the
>> skb temporarily to force the checksum change then this would make more
>> sense!
>> I'll forward on a further email after this with a patch using a stack
>> based variable - I tested this patch using an FTP session across OVS.
>>
>>
>>
>>
>>> > + rt->rt_flags = RTCF_LOCAL;
>>> > + skb_dst_set(skb, (struct dst_entry *)rt);
>>> >  }
>>> > #endif
>>> >  err = helper->help(skb, protoff, ct, ctinfo);
>>> >  if (err != NF_ACCEPT)
>>> >  return err;
>>> >
>>> > +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,6,0)
>>> > + if (rt) {
>>> > + skb_dst_set(skb, NULL);
>>> > + kfree(rt);
>>> > + }
>>> > +#endif
>>> > +
>>> >  /* Adjust seqs after helper.  This is needed due to some helpers
>>> (e.g.,
>>> >  * FTP with NAT) adusting the TCP payload size when mangling IP
>>> >  * addresses and/or port numbers in the text-based control connection.
>>> >
>>> > --
>>> >
>>> > On Wed, Dec 28, 2016 at 10:08 PM, Ben Pfaff <b...@ovn.org> wrote:
>>> >
>>> >> On Wed, Dec 28, 2016 at 09:37:30PM +0000, John Hurley wrote:
>>> >>> Fix for a bug when sending a NATed packet to helper function in
>>> kernels
>>> >>> <4.6.
>>> >>>
>>> >>> Setting CHECKSUM_PARTIAL flag means packets could have L4 checksum
>>> >>> corrupted in
>>> >>>
>>> >>> datapath.c/queue_userspace_packet().
>>> >>>
>>> >>> Giving the packet an skb_dst allows the kernel to correct the
>>> checksum if
>>> >>> packet
>>> >>> mangling happens in Conntrack/NAT helpers.
>>> >>>
>>> >>> Signed-off-by: John Hurley <john.hur...@netronome.com>
>>> >>
>>> >> I'm not the right person to review this but I did notice that the
>>> patch
>>> >> is wordwrapped and otherwise space-damaged.
>>> >>
>>> > _______________________________________________
>>> > dev mailing list
>>> > d...@openvswitch.org
>>> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>
>>>
>>
>
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to