[PATCH net-next] l2tp: device MTU setup, tunnel socket needs a lock

2017-04-12 Thread R. Parameswaran

The MTU overhead calculation in L2TP device set-up
merged via commit b784e7ebfce8cfb16c6f95e14e8532d0768ab7ff
needs to be adjusted to lock the tunnel socket while
referencing the sub-data structures to derive the
socket's IP overhead.

Reported-by: Guillaume Nault <g.na...@alphalink.fr>
Tested-by: Guillaume Nault <g.na...@alphalink.fr>
Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 include/linux/net.h | 2 +-
 net/l2tp/l2tp_eth.c | 2 ++
 net/socket.c| 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index a42fab2..abcfa46 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,7 +298,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
-/* Following routine returns the IP overhead imposed by a socket.  */
+/* Routine returns the IP overhead imposed by a (caller-protected) socket. */
 u32 kernel_sock_ip_overhead(struct sock *sk);
 
 #define MODULE_ALIAS_NETPROTO(proto) \
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 138566a..b722d55 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -225,7 +225,9 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
dev->needed_headroom += session->hdr_len;
return;
}
+   lock_sock(tunnel->sock);
l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+   release_sock(tunnel->sock);
if (l3_overhead == 0) {
/* L3 Overhead couldn't be identified, this could be
 * because tunnel->sock was NULL or the socket's
diff --git a/net/socket.c b/net/socket.c
index eea9970..c2564eb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3360,7 +3360,7 @@ EXPORT_SYMBOL(kernel_sock_shutdown);
 /* This routine returns the IP overhead imposed by a socket i.e.
  * the length of the underlying IP header, depending on whether
  * this is an IPv4 or IPv6 socket and the length from IP options turned
- * on at the socket.
+ * on at the socket. Assumes that the caller has a lock on the socket.
  */
 u32 kernel_sock_ip_overhead(struct sock *sk)
 {
-- 
2.1.4



Re: [PATCH net-next v1 1/1] L2TP device MTU setup - tunnel socket needs a lock

2017-04-12 Thread R Parameswaran
Hi Guillaume,

Please see inline:

On Wed, Apr 12, 2017 at 12:53 AM, Guillaume Nault <g.na...@alphalink.fr> wrote:
> On Tue, Apr 11, 2017 at 08:14:37PM -0700, R. Parameswaran wrote:
>>
>> The MTU overhead calculation in L2TP device set-up
>> merged via commit b784e7ebfce8cfb16c6f95e14e8532d0768ab7ff
>> needs to be adjusted to lock the tunnel socket while
>> referencing the sub-data structures to derive the
>> socket's IP overhead.
>
> Thanks.
>
> Tested-by: Guillaume Nault <g.na...@alphalink.fr>
>
> BTW, you don't need to add "v1" for the first version of a patch.
> There's also no need for numbering pathes when there's only one in the
> series. And we normally prefix the commit message with ": ".
> For this patch, your subject would look like " [PATCH net-next] l2tp: ...".
>
> Also, you could have added a "Reported-by:" tag (I don't really mind
> in this case, but that's good practice).

Thanks for correcting these (and for testing the changes) and sorry
for the Reported-by omission. I'll respin by tonight
with these, per reply to Dave.

regards,

Ramkumar


Re: [PATCH net-next v1 1/1] L2TP device MTU setup - tunnel socket needs a lock

2017-04-12 Thread R Parameswaran
Hi Dave,

Please see inline:

On Wed, Apr 12, 2017 at 7:13 AM, David Miller <da...@davemloft.net> wrote:
> From: "R. Parameswaran" <parameswaran...@gmail.com>
> Date: Tue, 11 Apr 2017 20:14:37 -0700 (PDT)
>
>>
>> The MTU overhead calculation in L2TP device set-up
>> merged via commit b784e7ebfce8cfb16c6f95e14e8532d0768ab7ff
>> needs to be adjusted to lock the tunnel socket while
>> referencing the sub-data structures to derive the
>> socket's IP overhead.
>
> This is missing a proper signoff.
>
> The subject line also needs to be fixed "[PATCH net-next] l2tp: " as explained
> by Guillaume.
>

Thanks, I will re-spin with these corrections by tonight PT.

regards,

Ramkumar
> Thanks.


[PATCH net-next v1 1/1] L2TP device MTU setup - tunnel socket needs a lock

2017-04-11 Thread R. Parameswaran

The MTU overhead calculation in L2TP device set-up
merged via commit b784e7ebfce8cfb16c6f95e14e8532d0768ab7ff
needs to be adjusted to lock the tunnel socket while
referencing the sub-data structures to derive the
socket's IP overhead.
---
 include/linux/net.h | 2 +-
 net/l2tp/l2tp_eth.c | 2 ++
 net/socket.c| 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index a42fab2..abcfa46 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,7 +298,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
-/* Following routine returns the IP overhead imposed by a socket.  */
+/* Routine returns the IP overhead imposed by a (caller-protected) socket. */
 u32 kernel_sock_ip_overhead(struct sock *sk);
 
 #define MODULE_ALIAS_NETPROTO(proto) \
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 138566a..b722d55 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -225,7 +225,9 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
dev->needed_headroom += session->hdr_len;
return;
}
+   lock_sock(tunnel->sock);
l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+   release_sock(tunnel->sock);
if (l3_overhead == 0) {
/* L3 Overhead couldn't be identified, this could be
 * because tunnel->sock was NULL or the socket's
diff --git a/net/socket.c b/net/socket.c
index eea9970..c2564eb 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3360,7 +3360,7 @@ EXPORT_SYMBOL(kernel_sock_shutdown);
 /* This routine returns the IP overhead imposed by a socket i.e.
  * the length of the underlying IP header, depending on whether
  * this is an IPv4 or IPv6 socket and the length from IP options turned
- * on at the socket.
+ * on at the socket. Assumes that the caller has a lock on the socket.
  */
 u32 kernel_sock_ip_overhead(struct sock *sk)
 {
-- 
2.1.4



Re: [PATCH net-next v5 2/2] L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

2017-04-11 Thread R Parameswaran
Hi Guillaume,

Please see inline:

On Tue, Apr 11, 2017 at 10:05 AM, Guillaume Nault <g.na...@alphalink.fr> wrote:
> On Tue, Apr 11, 2017 at 09:39:58AM -0700, R Parameswaran wrote:
>> Hi Guillaume,
>>
>> On Tue, Apr 11, 2017 at 3:40 AM, Guillaume Nault <g.na...@alphalink.fr> 
>> wrote:
>> > On Wed, Apr 05, 2017 at 05:00:07PM -0700, R. Parameswaran wrote:
>> >>
>> >> Change-set here uses the new kernel function, kernel_sock_ip_overhead(),
>> >> to factor the outer IP overhead on the L2TP tunnel socket (including
>> >> IP Options, if any) when calculating the default MTU for an Ethernet
>> >> pseudowire, along with consideration of the inner Ethernet header.
>> >>
>> > I get the following warning with CONFIG_LOCKDEP when creating a new
>> > session:
>> > # ip l2tp add tunnel local 10.1.8.64 remote 10.1.8.32 udp_sport 1701 
>> > udp_dport 1701 tunnel_id 1 peer_tunnel_id 1
>> > # ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
>> > ...
>>
>> Thanks for reporting this - I'll try and put up a patch soon,
>> hopefully the patch can stay in while I add this. One Q - how many CPU
>> cores do you have?
> This is a virtual machine with 4 vcores, but that shouldn't matter.
>
>> Can you give me some idea of how many tunnels and
>> sessions when you saw this?
>>
> Creating one session is enough. I simply used the following command:
> # ip l2tp add tunnel local 10.1.8.64 remote 10.1.8.32 udp_sport 1701 
> udp_dport 1701 tunnel_id 1 peer_tunnel_id 1
> # ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
>
>> I did not see this warning in my testing, possibly because
>> CONFIG_LOCKDEP_SUPPORT is turned off on the product build? Will
>> re-test with this turned on.
>>
> Yes, enabling lockdep should let you reproduce the problem.
>
> The issue goes away if the tunnel's socket is locked while calling
> kernel_sock_ip_overhead():
> +   lock_sock(tunnel->sock);
> kernel_sock_ip_overhead(tunnel->sock);
> +   release_sock(tunnel->sock);

Ack, thanks - was thinking along this line, since I see similar code
at other places in L2TP. I'll try and have a preliminary
patch out by tonight.

regards,

Ramkumar


Re: [PATCH net-next v5 2/2] L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

2017-04-11 Thread R Parameswaran
Hi Guillaume,

On Tue, Apr 11, 2017 at 3:40 AM, Guillaume Nault <g.na...@alphalink.fr> wrote:
> On Wed, Apr 05, 2017 at 05:00:07PM -0700, R. Parameswaran wrote:
>>
>> Change-set here uses the new kernel function, kernel_sock_ip_overhead(),
>> to factor the outer IP overhead on the L2TP tunnel socket (including
>> IP Options, if any) when calculating the default MTU for an Ethernet
>> pseudowire, along with consideration of the inner Ethernet header.
>>
> I get the following warning with CONFIG_LOCKDEP when creating a new
> session:
> # ip l2tp add tunnel local 10.1.8.64 remote 10.1.8.32 udp_sport 1701 
> udp_dport 1701 tunnel_id 1 peer_tunnel_id 1
> # ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
> ...

Thanks for reporting this - I'll try and put up a patch soon,
hopefully the patch can stay in while I add this. One Q - how many CPU
cores do you have? Can you give me some idea of how many tunnels and
sessions when you saw this?

I did not see this warning in my testing, possibly because
CONFIG_LOCKDEP_SUPPORT is turned off on the product build? Will
re-test with this turned on.

thanks,

Ramkumar

> [   45.524535] [ cut here ]
> [   45.524570] WARNING: CPU: 3 PID: 732 at ./include/net/sock.h:1509 
> kernel_sock_ip_overhead+0x54/0x1a1
> [   45.524574] Modules linked in: l2tp_eth l2tp_netlink l2tp_core 
> ip6_udp_tunnel udp_tunnel crc32c_intel ghash_clmulni_intel pcbc aesni_intel 
> aes_x86_64 crypto_simd cryptd glue_helper evdev acpi_cpufreq processor button 
> serio_raw ext4 crc16 jbd2 mbcache virtio_blk virtio_net virtio_pci 
> virtio_ring virtio
> [   45.524696] CPU: 3 PID: 732 Comm: ip Not tainted 4.11.0-rc5 #1
> [   45.524700] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> 1.10.2-1 04/01/2014
> [   45.524704] Call Trace:
> [   45.524714]  dump_stack+0x67/0x90
> [   45.524725]  __warn+0xfd/0x118
> [   45.524739]  warn_slowpath_null+0x18/0x1a
> [   45.524747]  kernel_sock_ip_overhead+0x54/0x1a1
> [   45.524761]  l2tp_eth_create+0x1eb/0x557 [l2tp_eth]
> [   45.524768]  ? __mutex_unlock_slowpath+0xb5/0x2c2
> [   45.524787]  ? l2tp_eth_dev_uninit+0xd9/0xd9 [l2tp_eth]
> [   45.524800]  l2tp_nl_cmd_session_create+0x521/0x56b [l2tp_netlink]
> [   45.524827]  genl_family_rcv_msg+0x445/0x4b3
> [   45.524857]  genl_rcv_msg+0x60/0x84
> [   45.524867]  ? genl_family_rcv_msg+0x4b3/0x4b3
> [   45.524875]  netlink_rcv_skb+0x95/0x102
> [   45.524881]  ? down_read+0x41/0x62
> [   45.524893]  genl_rcv+0x23/0x32
> [   45.524901]  netlink_unicast+0x1b0/0x23b
> [   45.524915]  netlink_sendmsg+0x46f/0x48f
> [   45.524933]  ? netlink_unicast+0x23b/0x23b
> [   45.524942]  sock_sendmsg_nosec+0x41/0x51
> [   45.524953]  sock_sendmsg+0x33/0x38
> [   45.524962]  ___sys_sendmsg+0x2a0/0x374
> [   45.524991]  ? do_raw_spin_unlock+0xc2/0xcc
> [   45.525002]  ? _raw_spin_unlock+0x22/0x25
> [   45.525014]  ? match_held_lock+0x20/0x113
> [   45.525027]  ? __fget_light+0x89/0xae
> [   45.525045]  __sys_sendmsg+0x40/0x6b
> [   45.525052]  ? __sys_sendmsg+0x40/0x6b
> [   45.525075]  SyS_sendmsg+0x9/0xb
> [   45.525083]  entry_SYSCALL_64_fastpath+0x18/0xad
> [   45.525089] RIP: 0033:0x7fb224391690
> [   45.525094] RSP: 002b:7ffe53943dd8 EFLAGS: 0246 ORIG_RAX: 
> 002e
> [   45.525104] RAX: ffda RBX: 7ffe539480f0 RCX: 
> 7fb224391690
> [   45.525108] RDX:  RSI: 7ffe53943e20 RDI: 
> 0004
> [   45.525113] RBP: 810a1e49 R08:  R09: 
> 0005
> [   45.525119] R10:  R11: 0246 R12: 
> 88003436ff98
> [   45.525124] R13: 0046 R14: 7ffe539486a0 R15: 
> 7ffe53947ea0
> [   45.525136]  ? trace_hardirqs_off_caller+0x121/0x12f
> [   45.525157] ---[ end trace 0834023e7b30e761 ]---
>
> I guess you neet to lock_sock(tunnel->socket) before calling
> kernel_sock_ip_overhead().


[PATCH net-next v5 0/2] L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

2017-04-05 Thread R. Parameswaran

Existing L2TP kernel code does not derive the optimal MTU for Ethernet
pseudowires and instead leaves this to a userspace L2TP daemon or
operator. If an MTU is not specified, the existing kernel code chooses
an MTU that does not take account of all tunnel header overheads, which
can lead to unwanted IP fragmentation. When L2TP is used without a
control plane (userspace daemon), we would prefer that the kernel does a
better job of choosing a default pseudowire MTU, taking account of all
tunnel header overheads, including IP header options, if any. This patch
addresses this.

Change-set is organized as a two part patch series, with one patch
introducing a new kernel function to compute the IP overhead on a
socket, and the other patch using this new kernel function to compute
the default L2TP MTU for an Ethernet pseudowire.

Existing code also seems to assume an Ethernet (non-jumbo) underlay. The
change proposed here uses the PMTU mechanism and the dst entry in the
L2TP tunnel socket to directly pull up the underlay MTU (as the baseline
number on top of which the encapsulation headers are factored in).
An default MTU value of 1500 bytes is assumed as a fallback only if
this fails. 

Fixed the kbuild test robot error in the previous posting.

R. Parameswaran (2):
  New kernel function to get IP overhead on a socket.
  L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

 include/linux/net.h |  3 +++
 net/l2tp/l2tp_eth.c | 55 +
 net/socket.c| 46 
 3 files changed, 100 insertions(+), 4 deletions(-)

-- 
2.1.4



[PATCH net-next v5 2/2] L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

2017-04-05 Thread R. Parameswaran

Existing L2TP kernel code does not derive the optimal MTU for Ethernet
pseudowires and instead leaves this to a userspace L2TP daemon or
operator. If an MTU is not specified, the existing kernel code chooses
an MTU that does not take account of all tunnel header overheads, which
can lead to unwanted IP fragmentation. When L2TP is used without a
control plane (userspace daemon), we would prefer that the kernel does a
better job of choosing a default pseudowire MTU, taking account of all
tunnel header overheads, including IP header options, if any. This patch
addresses this.

Change-set here uses the new kernel function, kernel_sock_ip_overhead(),
to factor the outer IP overhead on the L2TP tunnel socket (including
IP Options, if any) when calculating the default MTU for an Ethernet
pseudowire, along with consideration of the inner Ethernet header.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 net/l2tp/l2tp_eth.c | 55 +
 1 file changed, 51 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8bf18a5..9c18a4e 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "l2tp_core.h"
 
@@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 }
 #endif
 
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+   struct l2tp_session *session,
+   struct net_device *dev)
+{
+   unsigned int overhead = 0;
+   struct dst_entry *dst;
+   u32 l3_overhead = 0;
+
+   /* if the encap is UDP, account for UDP header size */
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+   overhead += sizeof(struct udphdr);
+   dev->needed_headroom += sizeof(struct udphdr);
+   }
+   if (session->mtu != 0) {
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   return;
+   }
+   l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+   if (l3_overhead == 0) {
+   /* L3 Overhead couldn't be identified, this could be
+* because tunnel->sock was NULL or the socket's
+* address family was not IPv4 or IPv6,
+* dev mtu stays at 1500.
+*/
+   return;
+   }
+   /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+* UDP overhead, if any, was already factored in above.
+*/
+   overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+   dst = sk_dst_get(tunnel->sock);
+   if (dst) {
+   /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+   u32 pmtu = dst_mtu(dst);
+
+   if (pmtu != 0)
+   dev->mtu = pmtu;
+   dst_release(dst);
+   }
+   session->mtu = dev->mtu - overhead;
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 
peer_session_id, struct l2tp_session_cfg *cfg)
 {
struct net_device *dev;
@@ -253,12 +303,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, 
u32 session_id, u32 p
}
 
dev_net_set(dev, net);
-   if (session->mtu == 0)
-   session->mtu = dev->mtu - session->hdr_len;
-   dev->mtu = session->mtu;
-   dev->needed_headroom += session->hdr_len;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
+   l2tp_eth_adjust_mtu(tunnel, session, dev);
 
priv = netdev_priv(dev);
priv->dev = dev;
-- 
2.1.4



[PATCH net-next v5 1/2] New kernel function to get IP overhead on a socket.

2017-04-05 Thread R. Parameswaran

A new function, kernel_sock_ip_overhead(), is provided
to calculate the cumulative overhead imposed by the IP
Header and IP options, if any, on a socket's payload.
The new function returns an overhead of zero for sockets
that do not belong to the IPv4 or IPv6 address families.
This is used in the L2TP code path to compute the
total outer IP overhead on the L2TP tunnel socket when
calculating the default MTU for Ethernet pseudowires.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 include/linux/net.h |  3 +++
 net/socket.c| 46 ++
 2 files changed, 49 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 0620f5e..a42fab2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
+/* Following routine returns the IP overhead imposed by a socket.  */
+u32 kernel_sock_ip_overhead(struct sock *sk);
+
 #define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
 
diff --git a/net/socket.c b/net/socket.c
index 985ef06..eea9970 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3356,3 +3356,49 @@ int kernel_sock_shutdown(struct socket *sock, enum 
sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+   struct inet_sock *inet;
+   struct ip_options_rcu *opt;
+   u32 overhead = 0;
+   bool owned_by_user;
+#if IS_ENABLED(CONFIG_IPV6)
+   struct ipv6_pinfo *np;
+   struct ipv6_txoptions *optv6 = NULL;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+   if (!sk)
+   return overhead;
+
+   owned_by_user = sock_owned_by_user(sk);
+   switch (sk->sk_family) {
+   case AF_INET:
+   inet = inet_sk(sk);
+   overhead += sizeof(struct iphdr);
+   opt = rcu_dereference_protected(inet->inet_opt,
+   owned_by_user);
+   if (opt)
+   overhead += opt->opt.optlen;
+   return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+   case AF_INET6:
+   np = inet6_sk(sk);
+   overhead += sizeof(struct ipv6hdr);
+   if (np)
+   optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+   if (optv6)
+   overhead += (optv6->opt_flen + optv6->opt_nflen);
+   return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+   default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+   return overhead;
+   }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
-- 
2.1.4



Re: [PATCH net-next v4 1/2] New kernel function to get IP overhead on a socket.

2017-04-03 Thread R. Parameswaran


Hi Dave,

Please see inline:

On Mon, 3 Apr 2017, David Miller wrote:

> From: "R. Parameswaran" <parameswaran...@gmail.com>
> Date: Mon, 3 Apr 2017 13:28:11 -0700 (PDT)
> 
> > Can I take this to mean that we do need to factor in IP options in 
> > the L2TP device MTU setup (i.e approach in the posted patch is okay)? 
> > 
> > If yes, please let me know if I can keep the socket IP option overhead 
> > calculations in a generic function, or it would be better to move it back 
> > into 
> > L2TP code? 
> 
> If the user creates and maintains this UDP socket, then yes we have to
> account for potential IP options.
> 

Can I take this to mean that the patch in its present form is 
acceptable (patch currently accounts for IP options on the socket)? 
Please let me know if any further change is needed (I'll clean up the 
krobot reported errors after this).

thanks,

Ramkumar


Re: [PATCH net-next v4 1/2] New kernel function to get IP overhead on a socket.

2017-04-03 Thread R. Parameswaran


Hi James, Dave,

Sorry for the delay (was away), please see inline:

On Fri, 24 Mar 2017, James Chapman wrote:

> On 24/03/17 01:51, R. Parameswaran wrote:
> > Hi Dave,
> >
> > Please see inline:
> >
> > On Thu, 23 Mar 2017, David Miller wrote:
> >
> >> From: "R. Parameswaran" <parameswaran...@gmail.com>
> >> Date: Wed, 22 Mar 2017 15:59:13 -0700 (PDT)
> >>
> >>> A new function, kernel_sock_ip_overhead(), is provided
> >>> to calculate the cumulative overhead imposed by the IP
> >>> Header and IP options, if any, on a socket's payload.
> >>> The new function returns an overhead of zero for sockets
> >>> that do not belong to the IPv4 or IPv6 address families.
> >>>
> >>> Signed-off-by: R. Parameswaran <rpara...@brocade.com>
> >> Just use the IPv4/IPv6 header size for now, just like the VXLAN
> >> driver does.
> >>
> > Actually, that's how the original posting was - it was changed in 
> > response to a review comment from James Chapman requesting the IP
> > Options overhead to be factored in and for this to be calculated in
> > a new standalone function that can be reused in other situations. 
> > The review comment makes sense to me - the kernel seems to do a 
> > good job of accounting for the cumulative size of IP Options and
> > if the information is available, it may make sense to factor it in.
> >
> > I guess you are concerned about compatibility between vxlan and
> > L2TP? There may be one difference  - the socket for vxlan
> > appears to be opened/controlled entirely within kernel code (seems
> > to call udp_sock_create() which does not appear to turn on any options), 
> > but in the case of L2TP, it is possible for the tunnel socket to be 
> > opened from user space, if a user space control plane daemon is running.
> > Regardless of how user space daemons are written right now, it is 
> > possible in theory for the user space code to turn on options on the 
> > L2TP tunnel socket. So it seems that IP options might be enabled on the 
> > L2TP socket, but are probably unlikely on the vxlan socket? 
> >
> > I'd suggest giving this a few days for James to respond. 
> > At that time if there is agreement that we don't need to factor options, 
> > I can rework it.
> 
> The reason I suggested factoring in IP options here is because L2TP
> tunnel sockets can be opened by userspace daemons. When an L2TP control
> plane is used, the L2TP daemon opens the tunnel socket. When no control
> plane is used, L2TP connections are manually configured, usually by ip
> l2tp commands, and the tunnel socket is created by the kernel.
> 
> For sockets created by userspace, the L2TP daemon could derive the
> cumulative size of all IP options that it sets and use that to set the
> L2TP session's MTU. Setting an MTU when establishing L2TP connections
> overrides the kernel's own default MTU calculations anyway. But the L2TP
> packet header overhead depends on several other L2TP-specific options,
> e.g. cookie size and the kernel already takes these into account when
> calculating MTU. If IP options are ignored, we'd have the situation
> where the default MTU value avoids fragmentation but only if certain IP
> options are not enabled on the tunnel socket.
> 
> For L2TP, I'd prefer that the kernel takes IP options into account. If a
> function were added to the kernel to return the IP header overhead of an
> IP socket, initially for L2TP, I figured it might be useful for other
> tunnel protocols too, hence I suggested it was added as a generic
> function. Maybe that was a mistake. If instead this function is renamed
> and made L2TP-private, that would be ok for me.
> 
> James
> 

Can I take this to mean that we do need to factor in IP options in 
the L2TP device MTU setup (i.e approach in the posted patch is okay)? 

If yes, please let me know if I can keep the socket IP option overhead 
calculations in a generic function, or it would be better to move it back into 
L2TP code? 

There are a couple of krobot warnings which I can fix on the next upload. 

thanks,

Ramkumar

> 
> 


Re: [PATCH net-next v4 1/2] New kernel function to get IP overhead on a socket.

2017-03-23 Thread R. Parameswaran

Hi Dave,

Please see inline:

On Thu, 23 Mar 2017, David Miller wrote:

> From: "R. Parameswaran" <parameswaran...@gmail.com>
> Date: Wed, 22 Mar 2017 15:59:13 -0700 (PDT)
> 
> > A new function, kernel_sock_ip_overhead(), is provided
> > to calculate the cumulative overhead imposed by the IP
> > Header and IP options, if any, on a socket's payload.
> > The new function returns an overhead of zero for sockets
> > that do not belong to the IPv4 or IPv6 address families.
> > 
> > Signed-off-by: R. Parameswaran <rpara...@brocade.com>
> 
> Just use the IPv4/IPv6 header size for now, just like the VXLAN
> driver does.
>

Actually, that's how the original posting was - it was changed in 
response to a review comment from James Chapman requesting the IP
Options overhead to be factored in and for this to be calculated in
a new standalone function that can be reused in other situations. 
The review comment makes sense to me - the kernel seems to do a 
good job of accounting for the cumulative size of IP Options and
if the information is available, it may make sense to factor it in.

I guess you are concerned about compatibility between vxlan and
L2TP? There may be one difference  - the socket for vxlan
appears to be opened/controlled entirely within kernel code (seems
to call udp_sock_create() which does not appear to turn on any options), 
but in the case of L2TP, it is possible for the tunnel socket to be 
opened from user space, if a user space control plane daemon is running.
Regardless of how user space daemons are written right now, it is 
possible in theory for the user space code to turn on options on the 
L2TP tunnel socket. So it seems that IP options might be enabled on the 
L2TP socket, but are probably unlikely on the vxlan socket? 

I'd suggest giving this a few days for James to respond. 
At that time if there is agreement that we don't need to factor options, 
I can rework it.

thanks,

Ramkumar
  
 
> Thanks.
> 


[PATCH net-next v4 1/2] New kernel function to get IP overhead on a socket.

2017-03-22 Thread R. Parameswaran

A new function, kernel_sock_ip_overhead(), is provided
to calculate the cumulative overhead imposed by the IP
Header and IP options, if any, on a socket's payload.
The new function returns an overhead of zero for sockets
that do not belong to the IPv4 or IPv6 address families.
This is used in the L2TP code path to compute the
total outer IP overhead on the L2TP tunnel socket when
calculating the default MTU for Ethernet pseudowires.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 include/linux/net.h |  3 +++
 net/socket.c| 44 
 2 files changed, 47 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 0620f5e..a42fab2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
+/* Following routine returns the IP overhead imposed by a socket.  */
+u32 kernel_sock_ip_overhead(struct sock *sk);
+
 #define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
 
diff --git a/net/socket.c b/net/socket.c
index e034fe4..69598e1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3345,3 +3345,47 @@ int kernel_sock_shutdown(struct socket *sock, enum 
sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+   struct inet_sock *inet;
+   struct ipv6_pinfo *np;
+   struct ip_options_rcu *opt;
+   struct ipv6_txoptions *optv6 = NULL;
+   u32 overhead = 0;
+   bool owned_by_user;
+
+   if (!sk)
+   return overhead;
+
+   owned_by_user = sock_owned_by_user(sk);
+   switch (sk->sk_family) {
+   case AF_INET:
+   inet = inet_sk(sk);
+   overhead += sizeof(struct iphdr);
+   opt = rcu_dereference_protected(inet->inet_opt,
+   owned_by_user);
+   if (opt)
+   overhead += opt->opt.optlen;
+   return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+   case AF_INET6:
+   np = inet6_sk(sk);
+   overhead += sizeof(struct ipv6hdr);
+   if (np)
+   optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+   if (optv6)
+   overhead += (optv6->opt_flen + optv6->opt_nflen);
+   return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+   default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+   return overhead;
+   }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
-- 
2.1.4



[PATCH net-next v4 0/2] L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

2017-03-22 Thread R. Parameswaran

Existing L2TP kernel code does not derive the optimal MTU for Ethernet
pseudowires and instead leaves this to a userspace L2TP daemon or
operator. If an MTU is not specified, the existing kernel code chooses
an MTU that does not take account of all tunnel header overheads, which
can lead to unwanted IP fragmentation. When L2TP is used without a
control plane (userspace daemon), we would prefer that the kernel does a
better job of choosing a default pseudowire MTU, taking account of all
tunnel header overheads, including IP header options, if any. This patch
addresses this.

Change-set is organized as a two part patch series, with one patch
introducing a new kernel function to compute the IP overhead on a
socket, and the other patch using this new kernel function to compute
the default L2TP MTU for an Ethernet pseudowire.

Existing code also seems to assume an Ethernet (non-jumbo) underlay. The
change proposed here uses the PMTU mechanism and the dst entry in the
L2TP tunnel socket to directly pull up the underlay MTU (as the baseline
number on top of which the encapsulation headers are factored in).
An default MTU value of 1500 bytes is assumed as a fallback only if
this fails.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>

R. Parameswaran (2):
  New kernel function to get IP overhead on a socket.
  L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

 include/linux/net.h |  3 +++
 net/l2tp/l2tp_eth.c | 55 +
 net/socket.c| 44 ++
 3 files changed, 98 insertions(+), 4 deletions(-)

-- 
2.1.4



[PATCH net-next v4 2/2] L2TP:Adjust intf MTU, add underlay L3, L2 hdrs.

2017-03-22 Thread R. Parameswaran

Existing L2TP kernel code does not derive the optimal MTU for Ethernet
pseudowires and instead leaves this to a userspace L2TP daemon or
operator. If an MTU is not specified, the existing kernel code chooses
an MTU that does not take account of all tunnel header overheads, which
can lead to unwanted IP fragmentation. When L2TP is used without a
control plane (userspace daemon), we would prefer that the kernel does a
better job of choosing a default pseudowire MTU, taking account of all
tunnel header overheads, including IP header options, if any. This patch
addresses this.

Change-set here uses a new kernel function, kernel_sock_ip_overhead,
to factor the outer IP overhead on the L2TP tunnel socket (including
IP Options, if any) when calculating the default MTU for an Ethernet
pseudowire, along with consideration of the inner Ethernet header.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 net/l2tp/l2tp_eth.c | 55 +
 1 file changed, 51 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8bf18a5..9c18a4e 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "l2tp_core.h"
 
@@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 }
 #endif
 
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+   struct l2tp_session *session,
+   struct net_device *dev)
+{
+   unsigned int overhead = 0;
+   struct dst_entry *dst;
+   u32 l3_overhead = 0;
+
+   /* if the encap is UDP, account for UDP header size */
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+   overhead += sizeof(struct udphdr);
+   dev->needed_headroom += sizeof(struct udphdr);
+   }
+   if (session->mtu != 0) {
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   return;
+   }
+   l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+   if (l3_overhead == 0) {
+   /* L3 Overhead couldn't be identified, this could be
+* because tunnel->sock was NULL or the socket's
+* address family was not IPv4 or IPv6,
+* dev mtu stays at 1500.
+*/
+   return;
+   }
+   /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+* UDP overhead, if any, was already factored in above.
+*/
+   overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+   dst = sk_dst_get(tunnel->sock);
+   if (dst) {
+   /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+   u32 pmtu = dst_mtu(dst);
+
+   if (pmtu != 0)
+   dev->mtu = pmtu;
+   dst_release(dst);
+   }
+   session->mtu = dev->mtu - overhead;
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 
peer_session_id, struct l2tp_session_cfg *cfg)
 {
struct net_device *dev;
@@ -253,12 +303,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, 
u32 session_id, u32 p
}
 
dev_net_set(dev, net);
-   if (session->mtu == 0)
-   session->mtu = dev->mtu - session->hdr_len;
-   dev->mtu = session->mtu;
-   dev->needed_headroom += session->hdr_len;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
+   l2tp_eth_adjust_mtu(tunnel, session, dev);
 
priv = netdev_priv(dev);
priv->dev = dev;
-- 
2.1.4



[PATCH net-next v4 1/2] New kernel function to get IP overhead on a socket.

2017-03-22 Thread R. Parameswaran

A new function, kernel_sock_ip_overhead(), is provided
to calculate the cumulative overhead imposed by the IP
Header and IP options, if any, on a socket's payload.
The new function returns an overhead of zero for sockets
that do not belong to the IPv4 or IPv6 address families.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 include/linux/net.h |  3 +++
 net/socket.c| 44 
 2 files changed, 47 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 0620f5e..a42fab2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
+/* Following routine returns the IP overhead imposed by a socket.  */
+u32 kernel_sock_ip_overhead(struct sock *sk);
+
 #define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
 
diff --git a/net/socket.c b/net/socket.c
index e034fe4..69598e1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3345,3 +3345,47 @@ int kernel_sock_shutdown(struct socket *sock, enum 
sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+   struct inet_sock *inet;
+   struct ipv6_pinfo *np;
+   struct ip_options_rcu *opt;
+   struct ipv6_txoptions *optv6 = NULL;
+   u32 overhead = 0;
+   bool owned_by_user;
+
+   if (!sk)
+   return overhead;
+
+   owned_by_user = sock_owned_by_user(sk);
+   switch (sk->sk_family) {
+   case AF_INET:
+   inet = inet_sk(sk);
+   overhead += sizeof(struct iphdr);
+   opt = rcu_dereference_protected(inet->inet_opt,
+   owned_by_user);
+   if (opt)
+   overhead += opt->opt.optlen;
+   return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+   case AF_INET6:
+   np = inet6_sk(sk);
+   overhead += sizeof(struct ipv6hdr);
+   if (np)
+   optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+   if (optv6)
+   overhead += (optv6->opt_flen + optv6->opt_nflen);
+   return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+   default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+   return overhead;
+   }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
-- 
2.1.4



Re: [PATCH net-next v4 1/2]L2TP:Adjust intf MTU, add underlay L3, L2 hdrs

2017-03-20 Thread R. Parameswaran


Hi James,

Thanks for the response and suggestions, please see inline:

On Mon, 20 Mar 2017, James Chapman wrote:

> The patch comment of each patch should represent the changes of the
> patch. You seem to be using a common description for your two commits
> and this will look out of place when viewed using git log on one of the
> files modified by this patch. The patch summary line here is also
> inaccurate.
> 

For this specific patch, I was thinking of the following
header:

"New kernel API to get IP overhead on a socket.

A new API is needed to calculate the cumulative 
overhead imposed by the IP Header and IP options,
if any, on a socket's payload. Provided by the patch
here, this API is then used to determine the
the default pseudowire MTU on an L2TP interface,
relative to the underlay MTU. The new API returns
an overhead of zero for sockets that do not belong
to the IPv4 or IPv6 address families."

Please feel free to edit or suggest changes.

> Are you using git format-patch? Its "Patch 0" can be useful to provide a
> summary description of a patch series to help reviewers.
>

Yes, I am using git format-patch, but was individually generating each
commit's patch. I just figured out how to generate a cover letter and
multiple patches in one shot with git format-patch, will update with the 
suggested changes in a day or so. I also tested the latest patch, 
verified it to be working correctly.

thanks,

Ramkumar


 
> James
> 
> On 18/03/17 01:53, R. Parameswaran wrote:
> > In existing kernel code, when setting up the L2TP interface, all of the
> > tunnel encapsulation headers are not taken into account when setting
> > up the MTU on the  L2TP logical interface device. Due to this, the
> > packets created by the applications on top of the L2TP layer are larger
> > than they ought to be, relative to the underlay MTU, which leads to
> > needless fragmentation once the L2TP packet is encapsulated in an outer IP
> > packet.  Specifically, the MTU calculation  does not take into account the
> > (outer) IP header imposed on the encapsulated L2TP packet, and the Layer 2
> > header imposed on the inner L2TP packet prior to encapsulation.
> >
> > Change-set here (1/2) introduces a new kernel API to compute the IP overhead
> > on an IPv4 or IPv6 socket, which is then used in the L2TP code-path.
> >
> > Signed-off-by: R. Parameswaran <rpara...@brocade.com>
> > ---
> >  include/linux/net.h |  3 +++
> >  net/socket.c| 44 
> >  2 files changed, 47 insertions(+)
> >
> > diff --git a/include/linux/net.h b/include/linux/net.h
> > index 0620f5e..a42fab2 100644
> > --- a/include/linux/net.h
> > +++ b/include/linux/net.h
> > @@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page 
> > *page, int offset,
> >  int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
> >  int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
> >  
> > +/* Following routine returns the IP overhead imposed by a socket.  */
> > +u32 kernel_sock_ip_overhead(struct sock *sk);
> > +
> >  #define MODULE_ALIAS_NETPROTO(proto) \
> > MODULE_ALIAS("net-pf-" __stringify(proto))
> >  
> > diff --git a/net/socket.c b/net/socket.c
> > index e034fe4..69598e1 100644
> > --- a/net/socket.c
> > +++ b/net/socket.c
> > @@ -3345,3 +3345,47 @@ int kernel_sock_shutdown(struct socket *sock, enum 
> > sock_shutdown_cmd how)
> > return sock->ops->shutdown(sock, how);
> >  }
> >  EXPORT_SYMBOL(kernel_sock_shutdown);
> > +
> > +/* This routine returns the IP overhead imposed by a socket i.e.
> > + * the length of the underlying IP header, depending on whether
> > + * this is an IPv4 or IPv6 socket and the length from IP options turned
> > + * on at the socket.
> > + */
> > +u32 kernel_sock_ip_overhead(struct sock *sk)
> > +{
> > +   struct inet_sock *inet;
> > +   struct ipv6_pinfo *np;
> > +   struct ip_options_rcu *opt;
> > +   struct ipv6_txoptions *optv6 = NULL;
> > +   u32 overhead = 0;
> > +   bool owned_by_user;
> > +
> > +   if (!sk)
> > +   return overhead;
> > +
> > +   owned_by_user = sock_owned_by_user(sk);
> > +   switch (sk->sk_family) {
> > +   case AF_INET:
> > +   inet = inet_sk(sk);
> > +   overhead += sizeof(struct iphdr);
> > +   opt = rcu_dereference_protected(inet->inet_opt,
> > +   owned_by_user);
> > +   if (opt)
> > +   overhead +=

Re: [PATCH net-next v4 2/2]L2TP:Adjust intf MTU, add underlay L3, L2 hdrs

2017-03-20 Thread R. Parameswaran


Hi James,

Please see inline:

On Mon, 20 Mar 2017, James Chapman wrote:

> I suggest change the wording of the first paragraph in the patch comment
> to better represent why the changes are being made. Perhaps something
> like the following?
> 
> "Existing L2TP kernel code does not derive the optimal MTU for Ethernet
> pseudowires and instead leaves this to a userspace L2TP daemon or
> operator. If an MTU is not specified, the existing kernel code chooses
> an MTU that does not take account of all tunnel header overheads, which
> can lead to unwanted IP fragmentation. When L2TP is used without a
> control plane (userspace daemon), we would prefer that the kernel does a
> better job of choosing a default pseudowire MTU, taking account of all
> tunnel header overheads, including IP header options, if any. This patch
> addresses this."
> 

This reads quite a bit better, thanks for suggesting this. I will
pick it up. Plan to  retain the second paragraph while removing the 1/2, 
2/2 references, while keeping the patch rev at v4. 
I'll also respond to your email on the other patch in a bit, with suggested 
text which you could review/comment on. I'll re-post with changes after 
that. 

thanks,

Ramkumar

> 
> On 18/03/17 02:00, R. Parameswaran wrote:
> > In existing kernel code, when setting up the L2TP interface, all of the
> > tunnel encapsulation headers are not taken into account when setting
> > up the MTU on the  L2TP logical interface device. Due to this, the
> > packets created by the applications on top of the L2TP layer are larger
> > than they ought to be, relative to the underlay MTU, which leads to
> > needless fragmentation once the L2TP packet is encapsulated in an outer IP
> > packet.  Specifically, the MTU calculation  does not take into account the
> > (outer) IP header imposed on the encapsulated L2TP packet, and the Layer 2
> > header imposed on the inner L2TP packet prior to encapsulation.
> >
> > Change-set here (2/2) uses the new kernel API to compute the IP overhead
> > on an IPv4 or IPv6 socket, introduced in 1/2, in the L2TP Eth device setup
> > to factor the additional encap overheads from the underlay IP header and
> > Ethernet header on overlay (inner packet), to size the MTU on the L2TP
> > logical device to its correct value.
> >
> > Signed-off-by: R. Parameswaran <rpara...@brocade.com>
> > ---
> >  net/l2tp/l2tp_eth.c | 55 
> > +
> >  1 file changed, 51 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
> > index 8bf18a5..f143fa4 100644
> > --- a/net/l2tp/l2tp_eth.c
> > +++ b/net/l2tp/l2tp_eth.c
> > @@ -30,6 +30,9 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> > +#include 
> >  
> >  #include "l2tp_core.h"
> >  
> > @@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void 
> > *arg)
> >  }
> >  #endif
> >  
> > +static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
> > +   struct l2tp_session *session,
> > +   struct net_device *dev)
> > +{
> > +   unsigned int overhead = 0;
> > +   struct dst_entry *dst;
> > +   u32 l3_overhead = 0;
> > +
> > +   /* if the encap is UDP, account for UDP header size */
> > +   if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
> > +   overhead += sizeof(struct udphdr);
> > +   dev->needed_headroom += sizeof(struct udphdr);
> > +   }
> > +   if (session->mtu != 0) {
> > +   dev->mtu = session->mtu;
> > +   dev->needed_headroom += session->hdr_len;
> > +   return;
> > +   }
> > +   l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
> > +   if (l3_overhead == 0) {
> > +   /* L3 Overhead couldn't be identified, this could be
> > +* because tunnel->sock was NULL or the socket's
> > +* address family was not IPv4 or IPv6,
> > +* dev mtu stays at 1500.
> > +*/
> > +   return;
> > +   }
> > +   /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
> > +* UDP overhead, if any, was already factored in above.
> > +*/
> > +   overhead += session->hdr_len + ETH_HLEN + l3_overhead;
> > +
> > +   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
> > +   dst = sk_dst_get(tunnel->sock);
> > +   if (dst) {
> > +   /* dst_mtu will use PMTU if found, else fallba

[PATCH net-next v4 2/2]L2TP:Adjust intf MTU, add underlay L3, L2 hdrs

2017-03-17 Thread R. Parameswaran

In existing kernel code, when setting up the L2TP interface, all of the
tunnel encapsulation headers are not taken into account when setting
up the MTU on the  L2TP logical interface device. Due to this, the
packets created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, which leads to
needless fragmentation once the L2TP packet is encapsulated in an outer IP
packet.  Specifically, the MTU calculation  does not take into account the
(outer) IP header imposed on the encapsulated L2TP packet, and the Layer 2
header imposed on the inner L2TP packet prior to encapsulation.

Change-set here (2/2) uses the new kernel API to compute the IP overhead
on an IPv4 or IPv6 socket, introduced in 1/2, in the L2TP Eth device setup
to factor the additional encap overheads from the underlay IP header and
Ethernet header on overlay (inner packet), to size the MTU on the L2TP
logical device to its correct value.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 net/l2tp/l2tp_eth.c | 55 +
 1 file changed, 51 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8bf18a5..f143fa4 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "l2tp_core.h"
 
@@ -204,6 +207,53 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 }
 #endif
 
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+   struct l2tp_session *session,
+   struct net_device *dev)
+{
+   unsigned int overhead = 0;
+   struct dst_entry *dst;
+   u32 l3_overhead = 0;
+
+   /* if the encap is UDP, account for UDP header size */
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+   overhead += sizeof(struct udphdr);
+   dev->needed_headroom += sizeof(struct udphdr);
+   }
+   if (session->mtu != 0) {
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   return;
+   }
+   l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+   if (l3_overhead == 0) {
+   /* L3 Overhead couldn't be identified, this could be
+* because tunnel->sock was NULL or the socket's
+* address family was not IPv4 or IPv6,
+* dev mtu stays at 1500.
+*/
+   return;
+   }
+   /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr
+* UDP overhead, if any, was already factored in above.
+*/
+   overhead += session->hdr_len + ETH_HLEN + l3_overhead;
+
+   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+   dst = sk_dst_get(tunnel->sock);
+   if (dst) {
+   /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+   u32 pmtu = dst_mtu(dst);
+
+   if (pmtu != 0)
+   dev->mtu = pmtu;
+   dst_release(dst);
+   }
+   session->mtu = dev->mtu - overhead;
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 
peer_session_id, struct l2tp_session_cfg *cfg)
 {
struct net_device *dev;
@@ -253,13 +303,10 @@ static int l2tp_eth_create(struct net *net, u32 
tunnel_id, u32 session_id, u32 p
}
 
dev_net_set(dev, net);
-   if (session->mtu == 0)
-   session->mtu = dev->mtu - session->hdr_len;
-   dev->mtu = session->mtu;
-   dev->needed_headroom += session->hdr_len;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
 
+   l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
priv->session = session;
-- 
2.1.4


Re: [PATCH net-next v3 1/2]L2TP:Adjust intf MTU, add underlay L3, L2 hdrs

2017-03-17 Thread R Parameswaran
[Posting a v4 patch-set shortly based on additional code review
comments received in internal review, please disregard the v3 patches]

On Thu, Mar 16, 2017 at 9:33 PM, R. Parameswaran
<parameswaran...@gmail.com> wrote:
>
>
> In existing kernel code, when setting up the L2TP interface, all of the
> tunnel encapsulation headers are not taken into account when setting
> up the MTU on the  L2TP logical interface device. Due to this, the
> packets created by the applications on top of the L2TP layer are larger
> than they ought to be, relative to the underlay MTU, which leads to
> needless fragmentation once the L2TP packet is encapsulated in an outer IP
> packet.
>
> Specifically, the MTU calculation  does not take into account the (outer)
> IP header imposed on the encapsulated L2TP packet, and the Layer 2 header
> imposed on the inner L2TP packet prior to encapsulation. The patch posted
> here takes care of these.
>
> Existing code also seems to assume an Ethernet (non-jumbo) underlay. The
> patch uses the PMTU mechanism and the dst entry in the L2TP tunnel socket
> to directly pull up the underlay MTU (as the baseline number on top of
> which the encapsulation headers are factored in).  Ethernet MTU is
> assumed as a fallback only if this fails.
>
> Picked up review comments from James Chapman, added a function
> to compute ip header + ip option overhead on a socket, and factored it
> into L2TP change-set.
>
> Signed-off-by: R. Parameswaran <rpara...@brocade.com>
> ---
>  include/linux/net.h |  3 +++
>  net/socket.c| 41 +
>  2 files changed, 44 insertions(+)
>
> diff --git a/include/linux/net.h b/include/linux/net.h
> index 0620f5e..a42fab2 100644
> --- a/include/linux/net.h
> +++ b/include/linux/net.h
> @@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page 
> *page, int offset,
>  int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
>  int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
>
> +/* Following routine returns the IP overhead imposed by a socket.  */
> +u32 kernel_sock_ip_overhead(struct sock *sk);
> +
>  #define MODULE_ALIAS_NETPROTO(proto) \
> MODULE_ALIAS("net-pf-" __stringify(proto))
>
> diff --git a/net/socket.c b/net/socket.c
> index e034fe4..af54b12 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -3345,3 +3345,44 @@ int kernel_sock_shutdown(struct socket *sock, enum 
> sock_shutdown_cmd how)
> return sock->ops->shutdown(sock, how);
>  }
>  EXPORT_SYMBOL(kernel_sock_shutdown);
> +
> +/* This routine returns the IP overhead imposed by a socket i.e.
> + * the length of the underlying IP header, depending on whether
> + * this is an IPv4 or IPv6 socket and the length from IP options turned
> + * on at the socket.
> + */
> +u32 kernel_sock_ip_overhead(struct sock *sk)
> +{
> +   struct inet_sock *inet;
> +   struct ipv6_pinfo *np;
> +   struct ip_options_rcu *opt = NULL;
> +   struct ipv6_txoptions *optv6 = NULL;
> +   u32 overhead = 0;
> +   bool owned_by_user = sock_owned_by_user(sk);
> +
> +   if (!sk)
> +   return overhead;
> +   switch (sk->sk_family) {
> +   case AF_INET:
> +   inet = inet_sk(sk);
> +   overhead += sizeof(struct iphdr);
> +   if (inet)
> +   opt = rcu_dereference_protected(inet->inet_opt,
> +   owned_by_user);
> +   if (opt)
> +   overhead += opt->opt.optlen;
> +   return overhead;
> +   case AF_INET6:
> +   np = inet6_sk(sk);
> +   overhead += sizeof(struct ipv6hdr);
> +   if (np)
> +   optv6 = rcu_dereference_protected(np->opt,
> + owned_by_user);
> +   if (optv6)
> +   overhead += (optv6->opt_flen + optv6->opt_nflen);
> +   return overhead;
> +   default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
> +   return overhead;
> +   }
> +}
> +EXPORT_SYMBOL(kernel_sock_ip_overhead);
> --
> 2.1.4
>


[PATCH net-next v4 1/2]L2TP:Adjust intf MTU, add underlay L3, L2 hdrs

2017-03-17 Thread R. Parameswaran

In existing kernel code, when setting up the L2TP interface, all of the
tunnel encapsulation headers are not taken into account when setting
up the MTU on the  L2TP logical interface device. Due to this, the
packets created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, which leads to
needless fragmentation once the L2TP packet is encapsulated in an outer IP
packet.  Specifically, the MTU calculation  does not take into account the
(outer) IP header imposed on the encapsulated L2TP packet, and the Layer 2
header imposed on the inner L2TP packet prior to encapsulation.

Change-set here (1/2) introduces a new kernel API to compute the IP overhead
on an IPv4 or IPv6 socket, which is then used in the L2TP code-path.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 include/linux/net.h |  3 +++
 net/socket.c| 44 
 2 files changed, 47 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 0620f5e..a42fab2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
+/* Following routine returns the IP overhead imposed by a socket.  */
+u32 kernel_sock_ip_overhead(struct sock *sk);
+
 #define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
 
diff --git a/net/socket.c b/net/socket.c
index e034fe4..69598e1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3345,3 +3345,47 @@ int kernel_sock_shutdown(struct socket *sock, enum 
sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+   struct inet_sock *inet;
+   struct ipv6_pinfo *np;
+   struct ip_options_rcu *opt;
+   struct ipv6_txoptions *optv6 = NULL;
+   u32 overhead = 0;
+   bool owned_by_user;
+
+   if (!sk)
+   return overhead;
+
+   owned_by_user = sock_owned_by_user(sk);
+   switch (sk->sk_family) {
+   case AF_INET:
+   inet = inet_sk(sk);
+   overhead += sizeof(struct iphdr);
+   opt = rcu_dereference_protected(inet->inet_opt,
+   owned_by_user);
+   if (opt)
+   overhead += opt->opt.optlen;
+   return overhead;
+#if IS_ENABLED(CONFIG_IPV6)
+   case AF_INET6:
+   np = inet6_sk(sk);
+   overhead += sizeof(struct ipv6hdr);
+   if (np)
+   optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+   if (optv6)
+   overhead += (optv6->opt_flen + optv6->opt_nflen);
+   return overhead;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+   default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+   return overhead;
+   }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
-- 
2.1.4


[PATCH net-next v3 2/2]L2TP:Adjust intf MTU, add underlay L3, L2 hdrs

2017-03-16 Thread R. Parameswaran

In existing kernel code, when setting up the L2TP interface, all of the
tunnel encapsulation headers are not taken into account when setting
up the MTU on the  L2TP logical interface device. Due to this, the
packets created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, which leads to
needless fragmentation once the L2TP packet is encapsulated in an outer IP
packet.

Specifically, the MTU calculation  does not take into account the (outer)
IP header imposed on the encapsulated L2TP packet, and the Layer 2 header
imposed on the inner L2TP packet prior to encapsulation. The patch posted
here takes care of these.

Existing code also seems to assume an Ethernet (non-jumbo) underlay. The
patch uses the PMTU mechanism and the dst entry in the L2TP tunnel socket
to directly pull up the underlay MTU (as the baseline number on top of
which the encapsulation headers are factored in).  Ethernet MTU is
assumed as a fallback only if this fails.

Picked up review comments from James Chapman, added a function
to compute ip header + ip option overhead on a socket, and factored it
into L2TP change-set.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 net/l2tp/l2tp_eth.c | 51 +++
 1 file changed, 47 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8bf18a5..f512d97 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "l2tp_core.h"
 
@@ -204,6 +207,49 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 }
 #endif
 
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+   struct l2tp_session *session,
+   struct net_device *dev)
+{
+   unsigned int overhead = 0;
+   struct dst_entry *dst;
+   u32 l3_overhead = 0;
+
+   if (session->mtu != 0) {
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   dev->needed_headroom += sizeof(struct udphdr);
+   return;
+   }
+   overhead = session->hdr_len;
+   l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+   if (!tunnel->sock || (l3_overhead == 0)) {
+   /* L3 Overhead couldn't be identified, dev mtu stays at 1500 */
+   return;
+   }
+   /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr */
+   overhead += ETH_HLEN + l3_overhead;
+   /* Additionally, if the encap is UDP, account for UDP header size */
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+   overhead += sizeof(struct udphdr);
+   dev->needed_headroom += sizeof(struct udphdr);
+   }
+   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+   dst = sk_dst_get(tunnel->sock);
+   if (dst) {
+   /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+   u32 pmtu = dst_mtu(dst);
+
+   if (pmtu != 0)
+   dev->mtu = pmtu;
+   dst_release(dst);
+   }
+   session->mtu = dev->mtu - overhead;
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 
peer_session_id, struct l2tp_session_cfg *cfg)
 {
struct net_device *dev;
@@ -253,12 +299,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, 
u32 session_id, u32 p
}
 
dev_net_set(dev, net);
-   if (session->mtu == 0)
-   session->mtu = dev->mtu - session->hdr_len;
-   dev->mtu = session->mtu;
-   dev->needed_headroom += session->hdr_len;
dev->min_mtu = 0;
dev->max_mtu = ETH_MAX_MTU;
+   l2tp_eth_adjust_mtu(tunnel, session, dev);
 
priv = netdev_priv(dev);
priv->dev = dev;
-- 
2.1.4



[PATCH net-next v3 1/2]L2TP:Adjust intf MTU, add underlay L3, L2 hdrs

2017-03-16 Thread R. Parameswaran


In existing kernel code, when setting up the L2TP interface, all of the
tunnel encapsulation headers are not taken into account when setting
up the MTU on the  L2TP logical interface device. Due to this, the
packets created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, which leads to
needless fragmentation once the L2TP packet is encapsulated in an outer IP
packet.

Specifically, the MTU calculation  does not take into account the (outer)
IP header imposed on the encapsulated L2TP packet, and the Layer 2 header
imposed on the inner L2TP packet prior to encapsulation. The patch posted
here takes care of these.

Existing code also seems to assume an Ethernet (non-jumbo) underlay. The
patch uses the PMTU mechanism and the dst entry in the L2TP tunnel socket
to directly pull up the underlay MTU (as the baseline number on top of
which the encapsulation headers are factored in).  Ethernet MTU is
assumed as a fallback only if this fails.

Picked up review comments from James Chapman, added a function
to compute ip header + ip option overhead on a socket, and factored it
into L2TP change-set.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>
---
 include/linux/net.h |  3 +++
 net/socket.c| 41 +
 2 files changed, 44 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index 0620f5e..a42fab2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
+/* Following routine returns the IP overhead imposed by a socket.  */
+u32 kernel_sock_ip_overhead(struct sock *sk);
+
 #define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
 
diff --git a/net/socket.c b/net/socket.c
index e034fe4..af54b12 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3345,3 +3345,44 @@ int kernel_sock_shutdown(struct socket *sock, enum 
sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/* This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+   struct inet_sock *inet;
+   struct ipv6_pinfo *np;
+   struct ip_options_rcu *opt = NULL;
+   struct ipv6_txoptions *optv6 = NULL;
+   u32 overhead = 0;
+   bool owned_by_user = sock_owned_by_user(sk);
+
+   if (!sk)
+   return overhead;
+   switch (sk->sk_family) {
+   case AF_INET:
+   inet = inet_sk(sk);
+   overhead += sizeof(struct iphdr);
+   if (inet)
+   opt = rcu_dereference_protected(inet->inet_opt,
+   owned_by_user);
+   if (opt)
+   overhead += opt->opt.optlen;
+   return overhead;
+   case AF_INET6:
+   np = inet6_sk(sk);
+   overhead += sizeof(struct ipv6hdr);
+   if (np)
+   optv6 = rcu_dereference_protected(np->opt,
+ owned_by_user);
+   if (optv6)
+   overhead += (optv6->opt_flen + optv6->opt_nflen);
+   return overhead;
+   default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
+   return overhead;
+   }
+}
+EXPORT_SYMBOL(kernel_sock_ip_overhead);
-- 
2.1.4



[RFC PATCH v3 2/2] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

2016-10-16 Thread R. Parameswaran


[v3: Picked up review comments from James Chapman, added a
 function  to compute ip header + ip option overhead on a socket, and factored
 it  into L2TP change-set, RFC, would like early feedback on name and
 placement  of new function while I test this.

 Part 2/2: Changes in l2tp_eth.c, using the new API from part 1]

>From f4066da53e781ef167055c1e89ca1a7819215a40 Mon Sep 17 00:00:00 2001
From: "R. Parameswaran" <rpara...@brocade.com>
Date: Sun, 16 Oct 2016 20:27:20 -0700

In existing kernel code, when setting up the L2TP interface, all of the
tunnel encapsulation headers are not taken into account when setting
up the MTU on the  L2TP logical interface device. Due to this, the
packets created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, which leads to
needless fragmentation once the L2TP packet is encapsulated in an outer IP
packet.

Specifically, the MTU calculation  does not take into account the (outer)
IP header imposed on the encapsulated L2TP packet, and the Layer 2 header
imposed on the inner L2TP packet prior to encapsulation. The patch posted
here takes care of these.

Existing code also seems to assume an Ethernet (non-jumbo) underlay. The
patch uses the PMTU mechanism and the dst entry in the L2TP tunnel socket
to directly pull up the underlay MTU (as the baseline number on top of
which the encapsulation headers are factored in).  Ethernet MTU is
assumed as a fallback only if this fails.

Picked up review comments from James Chapman, added a function
to compute ip header + ip option overhead on a socket, and factored it
into L2TP change-set.

Signed-off-by: nprac...@brocade.com,
Signed-off-by: bh...@brocade.com,
Signed-off-by: rshea...@brocade.com,
Signed-off-by: dfaw...@brocade.com
---
 net/l2tp/l2tp_eth.c | 51 +++
 1 file changed, 47 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 965f7e3..75eb5d3 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "l2tp_core.h"
 
@@ -206,6 +209,49 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 }
 #endif
 
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+   struct l2tp_session *session,
+   struct net_device *dev)
+{
+   unsigned int overhead = 0;
+   struct dst_entry *dst;
+   u32 l3_overhead = 0;
+
+   if (session->mtu != 0) {
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   dev->needed_headroom += sizeof(struct udphdr);
+   return;
+   }
+   overhead = session->hdr_len;
+   l3_overhead = kernel_sock_ip_overhead(tunnel->sock);
+   if (!tunnel->sock || (l3_overhead == 0)) {
+   /* L3 Overhead couldn't be identified, dev mtu stays at 1500 */
+   return;
+   }
+   /* Adjust MTU, factor overhead - underlay L3, overlay L2 hdr*/
+   overhead += ETH_HLEN + l3_overhead;
+   /* Additionally, if the encap is UDP, account for UDP header size */
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   overhead += sizeof(struct udphdr);
+   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+   dst = sk_dst_get(tunnel->sock);
+   if (dst) {
+   /* dst_mtu will use PMTU if found, else fallback to intf MTU */
+   u32 pmtu = dst_mtu(dst);
+
+   if (pmtu != 0)
+   dev->mtu = pmtu;
+   dst_release(dst);
+   }
+   session->mtu = dev->mtu - overhead;
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   dev->needed_headroom += sizeof(struct udphdr);
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 
peer_session_id, struct l2tp_session_cfg *cfg)
 {
struct net_device *dev;
@@ -255,11 +301,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, 
u32 session_id, u32 p
}
 
dev_net_set(dev, net);
-   if (session->mtu == 0)
-   session->mtu = dev->mtu - session->hdr_len;
-   dev->mtu = session->mtu;
-   dev->needed_headroom += session->hdr_len;
 
+   l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
priv->session = session;
-- 
2.1.4



> 
> I think keep it simple. A function to return the size of the IP header
> associated with any IP socket, not necessarily a tunnel socket. Don't
> mix in any MTU derivation logic or UDP header size etc.
> 
> Post code early as an RFC. You're more likely to get review feedback
> from others.
> 
> 
> 
> 


[RFC PATCH v3 1/2] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

2016-10-16 Thread R. Parameswaran

[v3: Picked up review comments from James Chapman, added a
 function  to compute ip header + ip option overhead on a socket, and factored
 it  into L2TP change-set, RFC, would like early feedback on name and
 placement, and logic  of new function while I test this]

>From 30c4b3900d09deb912fc6ce4af3c19e870f84e14 Mon Sep 17 00:00:00 2001
From: "R. Parameswaran" <rpara...@brocade.com>
Date: Sun, 16 Oct 2016 20:19:38 -0700

In existing kernel code, when setting up the L2TP interface, all of the
tunnel encapsulation headers are not taken into account when setting
up the MTU on the  L2TP logical interface device. Due to this, the
packets created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, which leads to
needless fragmentation once the L2TP packet is encapsulated in an outer IP
packet.

Specifically, the MTU calculation  does not take into account the (outer)
IP header imposed on the encapsulated L2TP packet, and the Layer 2 header
imposed on the inner L2TP packet prior to encapsulation. The patch posted
here takes care of these.

Existing code also seems to assume an Ethernet (non-jumbo) underlay. The
patch uses the PMTU mechanism and the dst entry in the L2TP tunnel socket
to directly pull up the underlay MTU (as the baseline number on top of
which the encapsulation headers are factored in).  Ethernet MTU is
assumed as a fallback only if this fails.

Picked up review comments from James Chapman, added a function
to compute ip header + ip option overhead on a socket, and factored it
into L2TP change-set.

Signed-off-by: nprac...@brocade.com,
Signed-off-by: bh...@brocade.com,
Signed-off-by: rshea...@brocade.com,
Signed-off-by: dfaw...@brocade.com
---
 include/linux/net.h |  3 +++
 net/socket.c| 37 +
 2 files changed, 40 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index cd0c8bd..2c8b092 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, 
int offset,
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
 
+/* Following routine returns the IP overhead imposed by a socket.  */
+u32 kernel_sock_ip_overhead(struct sock *sk);
+
 #define MODULE_ALIAS_NETPROTO(proto) \
MODULE_ALIAS("net-pf-" __stringify(proto))
 
diff --git a/net/socket.c b/net/socket.c
index 5a9bf5e..d5e79c2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3293,3 +3293,40 @@ int kernel_sock_shutdown(struct socket *sock, enum 
sock_shutdown_cmd how)
return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
+/*
+ * This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ *  this is an IPv4 or IPv6 socket and the length from IP options turned
+ *  on at the socket.
+ */
+u32 kernel_sock_ip_overhead(struct sock *sk)
+{
+   u32 overhead = 0;
+   if (!sk)
+   goto done;
+   if (sk->sk_family == AF_INET) {
+   struct ip_options_rcu *opt = NULL;
+   struct inet_sock *inet = inet_sk(sk);
+   overhead += sizeof(struct iphdr);
+   if (inet)
+   opt = rcu_dereference_protected(inet->inet_opt,
+   sock_owned_by_user(sk));
+   if (opt)
+   overhead += opt->opt.optlen;
+   }
+   else if (sk->sk_family == AF_INET6) {
+   struct ipv6_pinfo *np = inet6_sk(sk);
+   struct ipv6_txoptions *opt = NULL;
+   overhead += sizeof(struct ipv6hdr);
+   if (np)
+   opt = rcu_dereference_protected(np->opt,
+   sock_owned_by_user(sk));
+   if (opt)
+   overhead += (opt->opt_flen + opt->opt_nflen);
+   }
+
+done:
+   return overhead;
+}
+EXPORT_SYMBOL_GPL(kernel_sock_ip_overhead);
-- 
2.1.4


On Tue, 11 Oct 2016, James Chapman wrote:

> 
> I think keep it simple. A function to return the size of the IP header
> associated with any IP socket, not necessarily a tunnel socket. Don't
> mix in any MTU derivation logic or UDP header size etc.
> 
> Post code early as an RFC. You're more likely to get review feedback
> from others.
> 
> 
> 
> 


Re: [PATCH net v2] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

2016-10-03 Thread R. Parameswaran


Hi James, 

Please see inline, thanks for the reply:

On Sat, 1 Oct 2016, James Chapman wrote:

> On 30/09/16 03:39, R. Parameswaran wrote:
> >
> >>> + /* Adjust MTU, factor overhead - underlay L3 hdr, overlay L2 hdr*/
> >>> + if (tunnel->sock->sk_family == AF_INET)
> >>> + overhead += (ETH_HLEN + sizeof(struct iphdr));
> >>> + else if (tunnel->sock->sk_family == AF_INET6)
> >>> + overhead += (ETH_HLEN + sizeof(struct ipv6hdr));
> >> What about options in the IP header? If certain options are set on the
> >> socket, the IP header may be larger.
> >>
> > Thanks for the reply - It looks like IP options can only be 
> > enabled through setsockopt on an application's socket (if there's any 
> > other way to turn on IP options, please let me know - didn't see any 
> > sysctl setting for transmit). This scenario would come 
> > into picture when an application opens a raw IP or UDP socket such that it 
> > routes into the L2TP logical interface.
> 
> No. An L2TP daemon (userspace) will open a socket for each tunnel that
> it creates. Control and data packets use the same socket, which is the
> socket used by this code. It may set any options on its sockets. L2TP
> tunnel sockets can be created either by an L2TP daemon (managed tunnels)
> or by ip l2tp commands (unmanaged tunnels).
> 

One Q I have is whether it would be sufficient to solve this for the
common case (i.e no IP options) and have an expectation that the 
administrator will explicitly provision the mtu using the 'ip link ... 
mtu'  command when dealing with infrequent occurences like IP options? 

But looking at the code, it looks to be possible to pick up whether 
options are enabled and how long the options are, from the ip_options struct 
embedded in the tunnel socket. If you want me to, I can repost the patch
with this change (will need a few days) - please let me know if this is 
what you had in mind.

thanks,

Ramkumar



> > If you take the case of a plain IP (ethernet) interface, even if an
> > application opened a socket turning on IP options, it would not change
> > the MTU of the underlying interface, and it would not affect other 
> > applications transacting packets on the same interface. I know its not an 
> > exact parallel to this case, but since the IP option control is per 
> > application, we probably should not factor it into the L2TP logical 
> > interface?
> > We cannot affect other applications/processes running on the same L2TP 
> > tunnel. Also, since the application  using IP options knows that it has 
> > turned 
> > on IP options, maybe we can count on it to factor the size of the options 
> > into the size of the payload it sends into the socket, or set the mtu on 
> > the 
> > L2TP interface through config? 
> 
> No. See above.
> 
> >
> > Other than this, I don't see keepalives or anything else in which the 
> > kernel will source its own packet into the L2TP interface, outside of 
> > an application injected packet - if there is something like that, please
> > let me know. The user space L2TP daemon would probably fall in the 
> > category of applications.
> >
> > thanks,
> >
> > Ramkumar 
> >
> >
> 
> 
> 


Re: [PATCH net v2] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

2016-09-29 Thread R. Parameswaran

Hi James,

On Thu, 29 Sep 2016, James Chapman wrote:

> On 22/09/16 21:52, R. Parameswaran wrote:
> > From ed585bdd6d3d2b3dec58d414f514cd764d89159d Mon Sep 17 00:00:00 2001
> > From: "R. Parameswaran" <rpara...@brocade.com>
> > Date: Thu, 22 Sep 2016 13:19:25 -0700
> > Subject: [PATCH] L2TP:Adjust intf MTU,factor underlay L3,overlay L2
> >
> > Take into account all of the tunnel encapsulation headers when setting
> > up the MTU on the L2TP logical interface device. Otherwise, packets
> > created by the applications on top of the L2TP layer are larger
> > than they ought to be, relative to the underlay MTU, leading to
> > needless fragmentation once the outer IP encap is added.
> >
> > Specifically, take into account the (outer, underlay) IP header
> > imposed on the encapsulated L2TP packet, and the Layer 2 header
> > imposed on the inner IP packet prior to L2TP encapsulation.
> >
> > Do not assume an Ethernet (non-jumbo) underlay. Use the PMTU mechanism
> > and the dst entry in the L2TP tunnel socket to directly pull up
> > the underlay MTU (as the baseline number on top of which the
> > encapsulation headers are factored in).  Fall back to Ethernet MTU
> > if this fails.
> >
> > Signed-off-by: R. Parameswaran <rpara...@brocade.com>
> >
> > Reviewed-by: "N. Prachanda" <nprac...@brocade.com>,
> > Reviewed-by: "R. Shearman" <rshea...@brocade.com>,
> > Reviewed-by: "D. Fawcus" <dfaw...@brocade.com>
> > ---
> >  net/l2tp/l2tp_eth.c | 48 
> >  1 file changed, 44 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
> > index 57fc5a4..dbcd6bd 100644
> > --- a/net/l2tp/l2tp_eth.c
> > +++ b/net/l2tp/l2tp_eth.c
> > @@ -30,6 +30,9 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> > +#include 
> >  
> >  #include "l2tp_core.h"
> >  
> > @@ -206,6 +209,46 @@ static void l2tp_eth_show(struct seq_file *m, void 
> > *arg)
> >  }
> >  #endif
> >  
> > +static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
> > +   struct l2tp_session *session,
> > +   struct net_device *dev)
> > +{
> > +   unsigned int overhead = 0;
> > +   struct dst_entry *dst;
> > +
> > +   if (session->mtu != 0) {
> > +   dev->mtu = session->mtu;
> > +   dev->needed_headroom += session->hdr_len;
> > +   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
> > +   dev->needed_headroom += sizeof(struct udphdr);
> > +   return;
> > +   }
> > +   overhead = session->hdr_len;
> > +   /* Adjust MTU, factor overhead - underlay L3 hdr, overlay L2 hdr*/
> > +   if (tunnel->sock->sk_family == AF_INET)
> > +   overhead += (ETH_HLEN + sizeof(struct iphdr));
> > +   else if (tunnel->sock->sk_family == AF_INET6)
> > +   overhead += (ETH_HLEN + sizeof(struct ipv6hdr));
> What about options in the IP header? If certain options are set on the
> socket, the IP header may be larger.
> 

Thanks for the reply - It looks like IP options can only be 
enabled through setsockopt on an application's socket (if there's any 
other way to turn on IP options, please let me know - didn't see any 
sysctl setting for transmit). This scenario would come 
into picture when an application opens a raw IP or UDP socket such that it 
routes into the L2TP logical interface.

If you take the case of a plain IP (ethernet) interface, even if an
application opened a socket turning on IP options, it would not change
the MTU of the underlying interface, and it would not affect other 
applications transacting packets on the same interface. I know its not an 
exact parallel to this case, but since the IP option control is per 
application, we probably should not factor it into the L2TP logical interface?
We cannot affect other applications/processes running on the same L2TP 
tunnel. Also, since the application  using IP options knows that it has turned 
on IP options, maybe we can count on it to factor the size of the options 
into the size of the payload it sends into the socket, or set the mtu on the 
L2TP interface through config? 

Other than this, I don't see keepalives or anything else in which the 
kernel will source its own packet into the L2TP interface, outside of 
an application injected packet - if there is something like that, please
let me know. The user space L2TP daemon would probably fall in the 
category of applications.

thanks

Re: [PATCH net v2] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

2016-09-28 Thread R. Parameswaran


Hi David,

Please see inline:

On Wed, 28 Sep 2016, David Miller wrote:

> From: "R. Parameswaran" <parameswaran...@gmail.com>
> Date: Tue, 27 Sep 2016 12:17:21 -0700 (PDT)
> 
> > Later, in vxlan_dev_configure(), called from vxlan_dev_create(), it gets 
> > adjusted to account for the headers:
> > 
> > vxlan_dev_configure():
> > ...
> >  if (!conf->mtu)
> > dev->mtu = lowerdev->mtu - (use_ipv6 ? 
> > VXLAN6_HEADROOM : VXLAN_HEADROOM);
> > 
> > 
> > where VXLAN_HEADROOM is defined as follows: 
> > 
> > /* IP header + UDP + VXLAN + Ethernet header */
> > #define VXLAN_HEADROOM (20 + 8 + 8 + 14)
> > /* IPv6 header + UDP + VXLAN + Ethernet header */
> > #define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
> 
> Right but I don't see it going through the effort to make use of the
> PMTU like you are.
> 
> I have another strong concern related to this.  There seems to be no
> mechanism used to propagate any PMTU events into the device's MTU.
> 
> Because if there is a limiting nexthop in the route to the other end
> of the UDP tunnel, you won't learn the PMTU until you (or some other
> entity on the machine) actually starts sending traffic to the tunnel's
> endpoint.
> 
> If the PMTU events aren't propagated into the tunnel's MTU or similar
> I think this is an ad-hoc solution.
> 
> I would suggest that you either:
> 
> 1) Do what VXLAN appears to do an ignore the PMTu
> 

I'd like to point out one difference with VXLAN - in VXLAN, the 
local physical interface is directly specified at the time of 
creation of the tunnel, and the data structure seems to have the ifindex 
of the local interface with which it is able to directly pull up the 
underlay interface device. Whereas in L2TP, we only have the IP
address of the remote tunnel end-point and thus only the socket and the 
dst from which we need to derive this. 

Also, dst_mtu references dst->ops->mtu, which if I followed the pointer
chain correctly, will dereference to ipv4_mtu() (for the IPv4 case, as
an example). The code in ipv4_mtu looks like the following:

ipv4_mtu():

unsigned int mtu = rt->rt_pmtu;

if (!mtu || time_after_eq(jiffies, rt->dst.expires))
mtu = dst_metric_raw(dst, RTAX_MTU);

if (mtu)
return mtu;

mtu = dst->dev->mtu;

if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
}

return min_t(unsigned int, mtu, IP_MAX_MTU);

The code above does not depend on PMTU to be working. If no PMTU 
discovered MTU exists, it eventually falls back to the local 
underlay device MTU - and this is the mode in which I tested the fix - PMTU 
was off in my testbed, but it was picking up the local device MTU correctly.

Basically, this looks better than the VXLAN handling as far as I can 
tell - at least it will pick up the existing discovered PMTU on a best 
effort basis, while falling back to the underlay device if all else fails. 

I agree that something like 2. below would be needed in the long run (it 
will need some effort and redesign -e.g. how do I lookup the parent tunnel 
from the socket when receiving a PMTU update, existing pointer chain runs 
from tunnel to socket).  

But since the existing (Ethernet over L2TP) MTU derivation is incorrect, I am 
hoping this may be acceptable as an interim solution. 

thanks,

Ramkumar


> 2) Add code to handle PMTU events that land on the UDP tunnel
>socket.
> 
> Thanks.
> 


Re: [PATCH net v2] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

2016-09-27 Thread R. Parameswaran

Hi David,

Thanks for the reply, please see inline:

On Tue, 27 Sep 2016, David Miller wrote:

> From: "R. Parameswaran" <parameswaran...@gmail.com>
> Date: Thu, 22 Sep 2016 13:52:43 -0700 (PDT)
> 
> > From ed585bdd6d3d2b3dec58d414f514cd764d89159d Mon Sep 17 00:00:00 2001
> > From: "R. Parameswaran" <rpara...@brocade.com>
> > Date: Thu, 22 Sep 2016 13:19:25 -0700
> > Subject: [PATCH] L2TP:Adjust intf MTU,factor underlay L3,overlay L2
> > 
> > Take into account all of the tunnel encapsulation headers when setting
> > up the MTU on the L2TP logical interface device. Otherwise, packets
> > created by the applications on top of the L2TP layer are larger
> > than they ought to be, relative to the underlay MTU, leading to
> > needless fragmentation once the outer IP encap is added.
> > 
> > Specifically, take into account the (outer, underlay) IP header
> > imposed on the encapsulated L2TP packet, and the Layer 2 header
> > imposed on the inner IP packet prior to L2TP encapsulation.
> > 
> > Do not assume an Ethernet (non-jumbo) underlay. Use the PMTU mechanism
> > and the dst entry in the L2TP tunnel socket to directly pull up
> > the underlay MTU (as the baseline number on top of which the
> > encapsulation headers are factored in).  Fall back to Ethernet MTU
> > if this fails.
> > 
> > Signed-off-by: R. Parameswaran <rpara...@brocade.com>
> > 
> > Reviewed-by: "N. Prachanda" <nprac...@brocade.com>,
> > Reviewed-by: "R. Shearman" <rshea...@brocade.com>,
> > Reviewed-by: "D. Fawcus" <dfaw...@brocade.com>
> 
> I have to ask, how do other tunnels over UDP such as VXLAN handle
> this problem?
> 

Specific to Vxlan, it appears to behave similarly.  I haven't functionally 
tested fragmentation on vxlan interfaces, but looking at the
code, it seems to account for the headers involved:


When the vxlan interface is created, from vxlan_dev_create(), in 
vxlan_setup(), it initially starts off with an ethernet MTU:

vxlan_setup(struct net_device *dev)
{
...
...
ether_setup(dev); <<<<<<< Will set device MTU to 1500


Later, in vxlan_dev_configure(), called from vxlan_dev_create(), it gets 
adjusted to account for the headers:

vxlan_dev_configure():
...
 if (!conf->mtu)
dev->mtu = lowerdev->mtu - (use_ipv6 ? 
VXLAN6_HEADROOM : VXLAN_HEADROOM);


where VXLAN_HEADROOM is defined as follows: 

/* IP header + UDP + VXLAN + Ethernet header */
#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
/* IPv6 header + UDP + VXLAN + Ethernet header */
#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)


This seems to match what I see with hand config:

sudo ip link add vxlan0 type vxlan id 42 group 239.1.1.1 dev eth0 dstport 
4789 <<<< (eth0 has an MTU of 1500)


sudo ip -d link show vxlan0
36: vxlan0: <BROADCAST,MULTICAST> mtu 1450 qdisc noop state DOWN mode 
DEFAULT group default <<<< (1450 = 1500 -50)
link/ether e2:b8:2d:f4:f7:ae brd ff:ff:ff:ff:ff:ff promiscuity 0
vxlan id 42 group 239.1.1.1 dev eth0 srcport 32768 61000 dstport 4789 
ageing 300

thanks,

Ramkumar


Re: [PATCH] L2TP:Adjust intf MTU, add underlay L3, overlay L2

2016-09-22 Thread R. Parameswaran


On Thu, 22 Sep 2016, Derek Fawcus wrote:

> On Wed, Sep 21, 2016 at 02:11:04pm -0700, R. Parameswaran wrote:
> > 
> [snip]
> 
> > @@ -206,6 +209,46 @@ static void l2tp_eth_show(struct seq_file *m, void
> > *arg)
> >  }
> >  #endif
> [snip]
> 
> > +
> >  static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id,
> > u32 peer_session_id, struct l2tp_session_cfg *cfg)
> >  {
> > struct net_device *dev;
> > @@ -255,11 +298,8 @@ static int l2tp_eth_create(struct net *net, u32
> > tunnel_id, u32 session_id, u32 p
> > }
> > 
> 
> Your diff has whitespace errors,  probably where your MUA has decided to do
> 'intelligent' line wrapping.
> You should (re)send from a proper MUA which does not suffer from this issue.
> 
> DF
> 

Reposted the patch fixing this, and after rebasing the patch to the 
dmiller 'net' tree, verified that 'git am -c' applies the reposted patch 
successfully (after email header is removed) - thanks for identifying 
this.

regards,

Ramkumar


[PATCH net v2] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

2016-09-22 Thread R. Parameswaran
>From ed585bdd6d3d2b3dec58d414f514cd764d89159d Mon Sep 17 00:00:00 2001
From: "R. Parameswaran" <rpara...@brocade.com>
Date: Thu, 22 Sep 2016 13:19:25 -0700
Subject: [PATCH] L2TP:Adjust intf MTU,factor underlay L3,overlay L2

Take into account all of the tunnel encapsulation headers when setting
up the MTU on the L2TP logical interface device. Otherwise, packets
created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, leading to
needless fragmentation once the outer IP encap is added.

Specifically, take into account the (outer, underlay) IP header
imposed on the encapsulated L2TP packet, and the Layer 2 header
imposed on the inner IP packet prior to L2TP encapsulation.

Do not assume an Ethernet (non-jumbo) underlay. Use the PMTU mechanism
and the dst entry in the L2TP tunnel socket to directly pull up
the underlay MTU (as the baseline number on top of which the
encapsulation headers are factored in).  Fall back to Ethernet MTU
if this fails.

Signed-off-by: R. Parameswaran <rpara...@brocade.com>

Reviewed-by: "N. Prachanda" <nprac...@brocade.com>,
Reviewed-by: "R. Shearman" <rshea...@brocade.com>,
Reviewed-by: "D. Fawcus" <dfaw...@brocade.com>
---
 net/l2tp/l2tp_eth.c | 48 
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 57fc5a4..dbcd6bd 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #include "l2tp_core.h"
 
@@ -206,6 +209,46 @@ static void l2tp_eth_show(struct seq_file *m, void *arg)
 }
 #endif
 
+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+   struct l2tp_session *session,
+   struct net_device *dev)
+{
+   unsigned int overhead = 0;
+   struct dst_entry *dst;
+
+   if (session->mtu != 0) {
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   dev->needed_headroom += sizeof(struct udphdr);
+   return;
+   }
+   overhead = session->hdr_len;
+   /* Adjust MTU, factor overhead - underlay L3 hdr, overlay L2 hdr*/
+   if (tunnel->sock->sk_family == AF_INET)
+   overhead += (ETH_HLEN + sizeof(struct iphdr));
+   else if (tunnel->sock->sk_family == AF_INET6)
+   overhead += (ETH_HLEN + sizeof(struct ipv6hdr));
+   /* Additionally, if the encap is UDP, account for UDP header size */
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   overhead += sizeof(struct udphdr);
+   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+   dst = sk_dst_get(tunnel->sock);
+   if (dst) {
+   u32 pmtu = dst_mtu(dst);
+
+   if (pmtu != 0)
+   dev->mtu = pmtu;
+   dst_release(dst);
+   }
+   /* else (no PMTUD) L2TP dev MTU defaulted to Ethernet MTU in caller */
+   session->mtu = dev->mtu - overhead;
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   dev->needed_headroom += sizeof(struct udphdr);
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 
peer_session_id, struct l2tp_session_cfg *cfg)
 {
struct net_device *dev;
@@ -255,11 +298,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, 
u32 session_id, u32 p
}
 
dev_net_set(dev, net);
-   if (session->mtu == 0)
-   session->mtu = dev->mtu - session->hdr_len;
-   dev->mtu = session->mtu;
-   dev->needed_headroom += session->hdr_len;
 
+   l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
priv->session = session;
-- 
2.1.4



[PATCH] L2TP:Adjust intf MTU, add underlay L3, overlay L2

2016-09-21 Thread R. Parameswaran


Take into account all of the tunnel encapsulation headers when setting
up the MTU on the L2TP logical interface device. Otherwise, packets
created by the applications on top of the L2TP layer are larger
than they ought to be, relative to the underlay MTU, leading to
needless fragmentation once the outer IP encap is added.

Specifically, take into account the (outer, underlay) IP header
imposed on the encapsulated L2TP packet, and the Layer 2 header
imposed on the inner IP packet prior to L2TP encapsulation.

Do not assume an Ethernet (non-jumbo) underlay. Use the PMTU mechanism
and the dst entry in the L2TP tunnel socket to directly pull up
the underlay MTU (as the baseline number on top of which the
encapsulation headers are factored in).  Fall back to Ethernet MTU
if this fails.

Signed-off-by: Ramkumar Parameswaran 

Reviewed-by: N. Prachanda ,
 R. Shearman  ,
 D. Fawcus


---
 net/l2tp/l2tp_eth.c | 48 
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 57fc5a4..dbcd6bd 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 

 #include "l2tp_core.h"

@@ -206,6 +209,46 @@ static void l2tp_eth_show(struct seq_file *m, void 
*arg)

 }
 #endif

+static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel,
+   struct l2tp_session *session,
+   struct net_device *dev)
+{
+   unsigned int overhead = 0;
+   struct dst_entry *dst;
+
+   if (session->mtu != 0) {
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   dev->needed_headroom += sizeof(struct udphdr);
+   return;
+   }
+   overhead = session->hdr_len;
+   /* Adjust MTU, factor overhead - underlay L3 hdr, overlay L2 hdr*/
+   if (tunnel->sock->sk_family == AF_INET)
+   overhead += (ETH_HLEN + sizeof(struct iphdr));
+   else if (tunnel->sock->sk_family == AF_INET6)
+   overhead += (ETH_HLEN + sizeof(struct ipv6hdr));
+   /* Additionally, if the encap is UDP, account for UDP header size */
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   overhead += sizeof(struct udphdr);
+   /* If PMTU discovery was enabled, use discovered MTU on L2TP device */
+   dst = sk_dst_get(tunnel->sock);
+   if (dst) {
+   u32 pmtu = dst_mtu(dst);
+
+   if (pmtu != 0)
+   dev->mtu = pmtu;
+   dst_release(dst);
+   }
+   /* else (no PMTUD) L2TP dev MTU defaulted to Ethernet MTU in caller */
+   session->mtu = dev->mtu - overhead;
+   dev->mtu = session->mtu;
+   dev->needed_headroom += session->hdr_len;
+   if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+   dev->needed_headroom += sizeof(struct udphdr);
+}
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 
session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)

 {
struct net_device *dev;
@@ -255,11 +298,8 @@ static int l2tp_eth_create(struct net *net, u32 
tunnel_id, u32 session_id, u32 p

}

dev_net_set(dev, net);
-   if (session->mtu == 0)
-   session->mtu = dev->mtu - session->hdr_len;
-   dev->mtu = session->mtu;
-   dev->needed_headroom += session->hdr_len;

+   l2tp_eth_adjust_mtu(tunnel, session, dev);
priv = netdev_priv(dev);
priv->dev = dev;
priv->session = session;
--
2.1.4