From: Dorian Craps <dorian.cr...@student.vinci.be>

Multipath TCP (MPTCP), standardized in RFC8684 [1], is a TCP extension
that enables a TCP connection to use different paths.

Multipath TCP has been used for several use cases. On smartphones, MPTCP
enables seamless handovers between cellular and Wi-Fi networks while
preserving established connections. This use-case is what pushed Apple
to use MPTCP since 2013 in multiple applications [2]. On dual-stack
hosts, Multipath TCP enables the TCP connection to automatically use the
best performing path, either IPv4 or IPv6. If one path fails, MPTCP
automatically uses the other path.

To benefit from MPTCP, both the client and the server have to support
it. Multipath TCP is a backward-compatible TCP extension that is enabled
by default on recent Linux distributions (Debian, Ubuntu, Redhat, ...).
Multipath TCP is included in the Linux kernel since version 5.6 [3]. To
use it on Linux, an application must explicitly enable it when creating
the socket. No need to change anything else in the application.

This attached patch uses MPTCP by default instead of TCP on Linux. There
is a fallback if the creation of the MPTCP socket fails. A new option
has been added in the config to be able to disable MPTCP support.

It sounds good to have MPTCP enabled by default, so the client can
decide when to use it or not. If the client didn't ask to use MPTCP, the
kernel will return a "plain" TCP socket to the server application after
an "accept()". [4]

IPPROTO_MPTCP is defined just in case old libC are being used and don't
have the ref. The running kernel is the only one who can tell if MPTCP
is supported or not, it is probably best not to check that at build
time.

TCP_MAXSEG is currently not supported by MPTCP: is it an issue? MPTCP
devs didn't add a support for it because it has not been requested with
a use-case. If you think it is important, I can report that to them.

Due to the limited impact within a data center environment, MPTCP
support has only been added on the listening sockets, then not between
the proxy and the servers. The high-speed, low-latency nature of data
center networks reduces the benefits of MPTCP, making the complexity of
its implementation unnecessary in this context.

Developed with the help of Matthieu Baerts (matt...@kernel.org) and
Olivier Bonaventure (olivier.bonavent...@uclouvain.be)

Link: https://www.rfc-editor.org/rfc/rfc8684.html [1]
Link: https://www.tessares.net/apples-mptcp-story-so-far/ [2]
Link: https://www.mptcp.dev/ [3]
Link: 
https://www.mptcp.dev/faq.html#why--when-should-mptcp-be-enabled-by-default [4]
---
 doc/configuration.txt        |  4 ++++
 include/haproxy/global-t.h   |  1 +
 include/haproxy/protocol-t.h |  7 +++++++
 src/cfgparse-global.c        |  7 ++++++-
 src/cfgparse.c               |  2 +-
 src/proto_rhttp.c            |  5 +++++
 src/proto_tcp.c              | 10 ++++++++++
 src/protocol.c               | 12 +++++++++++-
 src/sock_inet.c              | 21 +++++++++++++++++++--
 9 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index d2d654c19..85b75de33 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -1342,6 +1342,7 @@ The following keywords are supported in the "global" 
section :
    - maxsslrate
    - maxzlibmem
    - no-memory-trimming
+   - no-mptcp
    - noepoll
    - noevports
    - nogetaddrinfo
@@ -2974,6 +2975,9 @@ no-memory-trimming
   nice with the new process. Note that advanced memory allocators usually do
   not suffer from such a problem.
 
+no-mptcp
+  Disables Multipath TCP (MPTCP) support when the TCP protocol is requested.
+
 noepoll
   Disables the use of the "epoll" event polling system on Linux. It is
   equivalent to the command-line argument "-de". The next polling system
diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h
index 92d2c6bc1..c2b81fb50 100644
--- a/include/haproxy/global-t.h
+++ b/include/haproxy/global-t.h
@@ -85,6 +85,7 @@
 #define GTUNE_LISTENER_MQ_OPT    (1<<28)
 #define GTUNE_LISTENER_MQ_ANY    (GTUNE_LISTENER_MQ_FAIR | 
GTUNE_LISTENER_MQ_OPT)
 #define GTUNE_QUIC_CC_HYSTART    (1<<29)
+#define GTUNE_NO_MPTCP           (1<<30)
 
 #define NO_ZERO_COPY_FWD             0x0001 /* Globally disable zero-copy FF */
 #define NO_ZERO_COPY_FWD_PT          0x0002 /* disable zero-copy FF for PT 
(recv & send are disabled automatically) */
diff --git a/include/haproxy/protocol-t.h b/include/haproxy/protocol-t.h
index b85f29cc0..6a7d45c52 100644
--- a/include/haproxy/protocol-t.h
+++ b/include/haproxy/protocol-t.h
@@ -28,6 +28,12 @@
 #include <import/ebtree-t.h>
 #include <haproxy/api-t.h>
 
+#ifdef __linux__
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP
+#endif
+#endif
+
 /* some pointer types referenced below */
 struct listener;
 struct receiver;
@@ -99,6 +105,7 @@ struct protocol {
        enum proto_type proto_type;                     /* protocol type at the 
socket layer (PROTO_TYPE_*) */
        int sock_type;                                  /* socket type, as 
passed to socket()     */
        int sock_prot;                                  /* socket protocol, as 
passed to socket() */
+       int sock_prot_fb;                               /* fallback socket 
protocol, as passed to socket() */
 
        /* functions acting on the listener */
        void (*add)(struct protocol *p, struct listener *l); /* add a listener 
for this protocol */
diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c
index b173511c9..523c6b633 100644
--- a/src/cfgparse-global.c
+++ b/src/cfgparse-global.c
@@ -52,7 +52,7 @@ static const char *common_kw_list[] = {
        "presetenv", "unsetenv", "resetenv", "strict-limits", "localpeer",
        "numa-cpu-mapping", "defaults", "listen", "frontend", "backend",
        "peers", "resolvers", "cluster-secret", "no-quic", "limited-quic",
-       NULL /* must be last */
+       "no-mptcp", NULL /* must be last */
 };
 
 /*
@@ -1334,6 +1334,11 @@ int cfg_parse_global(const char *file, int linenum, char 
**args, int kwm)
                        HA_ATOMIC_STORE(&global.anon_key, tmp);
                }
        }
+       else if (strcmp(args[0], "no-mptcp") == 0) {
+               if (alertif_too_many_args(0, file, linenum, args, &err_code))
+                       goto out;
+               global.tune.options |= GTUNE_NO_MPTCP;
+       }
        else {
                struct cfg_kw_list *kwl;
                const char *best;
diff --git a/src/cfgparse.c b/src/cfgparse.c
index a2f7c4a74..a7dd2ade4 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -1193,7 +1193,7 @@ int cfg_parse_mailers(const char *file, int linenum, char 
**args, int kwm)
                        goto out;
                }
 
-               if (proto->sock_prot != IPPROTO_TCP) {
+               if (proto->sock_prot != IPPROTO_TCP && proto->sock_prot_fb != 
IPPROTO_TCP) {
                        ha_alert("parsing [%s:%d] : '%s %s' : TCP not supported 
for this address family.\n",
                                 file, linenum, args[0], args[1]);
                        err_code |= ERR_ALERT | ERR_FATAL;
diff --git a/src/proto_rhttp.c b/src/proto_rhttp.c
index 2f1004cde..3860a1d46 100644
--- a/src/proto_rhttp.c
+++ b/src/proto_rhttp.c
@@ -45,7 +45,12 @@ struct protocol proto_rhttp = {
        /* socket layer */
        .proto_type     = PROTO_TYPE_STREAM,
        .sock_type      = SOCK_STREAM,
+#ifdef __linux__
+       .sock_prot      = IPPROTO_MPTCP,
+       .sock_prot_fb   = IPPROTO_TCP,
+#else
        .sock_prot      = IPPROTO_TCP,
+#endif
        .rx_listening   = rhttp_accepting_conn,
        .receivers      = LIST_HEAD_INIT(proto_rhttp.receivers),
 };
diff --git a/src/proto_tcp.c b/src/proto_tcp.c
index 45ce27f11..6e463f823 100644
--- a/src/proto_tcp.c
+++ b/src/proto_tcp.c
@@ -80,7 +80,12 @@ struct protocol proto_tcpv4 = {
        /* socket layer */
        .proto_type     = PROTO_TYPE_STREAM,
        .sock_type      = SOCK_STREAM,
+#ifdef __linux__
+       .sock_prot      = IPPROTO_MPTCP,
+       .sock_prot_fb   = IPPROTO_TCP,
+#else
        .sock_prot      = IPPROTO_TCP,
+#endif
        .rx_enable      = sock_enable,
        .rx_disable     = sock_disable,
        .rx_unbind      = sock_unbind,
@@ -126,7 +131,12 @@ struct protocol proto_tcpv6 = {
        /* socket layer */
        .proto_type     = PROTO_TYPE_STREAM,
        .sock_type      = SOCK_STREAM,
+#ifdef __linux__
+       .sock_prot      = IPPROTO_MPTCP,
+       .sock_prot_fb   = IPPROTO_TCP,
+#else
        .sock_prot      = IPPROTO_TCP,
+#endif
        .rx_enable      = sock_enable,
        .rx_disable     = sock_disable,
        .rx_unbind      = sock_unbind,
diff --git a/src/protocol.c b/src/protocol.c
index 399835a88..ff59ade9c 100644
--- a/src/protocol.c
+++ b/src/protocol.c
@@ -104,7 +104,17 @@ int protocol_supports_flag(struct protocol *proto, uint 
flag)
                        return 1;
 
                /* run a live check */
-               ret = _sock_supports_reuseport(proto->fam, proto->sock_type, 
proto->sock_prot);
+               if (proto->sock_prot_fb > 0 && global.tune.options & 
GTUNE_NO_MPTCP) {
+                       ret = _sock_supports_reuseport(proto->fam, 
proto->sock_type, proto->sock_prot_fb);
+               } else {
+                       ret = _sock_supports_reuseport(proto->fam, 
proto->sock_type, proto->sock_prot);
+
+                       /* retry with the fall back protocol */
+                       if (proto->sock_prot_fb > 0 && !ret)
+                               ret = _sock_supports_reuseport(proto->fam, 
proto->sock_type,
+                                                              
proto->sock_prot_fb);
+               }
+
                if (!ret)
                        _HA_ATOMIC_AND(&proto->flags, 
~PROTO_F_REUSEPORT_SUPPORTED);
 
diff --git a/src/sock_inet.c b/src/sock_inet.c
index 028ffaa68..706afdb3a 100644
--- a/src/sock_inet.c
+++ b/src/sock_inet.c
@@ -327,8 +327,25 @@ int sock_inet_bind_receiver(struct receiver *rx, char 
**errmsg)
        ext = (fd >= 0);
 
        if (!ext) {
-               fd = my_socketat(rx->settings->netns, 
rx->proto->fam->sock_domain,
-                                rx->proto->sock_type, rx->proto->sock_prot);
+               if (rx->proto->sock_prot_fb > 0 && global.tune.options & 
GTUNE_NO_MPTCP) {
+                       fd = my_socketat(rx->settings->netns, 
rx->proto->fam->sock_domain,
+                                        rx->proto->sock_type, 
rx->proto->sock_prot_fb);
+               } else {
+                       fd = my_socketat(rx->settings->netns, 
rx->proto->fam->sock_domain,
+                                        rx->proto->sock_type, 
rx->proto->sock_prot);
+
+                       /*
+                       * If we failed to create a socket, try to create a 
fallback socket
+                       * with the fallback protocol.
+                       */
+                       if (rx->proto->sock_prot_fb > 0 && fd == -1) 
+                               fd = my_socketat(rx->settings->netns, 
rx->proto->fam->sock_domain,
+                                                rx->proto->sock_type, 
rx->proto->sock_prot_fb);
+                               memprintf(errmsg, "protocol %d not supported, 
falling back to %d",
+                                 rx->proto->sock_prot, 
rx->proto->sock_prot_fb);
+                               err |= ERR_ALERT;
+               }
+
                if (fd == -1) {
                        err |= ERR_RETRYABLE | ERR_ALERT;
                        memprintf(errmsg, "cannot create receiving socket 
(%s)", strerror(errno));
-- 
2.34.1


Reply via email to