Hi Susant,

Thanks for this, looking forward getting this merged!

I have some comments below though.

On Fri, Apr 4, 2014 at 11:25 AM, Susant Sahani <sus...@redhat.com> wrote:
>  This patch enables basic ipip tunnel support.
> It works with kernel module ipip
>
> Example configuration
> File : ipip.netdev
>
>  [NetDev]
>  Name=ipip-tun
>  Kind=tunnel
>
>  [Tunnel]
>  Kind=ipip

Maybe we should simply have

[NetDev]
Kind=ipip

We can still use the same [Tunnel] section for each of the tunnel
kinds though. This way we are closer to the rtnl interface, and it
seems a bit simpler to me.

>  Local=192.168.8.102
>  Remote=10.4.4.4
>  Dev=em1

I don't think we should be using the interface name (anywhere, unless
we really must). I suggest we do the same with tunnel devices as with
other netdev devices. Simply add a "Tunnel=ipip-tun" to the [Network]
section of the corresponding interface and match in this way.

>  Ttl=64
>  Mtu=1480

I guess these should be upper-case, and MTUBytes should be used as in
.link files.

So to sum up, I suggest replacing your example with:

/////////////////

ipip.netdev:
[NetDev]
Name=ipip-tun
Kind=tunnel

[Tunnel]
Local=192.168.8.102
Remote=10.4.4.4
TTL=64
MTUBytes=1480

foo.network:
[Match]
Name=em1

[Network]
Tunnel=ipip-tun

//////////////

Also, we need to make sure that we only start setting up the tunnel
when the underlying device (em1) has reached the correct state, so we
really want to initiate the tunnel creation from networkd-link.c (so
hooking into this from the .network file is the most convenient).

In the future, we may want to allow a short-hand, where separate
.network and .netdev files are not necessarily in some cases, but
let's delay that for now.

>  Makefile.am                              |   7 +-
>  src/network/networkd-netdev-gperf.gperf  |   6 +
>  src/network/networkd-netdev.c            | 240 
> ++++++++++++++++++++++++++++++-
>  src/network/networkd-network-gperf.gperf |   1 +
>  src/network/networkd-network.c           |  37 +++++
>  src/network/networkd.h                   |  38 +++++
>  6 files changed, 322 insertions(+), 7 deletions(-)
>
> diff --git a/Makefile.am b/Makefile.am
> index c51f6ae..60c7016 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -4144,8 +4144,8 @@ systemd_networkd_SOURCES = \
>         src/network/networkd.c
>
>  systemd_networkd_LDADD = \
> -       libsystemd-networkd-core.la
> -
> +       libsystemd-networkd-core.la \
> +       -lkmod
>  noinst_LTLIBRARIES += \
>         libsystemd-networkd-core.la
>
> @@ -4189,7 +4189,8 @@ test_network_SOURCES = \
>         src/network/test-network.c
>
>  test_network_LDADD = \
> -       libsystemd-networkd-core.la
> +       libsystemd-networkd-core.la \
> +       -lkmod
>
>  tests += \
>         test-network
> diff --git a/src/network/networkd-netdev-gperf.gperf 
> b/src/network/networkd-netdev-gperf.gperf
> index ea7ba57..ecca2bd 100644
> --- a/src/network/networkd-netdev-gperf.gperf
> +++ b/src/network/networkd-netdev-gperf.gperf
> @@ -24,3 +24,9 @@ NetDev.Name,             config_parse_ifname,               
>  0,
>  NetDev.Kind,             config_parse_netdev_kind,           0,              
>                offsetof(NetDev, kind)
>  VLAN.Id,                 config_parse_uint64,                0,              
>                offsetof(NetDev, vlanid)
>  MACVLAN.Mode,            config_parse_macvlan_mode,          0,              
>                offsetof(NetDev, macvlan_mode)
> +Tunnel.Kind,             config_parse_tunnel_kind,           0,              
>                offsetof(NetDev, tunnel_kind)
> +Tunnel.Dev,              config_parse_ifname,                0,              
>                offsetof(NetDev, tunnel_dev)
> +Tunnel.Ttl,              config_parse_int,                   0,              
>                offsetof(NetDev, tunnel_ttl)
> +Tunnel.Mtu,              config_parse_int,                   0,              
>                offsetof(NetDev, tunnel_mtu)
> +Tunnel.Local,            config_parse_tunnel_address,        0,              
>                offsetof(NetDev, tunnel_local)
> +Tunnel.Remote,           config_parse_tunnel_address,        0,              
>                offsetof(NetDev, tunnel_remote)
> diff --git a/src/network/networkd-netdev.c b/src/network/networkd-netdev.c
> index 762eff2..6abaf12 100644
> --- a/src/network/networkd-netdev.c
> +++ b/src/network/networkd-netdev.c
> @@ -18,6 +18,12 @@
>    You should have received a copy of the GNU Lesser General Public License
>    along with systemd; If not, see <http://www.gnu.org/licenses/>.
>  ***/
> +#include <netinet/ether.h>
> +#include <arpa/inet.h>
> +#include <net/if.h>
> +#include <linux/ip.h>
> +#include <linux/if_tunnel.h>
> +#include <libkmod.h>
>
>  #include "networkd.h"
>  #include "network-internal.h"
> @@ -33,6 +39,7 @@ static const char* const 
> netdev_kind_table[_NETDEV_KIND_MAX] = {
>          [NETDEV_KIND_BOND] = "bond",
>          [NETDEV_KIND_VLAN] = "vlan",
>          [NETDEV_KIND_MACVLAN] = "macvlan",
> +        [NETDEV_KIND_TUNNEL] = "tunnel",
>  };
>
>  DEFINE_STRING_TABLE_LOOKUP(netdev_kind, NetDevKind);
> @@ -48,6 +55,16 @@ static const char* const 
> macvlan_mode_table[_NETDEV_MACVLAN_MODE_MAX] = {
>  DEFINE_STRING_TABLE_LOOKUP(macvlan_mode, MacVlanMode);
>  DEFINE_CONFIG_PARSE_ENUM(config_parse_macvlan_mode, macvlan_mode, 
> MacVlanMode, "Failed to parse macvlan mode");
>
> +static const char* const tunnel_kind_table[_TUNNEL_KIND_MAX] = {
> +        [TUNNEL_KIND_IPIP] = "ipip",
> +        [TUNNEL_KIND_GRE] = "gre",
> +        [TUNNEL_KIND_SIT] = "sit",
> +};
> +
> +DEFINE_STRING_TABLE_LOOKUP(tunnel_kind, TunnelKind);
> +DEFINE_CONFIG_PARSE_ENUM(config_parse_tunnel_kind, tunnel_kind, TunnelKind, 
> "Failed to parse tunnel kind");
> +
> +
>  void netdev_free(NetDev *netdev) {
>          netdev_enslave_callback *callback;
>
> @@ -66,6 +83,7 @@ void netdev_free(NetDev *netdev) {
>
>          free(netdev->description);
>          free(netdev->name);
> +        free(netdev->tunnel_dev);
>
>          condition_free_list(netdev->match_host);
>          condition_free_list(netdev->match_virt);
> @@ -242,6 +260,169 @@ static int netdev_create_handler(sd_rtnl *rtnl, 
> sd_rtnl_message *m, void *userda
>          return 1;
>  }
>
> +static int load_module(const char *mod_name) {
> +        struct kmod_ctx *ctx;
> +        struct kmod_list *list = NULL, *l;
> +        int r;
> +
> +        ctx = kmod_new(NULL, NULL);
> +        if (!ctx) {
> +                kmod_unref(ctx);
> +                return -ENOMEM;
> +        }

We probably want to avoid loading modules from networkd (see below),
but if we need it, then store the kmod context in the manager object,
so we only need to create it once (though may need to invalidate the
caches if/when we reload the rest of the daemon config (which we
currently don't)). The reason is that loading the configs is quite
expensive, but once they are loaded using kmod is cheap.

> +        r = kmod_module_new_from_lookup(ctx, mod_name, &list);
> +        if (r < 0)
> +                return -1;
> +
> +        kmod_list_foreach(l, list) {
> +                struct kmod_module *mod = kmod_module_get_module(l);
> +
> +                r = kmod_module_probe_insert_module(mod, 0, NULL, NULL, 
> NULL, NULL);
> +                if (r >= 0)
> +                        r = 0;
> +                else
> +                        r = -1;
> +
> +                kmod_module_unref(mod);
> +        }
> +
> +        kmod_module_unref_list(list);
> +        kmod_unref(ctx);
> +
> +        return r;
> +}
> +
> +int config_parse_tunnel_address(const char *unit,
> +                                const char *filename,
> +                                unsigned line,
> +                                const char *section,
> +                                unsigned section_line,
> +                                const char *lvalue,
> +                                int ltype,
> +                                const char *rvalue,
> +                                void *data,
> +                                void *userdata) {
> +        NetDev *n = userdata;
> +        int r;
> +
> +        assert(filename);
> +        assert(lvalue);
> +        assert(rvalue);
> +        assert(data);
> +
> +        if(streq(lvalue, "Local"))
> +                r = inet_pton(AF_INET, rvalue , (in_addr_t 
> *)&n->tunnel_local.s_addr);
> +        else
> +                r = inet_pton(AF_INET, rvalue , (in_addr_t 
> *)&n->tunnel_remote.s_addr);
> +
> +        if (r < 0) {
> +                log_syntax(unit, LOG_ERR, filename, line, EINVAL,
> +                           "Tunnel address is invalid, ignoring assignment: 
> %s", rvalue);
> +                return 0;
> +        }
> +
> +        return 0;
> +}

Hm, we can probably reuse some of the existing address parsing
functions don't you think? And we should also check the address
faimilies here right?

> +static int netdev_create_tunnel(NetDev *netdev, sd_rtnl_message *m) {
> +        int r;
> +
> +        r = sd_rtnl_message_append_string(m, IFLA_IFNAME, netdev->name);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_IFNAME, attribute: 
> %s",
> +                                 strerror(-r));
> +                return r;
> +        }
> +
> +        r = sd_rtnl_message_append_u32(m, IFLA_MTU, netdev->tunnel_mtu);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_MTU attribute: %s",
> +                                 strerror(-r));
> +                return r;
> +        }
> +
> +        r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_LINKINFO attribute: 
> %s",
> +                                 strerror(-r));
> +                return r;
> +        }
> +
> +        r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND,
> +                                          
> tunnel_kind_to_string(netdev->tunnel_kind));
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append  IFLA_INFO_KIND 
> attribute: %s",
> +                                 strerror(-r));
> +                return r;
> +        }

The way the sd_rtnl_message_open_container_union() works is that it
will automatically add the IFLA_INFO_KIND attribute, so you can drop
this.

> +        r = sd_rtnl_message_open_container_union(m, IFLA_INFO_DATA, "ipip");
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_INFO_DATA attribute: 
> %s",
> +                                 strerror(-r));
> +                return r;
> +        }

Maybe not hardcode "ipip", but look it up depending on the Kind?

> +        r = sd_rtnl_message_append_u32(m, IFLA_IPTUN_LINK, 
> if_nametoindex(netdev->tunnel_dev));

We should never use if_nametoindex(), both because it is racy (the
interface can be renamed at run-time) and because it is a synchronous
interfac to RTNL, which we really want to avoid. We keep track of the
ifindex of netdev's in their Link structure, so we should pass that
structure into this function and use the ifindex from there.

> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_IPTUN_LINK 
> attribute: %s",
> +                                 strerror(-r));
> +                return r;
> +        }
> +
> +        r = sd_rtnl_message_append_u32(m, IFLA_IPTUN_LOCAL, 
> netdev->tunnel_local.s_addr);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_IPTUN_LOCAL 
> attribute: %s",
> +                                 strerror(-r));
> +                return r;
> +        }
> +
> +        r = sd_rtnl_message_append_u32(m, IFLA_IPTUN_REMOTE, 
> netdev->tunnel_remote.s_addr);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_IPTUN_REMOTE 
> attribute: %s",
> +                                 strerror(-r));
> +                return r;
> +        }

Hm, I guess these should be _append_in_addr() to get the typesafety
right (might need to verify that we are using the right types for this
in rtnl-types.c.

> +        r = sd_rtnl_message_close_container(m);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_INFO_DATA attribute: 
> %s",
> +                                 strerror(-r));
> +                return r;
> +        }
> +
> +        r = sd_rtnl_message_close_container(m);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not append IFLA_LINKINFO attribute: 
> %s",
> +                                 strerror(-r));
> +                return r;
> +        }
> +
> +        r = sd_rtnl_call_async(netdev->manager->rtnl, m, 
> &netdev_create_handler, netdev, 0, NULL);
> +        if (r < 0) {
> +                log_error_netdev(netdev,
> +                                 "Could not send rtnetlink message: %s", 
> strerror(-r));
> +                return r;
> +        }
> +
> +        log_debug_netdev(netdev, "creating netdev tunnel");
> +
> +        netdev->state = NETDEV_STATE_CREATING;
> +
> +        return 0;
> +}
> +
>  static int netdev_create(NetDev *netdev, Link *link, 
> sd_rtnl_message_handler_t callback) {
>          _cleanup_rtnl_message_unref_ sd_rtnl_message *req = NULL;
>          const char *kind;
> @@ -262,6 +443,10 @@ static int netdev_create(NetDev *netdev, Link *link, 
> sd_rtnl_message_handler_t c
>                  return r;
>          }
>
> +        if(netdev->kind == NETDEV_KIND_TUNNEL) {
> +                return netdev_create_tunnel(netdev, req);
> +        }
> +
>          if (link) {
>                  r = sd_rtnl_message_append_u32(req, IFLA_LINK, 
> link->ifindex);
>                  if (r < 0) {
> @@ -418,9 +603,19 @@ int netdev_set_ifindex(NetDev *netdev, sd_rtnl_message 
> *message) {
>          }
>
>          if (!streq(kind, received_kind)) {
> -                log_error_netdev(netdev, "Received newlink with wrong KIND");
> -                netdev_enter_failed(netdev);
> -                return r;
> +                if(streq(kind, "tunnel")) {
> +                        if(streq(received_kind, "ipip")) {

This will be much nicer if we simply use "ipip" as the kind, rather
than "tunnel".

> +                                r = 0;
> +                        } else
> +                                r = -1;
> +                } else
> +                        r = -1;
> +
> +                if(r < 0) {
> +                        log_error_netdev(netdev, "Received newlink with 
> wrong KIND");
> +                        netdev_enter_failed(netdev);
> +                        return -EINVAL;
> +                }
>          }
>
>          r = sd_rtnl_message_link_get_ifindex(message, &ifindex);
> @@ -474,7 +669,7 @@ static int netdev_load_one(Manager *manager, const char 
> *filename) {
>          netdev->macvlan_mode = _NETDEV_MACVLAN_MODE_INVALID;
>          netdev->vlanid = VLANID_MAX + 1;
>
> -        r = config_parse(NULL, filename, file, 
> "Match\0NetDev\0VLAN\0MACVLAN\0",
> +        r = config_parse(NULL, filename, file, 
> "Match\0NetDev\0VLAN\0MACVLAN\0Tunnel\0",
>                           config_item_perf_lookup, (void*) 
> network_netdev_gperf_lookup,
>                           false, false, netdev);
>          if (r < 0) {
> @@ -510,6 +705,43 @@ static int netdev_load_one(Manager *manager, const char 
> *filename) {
>                  return 0;
>          }
>
> +
> +        if(netdev->kind == NETDEV_KIND_TUNNEL) {
> +                if(!netdev->tunnel_kind == _TUNNEL_KIND_INVALID) {
> +                        log_error_netdev(netdev, "Tunnel Kind is misssing 
> Ignoring");
> +                        return 0;
> +
> +                }
> +
> +                switch(netdev->tunnel_kind) {
> +                case TUNNEL_KIND_IPIP:
> +                        r = load_module("ipip");

This should be fixed in the kernel I think. All that is needed is to
add MODULE_ALIA_RTNL_LINK("ipip") to the ipip module (and the same for
the other modules). This is already done for many (most?) netdev
kinds, which is why this stuff "just works" for bridges, bonds, etc.

> +                        break;
> +                case TUNNEL_KIND_GRE:
> +                case TUNNEL_KIND_SIT:
> +                default:
> +                        r = -1;
> +                }
> +
> +                if (r < 0) {
> +                        log_error_netdev(netdev, "Could not load Kernel 
> module . Ignoring");
> +                        return 0;
> +
> +                }
> +
> +                if(netdev->tunnel_mtu <= 0) {
> +                        log_error_netdev(netdev, "MTU size shold be greater 
> than 0. Ignoring");
> +                        return 0;
> +                }

Hm, is this really necessary. Can we not simply ignore the MTU if it
is unset and let the kernel set a default, or does that not work?

> +                r = if_nametoindex(netdev->tunnel_dev);

Yeah, this is a no-no. We should simply wait for the device to appear
and then trigger the tunnel creation from networkd-link.c.

> +                if(!r) {
> +                        log_error_netdev(netdev,
> +                                         "Could not find interface : %s", 
> netdev->tunnel_dev);
> +                        return 0;
> +                }
> +        }
> +
>          netdev->filename = strdup(filename);
>          if (!netdev->filename)
>                  return log_oom();
> diff --git a/src/network/networkd-network-gperf.gperf 
> b/src/network/networkd-network-gperf.gperf
> index 6ba890f..bbc0f78 100644
> --- a/src/network/networkd-network-gperf.gperf
> +++ b/src/network/networkd-network-gperf.gperf
> @@ -34,6 +34,7 @@ Network.IPv4LL,              config_parse_bool,             
>      0,
>  Network.Address,             config_parse_address,               0,          
>                    0
>  Network.Gateway,             config_parse_gateway,               0,          
>                    0
>  Network.DNS,                 config_parse_dns,                   0,          
>                    offsetof(Network, dns)
> +Network.Tunnel,              config_parse_tunnel,                0,          
>                    offsetof(Network, tunnel)
>  Address.Address,             config_parse_address,               0,          
>                    0
>  Address.Broadcast,           config_parse_broadcast,             0,          
>                    0
>  Address.Label,               config_parse_label,                 0,          
>                    0
> diff --git a/src/network/networkd-network.c b/src/network/networkd-network.c
> index 47fab4e..f071e5d 100644
> --- a/src/network/networkd-network.c
> +++ b/src/network/networkd-network.c
> @@ -387,3 +387,40 @@ int config_parse_macvlan(const char *unit,
>
>          return 0;
>  }
> +
> +int config_parse_tunnel(const char *unit,
> +                        const char *filename,
> +                        unsigned line,
> +                        const char *section,
> +                        unsigned section_line,
> +                        const char *lvalue,
> +                        int ltype,
> +                        const char *rvalue,
> +                        void *data,
> +                        void *userdata) {
> +        Network *network = userdata;
> +        NetDev *netdev;
> +        int r;
> +
> +        assert(filename);
> +        assert(lvalue);
> +        assert(rvalue);
> +        assert(data);
> +
> +        r = netdev_get(network->manager, rvalue, &netdev);
> +        if (r < 0) {
> +                log_syntax(unit, LOG_ERR, filename, line, EINVAL,
> +                           "Tunnel is invalid, ignoring assignment: %s", 
> rvalue);
> +                return 0;
> +        }
> +
> +        if (netdev->kind != NETDEV_KIND_TUNNEL) {
> +                log_syntax(unit, LOG_ERR, filename, line, EINVAL,
> +                           "NetDev is not a tunnel, ignoring assignment: 
> %s", rvalue);
> +                return 0;
> +        }
> +
> +        network->tunnel = netdev;
> +
> +        return 0;
> +}
> diff --git a/src/network/networkd.h b/src/network/networkd.h
> index 36902e3..b3341b0 100644
> --- a/src/network/networkd.h
> +++ b/src/network/networkd.h
> @@ -68,6 +68,7 @@ typedef enum NetDevKind {
>          NETDEV_KIND_BOND,
>          NETDEV_KIND_VLAN,
>          NETDEV_KIND_MACVLAN,
> +        NETDEV_KIND_TUNNEL,
>          _NETDEV_KIND_MAX,
>          _NETDEV_KIND_INVALID = -1
>  } NetDevKind;
> @@ -80,6 +81,14 @@ typedef enum NetDevState {
>          _NETDEV_STATE_INVALID = -1,
>  } NetDevState;
>
> +typedef enum TunnelKind {
> +        TUNNEL_KIND_IPIP,
> +        TUNNEL_KIND_GRE,
> +        TUNNEL_KIND_SIT,
> +        _TUNNEL_KIND_MAX,
> +        _TUNNEL_KIND_INVALID = -1
> +} TunnelKind;
> +
>  struct NetDev {
>          Manager *manager;
>
> @@ -100,6 +109,13 @@ struct NetDev {
>          int ifindex;
>          NetDevState state;
>
> +        TunnelKind tunnel_kind;
> +        char *tunnel_dev;
> +        unsigned tunnel_ttl;
> +        unsigned tunnel_mtu;
> +        struct in_addr tunnel_local;
> +        struct in_addr tunnel_remote;
> +
>          LIST_HEAD(netdev_enslave_callback, callbacks);
>  };
>
> @@ -121,6 +137,7 @@ struct Network {
>          char *description;
>          NetDev *bridge;
>          NetDev *bond;
> +        NetDev *tunnel;
>          Hashmap *vlans;
>          Hashmap *macvlans;
>          bool dhcp;
> @@ -276,10 +293,16 @@ NetDevKind netdev_kind_from_string(const char *d) 
> _pure_;
>  const char *macvlan_mode_to_string(MacVlanMode d) _const_;
>  MacVlanMode macvlan_mode_from_string(const char *d) _pure_;
>
> +const char *tunnel_kind_to_string(TunnelKind d) _const_;
> +TunnelKind tunnel_kind_from_string(const char *d) _pure_;
> +
>  int config_parse_netdev_kind(const char *unit, const char *filename, 
> unsigned line, const char *section, unsigned section_line, const char 
> *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
>
>  int config_parse_macvlan_mode(const char *unit, const char *filename, 
> unsigned line, const char *section, unsigned section_line, const char 
> *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
>
> +int config_parse_tunnel_kind(const char *unit, const char *filename, 
> unsigned line, const char *section, unsigned section_line, const char 
> *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
> +
> +
>  /* gperf */
>  const struct ConfigPerfItem* network_netdev_gperf_lookup(const char *key, 
> unsigned length);
>
> @@ -311,6 +334,21 @@ int config_parse_macvlan(const char *unit, const char 
> *filename, unsigned line,
>                           const char *section, unsigned section_line, const 
> char *lvalue,
>                           int ltype, const char *rvalue, void *data, void 
> *userdata);
>
> +int config_parse_tunnel(const char *unit, const char *filename, unsigned 
> line,
> +                        const char *section, unsigned section_line, const 
> char *lvalue,
> +                        int ltype, const char *rvalue, void *data, void 
> *userdata);
> +
> +int config_parse_tunnel_address(const char *unit,
> +                                const char *filename,
> +                                unsigned line,
> +                                const char *section,
> +                                unsigned section_line,
> +                                const char *lvalue,
> +                                int ltype,
> +                                const char *rvalue,
> +                                void *data,
> +                                void *userdata);
> +
>  /* gperf */
>  const struct ConfigPerfItem* network_network_gperf_lookup(const char *key, 
> unsigned length);
>
> --
> 1.9.0
>
_______________________________________________
systemd-devel mailing list
systemd-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/systemd-devel

Reply via email to