On 08/23/2016 10:41 AM, Richard Alpe wrote:
> This patch introduces UDP replicast. A concept where we emulate
> multicast by sending multiple unicast messages to configured peers.
>
> The purpose of replicast is mainly to be able to use TIPC in cloud
> environments where IP multicast is disabled. Using replicas to unicast
> multicast messages is costly as we have to copy each skb and send the
> copies individually.
>
> Signed-off-by: Richard Alpe <[email protected]>
> ---
> include/uapi/linux/tipc_netlink.h | 1 +
> net/tipc/bearer.c | 44 +++++++++++++
> net/tipc/bearer.h | 1 +
> net/tipc/netlink.c | 5 ++
> net/tipc/udp_media.c | 126
> ++++++++++++++++++++++++++++++++++----
> net/tipc/udp_media.h | 44 +++++++++++++
> 6 files changed, 210 insertions(+), 11 deletions(-)
> create mode 100644 net/tipc/udp_media.h
>
> diff --git a/include/uapi/linux/tipc_netlink.h
> b/include/uapi/linux/tipc_netlink.h
> index bcb65ef..b15664c 100644
> --- a/include/uapi/linux/tipc_netlink.h
> +++ b/include/uapi/linux/tipc_netlink.h
> @@ -60,6 +60,7 @@ enum {
> TIPC_NL_MON_GET,
> TIPC_NL_MON_PEER_GET,
> TIPC_NL_PEER_REMOVE,
> + TIPC_NL_BEARER_ADD,
>
> __TIPC_NL_CMD_MAX,
> TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
> diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
> index 6fc4e3c..b82cb00 100644
> --- a/net/tipc/bearer.c
> +++ b/net/tipc/bearer.c
> @@ -42,6 +42,7 @@
> #include "monitor.h"
> #include "bcast.h"
> #include "netlink.h"
> +#include "udp_media.h"
>
> #define MAX_ADDR_STR 60
>
> @@ -897,6 +898,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct
> genl_info *info)
> return 0;
> }
>
> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
> +{
> + int err;
> + char *name;
> + struct tipc_bearer *b;
> + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
> + struct net *net = sock_net(skb->sk);
> +
> + if (!info->attrs[TIPC_NLA_BEARER])
> + return -EINVAL;
> +
> + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
> + info->attrs[TIPC_NLA_BEARER],
> + tipc_nl_bearer_policy);
> + if (err)
> + return err;
> +
> + if (!attrs[TIPC_NLA_BEARER_NAME])
> + return -EINVAL;
> + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
> +
> + rtnl_lock();
> + b = tipc_bearer_find(net, name);
> + if (!b) {
> + rtnl_unlock();
> + return -EINVAL;
> + }
> +
> +#ifdef CONFIG_TIPC_MEDIA_UDP
> + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
> + err = tipc_udp_nl_bearer_add(b,
> + attrs[TIPC_NLA_BEARER_UDP_OPTS]);
> + if (err) {
> + rtnl_unlock();
> + return err;
> + }
> + }
> +#endif
> + rtnl_unlock();
> +
> + return 0;
> +}
> +
> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
> {
> int err;
> diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
> index 83a9abb..78892e2f 100644
> --- a/net/tipc/bearer.h
> +++ b/net/tipc/bearer.h
> @@ -181,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct
> genl_info *info);
> int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
> int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
>
> int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
> int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
> diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
> index 2718de6..3122f21 100644
> --- a/net/tipc/netlink.c
> +++ b/net/tipc/netlink.c
> @@ -161,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
> .policy = tipc_nl_policy,
> },
> {
> + .cmd = TIPC_NL_BEARER_ADD,
> + .doit = tipc_nl_bearer_add,
> + .policy = tipc_nl_policy,
> + },
> + {
> .cmd = TIPC_NL_BEARER_SET,
> .doit = tipc_nl_bearer_set,
> .policy = tipc_nl_policy,
> diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
> index b8ec1a1..d4517f4 100644
> --- a/net/tipc/udp_media.c
> +++ b/net/tipc/udp_media.c
> @@ -49,6 +49,7 @@
> #include "core.h"
> #include "bearer.h"
> #include "netlink.h"
> +#include "msg.h"
>
> /* IANA assigned UDP port */
> #define UDP_PORT_DEFAULT 6118
> @@ -70,6 +71,13 @@ struct udp_media_addr {
> };
> };
>
> +/* struct udp_replicast - container for UDP remote addresses */
> +struct udp_replicast {
> + struct udp_media_addr addr;
> + struct rcu_head rcu;
> + struct list_head list;
> +};
> +
> /**
> * struct udp_bearer - ip/udp bearer data structure
> * @bearer: associated generic tipc bearer
> @@ -82,6 +90,7 @@ struct udp_bearer {
> struct socket *ubsock;
> u32 ifindex;
> struct work_struct work;
> + struct udp_replicast rcast;
> };
>
> static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
> @@ -209,23 +218,79 @@ static int tipc_udp_send_msg(struct net *net, struct
> sk_buff *skb,
> if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
> err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
> if (err)
> - goto tx_error;
> + goto err_out;
> }
>
> skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
> ub = rcu_dereference_rtnl(b->media_ptr);
> if (!ub) {
> err = -ENODEV;
> - goto tx_error;
> + goto err_out;
> + }
> +
> + /* Replicast, send an skb to each configured IP address */
> + if (unlikely(addr->broadcast)) {
> + bool first = true;
> + struct udp_replicast *rcast;
> +
> + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
> + struct sk_buff *_skb;
> +
> + /* Avoid one extra skb copy */
> + if (first) {
> + dst = &rcast->addr;
> + first = false;
> + continue;
> + }
> +
> + _skb = pskb_copy(skb, GFP_ATOMIC);
> + if (!_skb) {
> + err = -ENOMEM;
> + goto err_out;
> + }
> +
> + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
> + if (err) {
> + kfree_skb(_skb);
> + goto err_out;
> + }
> + }
Slightly confusing. Why don't you return here? Even when the list is
non-empty you go on and send to the first given address, which may or
may not be a multicast address.
It is probably correct, but the logics is not intuitive. Maybe some
refactoring and comments will help?
///jon
> }
>
> return tipc_udp_xmit(net, skb, ub, src, dst);
>
> -tx_error:
> +err_out:
> kfree_skb(skb);
> return err;
> }
>
> +static int tipc_udp_rcast_add(struct tipc_bearer *b,
> + struct udp_media_addr *addr)
> +{
> + struct udp_replicast *rcast;
> + struct udp_bearer *ub;
> +
> + ub = rcu_dereference_rtnl(b->media_ptr);
> + if (!ub)
> + return -ENODEV;
> +
> + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
> + if (!rcast)
> + return -ENOMEM;
> +
> + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
> +
> + if (ntohs(addr->proto) == ETH_P_IP)
> + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
> +#if IS_ENABLED(CONFIG_IPV6)
> + else if (ntohs(addr->proto) == ETH_P_IPV6)
> + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
> +#endif
> +
> + list_add_rcu(&rcast->list, &ub->rcast.list);
> + return 0;
> +}
> +
> /* tipc_udp_recv - read data from bearer socket */
> static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
> {
> @@ -320,6 +385,32 @@ static int tipc_parse_udp_addr(struct nlattr *nla,
> struct udp_media_addr *addr,
> return -EADDRNOTAVAIL;
> }
>
> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
> +{
> + int err;
> + struct udp_media_addr addr = {0};
> + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
> + struct udp_media_addr *dst;
> +
> + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
> + return -EINVAL;
> +
> + if (!opts[TIPC_NLA_UDP_REMOTE])
> + return -EINVAL;
> +
> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
> + if (err)
> + return err;
> +
> + dst = (struct udp_media_addr *)&b->bcast_addr.value;
> + if (tipc_udp_is_mcast_addr(dst)) {
> + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
> + return -EINVAL;
> + }
> +
> + return tipc_udp_rcast_add(b, &addr);
> +}
> +
> /**
> * tipc_udp_enable - callback to create a new udp bearer instance
> * @net: network namespace
> @@ -334,7 +425,7 @@ static int tipc_udp_enable(struct net *net, struct
> tipc_bearer *b,
> {
> int err = -EINVAL;
> struct udp_bearer *ub;
> - struct udp_media_addr *remote;
> + struct udp_media_addr remote = {0};
> struct udp_media_addr local = {0};
> struct udp_port_cfg udp_conf = {0};
> struct udp_tunnel_sock_cfg tuncfg = {NULL};
> @@ -344,6 +435,8 @@ static int tipc_udp_enable(struct net *net, struct
> tipc_bearer *b,
> if (!ub)
> return -ENOMEM;
>
> + INIT_LIST_HEAD(&ub->rcast.list);
> +
> if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
> goto err;
>
> @@ -362,9 +455,7 @@ static int tipc_udp_enable(struct net *net, struct
> tipc_bearer *b,
> if (err)
> goto err;
>
> - remote = (struct udp_media_addr *)&b->bcast_addr.value;
> - memset(remote, 0, sizeof(struct udp_media_addr));
> - err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL);
> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
> if (err)
> goto err;
>
> @@ -409,10 +500,17 @@ static int tipc_udp_enable(struct net *net, struct
> tipc_bearer *b,
> tuncfg.encap_destroy = NULL;
> setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
>
> - if (tipc_udp_is_mcast_addr(remote)) {
> - if (enable_mcast(ub, remote))
> - goto err;
> - }
> + /**
> + * The bcast media address port is used for all peers and the ip
> + * is used if it's a multicast address.
> + */
> + memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
> + if (tipc_udp_is_mcast_addr(&remote))
> + err = enable_mcast(ub, &remote);
> + else
> + err= tipc_udp_rcast_add(b, &remote);
> + if (err)
> + goto err;
>
> return 0;
> err:
> @@ -424,6 +522,12 @@ err:
> static void cleanup_bearer(struct work_struct *work)
> {
> struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
> + struct udp_replicast *rcast, *tmp;
> +
> + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
> + list_del_rcu(&rcast->list);
> + kfree_rcu(rcast, rcu);
> + }
>
> if (ub->ubsock)
> udp_tunnel_sock_release(ub->ubsock);
> diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
> new file mode 100644
> index 0000000..4dcb548
> --- /dev/null
> +++ b/net/tipc/udp_media.h
> @@ -0,0 +1,44 @@
> +/*
> + * net/tipc/udp_media.h: Include file for UDP bearer media
> + *
> + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
> + * Copyright (c) 2005, 2010-2011, Wind River Systems
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> met:
> + *
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + * 3. Neither the names of the copyright holders nor the names of its
> + * contributors may be used to endorse or promote products derived from
> + * this software without specific prior written permission.
> + *
> + * Alternatively, this software may be distributed under the terms of the
> + * GNU General Public License ("GPL") version 2 as published by the Free
> + * Software Foundation.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
> IS"
> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> + * POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifdef CONFIG_TIPC_MEDIA_UDP
> +#ifndef _TIPC_UDP_MEDIA_H
> +#define _TIPC_UDP_MEDIA_H
> +
> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
> +
> +#endif
> +#endif
------------------------------------------------------------------------------
_______________________________________________
tipc-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tipc-discussion