Hey Serge, On Tue, Feb 18, 2014 at 11:55 PM, Serge Hallyn <[email protected]> wrote: > Quoting Stéphane Graber ([email protected]): >> On Tue, Feb 18, 2014 at 04:32:02PM -0600, Serge Hallyn wrote: >> Right, I'd be fine with us just having: >> - lxc discard lxc.network.mtu for unpriv containers > > Done implicitly by unpriv users not calling lxc_create_network(), > which calls instantiate_veth() where mtu is set. So unpriv veths > always have mtu set to 1500. > >> - lxc-user-nic mirror the bridge mtu to both interfaces of the veth pair > > the patch below does that. > >> - ensure that privileged lxc will always set the same mtu on both >> interfaces in a veth pair > > I was wrong before, that's being done now. > > From 99b3648323a468341f35e84ed9417b344b6fb8a7 Mon Sep 17 00:00:00 2001 > From: Serge Hallyn <[email protected]> > Date: Tue, 18 Feb 2014 22:42:34 -0600 > Subject: [PATCH 1/1] network: set mtu of unpriv veth to the bridge's mtu > > That's to make sure that if the bridge has a higher than 1500 > mtu, the new veth (defaulting to 1500) doesn't lower it. > > The netlink get_mtu fn is ugly. If it causes us any problems we should > ditch it in favor of /sys/class/net/$name/mtu.
Then why we are not reading that file instead? I've no objection but just trying to learn the reason :) > Signed-off-by: Serge Hallyn <[email protected]> > --- > src/lxc/lxc_user_nic.c | 18 +++++++- > src/lxc/network.c | 110 > +++++++++++++++++++++++++++++++++++++++++++++++++ > src/lxc/network.h | 1 + > 3 files changed, 128 insertions(+), 1 deletion(-) > > diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c > index a67c63d..0e66549 100644 > --- a/src/lxc/lxc_user_nic.c > +++ b/src/lxc/lxc_user_nic.c > @@ -226,12 +226,18 @@ static int instanciate_veth(char *n1, char **n2) > return netdev_set_flag(n1, IFF_UP); > } > > +static int get_mtu(char *name) > +{ > + int idx = if_nametoindex(name); > + return netdev_get_mtu(idx); > +} > + > static bool create_nic(char *nic, char *br, int pid, char **cnic) > { > char *veth1buf, *veth2buf; > veth1buf = alloca(IFNAMSIZ); > veth2buf = alloca(IFNAMSIZ); > - int ret; > + int ret, mtu; > > ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic); > if (ret < 0 || ret >= IFNAMSIZ) { > @@ -245,6 +251,16 @@ static bool create_nic(char *nic, char *br, int pid, > char **cnic) > return false; > } > > + /* copy the bridge's mtu to both ends */ > + mtu = get_mtu(br); > + if (mtu != -1) { > + if (lxc_netdev_set_mtu(veth1buf, mtu) < 0 || > + lxc_netdev_set_mtu(veth2buf, mtu) < 0) { > + fprintf(stderr, "Failed setting mtu\n"); > + goto out_del; > + } > + } > + > /* attach veth1 to bridge */ > if (lxc_bridge_attach(br, veth1buf) < 0) { > fprintf(stderr, "Error attaching %s to %s\n", veth1buf, br); > diff --git a/src/lxc/network.c b/src/lxc/network.c > index 34845d1..090b9bd 100644 > --- a/src/lxc/network.c > +++ b/src/lxc/network.c > @@ -297,6 +297,116 @@ out: > return err; > } > > +int netdev_get_mtu(int ifindex) > +{ > + struct nl_handler nlh; > + struct nlmsg *nlmsg = NULL, *answer = NULL; > + struct ip_req *ip_req; > + struct nlmsghdr *msg; > + int err, res; > + int recv_len = 0, answer_len; > + int readmore = 0; > + > + err = netlink_open(&nlh, NETLINK_ROUTE); > + if (err) > + return err; > + > + err = -ENOMEM; > + nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE); > + if (!nlmsg) > + goto out; > + > + answer = nlmsg_alloc(NLMSG_GOOD_SIZE); > + if (!answer) > + goto out; > + > + /* Save the answer buffer length, since it will be overwritten > + * on the first receive (and we might need to receive more than > + * once. */ > + answer_len = answer->nlmsghdr.nlmsg_len; > + > + ip_req = (struct ip_req *)nlmsg; > + ip_req->nlmsg.nlmsghdr.nlmsg_len = > + NLMSG_LENGTH(sizeof(struct ifaddrmsg)); > + ip_req->nlmsg.nlmsghdr.nlmsg_flags = NLM_F_REQUEST|NLM_F_DUMP; > + ip_req->nlmsg.nlmsghdr.nlmsg_type = RTM_GETLINK; > + ip_req->ifa.ifa_family = AF_UNSPEC; > + > + /* Send the request for addresses, which returns all addresses > + * on all interfaces. */ > + err = netlink_send(&nlh, nlmsg); > + if (err < 0) > + goto out; > + > + do { > + /* Restore the answer buffer length, it might have been > + * overwritten by a previous receive. */ > + answer->nlmsghdr.nlmsg_len = answer_len; > + > + /* Get the (next) batch of reply messages */ > + err = netlink_rcv(&nlh, answer); > + if (err < 0) > + goto out; > + > + recv_len = err; > + err = 0; > + > + /* Satisfy the typing for the netlink macros */ > + msg = &answer->nlmsghdr; > + > + while (NLMSG_OK(msg, recv_len)) { > + > + /* Stop reading if we see an error message */ > + if (msg->nlmsg_type == NLMSG_ERROR) { > + struct nlmsgerr *errmsg = (struct > nlmsgerr*)NLMSG_DATA(msg); > + err = errmsg->error; > + goto out; > + } > + > + /* Stop reading if we see a NLMSG_DONE message */ > + if (msg->nlmsg_type == NLMSG_DONE) { > + readmore = 0; > + break; > + } > + > + struct ifinfomsg *ifi = NLMSG_DATA(msg); > + if (ifi->ifi_index == ifindex) { > + struct rtattr *rta = IFLA_RTA(ifi); > + int attr_len = msg->nlmsg_len - > NLMSG_LENGTH(sizeof(*ifi)); > + res = 0; > + while(RTA_OK(rta, attr_len)) { > + /* Found a local address for the > requested interface, > + * return it. */ > + if (rta->rta_type == IFLA_MTU) { > + memcpy(&res, RTA_DATA(rta), > sizeof(int)); > + err = res; > + goto out; > + } > + rta = RTA_NEXT(rta, attr_len); > + } > + > + } > + > + /* Keep reading more data from the socket if the > + * last message had the NLF_F_MULTI flag set */ > + readmore = (msg->nlmsg_flags & NLM_F_MULTI); > + > + /* Look at the next message received in this buffer */ > + msg = NLMSG_NEXT(msg, recv_len); > + } > + } while (readmore); > + > + /* If we end up here, we didn't find any result, so signal an > + * error */ > + err = -1; > + > +out: > + netlink_close(&nlh); > + nlmsg_free(answer); > + nlmsg_free(nlmsg); > + return err; > +} > + > int lxc_netdev_set_mtu(const char *name, int mtu) > { > struct nl_handler nlh; > diff --git a/src/lxc/network.h b/src/lxc/network.h > index 3ea99ac..97f6b4d 100644 > --- a/src/lxc/network.h > +++ b/src/lxc/network.h > @@ -138,4 +138,5 @@ extern char *lxc_mkifname(char *template); > > extern const char *lxc_net_type_to_str(int type); > extern int setup_private_host_hw_addr(char *veth1); > +extern int netdev_get_mtu(int ifindex); > #endif > -- > 1.9.rc1 > > _______________________________________________ > lxc-devel mailing list > [email protected] > http://lists.linuxcontainers.org/listinfo/lxc-devel -- S.Çağlar Onur <[email protected]> _______________________________________________ lxc-devel mailing list [email protected] http://lists.linuxcontainers.org/listinfo/lxc-devel
