On 10/29/2018 10:54 AM, Jason Wang wrote:

On 2018/10/27 下午8:04, Jianfeng Tan wrote:
Previouly, virtio net header size is hardcoded to be 10, which makes
the feature mrg_rxbuf not available.

We redefine PACKET_VNET_HDR ioctl which treats user input as boolean,
but now as int, 0, 10, 12, or everything else be treated as 10.

There will be one case which is treated differently: if user input is
12, previously, the header size will be 10; but now it's 12.

Signed-off-by: Jianfeng Tan <jianfeng....@linux.alibaba.com>


This should go for net-next which is closed. You may consider to re-submit when it was open.

Thank you for the reminder. We'll re-evaluate the necessity of this patch.



---
  net/packet/af_packet.c | 97 ++++++++++++++++++++++++++----------------
  net/packet/diag.c      |  2 +-
  net/packet/internal.h  |  2 +-
  3 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..1bd7f4cdcc80 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1999,18 +1999,24 @@ static unsigned int run_filter(struct sk_buff *skb,
  }
    static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
-               size_t *len)
+               size_t *len, int vnet_hdr_len)
  {
+    int res;
      struct virtio_net_hdr vnet_hdr;
  -    if (*len < sizeof(vnet_hdr))
+    if (*len < vnet_hdr_len)
          return -EINVAL;
-    *len -= sizeof(vnet_hdr);
+    *len -= vnet_hdr_len;
        if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
          return -EINVAL;
  -    return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+    res = memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
+    if (res == 0)
+        iov_iter_advance(&msg->msg_iter,
+                 vnet_hdr_len - sizeof(vnet_hdr));
+
+    return res;
  }
    /*
@@ -2206,11 +2212,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
                    po->tp_reserve;
      } else {
          unsigned int maclen = skb_network_offset(skb);
+        int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+
          netoff = TPACKET_ALIGN(po->tp_hdrlen +
                         (maclen < 16 ? 16 : maclen)) +
                         po->tp_reserve;
-        if (po->has_vnet_hdr) {
-            netoff += sizeof(struct virtio_net_hdr);
+        if (vnet_hdr_sz) {
+            netoff += vnet_hdr_sz;
              do_vnet = true;
          }
          macoff = netoff - maclen;
@@ -2429,19 +2437,6 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
      return 0;
  }
  -static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
-                 struct virtio_net_hdr *vnet_hdr)
-{
-    if (*len < sizeof(*vnet_hdr))
-        return -EINVAL;
-    *len -= sizeof(*vnet_hdr);
-
-    if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
-        return -EFAULT;
-
-    return __packet_snd_vnet_parse(vnet_hdr, *len);
-}
-
  static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
          void *frame, struct net_device *dev, void *data, int tp_len,
          __be16 proto, unsigned char *addr, int hlen, int copylen,
@@ -2609,6 +2604,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
      int len_sum = 0;
      int status = TP_STATUS_AVAILABLE;
      int hlen, tlen, copylen = 0;
+    int vnet_hdr_sz;
        mutex_lock(&po->pg_vec_lock);
  @@ -2648,7 +2644,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
      size_max = po->tx_ring.frame_size
          - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
  -    if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
+    vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+    if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
          size_max = dev->mtu + reserve + VLAN_HLEN;
        do {
@@ -2668,10 +2665,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
          status = TP_STATUS_SEND_REQUEST;
          hlen = LL_RESERVED_SPACE(dev);
          tlen = dev->needed_tailroom;
-        if (po->has_vnet_hdr) {
+        if (vnet_hdr_sz) {
              vnet_hdr = data;
-            data += sizeof(*vnet_hdr);
-            tp_len -= sizeof(*vnet_hdr);
+            data += vnet_hdr_sz;
+            tp_len -= vnet_hdr_sz;
              if (tp_len < 0 ||
                  __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
                  tp_len = -EINVAL;
@@ -2696,7 +2693,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
                        addr, hlen, copylen, &sockc);
          if (likely(tp_len >= 0) &&
              tp_len > dev->mtu + reserve &&
-            !po->has_vnet_hdr &&
+            !vnet_hdr_sz &&
              !packet_extra_vlan_len_allowed(dev, skb))
              tp_len = -EMSGSIZE;
  @@ -2715,7 +2712,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
              }
          }
  -        if (po->has_vnet_hdr) {
+        if (vnet_hdr_sz) {
              if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
                  tp_len = -EINVAL;
                  goto tpacket_error;
@@ -2802,9 +2799,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
      int err, reserve = 0;
      struct sockcm_cookie sockc;
      struct virtio_net_hdr vnet_hdr = { 0 };
+    int vnet_hdr_sz;
      int offset = 0;
      struct packet_sock *po = pkt_sk(sk);
-    bool has_vnet_hdr = false;
      int hlen, tlen, linear;
      int extra_len = 0;
  @@ -2844,11 +2841,29 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
        if (sock->type == SOCK_RAW)
          reserve = dev->hard_header_len;
-    if (po->has_vnet_hdr) {
-        err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
-        if (err)
+
+    vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
+    if (vnet_hdr_sz) {
+        if (len < vnet_hdr_sz) {
+            err = -EINVAL;
              goto out_unlock;
-        has_vnet_hdr = true;
+        }
+        len -= vnet_hdr_sz;
+
+        if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr),
+                     &msg->msg_iter)) {
+            err = -EFAULT;
+            goto out_unlock;
+        }
+
+        if (__packet_snd_vnet_parse(&vnet_hdr, len)) {
+            err = -EINVAL;
+            goto out_unlock;
+        }


Any reason to open code packet_snd_vnet_parse() here?

No particular reason. Will try to add an parameter, and keep the vnet related code inside that function if there will be resubmit.



+
+        /* TODO: check hdr_len with len? */
+
+        iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(vnet_hdr));
      }
        if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2912,7 +2927,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
      skb->mark = sockc.mark;
      skb->tstamp = sockc.transmit_time;
  -    if (has_vnet_hdr) {
+    if (vnet_hdr_sz) {
          err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
          if (err)
              goto out_free;
@@ -3307,11 +3322,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
      if (pkt_sk(sk)->pressure)
          packet_rcv_has_room(pkt_sk(sk), NULL);
  -    if (pkt_sk(sk)->has_vnet_hdr) {
-        err = packet_rcv_vnet(msg, skb, &len);
+    vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
+    if (vnet_hdr_len) {
+        err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
          if (err)
              goto out_free;
-        vnet_hdr_len = sizeof(struct virtio_net_hdr);
      }
        /* You lose any data beyond the buffer you gave. If it worries
@@ -3772,7 +3787,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
          if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
              ret = -EBUSY;
          } else {
-            po->has_vnet_hdr = !!val;
+            /* Previouly we treat user input as boolean (!!val),
+             * now we treat it as int. After the below correction,
+             * the only violation case is 12, which results in
+             * vnet header size of 12 instead of 10.
+             */
+            if (val &&
+                val != sizeof(struct virtio_net_hdr) &&
+                val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
+                val = sizeof(struct virtio_net_hdr);
+
+            po->vnet_hdr_sz = val;
              ret = 0;
          }
          release_sock(sk);
@@ -3903,7 +3928,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
          val = po->origdev;
          break;
      case PACKET_VNET_HDR:
-        val = po->has_vnet_hdr;
+        val = po->vnet_hdr_sz;


So the change here is noticeable by userspace. Maybe we need a new opt for this?

Nice catch, users may assume that only 0 or 1 is returned.

Thanks,
Jianfeng


Thanks


          break;
      case PACKET_VERSION:
          val = po->tp_version;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 7ef1c881ae74..950015b6704f 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -26,7 +26,7 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
          pinfo.pdi_flags |= PDI_AUXDATA;
      if (po->origdev)
          pinfo.pdi_flags |= PDI_ORIGDEV;
-    if (po->has_vnet_hdr)
+    if (po->vnet_hdr_sz)
          pinfo.pdi_flags |= PDI_VNETHDR;
      if (po->tp_loss)
          pinfo.pdi_flags |= PDI_LOSS;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 3bb7c5fb3bff..11bc75950f28 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -115,9 +115,9 @@ struct packet_sock {
      unsigned int        running;    /* bind_lock must be held */
      unsigned int        auxdata:1,    /* writer must hold sock lock */
                  origdev:1,
-                has_vnet_hdr:1,
                  tp_loss:1,
                  tp_tx_has_off:1;
+    int            vnet_hdr_sz;
      int            pressure;
      int            ifindex;    /* bound device        */
      __be16            num;

Reply via email to