From: Willem de Bruijn <will...@google.com> Add MSG_ZEROCOPY support to inet/raw when passing IP_HDRINCL
Tested: raw loopback test //net/socket:snd_zerocopy_lo -r -z passes: without zerocopy (-r): rx=69348 (4327 MB) tx=69348 txc=0 rx=145590 (9085 MB) tx=145590 txc=0 rx=219210 (13679 MB) tx=219210 txc=0 rx=293688 (18327 MB) tx=293688 txc=0 with zerocopy (-r -z): rx=258132 (16108 MB) tx=258132 txc=258122 rx=541266 (33777 MB) tx=541266 txc=541256 rx=822606 (51334 MB) tx=822606 txc=822596 rx=1105776 (69005 MB) tx=1105776 txc=1105766 raw hdrincl loopback test //net/socket:snd_zerocopy_lo -R -z passes: without zerocopy (-R): rx=101904 (6359 MB) tx=101904 txc=0 rx=215256 (13432 MB) tx=215256 txc=0 rx=328584 (20505 MB) tx=328584 txc=0 rx=442008 (27583 MB) tx=442008 txc=0 with zerocopy (-R -z): rx=265398 (16562 MB) tx=265398 txc=265392 rx=558744 (34868 MB) tx=558744 txc=558738 rx=853308 (53250 MB) tx=853308 txc=853302 rx=1148142 (71649 MB) tx=1148142 txc=1148136 Signed-off-by: Willem de Bruijn <will...@google.com> --- net/ipv4/raw.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 561cd4b..c4fa57d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -347,7 +347,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, unsigned int iphlen; int err; struct rtable *rt = *rtp; - int hlen, tlen; + int hlen, tlen, linear; if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -359,8 +359,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, hlen = LL_RESERVED_SPACE(rt->dst.dev); tlen = rt->dst.dev->needed_tailroom; + linear = length; + + if (flags & MSG_ZEROCOPY && length && + sock_can_zerocopy(sk, rt, CHECKSUM_UNNECESSARY)) + linear = min_t(int, length, MAX_HEADER); + skb = sock_alloc_send_skb(sk, - length + hlen + tlen + 15, + linear + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (!skb) goto error; @@ -373,15 +379,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb_reset_network_header(skb); iph = ip_hdr(skb); - skb_put(skb, length); + skb_put(skb, linear); skb->ip_summed = CHECKSUM_NONE; sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - skb->transport_header = skb->network_header; err = -EFAULT; - if (memcpy_from_msg(iph, msg, length)) + if (memcpy_from_msg(iph, msg, linear)) goto error_free; iphlen = iph->ihl * 4; @@ -397,6 +402,17 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, if (iphlen > length) goto error_free; + if (length != linear) { + size_t datalen = length - linear; + + if (!skb_zerocopy_alloc(skb, datalen)) + goto error_zcopy; + err = skb_zerocopy_add_frags_iter(sk, skb, &msg->msg_iter, + datalen, skb_uarg(skb)); + if (err != datalen) + goto error_zcopy; + } + if (iphlen >= sizeof(*iph)) { if (!iph->saddr) iph->saddr = fl4->saddr; @@ -420,6 +436,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, out: return 0; +error_zcopy: + sock_zerocopy_put_abort(skb_zcopy(skb)); error_free: kfree_skb(skb); error: -- 2.5.0.276.gf5e568e -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html