This is a well-known from thib and dlg originally with a length fix
from yours truly, that marginally doubles througput (from 300kpps to
500-600kpps on selected hardware).
The idea is to save an IP header and 8 bytes of payload (good enough
for tcp state tracking) instead of recommended 68 bytes.
We've been running with this diff for quite some time and haven't seen
any problems, though there were some concerns in the past. Please
evaluate it for inclusion once again.
diff --git sys/netinet/ip_input.c sys/netinet/ip_input.c
index 7fa3d66..28b2b47 100644
--- sys/netinet/ip_input.c
+++ sys/netinet/ip_input.c
@@ -1450,12 +1450,12 @@ int inetctlerrmap[PRC_NCMDS] = {
void
ip_forward(struct mbuf *m, int srcrt)
{
+ struct mbuf mfake, *mcopy = NULL;
struct ip *ip = mtod(m, struct ip *);
struct sockaddr_in *sin;
struct rtentry *rt;
- int error, type = 0, code = 0, destmtu = 0;
+ int error, type = 0, code = 0, destmtu = 0, fake = 0, len;
u_int rtableid = 0;
- struct mbuf *mcopy;
n_long dest;
dest = 0;
@@ -1498,13 +1498,18 @@ ip_forward(struct mbuf *m, int srcrt)
}
/*
- * Save at most 68 bytes of the packet in case
- * we need to generate an ICMP message to the src.
- * Pullup to avoid sharing mbuf cluster between m and mcopy.
+ * Fake an mbuf in case we'll need to generate an ICMP message
*/
- mcopy = m_copym(m, 0, min(ntohs(ip->ip_len), 68), M_DONTWAIT);
- if (mcopy)
- mcopy = m_pullup(mcopy, min(ntohs(ip->ip_len), 68));
+ bzero(&mfake.m_hdr, sizeof(mfake.m_hdr));
+ mfake.m_type = m->m_type;
+ if (m_dup_pkthdr(&mfake, m, M_DONTWAIT) == 0) {
+ mfake.m_data = mfake.m_pktdat;
+ len = ((ip->ip_hl << 2) + min(ntohs(ip->ip_len) -
+ (ip->ip_hl << 2), 8));
+ m_copydata(m, 0, len, mfake.m_pktdat);
+ mfake.m_pkthdr.len = mfake.m_len = len;
+ fake = 1;
+ }
ip->ip_ttl -= IPTTLDEC;
@@ -1553,8 +1558,6 @@ ip_forward(struct mbuf *m, int srcrt)
else
goto freecopy;
}
- if (mcopy == NULL)
- goto freert;
switch (error) {
@@ -1604,13 +1607,15 @@ ip_forward(struct mbuf *m, int srcrt)
goto freecopy;
}
- icmp_error(mcopy, type, code, dest, destmtu);
- goto freert;
+ if (fake) {
+ mcopy = m_copym(&mfake, 0, len, M_DONTWAIT);
+ if (mcopy)
+ icmp_error(mcopy, type, code, dest, destmtu);
+ }
freecopy:
- if (mcopy)
- m_freem(mcopy);
- freert:
+ if (fake)
+ m_tag_delete_chain(&mfake);
#ifndef SMALL_KERNEL
if (ipmultipath && ipforward_rt.ro_rt &&
(ipforward_rt.ro_rt->rt_flags & RTF_MPATH)) {