When arp entries expire on busy gateways the system will most probably
lose some packets. This comes from a) very small limits of how many
packets can be queued during lookup and b) because we expire entries
before they get refreshed.
This diff fixes these issues for me.
It removes the LA_HOLD_TOTAL limit of 100 packets which is way to low one
routers getting port scanned. It adds a check for the expiry time in the
common case to issue a arp refresh if more than 7/8 of the lifetime of the
entry passed. With the default timeout of 20min the refresh will happen
around 2.5 min before timing out and will retry after 30sec if there was
no response.
There is some further cleanup when it comes to setting rt_expire (esp. it
removes the expire of cloning routes and simplifies the code further to
what our network stack assumes).
This needs lots of testing.
--
:wq Claudio
Index: netinet/if_ether.c
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.237
diff -u -p -r1.237 if_ether.c
--- netinet/if_ether.c 30 Nov 2018 09:27:56 -0000 1.237
+++ netinet/if_ether.c 17 Jan 2019 10:42:51 -0000
@@ -67,11 +67,11 @@
struct llinfo_arp {
LIST_ENTRY(llinfo_arp) la_list;
struct rtentry *la_rt; /* backpointer to rtentry */
- long la_asked; /* last time we QUERIED */
struct mbuf_list la_ml; /* packet hold queue */
+ time_t la_refreshed; /* when was refresh sent */
+ int la_asked; /* number of queries sent */
};
#define LA_HOLD_QUEUE 10
-#define LA_HOLD_TOTAL 100
/* timer values */
int arpt_prune = (5 * 60); /* walk list every 5 minutes */
@@ -119,7 +119,7 @@ arptimer(void *arg)
LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) {
struct rtentry *rt = la->la_rt;
- if (rt->rt_expire && rt->rt_expire <= time_uptime)
+ if (rt->rt_expire && rt->rt_expire < time_uptime)
arptfree(rt); /* timer has expired; clear */
}
NET_UNLOCK();
@@ -130,7 +130,6 @@ arp_rtrequest(struct ifnet *ifp, int req
{
struct sockaddr *gate = rt->rt_gateway;
struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
- struct ifaddr *ifa;
if (!arpinit_done) {
static struct timeout arptimer_to;
@@ -140,7 +139,7 @@ arp_rtrequest(struct ifnet *ifp, int req
IPL_SOFTNET, 0, "arp", NULL);
timeout_set_proc(&arptimer_to, arptimer, &arptimer_to);
- timeout_add_sec(&arptimer_to, 1);
+ timeout_add_sec(&arptimer_to, arpt_prune);
}
if (ISSET(rt->rt_flags, RTF_GATEWAY|RTF_BROADCAST|RTF_MULTICAST))
@@ -149,17 +148,12 @@ arp_rtrequest(struct ifnet *ifp, int req
switch (req) {
case RTM_ADD:
- if (rt->rt_flags & RTF_CLONING ||
- ((rt->rt_flags & (RTF_LLINFO | RTF_LOCAL)) && !la)) {
- /*
- * Give this route an expiration time, even though
- * it's a "permanent" route, so that routes cloned
- * from it do not need their expiration time set.
- */
- rt->rt_expire = time_uptime;
- if ((rt->rt_flags & RTF_CLONING) != 0)
- break;
+ if (rt->rt_flags & RTF_CLONING) {
+ rt->rt_expire = 0;
+ break;
}
+ if ((rt->rt_flags & RTF_LOCAL) && !la)
+ rt->rt_expire = 0;
/*
* Announce a new entry if requested or warn the user
* if another station has this IP address.
@@ -179,7 +173,7 @@ arp_rtrequest(struct ifnet *ifp, int req
}
satosdl(gate)->sdl_type = ifp->if_type;
satosdl(gate)->sdl_index = ifp->if_index;
- if (la != 0)
+ if (la != NULL)
break; /* This happens on a route change */
/*
* Case 2: This route may come from cloning, or a manual route
@@ -195,18 +189,9 @@ arp_rtrequest(struct ifnet *ifp, int req
ml_init(&la->la_ml);
la->la_rt = rt;
rt->rt_flags |= RTF_LLINFO;
+ if ((rt->rt_flags & RTF_LOCAL) == 0)
+ rt->rt_expire = time_uptime;
LIST_INSERT_HEAD(&arp_list, la, la_list);
-
- TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
- if ((ifa->ifa_addr->sa_family == AF_INET) &&
- ifatoia(ifa)->ia_addr.sin_addr.s_addr ==
- satosin(rt_key(rt))->sin_addr.s_addr)
- break;
- }
- if (ifa) {
- KASSERT(ifa == rt->rt_ifa);
- rt->rt_expire = 0;
- }
break;
case RTM_DELETE:
@@ -314,7 +299,7 @@ arpresolve(struct ifnet *ifp, struct rte
struct sockaddr *dst, u_char *desten)
{
struct arpcom *ac = (struct arpcom *)ifp;
- struct llinfo_arp *la = NULL;
+ struct llinfo_arp *la;
struct sockaddr_dl *sdl;
struct rtentry *rt = NULL;
char addr[INET_ADDRSTRLEN];
@@ -331,7 +316,7 @@ arpresolve(struct ifnet *ifp, struct rte
rt = rt_getll(rt0);
if (ISSET(rt->rt_flags, RTF_REJECT) &&
- (rt->rt_expire == 0 || time_uptime < rt->rt_expire)) {
+ (rt->rt_expire == 0 || rt->rt_expire > time_uptime )) {
m_freem(m);
return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
}
@@ -352,6 +337,9 @@ arpresolve(struct ifnet *ifp, struct rte
goto bad;
}
+ la = (struct llinfo_arp *)rt->rt_llinfo;
+ KASSERT(la != NULL);
+
/*
* Check the address family and length is valid, the address
* is resolved; otherwise, try to resolve.
@@ -359,6 +347,17 @@ arpresolve(struct ifnet *ifp, struct rte
if ((rt->rt_expire == 0 || rt->rt_expire > time_uptime) &&
sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
memcpy(desten, LLADDR(sdl), sdl->sdl_alen);
+
+ /* refresh ARP entry when timeout gets close */
+ if (rt->rt_expire != 0 &&
+ rt->rt_expire - arpt_keep / 8 < time_uptime &&
+ la->la_refreshed + 30 < time_uptime) {
+ la->la_refreshed = time_uptime;
+ arprequest(ifp,
+ &satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
+ &satosin(dst)->sin_addr.s_addr,
+ ac->ac_enaddr);
+ }
return (0);
}
@@ -370,9 +369,7 @@ arpresolve(struct ifnet *ifp, struct rte
* response yet. Insert mbuf in hold queue if below limit
* if above the limit free the queue without queuing the new packet.
*/
- la = (struct llinfo_arp *)rt->rt_llinfo;
- KASSERT(la != NULL);
- if (la_hold_total < LA_HOLD_TOTAL && la_hold_total < nmbclust / 64) {
+ if (la_hold_total < nmbclust / 64) {
struct mbuf *mh;
if (ml_len(&la->la_ml) >= LA_HOLD_QUEUE) {
@@ -411,6 +408,7 @@ arpresolve(struct ifnet *ifp, struct rte
rt->rt_flags |= RTF_REJECT;
rt->rt_expire += arpt_down;
la->la_asked = 0;
+ la->la_refreshed = 0;
la_hold_total -= ml_purge(&la->la_ml);
}
}
@@ -668,6 +666,7 @@ arpcache(struct ifnet *ifp, struct ether
}
la->la_asked = 0;
+ la->la_refreshed = 0;
while ((len = ml_len(&la->la_ml)) != 0) {
struct mbuf *mh;