The reading of neighbour table entries can be converted from a slow
reader/writer lock to a fast lockless sequence number check.
Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]>
---
include/net/neighbour.h | 2
net/core/neighbour.c | 117 +++++++++++++++++++++++++++++-------------------
net/ipv4/arp.c | 101 +++++++++++++++++++++++++----------------
net/ipv6/ndisc.c | 16 +++---
net/ipv6/route.c | 12 ++--
net/sched/sch_teql.c | 11 +++-
6 files changed, 155 insertions(+), 104 deletions(-)
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -100,7 +100,7 @@ struct neighbour
__u8 type;
__u8 dead;
atomic_t probes;
- rwlock_t lock;
+ seqlock_t lock;
unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
struct hh_cache *hh;
atomic_t refcnt;
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -143,17 +143,17 @@ static int neigh_forced_gc(struct neigh_
* - nobody refers to it.
* - it is not permanent
*/
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
hlist_del_rcu(&n->hlist);
n->dead = 1;
shrunk = 1;
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
call_rcu(&n->rcu, neigh_rcu_release);
continue;
}
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
}
}
@@ -198,7 +198,7 @@ static void neigh_flush_dev(struct neigh
continue;
hlist_del_rcu(&n->hlist);
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
@@ -220,7 +220,7 @@ static void neigh_flush_dev(struct neigh
n->nud_state = NUD_NONE;
NEIGH_PRINTK2("neigh %p is stray.\n", n);
}
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
neigh_release(n);
}
}
@@ -267,7 +267,7 @@ static struct neighbour *neigh_alloc(str
memset(n, 0, tbl->entry_size);
skb_queue_head_init(&n->arp_queue);
- rwlock_init(&n->lock);
+ seqlock_init(&n->lock);
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
@@ -615,7 +615,7 @@ void neigh_destroy(struct neighbour *nei
/* Neighbour state is suspicious;
disable fast path.
- Called with write_locked neigh.
+ Called with locked neigh.
*/
static void neigh_suspect(struct neighbour *neigh)
{
@@ -632,7 +632,7 @@ static void neigh_suspect(struct neighbo
/* Neighbour state is OK;
enable fast path.
- Called with write_locked neigh.
+ Called with locked neigh.
*/
static void neigh_connect(struct neighbour *neigh)
{
@@ -676,7 +676,7 @@ static void neigh_periodic_timer(unsigne
hlist_for_each_entry_safe(n, node, tmp, head, hlist) {
unsigned int state;
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
state = n->nud_state;
if (state & (NUD_PERMANENT | NUD_IN_TIMER))
@@ -690,12 +690,12 @@ static void neigh_periodic_timer(unsigne
time_after(now, n->used + n->parms->gc_staletime))) {
hlist_del_rcu(&n->hlist);
n->dead = 1;
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
neigh_release(n);
continue;
}
next_elt:
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
}
/* Cycle through all hash buckets every base_reachable_time/2 ticks.
@@ -738,7 +738,7 @@ static void neigh_timer_handler(unsigned
unsigned state;
int notify = 0;
- write_lock(&neigh->lock);
+ write_seqlock(&neigh->lock);
state = neigh->nud_state;
now = jiffies;
@@ -748,6 +748,7 @@ static void neigh_timer_handler(unsigned
#ifndef CONFIG_SMP
printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
#endif
+ write_sequnlock(&neigh->lock);
goto out;
}
@@ -808,9 +809,9 @@ static void neigh_timer_handler(unsigned
*/
while (neigh->nud_state == NUD_FAILED &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
- write_unlock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
neigh->ops->error_report(neigh, skb);
- write_lock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
}
@@ -821,20 +822,22 @@ static void neigh_timer_handler(unsigned
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
+
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
struct sk_buff *skb = skb_peek(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
skb_get(skb);
- write_unlock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
if (skb)
kfree_skb(skb);
} else {
-out:
- write_unlock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
}
+
+out:
if (notify)
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
@@ -850,11 +853,11 @@ int __neigh_event_send(struct neighbour
int rc;
unsigned long now;
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
- goto out_unlock_bh;
+ goto out;
now = jiffies;
@@ -868,7 +871,7 @@ int __neigh_event_send(struct neighbour
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
- write_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
if (skb)
kfree_skb(skb);
@@ -896,8 +899,8 @@ int __neigh_event_send(struct neighbour
}
rc = 1;
}
-out_unlock_bh:
- write_unlock_bh(&neigh->lock);
+out:
+ write_sequnlock_bh(&neigh->lock);
return rc;
}
@@ -948,7 +951,7 @@ int neigh_update(struct neighbour *neigh
struct net_device *dev;
int update_isrouter = 0;
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
dev = neigh->dev;
old = neigh->nud_state;
@@ -1052,22 +1055,23 @@ int neigh_update(struct neighbour *neigh
while (neigh->nud_state & NUD_VALID &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
struct neighbour *n1 = neigh;
- write_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
/* On shaper/eql skb->dst->neighbour != neigh :( */
if (skb->dst && skb->dst->neighbour)
n1 = skb->dst->neighbour;
n1->output(skb);
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
}
-out:
+
if (update_isrouter) {
neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
(neigh->flags | NTF_ROUTER) :
(neigh->flags & ~NTF_ROUTER);
}
- write_unlock_bh(&neigh->lock);
+out:
+ write_sequnlock_bh(&neigh->lock);
if (notify)
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
@@ -1144,6 +1148,30 @@ int neigh_compat_output(struct sk_buff *
return dev_queue_xmit(skb);
}
+static int neigh_hard_header(struct sk_buff *skb, struct net_device *dev,
+ const struct neighbour *neigh)
+{
+ int rc;
+
+ unsigned seq;
+
+ for(;;) {
+ seq = read_seqbegin(&neigh->lock);
+ rc = dev->hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
+
+ if (likely(!read_seqretry(&neigh->lock, seq)))
+ break;
+
+ if (rc < 0)
+ break;
+
+ __skb_pull(skb, rc);
+ }
+
+ return rc;
+}
+
/* Slow and careful. */
int neigh_resolve_output(struct sk_buff *skb)
@@ -1160,19 +1188,17 @@ int neigh_resolve_output(struct sk_buff
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
+
if (dev->hard_header_cache && !dst->hh) {
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
if (!dst->hh)
neigh_hh_init(neigh, dst, dst->ops->protocol);
err = dev->hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
- write_unlock_bh(&neigh->lock);
- } else {
- read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
- }
+ write_sequnlock_bh(&neigh->lock);
+ } else
+ err = neigh_hard_header(skb, dev, neigh);
+
if (err >= 0)
rc = neigh->ops->queue_xmit(skb);
else
@@ -1196,14 +1222,11 @@ int neigh_connected_output(struct sk_buf
int err;
struct dst_entry *dst = skb->dst;
struct neighbour *neigh = dst->neighbour;
- struct net_device *dev = neigh->dev;
__skb_pull(skb, skb->nh.raw - skb->data);
- read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
+ err = neigh_hard_header(skb, neigh->dev, neigh);
+
if (err >= 0)
err = neigh->ops->queue_xmit(skb);
else {
@@ -1960,11 +1983,15 @@ static int neigh_fill_info(struct sk_buf
NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
- read_lock_bh(&neigh->lock);
ndm->ndm_state = neigh->nud_state;
+
+ /* Not really updating this neighbour but don't want to
+ * deal with the unwind case when seqlock needs retry
+ */
+ write_seqlock_bh(&neigh->lock);
if ((neigh->nud_state & NUD_VALID) &&
nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
- read_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
goto nla_put_failure;
}
@@ -1972,7 +1999,7 @@ static int neigh_fill_info(struct sk_buf
ci.ndm_confirmed = now - neigh->confirmed;
ci.ndm_updated = now - neigh->updated;
ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
- read_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
@@ -2077,13 +2104,13 @@ void __neigh_for_each_release(struct nei
&tbl->hash_buckets[chain], hlist) {
int release;
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
release = cb(n);
if (release) {
hlist_del_rcu(&n->hlist);
n->dead = 1;
}
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
if (release)
call_rcu(&n->rcu, neigh_rcu_release);
}
--- net-2.6.19.orig/net/ipv4/arp.c
+++ net-2.6.19/net/ipv4/arp.c
@@ -328,6 +328,31 @@ static void arp_error_report(struct neig
kfree_skb(skb);
}
+
+static unsigned arp_state_to_flags(const struct neighbour *neigh)
+{
+ unsigned flags = 0;
+ if (neigh->nud_state&NUD_PERMANENT)
+ flags = ATF_PERM|ATF_COM;
+ else if (neigh->nud_state&NUD_VALID)
+ flags = ATF_COM;
+ return flags;
+}
+
+static void arp_get_neigh_addr(u8 *ha, const struct neighbour *neigh,
+ unsigned len, unsigned *flags)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqbegin(&neigh->lock);
+ memcpy(ha, neigh->ha, len);
+ if (flags)
+ *flags = arp_state_to_flags(neigh);
+ } while (read_seqretry(&neigh->lock, seq));
+
+}
+
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
u32 saddr = 0;
@@ -369,8 +394,12 @@ static void arp_solicit(struct neighbour
if ((probes -= neigh->parms->ucast_probes) < 0) {
if (!(neigh->nud_state&NUD_VALID))
printk(KERN_DEBUG "trying to ucast probe in
NUD_INVALID\n");
- dst_ha = neigh->ha;
- read_lock_bh(&neigh->lock);
+
+ dst_ha = kmalloc(MAX_ADDR_LEN, GFP_ATOMIC);
+ if (!dst_ha)
+ return;
+
+ arp_get_neigh_addr(dst_ha, neigh, MAX_ADDR_LEN, NULL);
} else if ((probes -= neigh->parms->app_probes) < 0) {
#ifdef CONFIG_ARPD
neigh_app_ns(neigh);
@@ -380,8 +409,9 @@ static void arp_solicit(struct neighbour
arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
dst_ha, dev->dev_addr, NULL);
+
if (dst_ha)
- read_unlock_bh(&neigh->lock);
+ kfree(dst_ha);
}
static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
@@ -489,10 +519,7 @@ int arp_find(unsigned char *haddr, struc
if (n) {
n->used = jiffies;
if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
- read_lock_bh(&n->lock);
- memcpy(haddr, n->ha, dev->addr_len);
- read_unlock_bh(&n->lock);
- neigh_release(n);
+ arp_get_neigh_addr(haddr, n, dev->addr_len, NULL);
return 0;
}
neigh_release(n);
@@ -1047,16 +1074,6 @@ static int arp_req_set(struct arpreq *r,
return err;
}
-static unsigned arp_state_to_flags(struct neighbour *neigh)
-{
- unsigned flags = 0;
- if (neigh->nud_state&NUD_PERMANENT)
- flags = ATF_PERM|ATF_COM;
- else if (neigh->nud_state&NUD_VALID)
- flags = ATF_COM;
- return flags;
-}
-
/*
* Get an ARP cache entry.
*/
@@ -1069,10 +1086,8 @@ static int arp_req_get(struct arpreq *r,
neigh = neigh_lookup(&arp_tbl, &ip, dev);
if (neigh) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
+ arp_get_neigh_addr(r->arp_ha.sa_data, neigh, dev->addr_len,
+ &r->arp_flags);
r->arp_ha.sa_family = dev->type;
strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
neigh_release(neigh);
@@ -1258,7 +1273,7 @@ void __init arp_init(void)
/*
* ax25 -> ASCII conversion
*/
-static char *ax2asc2(ax25_address *a, char *buf)
+static char *ax2asc2(const ax25_address *a, char *buf)
{
char c, *s;
int n;
@@ -1290,35 +1305,41 @@ static char *ax2asc2(ax25_address *a, ch
#define HBUFFERLEN 30
static void arp_format_neigh_entry(struct seq_file *seq,
- struct neighbour *n)
+ const struct neighbour *n)
{
char hbuffer[HBUFFERLEN];
const char hexbuf[] = "0123456789ABCDEF";
int k, j;
+ unsigned hflags, seqno;
char tbuf[16];
struct net_device *dev = n->dev;
int hatype = dev->type;
- read_lock(&n->lock);
- /* Convert hardware address to XX:XX:XX:XX ... form. */
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
- if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
- ax2asc2((ax25_address *)n->ha, hbuffer);
- else {
-#endif
- for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
- hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15];
- hbuffer[k++] = hexbuf[n->ha[j] & 15];
- hbuffer[k++] = ':';
- }
- hbuffer[--k] = 0;
+ do {
+ seqno = read_seqbegin(&n->lock);
+
+ /* Convert hardware address to XX:XX:XX:XX ... form. */
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
- }
+ if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
+ ax2asc2((const ax25_address *)n->ha, hbuffer);
+ else
#endif
- sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key));
+ {
+ for (k = 0, j = 0; k < HBUFFERLEN - 3 && j <
dev->addr_len; j++) {
+ hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15];
+ hbuffer[k++] = hexbuf[n->ha[j] & 15];
+ hbuffer[k++] = ':';
+ }
+ hbuffer[--k] = 0;
+ }
+
+ sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key));
+ hflags = arp_state_to_flags(n);
+ } while (read_seqretry(&n->lock, seqno));
+
seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
- tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
- read_unlock(&n->lock);
+ tbuf, hatype, hflags, hbuffer, dev->name);
+
}
static void arp_format_pneigh_entry(struct seq_file *seq,
--- net-2.6.19.orig/net/ipv6/ndisc.c
+++ net-2.6.19/net/ipv6/ndisc.c
@@ -1412,15 +1412,15 @@ void ndisc_send_redirect(struct sk_buff
return;
}
- if (dev->addr_len) {
- read_lock_bh(&neigh->lock);
- if (neigh->nud_state & NUD_VALID) {
+ if (dev->addr_len && (neigh->nud_state & NUD_VALID)) {
+ unsigned seq;
+ do {
+ seq = read_seqbegin(&neigh->lock);
memcpy(ha_buf, neigh->ha, dev->addr_len);
- read_unlock_bh(&neigh->lock);
- ha = ha_buf;
- len += ndisc_opt_addr_space(dev);
- } else
- read_unlock_bh(&neigh->lock);
+ } while (read_seqretry(&neigh->lock, seq));
+
+ ha = ha_buf;
+ len += ndisc_opt_addr_space(dev);
}
rd_len = min_t(unsigned int,
--- net-2.6.19.orig/net/ipv6/route.c
+++ net-2.6.19/net/ipv6/route.c
@@ -279,20 +279,19 @@ static void rt6_probe(struct rt6_info *r
*/
if (!neigh || (neigh->nud_state & NUD_VALID))
return;
- read_lock_bh(&neigh->lock);
+
if (!(neigh->nud_state & NUD_VALID) &&
- time_after(jiffies, neigh->updated +
rt->rt6i_idev->cnf.rtr_probe_interval)) {
+ time_after(jiffies,
+ neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval))
{
struct in6_addr mcaddr;
struct in6_addr *target;
neigh->updated = jiffies;
- read_unlock_bh(&neigh->lock);
target = (struct in6_addr *)&neigh->primary_key;
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
- } else
- read_unlock_bh(&neigh->lock);
+ }
}
#else
static inline void rt6_probe(struct rt6_info *rt)
@@ -323,10 +322,9 @@ static int inline rt6_check_neigh(struct
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
else if (neigh) {
- read_lock_bh(&neigh->lock);
+ smp_rmb();
if (neigh->nud_state & NUD_VALID)
m = 2;
- read_unlock_bh(&neigh->lock);
}
return m;
}
--- net-2.6.19.orig/net/sched/sch_teql.c
+++ net-2.6.19/net/sched/sch_teql.c
@@ -248,9 +248,14 @@ __teql_resolve(struct sk_buff *skb, stru
}
if (neigh_event_send(n, skb_res) == 0) {
int err;
- read_lock(&n->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha,
NULL, skb->len);
- read_unlock(&n->lock);
+ unsigned seq;
+
+ do {
+ seq = read_seqbegin(&n->lock);
+ err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+ n->ha, NULL, skb->len);
+ } while (read_seqretry(&n->lock, seq));
+
if (err < 0) {
neigh_release(n);
return -EINVAL;
--
Stephen Hemminger <[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html