Module Name: src Committed By: bouyer Date: Sun Nov 17 19:17:04 UTC 2013
Modified Files: src/sys/net/npf [netbsd-6-1]: npf_impl.h npf_nat.c npf_session.c Log Message: Pull up following revision(s) (requested by rmind in ticket #985): sys/net/npf/npf_impl.h: revision 1.35 sys/net/npf/npf_nat.c: revision 1.21 sys/net/npf/npf_session.c: revision 1.26 npf_session_setnat: fix the race condition when the old connection is still being expired while a new/duplicate is being created. To generate a diff of this commit: cvs rdiff -u -r1.10.2.14 -r1.10.2.14.2.1 src/sys/net/npf/npf_impl.h cvs rdiff -u -r1.10.2.8 -r1.10.2.8.2.1 src/sys/net/npf/npf_nat.c cvs rdiff -u -r1.10.4.9 -r1.10.4.9.2.1 src/sys/net/npf/npf_session.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/net/npf/npf_impl.h diff -u src/sys/net/npf/npf_impl.h:1.10.2.14 src/sys/net/npf/npf_impl.h:1.10.2.14.2.1 --- src/sys/net/npf/npf_impl.h:1.10.2.14 Mon Feb 18 18:26:14 2013 +++ src/sys/net/npf/npf_impl.h Sun Nov 17 19:17:04 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: npf_impl.h,v 1.10.2.14 2013/02/18 18:26:14 riz Exp $ */ +/* $NetBSD: npf_impl.h,v 1.10.2.14.2.1 2013/11/17 19:17:04 bouyer Exp $ */ /*- * Copyright (c) 2009-2013 The NetBSD Foundation, Inc. @@ -285,7 +285,7 @@ void npf_session_release(npf_session_t void npf_session_expire(npf_session_t *); bool npf_session_pass(const npf_session_t *, npf_rproc_t **); void npf_session_setpass(npf_session_t *, npf_rproc_t *); -int npf_session_setnat(npf_session_t *, npf_nat_t *, const int); +int npf_session_setnat(npf_session_t *, npf_nat_t *, u_int); npf_nat_t * npf_session_retnat(npf_session_t *, const int, bool *); int npf_session_save(prop_array_t, prop_array_t); Index: src/sys/net/npf/npf_nat.c diff -u src/sys/net/npf/npf_nat.c:1.10.2.8 src/sys/net/npf/npf_nat.c:1.10.2.8.2.1 --- src/sys/net/npf/npf_nat.c:1.10.2.8 Mon Feb 11 21:49:49 2013 +++ src/sys/net/npf/npf_nat.c Sun Nov 17 19:17:04 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: npf_nat.c,v 1.10.2.8 2013/02/11 21:49:49 riz Exp $ */ +/* $NetBSD: npf_nat.c,v 1.10.2.8.2.1 2013/11/17 19:17:04 bouyer Exp $ */ /*- * Copyright (c) 2010-2013 The NetBSD Foundation, Inc. @@ -76,7 +76,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.10.2.8 2013/02/11 21:49:49 riz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_nat.c,v 1.10.2.8.2.1 2013/11/17 19:17:04 bouyer Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -676,7 +676,7 @@ translate: * Note: packet now has a translated address in the cache. */ nt->nt_session = se; - error = npf_session_setnat(se, nt, di); + error = npf_session_setnat(se, nt, np->n_type); out: if (error) { /* If session was for NAT only - expire it. */ Index: src/sys/net/npf/npf_session.c diff -u src/sys/net/npf/npf_session.c:1.10.4.9 src/sys/net/npf/npf_session.c:1.10.4.9.2.1 --- src/sys/net/npf/npf_session.c:1.10.4.9 Mon Feb 11 21:49:49 2013 +++ src/sys/net/npf/npf_session.c Sun Nov 17 19:17:04 2013 @@ -1,4 +1,4 @@ -/* $NetBSD: npf_session.c,v 1.10.4.9 2013/02/11 21:49:49 riz Exp $ */ +/* $NetBSD: npf_session.c,v 1.10.4.9.2.1 2013/11/17 19:17:04 bouyer Exp $ */ /*- * Copyright (c) 2010-2012 The NetBSD Foundation, Inc. @@ -80,7 +80,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.10.4.9 2013/02/11 21:49:49 riz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: npf_session.c,v 1.10.4.9.2.1 2013/11/17 19:17:04 bouyer Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -140,7 +140,7 @@ struct npf_session { uint16_t if_idx; } s_common_id; /* Flags and the protocol state. */ - int s_flags; + u_int s_flags; npf_state_t s_state; /* Association of rule procedure data. */ npf_rproc_t * s_rproc; @@ -163,18 +163,20 @@ struct npf_sehash { }; /* - * Session flags: - * - PFIL_IN and PFIL_OUT values are reserved for direction. - * - SE_ACTIVE: session is active i.e. visible on inspection. - * - SE_PASS: a "pass" session. - * - SE_EXPIRE: explicitly expire the session. - * - SE_REMOVING: session is being removed (indicate need to enter G/C list). + * Session flags: PFIL_IN and PFIL_OUT values are reserved for direction. */ CTASSERT(PFIL_ALL == (0x001 | 0x002)); -#define SE_ACTIVE 0x004 -#define SE_PASS 0x008 -#define SE_EXPIRE 0x010 -#define SE_REMOVING 0x020 +#define SE_ACTIVE 0x004 /* visible on inspection */ +#define SE_PASS 0x008 /* perform implicit passing */ +#define SE_EXPIRE 0x010 /* explicitly expire */ + +/* + * Flags to indicate removal of forwards/backwards session entries or + * completion of session removal itself (i.e. both entries). + */ +#define SE_REMFORW 0x020 +#define SE_REMBACK 0x040 +#define SE_REMOVED (SE_REMFORW | SE_REMBACK) /* * Session tracking state: disabled (off), enabled (on) or flush request. @@ -466,7 +468,7 @@ npf_session_lookup(const npf_cache_t *np npf_sentry_t senkey, *sen; npf_session_t *se; npf_sehash_t *sh; - int flags; + u_int flags; switch (proto) { case IPPROTO_TCP: { @@ -762,7 +764,6 @@ out: static void npf_session_destroy(npf_session_t *se) { - if (se->s_nat) { /* Release any NAT related structures. */ npf_nat_expire(se->s_nat); @@ -786,7 +787,7 @@ npf_session_destroy(npf_session_t *se) * and re-insert session entry accordingly. */ int -npf_session_setnat(npf_session_t *se, npf_nat_t *nt, const int di) +npf_session_setnat(npf_session_t *se, npf_nat_t *nt, u_int ntype) { npf_sehash_t *sh; npf_sentry_t *sen; @@ -798,56 +799,60 @@ npf_session_setnat(npf_session_t *se, np /* First, atomically check and associate NAT entry. */ if (atomic_cas_ptr(&se->s_nat, NULL, nt) != NULL) { - /* Race: see below for description. */ + /* Race with a duplicate packet. */ npf_stats_inc(NPF_STAT_RACE_NAT); return EISCONN; } - /* - * Update, re-hash and re-insert "backwards" entry, according to - * the translation. First, remove the entry from tree. Note that - * a duplicate packet may establish a duplicate session while lock - * will be released. In such case, caller will drop this packet - * and structures associated with it. Such race condition should - * never happen in practice, though. - */ sen = &se->s_back_entry; sh = sess_hash_bucket(sess_hashtbl, &se->s_common_id, sen); + /* + * Note: once the lock is release, the session might be a G/C + * target, therefore keep SE_REMBACK bit set until re-insert. + */ rw_enter(&sh->sh_lock, RW_WRITER); rb_tree_remove_node(&sh->sh_tree, sen); sh->sh_count--; rw_exit(&sh->sh_lock); /* - * New source/destination and hash. Note that source/destination - * are inverted, since we are handling "backwards" entry. + * Update the source/destination IDs and rehash. Note that we are + * handling the "backwards" entry, therefore the opposite mapping. */ npf_nat_gettrans(nt, &taddr, &tport); - if (di == PFIL_OUT) { - /* NPF_NATOUT: source in "forwards" = destination. */ + switch (ntype) { + case NPF_NATOUT: + /* Source in "forwards" => destination. */ memcpy(&sen->se_dst_addr, taddr, sen->se_alen); - if (tport) { + if (tport) sen->se_dst_id = tport; - } - } else { - /* NPF_NATIN: destination in "forwards" = source. */ + break; + case NPF_NATIN: + /* Destination in "forwards" => source. */ memcpy(&sen->se_src_addr, taddr, sen->se_alen); - if (tport) { + if (tport) sen->se_src_id = tport; - } + break; } sh = sess_hash_bucket(sess_hashtbl, &se->s_common_id, sen); - /* Insert into the new bucket. */ + /* + * Insert the entry back into a potentially new bucket. + * + * Note: synchronise with the G/C thread here for a case when the + * old session is still being expired while a duplicate is being + * created here. This race condition is rare. + */ rw_enter(&sh->sh_lock, RW_WRITER); - ok = (rb_tree_insert_node(&sh->sh_tree, sen) == sen); + ok = rb_tree_insert_node(&sh->sh_tree, sen) == sen; if (__predict_true(ok)) { sh->sh_count++; NPF_PRINTF(("NPF: se %p assoc with nat %p\n", se, se->s_nat)); } else { - /* FIXMEgc */ - printf("npf_session_setnat: Houston, we've had a problem.\n"); + /* Race: mark a removed entry and explicitly expire. */ + atomic_or_uint(&se->s_flags, SE_REMBACK | SE_EXPIRE); + npf_stats_inc(NPF_STAT_RACE_NAT); } rw_exit(&sh->sh_lock); return ok ? 0 : EISCONN; @@ -859,7 +864,6 @@ npf_session_setnat(npf_session_t *se, np void npf_session_expire(npf_session_t *se) { - /* KASSERT(se->s_refcnt > 0); XXX: npf_nat_freepolicy() */ atomic_or_uint(&se->s_flags, SE_EXPIRE); } @@ -870,7 +874,6 @@ npf_session_expire(npf_session_t *se) bool npf_session_pass(const npf_session_t *se, npf_rproc_t **rp) { - KASSERT(se->s_refcnt > 0); if ((se->s_flags & SE_PASS) != 0) { *rp = se->s_rproc; @@ -906,7 +909,6 @@ npf_session_setpass(npf_session_t *se, n void npf_session_release(npf_session_t *se) { - KASSERT(se->s_refcnt > 0); if ((se->s_flags & SE_ACTIVE) == 0) { /* Activate: after this point, session is globally visible. */ @@ -922,7 +924,6 @@ npf_session_release(npf_session_t *se) npf_nat_t * npf_session_retnat(npf_session_t *se, const int di, bool *forw) { - KASSERT(se->s_refcnt > 0); *forw = (se->s_flags & PFIL_ALL) == di; return se->s_nat; @@ -967,6 +968,7 @@ npf_session_gc(struct npf_sesslist *gc_l if (sh->sh_count == 0) { continue; } + rw_enter(&sh->sh_lock, RW_WRITER); /* For each (left -> right) ... */ sen = rb_tree_iterate(&sh->sh_tree, NULL, RB_DIR_LEFT); @@ -975,25 +977,27 @@ npf_session_gc(struct npf_sesslist *gc_l se = sen->se_backptr; nsen = rb_tree_iterate(&sh->sh_tree, sen, RB_DIR_RIGHT); if (!npf_session_expired(se, &tsnow) && !flushall) { - KASSERT((se->s_flags & SE_REMOVING) == 0); + KASSERT((se->s_flags & SE_REMOVED) == 0); sen = nsen; continue; } - /* Expired - remove from the tree. */ + /* Expired: remove from the tree. */ + atomic_or_uint(&se->s_flags, SE_EXPIRE); rb_tree_remove_node(&sh->sh_tree, sen); sh->sh_count--; /* - * Set removal bit when the first entry is removed. - * If already set, then second entry has been removed, - * therefore move the session into the G/C list. + * Remove the session and move it to the G/C list, + * if we are removing the forwards entry. The list + * is protected by its bucket lock. */ - if (se->s_flags & SE_REMOVING) { + if (&se->s_forw_entry == sen) { + atomic_or_uint(&se->s_flags, SE_REMFORW); LIST_REMOVE(se, s_list); LIST_INSERT_HEAD(gc_list, se, s_list); } else { - atomic_or_uint(&se->s_flags, SE_REMOVING); + atomic_or_uint(&se->s_flags, SE_REMBACK); } /* Next.. */ @@ -1015,9 +1019,11 @@ npf_session_freelist(struct npf_sesslist se = LIST_FIRST(gc_list); while (se != NULL) { + bool removed = (se->s_flags & SE_REMOVED) == SE_REMOVED; + nse = LIST_NEXT(se, s_list); - if (se->s_refcnt == 0) { - /* Destroy only if no references. */ + if (removed && se->s_refcnt == 0) { + /* Destroy only if removed and no references. */ LIST_REMOVE(se, s_list); npf_session_destroy(se); }