Author: glebius
Date: Fri Feb 14 10:05:21 2014
New Revision: 261882
URL: http://svnweb.freebsd.org/changeset/base/261882

Log:
  Once pf became not covered by a single mutex, many counters in it became
  race prone. Some just gather statistics, but some are later used in
  different calculations.
  
  A real problem was the race provoked underflow of the states_cur counter
  on a rule. Once it goes below zero, it wraps to UINT32_MAX. Later this
  value is used in pf_state_expires() and any state created by this rule
  is immediately expired.
  
  Thus, make fields states_cur, states_tot and src_nodes of struct
  pf_rule be counter(9)s.
  
  Thanks to Dennis for providing me shell access to problematic box and
  his help with reproducing, debugging and investigating the problem.
  
  Thanks to:            Dennis Yusupoff <dyr smartspb.net>
  Also reported by:     dumbbell, pgj, Rambler
  Sponsored by:         Nginx, Inc.

Modified:
  head/sbin/pfctl/pfctl.c
  head/sys/net/pfvar.h
  head/sys/netpfil/pf/if_pfsync.c
  head/sys/netpfil/pf/pf.c
  head/sys/netpfil/pf/pf_ioctl.c

Modified: head/sbin/pfctl/pfctl.c
==============================================================================
--- head/sbin/pfctl/pfctl.c     Fri Feb 14 08:42:39 2014        (r261881)
+++ head/sbin/pfctl/pfctl.c     Fri Feb 14 10:05:21 2014        (r261882)
@@ -791,17 +791,17 @@ pfctl_print_rule_counters(struct pf_rule
        }
        if (opts & PF_OPT_VERBOSE) {
                printf("  [ Evaluations: %-8llu  Packets: %-8llu  "
-                           "Bytes: %-10llu  States: %-6u]\n",
+                           "Bytes: %-10llu  States: %-6lu]\n",
                            (unsigned long long)rule->evaluations,
                            (unsigned long long)(rule->packets[0] +
                            rule->packets[1]),
                            (unsigned long long)(rule->bytes[0] +
-                           rule->bytes[1]), rule->states_cur);
+                           rule->bytes[1]), (uint64_t)rule->states_cur);
                if (!(opts & PF_OPT_DEBUG))
                        printf("  [ Inserted: uid %u pid %u "
-                           "State Creations: %-6u]\n",
+                           "State Creations: %-6lu]\n",
                            (unsigned)rule->cuid, (unsigned)rule->cpid,
-                           rule->states_tot);
+                           (uint64_t)rule->states_tot);
        }
 }
 

Modified: head/sys/net/pfvar.h
==============================================================================
--- head/sys/net/pfvar.h        Fri Feb 14 08:42:39 2014        (r261881)
+++ head/sys/net/pfvar.h        Fri Feb 14 10:05:21 2014        (r261882)
@@ -35,6 +35,7 @@
 
 #include <sys/param.h>
 #include <sys/queue.h>
+#include <sys/counter.h>
 #include <sys/refcount.h>
 #include <sys/tree.h>
 
@@ -512,13 +513,9 @@ struct pf_rule {
 
        int                      rtableid;
        u_int32_t                timeout[PFTM_MAX];
-       u_int32_t                states_cur;
-       u_int32_t                states_tot;
        u_int32_t                max_states;
-       u_int32_t                src_nodes;
        u_int32_t                max_src_nodes;
        u_int32_t                max_src_states;
-       u_int32_t                spare1;                        /* netgraph */
        u_int32_t                max_src_conn;
        struct {
                u_int32_t               limit;
@@ -532,6 +529,10 @@ struct pf_rule {
        uid_t                    cuid;
        pid_t                    cpid;
 
+       counter_u64_t            states_cur;
+       counter_u64_t            states_tot;
+       counter_u64_t            src_nodes;
+
        u_int16_t                return_icmp;
        u_int16_t                return_icmp6;
        u_int16_t                max_mss;

Modified: head/sys/netpfil/pf/if_pfsync.c
==============================================================================
--- head/sys/netpfil/pf/if_pfsync.c     Fri Feb 14 08:42:39 2014        
(r261881)
+++ head/sys/netpfil/pf/if_pfsync.c     Fri Feb 14 10:05:21 2014        
(r261882)
@@ -438,7 +438,8 @@ pfsync_state_import(struct pfsync_state 
        else
                r = &V_pf_default_rule;
 
-       if ((r->max_states && r->states_cur >= r->max_states))
+       if ((r->max_states &&
+           counter_u64_fetch(r->states_cur) >= r->max_states))
                goto cleanup;
 
        /*
@@ -516,18 +517,15 @@ pfsync_state_import(struct pfsync_state 
        st->pfsync_time = time_uptime;
        st->sync_state = PFSYNC_S_NONE;
 
-       /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
-       r->states_cur++;
-       r->states_tot++;
-
        if (!(flags & PFSYNC_SI_IOCTL))
                st->state_flags |= PFSTATE_NOSYNC;
 
-       if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
-               /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
-               r->states_cur--;
+       if ((error = pf_state_insert(kif, skw, sks, st)) != 0)
                goto cleanup_state;
-       }
+
+       /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
+       counter_u64_add(r->states_cur, 1);
+       counter_u64_add(r->states_tot, 1);
 
        if (!(flags & PFSYNC_SI_IOCTL)) {
                st->state_flags &= ~PFSTATE_NOSYNC;

Modified: head/sys/netpfil/pf/pf.c
==============================================================================
--- head/sys/netpfil/pf/pf.c    Fri Feb 14 08:42:39 2014        (r261881)
+++ head/sys/netpfil/pf/pf.c    Fri Feb 14 10:05:21 2014        (r261882)
@@ -335,27 +335,27 @@ enum { PF_ICMP_MULTI_NONE, PF_ICMP_MULTI
 #define        BOUND_IFACE(r, k) \
        ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
 
-#define        STATE_INC_COUNTERS(s)                           \
-       do {                                            \
-               s->rule.ptr->states_cur++;              \
-               s->rule.ptr->states_tot++;              \
-               if (s->anchor.ptr != NULL) {            \
-                       s->anchor.ptr->states_cur++;    \
-                       s->anchor.ptr->states_tot++;    \
-               }                                       \
-               if (s->nat_rule.ptr != NULL) {          \
-                       s->nat_rule.ptr->states_cur++;  \
-                       s->nat_rule.ptr->states_tot++;  \
-               }                                       \
+#define        STATE_INC_COUNTERS(s)                                           
\
+       do {                                                            \
+               counter_u64_add(s->rule.ptr->states_cur, 1);            \
+               counter_u64_add(s->rule.ptr->states_tot, 1);            \
+               if (s->anchor.ptr != NULL) {                            \
+                       counter_u64_add(s->anchor.ptr->states_cur, 1);  \
+                       counter_u64_add(s->anchor.ptr->states_tot, 1);  \
+               }                                                       \
+               if (s->nat_rule.ptr != NULL) {                          \
+                       counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
+                       counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
+               }                                                       \
        } while (0)
 
-#define        STATE_DEC_COUNTERS(s)                           \
-       do {                                            \
-               if (s->nat_rule.ptr != NULL)            \
-                       s->nat_rule.ptr->states_cur--;  \
-               if (s->anchor.ptr != NULL)              \
-                       s->anchor.ptr->states_cur--;    \
-               s->rule.ptr->states_cur--;              \
+#define        STATE_DEC_COUNTERS(s)                                           
\
+       do {                                                            \
+               if (s->nat_rule.ptr != NULL)                            \
+                       counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
+               if (s->anchor.ptr != NULL)                              \
+                       counter_u64_add(s->anchor.ptr->states_cur, -1); \
+               counter_u64_add(s->rule.ptr->states_cur, -1);           \
        } while (0)
 
 static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
@@ -647,7 +647,7 @@ pf_insert_src_node(struct pf_src_node **
                PF_HASHROW_ASSERT(sh);
 
                if (!rule->max_src_nodes ||
-                   rule->src_nodes < rule->max_src_nodes)
+                   counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
                        (*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
                else
                        V_pf_status.lcounters[LCNT_SRCNODES]++;
@@ -667,7 +667,7 @@ pf_insert_src_node(struct pf_src_node **
                (*sn)->creation = time_uptime;
                (*sn)->ruletype = rule->action;
                if ((*sn)->rule.ptr != NULL)
-                       (*sn)->rule.ptr->src_nodes++;
+                       counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
                PF_HASHROW_UNLOCK(sh);
                V_pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
                V_pf_status.src_nodes++;
@@ -692,7 +692,7 @@ pf_unlink_src_node_locked(struct pf_src_
 #endif
        LIST_REMOVE(src, entry);
        if (src->rule.ptr)
-               src->rule.ptr->src_nodes--;
+               counter_u64_add(src->rule.ptr->src_nodes, -1);
        V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
        V_pf_status.src_nodes--;
 }
@@ -1478,7 +1478,7 @@ pf_state_expires(const struct pf_state *
        start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
        if (start) {
                end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
-               states = state->rule.ptr->states_cur;   /* XXXGL */
+               states = counter_u64_fetch(state->rule.ptr->states_cur);
        } else {
                start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
                end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
@@ -1587,11 +1587,7 @@ pf_unlink_state(struct pf_state *s, u_in
        if (pfsync_delete_state_ptr != NULL)
                pfsync_delete_state_ptr(s);
 
-       --s->rule.ptr->states_cur;
-       if (s->nat_rule.ptr != NULL)
-               --s->nat_rule.ptr->states_cur;
-       if (s->anchor.ptr != NULL)
-               --s->anchor.ptr->states_cur;
+       STATE_DEC_COUNTERS(s);
 
        s->timeout = PFTM_UNLINKED;
 
@@ -3606,7 +3602,8 @@ pf_create_state(struct pf_rule *r, struc
        u_short                  reason;
 
        /* check maximums */
-       if (r->max_states && (r->states_cur >= r->max_states)) {
+       if (r->max_states &&
+           (counter_u64_fetch(r->states_cur) >= r->max_states)) {
                V_pf_status.lcounters[LCNT_STATES]++;
                REASON_SET(&reason, PFRES_MAXSTATES);
                return (PF_DROP);

Modified: head/sys/netpfil/pf/pf_ioctl.c
==============================================================================
--- head/sys/netpfil/pf/pf_ioctl.c      Fri Feb 14 08:42:39 2014        
(r261881)
+++ head/sys/netpfil/pf/pf_ioctl.c      Fri Feb 14 10:05:21 2014        
(r261882)
@@ -229,6 +229,10 @@ pfattach(void)
        V_pf_default_rule.nr = -1;
        V_pf_default_rule.rtableid = -1;
 
+       V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
+       V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
+       V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
+
        /* initialize default timeouts */
        my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
        my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
@@ -398,6 +402,9 @@ pf_free_rule(struct pf_rule *rule)
                pfi_kif_unref(rule->kif);
        pf_anchor_remove(rule);
        pf_empty_pool(&rule->rpool.list);
+       counter_u64_free(rule->states_cur);
+       counter_u64_free(rule->states_tot);
+       counter_u64_free(rule->src_nodes);
        free(rule, M_PFRULE);
 }
 
@@ -1149,6 +1156,9 @@ pfioctl(struct cdev *dev, u_long cmd, ca
                bcopy(&pr->rule, rule, sizeof(struct pf_rule));
                if (rule->ifname[0])
                        kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+               rule->states_cur = counter_u64_alloc(M_WAITOK);
+               rule->states_tot = counter_u64_alloc(M_WAITOK);
+               rule->src_nodes = counter_u64_alloc(M_WAITOK);
                rule->cuid = td->td_ucred->cr_ruid;
                rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
                TAILQ_INIT(&rule->rpool.list);
@@ -1265,6 +1275,9 @@ pfioctl(struct cdev *dev, u_long cmd, ca
 #undef ERROUT
 DIOCADDRULE_error:
                PF_RULES_WUNLOCK();
+               counter_u64_free(rule->states_cur);
+               counter_u64_free(rule->states_tot);
+               counter_u64_free(rule->src_nodes);
                free(rule, M_PFRULE);
                if (kif)
                        free(kif, PFI_MTYPE);
@@ -1336,6 +1349,16 @@ DIOCADDRULE_error:
                        break;
                }
                bcopy(rule, &pr->rule, sizeof(struct pf_rule));
+               /*
+                * XXXGL: this is what happens when internal kernel
+                * structures are used as ioctl API structures.
+                */
+               pr->rule.states_cur =
+                   (counter_u64_t )counter_u64_fetch(rule->states_cur);
+               pr->rule.states_tot =
+                   (counter_u64_t )counter_u64_fetch(rule->states_tot);
+               pr->rule.src_nodes =
+                   (counter_u64_t )counter_u64_fetch(rule->src_nodes);
                if (pf_anchor_copyout(ruleset, rule, pr)) {
                        PF_RULES_WUNLOCK();
                        error = EBUSY;
@@ -1354,7 +1377,7 @@ DIOCADDRULE_error:
                        rule->evaluations = 0;
                        rule->packets[0] = rule->packets[1] = 0;
                        rule->bytes[0] = rule->bytes[1] = 0;
-                       rule->states_tot = 0;
+                       counter_u64_zero(rule->states_tot);
                }
                PF_RULES_WUNLOCK();
                break;
@@ -1394,15 +1417,14 @@ DIOCADDRULE_error:
 #endif /* INET6 */
                        newrule = malloc(sizeof(*newrule), M_PFRULE, M_WAITOK);
                        bcopy(&pcr->rule, newrule, sizeof(struct pf_rule));
+                       if (newrule->ifname[0])
+                               kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
+                       newrule->states_cur = counter_u64_alloc(M_WAITOK);
+                       newrule->states_tot = counter_u64_alloc(M_WAITOK);
+                       newrule->src_nodes = counter_u64_alloc(M_WAITOK);
                        newrule->cuid = td->td_ucred->cr_ruid;
                        newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
                        TAILQ_INIT(&newrule->rpool.list);
-                       /* Initialize refcounting. */
-                       newrule->states_cur = 0;
-                       newrule->entries.tqe_prev = NULL;
-
-                       if (newrule->ifname[0])
-                               kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK);
                }
 
 #define        ERROUT(x)       { error = (x); goto DIOCCHANGERULE_error; }
@@ -1570,8 +1592,12 @@ DIOCADDRULE_error:
 #undef ERROUT
 DIOCCHANGERULE_error:
                PF_RULES_WUNLOCK();
-               if (newrule != NULL)
+               if (newrule != NULL) {
+                       counter_u64_free(newrule->states_cur);
+                       counter_u64_free(newrule->states_tot);
+                       counter_u64_free(newrule->src_nodes);
                        free(newrule, M_PFRULE);
+               }
                if (kif != NULL)
                        free(kif, PFI_MTYPE);
                break;
@@ -3427,6 +3453,11 @@ shutdown_pf(void)
        char nn = '\0';
 
        V_pf_status.running = 0;
+
+       counter_u64_free(V_pf_default_rule.states_cur);
+       counter_u64_free(V_pf_default_rule.states_tot);
+       counter_u64_free(V_pf_default_rule.src_nodes);
+
        do {
                if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
                    != 0) {
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to