Hi,

I think I got this now.
- replace time_seconds with time_uptime
- with that flow_finish in pflow can be simplified (pointed out by benno@)
  this should take care of flows with finish < start for localy created
  states
- change various variables from unsigned to signed (mainly) in pfsync which
  could underflow (inspired by a patch from dlg@ from last year, pointed out
  by cameild@). This fixes pflow and at the same time an underflow problem
  in pfsync which dlg's patch was addressing. If I understand the history
  correctly cameild@ noticed this problem last year.

Hrvoje Popovski tested an older version without the pfsync fix.

Since this changes the semantics of a field in pfsync both pfsync machines
need to be updated (I think it's ok to import 5.1 states into this version
but the other way around will cause problems.)

We are running with this patch since yesterday on a redundant firewall
(amd64) with pfsync and no longer see broken flows (i.e. where finish
< start). The pair survived various failovers.

Index: share/man/man4/pflow.4
===================================================================
RCS file: /opt/OpenBSD-CVS/src/share/man/man4/pflow.4,v
retrieving revision 1.11
diff -u -p -u -r1.11 pflow.4
--- share/man/man4/pflow.4      2 Feb 2012 12:34:37 -0000       1.11
+++ share/man/man4/pflow.4      22 Aug 2012 18:20:08 -0000
@@ -127,6 +127,12 @@ The
 device first appeared in
 .Ox 4.5 .
 .Sh BUGS
+A state created by
+.Xr pfsync 4
+can have a creation or expiration time before the machine came up.
+.Nm
+pretends the flow was created or expired when the machine came up.
+
 The IPFIX implementation is incomplete:
 The required transport protocol SCTP is not supported.
 Transport over TCP and DTLS protected flow export is also not supported.
Index: sys/net/if_pflow.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/if_pflow.c,v
retrieving revision 1.20
diff -u -p -u -r1.20 if_pflow.c
--- sys/net/if_pflow.c  11 Apr 2012 17:42:53 -0000      1.20
+++ sys/net/if_pflow.c  22 Aug 2012 18:20:08 -0000
@@ -553,12 +553,15 @@ copy_flow_data(struct pflow_flow *flow1,
        flow1->flow_octets = htonl(st->bytes[0]);
        flow2->flow_octets = htonl(st->bytes[1]);
 
-       flow1->flow_start = flow2->flow_start =
+       /*
+        * Pretend the flow was created or expired when this machine
+        * came up when the state's creation or expiration is in the past.
+        */
+       flow1->flow_start = flow2->flow_start = st->creation < 0 ? 0 :
            htonl(st->creation * 1000);
-       flow1->flow_finish = flow2->flow_finish =
-           htonl((time_uptime - (st->rule.ptr->timeout[st->timeout] ?
-           st->rule.ptr->timeout[st->timeout] :
-           pf_default_rule.timeout[st->timeout])) * 1000);
+       flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? 0 :
+           htonl(st->expire * 1000);
+
        flow1->tcp_flags = flow2->tcp_flags = 0;
        flow1->protocol = flow2->protocol = sk->proto;
        flow1->tos = flow2->tos = st->rule.ptr->tos;
@@ -580,12 +583,14 @@ copy_flow4_data(struct pflow_flow4 *flow
        flow1->flow_octets = htobe64(st->bytes[0]);
        flow2->flow_octets = htobe64(st->bytes[1]);
 
-       flow1->flow_start = flow2->flow_start =
+       /*
+        * Pretend the flow was created or expired when this machine
+        * came up when the state's creation or expiration is in the past.
+        */
+       flow1->flow_start = flow2->flow_start = st->creation < 0 ? 0 :
            htonl(st->creation * 1000);
-       flow1->flow_finish = flow2->flow_finish =
-           htonl((time_uptime - (st->rule.ptr->timeout[st->timeout] ?
-           st->rule.ptr->timeout[st->timeout] :
-           pf_default_rule.timeout[st->timeout])) * 1000);
+       flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? 0 :
+           htonl(st->expire * 1000);
 
        flow1->protocol = flow2->protocol = sk->proto;
        flow1->tos = flow2->tos = st->rule.ptr->tos;
@@ -608,12 +613,14 @@ copy_flow6_data(struct pflow_flow6 *flow
        flow1->flow_octets = htobe64(st->bytes[0]);
        flow2->flow_octets = htobe64(st->bytes[1]);
 
-       flow1->flow_start = flow2->flow_start =
+       /*
+        * Pretend the flow was created or expired when this machine
+        * came up when the state's creation or expiration is in the past.
+        */
+       flow1->flow_start = flow2->flow_start = st->creation < 0 ? 0 :
            htonl(st->creation * 1000);
-       flow1->flow_finish = flow2->flow_finish =
-           htonl((time_uptime - (st->rule.ptr->timeout[st->timeout] ?
-           st->rule.ptr->timeout[st->timeout] :
-           pf_default_rule.timeout[st->timeout])) * 1000);
+       flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? 0 :
+           htonl(st->expire * 1000);
 
        flow1->protocol = flow2->protocol = sk->proto;
        flow1->tos = flow2->tos = st->rule.ptr->tos;
Index: sys/net/if_pfsync.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/if_pfsync.c,v
retrieving revision 1.189
diff -u -p -u -r1.189 if_pfsync.c
--- sys/net/if_pfsync.c 26 Jul 2012 12:25:31 -0000      1.189
+++ sys/net/if_pfsync.c 22 Aug 2012 18:20:08 -0000
@@ -464,6 +464,7 @@ void
 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
 {
        pf_state_export(sp, st);
+       sp->expire = htonl(time_uptime - st->expire);
 }
 
 int
@@ -574,17 +575,7 @@ pfsync_state_import(struct pfsync_state 
        /* copy to state */
        bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
        st->creation = time_uptime - ntohl(sp->creation);
-       st->expire = time_second;
-       if (sp->expire) {
-               u_int32_t timeout;
-
-               timeout = r->timeout[sp->timeout];
-               if (!timeout)
-                       timeout = pf_default_rule.timeout[sp->timeout];
-
-               /* sp->expire may have been adaptively scaled by export. */
-               st->expire -= timeout - ntohl(sp->expire);
-       }
+       st->expire = time_uptime - ntohl(sp->expire);
 
        st->direction = sp->direction;
        st->log = sp->log;
@@ -948,7 +939,7 @@ pfsync_in_upd(caddr_t buf, int len, int 
                if (sync < 2) {
                        pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
                        pf_state_peer_ntoh(&sp->dst, &st->dst);
-                       st->expire = time_second;
+                       st->expire = time_uptime;
                        st->timeout = sp->timeout;
                }
                st->pfsync_time = time_uptime;
@@ -1022,7 +1013,7 @@ pfsync_in_upd_c(caddr_t buf, int len, in
                if (sync < 2) {
                        pfsync_alloc_scrub_memory(&up->dst, &st->dst);
                        pf_state_peer_ntoh(&up->dst, &st->dst);
-                       st->expire = time_second;
+                       st->expire = time_uptime;
                        st->timeout = up->timeout;
                }
                st->pfsync_time = time_uptime;
Index: sys/net/pf.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pf.c,v
retrieving revision 1.809
diff -u -p -u -r1.809 pf.c
--- sys/net/pf.c        26 Jul 2012 12:25:31 -0000      1.809
+++ sys/net/pf.c        22 Aug 2012 18:20:08 -0000
@@ -389,13 +389,13 @@ pf_init_threshold(struct pf_threshold *t
        threshold->limit = limit * PF_THRESHOLD_MULT;
        threshold->seconds = seconds;
        threshold->count = 0;
-       threshold->last = time_second;
+       threshold->last = time_uptime;
 }
 
 void
 pf_add_threshold(struct pf_threshold *threshold)
 {
-       u_int32_t t = time_second, diff = t - threshold->last;
+       u_int32_t t = time_uptime, diff = t - threshold->last;
 
        if (diff >= threshold->seconds)
                threshold->count = 0;
@@ -582,7 +582,7 @@ pf_insert_src_node(struct pf_src_node **
 void
 pf_remove_src_node(struct pf_src_node *sn)
 {
-       if (sn->states > 0 || sn->expire > time_second)
+       if (sn->states > 0 || sn->expire > time_uptime)
                return;
 
        if (sn->rule.ptr != NULL) {
@@ -1105,10 +1105,10 @@ pf_state_export(struct pfsync_state *sp,
        bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
        sp->creation = htonl(time_uptime - st->creation);
        sp->expire = pf_state_expires(st);
-       if (sp->expire <= time_second)
+       if (sp->expire <= time_uptime)
                sp->expire = htonl(0);
        else
-               sp->expire = htonl(sp->expire - time_second);
+               sp->expire = htonl(sp->expire - time_uptime);
 
        sp->direction = st->direction;
        sp->log = st->log;
@@ -1169,7 +1169,7 @@ pf_purge_thread(void *v)
        }
 }
 
-u_int32_t
+int32_t
 pf_state_expires(const struct pf_state *state)
 {
        u_int32_t       timeout;
@@ -1179,7 +1179,7 @@ pf_state_expires(const struct pf_state *
 
        /* handle all PFTM_* > PFTM_MAX here */
        if (state->timeout == PFTM_PURGE)
-               return (time_second);
+               return (time_uptime);
        KASSERT(state->timeout != PFTM_UNLINKED);
        KASSERT(state->timeout < PFTM_MAX);
        timeout = state->rule.ptr->timeout[state->timeout];
@@ -1199,7 +1199,7 @@ pf_state_expires(const struct pf_state *
                        return (state->expire + timeout * (end - states) /
                            (end - start));
                else
-                       return (time_second);
+                       return (time_uptime);
        }
        return (state->expire + timeout);
 }
@@ -1213,7 +1213,7 @@ pf_purge_expired_src_nodes(int waslocked
        for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
        next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
 
-               if (cur->states <= 0 && cur->expire <= time_second) {
+               if (cur->states <= 0 && cur->expire <= time_uptime) {
                        if (! locked) {
                                rw_enter_write(&pf_consistency_lock);
                                next = RB_NEXT(pf_src_tree,
@@ -1243,7 +1243,7 @@ pf_src_tree_remove_state(struct pf_state
                        if (!timeout)
                                timeout =
                                    pf_default_rule.timeout[PFTM_SRC_NODE];
-                       sni->sn->expire = time_second + timeout;
+                       sni->sn->expire = time_uptime + timeout;
                }
                pool_put(&pf_sn_item_pl, sni);
        }
@@ -1343,7 +1343,7 @@ pf_purge_expired_states(u_int32_t maxche
                                locked = 1;
                        }
                        pf_free_state(cur);
-               } else if (pf_state_expires(cur) <= time_second) {
+               } else if (pf_state_expires(cur) <= time_uptime) {
                        /* unlink and free expired state */
                        pf_unlink_state(cur);
                        if (! locked) {
@@ -3761,7 +3761,7 @@ pf_create_state(struct pf_pdesc *pd, str
        }
 
        s->creation = time_uptime;
-       s->expire = time_second;
+       s->expire = time_uptime;
 
        if (pd->proto == IPPROTO_TCP) {
                if (s->state_flags & PFSTATE_SCRUB_TCP &&
@@ -4198,7 +4198,7 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
                        src->state = dst->state = TCPS_TIME_WAIT;
 
                /* update expire time */
-               (*state)->expire = time_second;
+               (*state)->expire = time_uptime;
                if (src->state >= TCPS_FIN_WAIT_2 &&
                    dst->state >= TCPS_FIN_WAIT_2)
                        (*state)->timeout = PFTM_TCP_CLOSED;
@@ -4375,7 +4375,7 @@ pf_tcp_track_sloppy(struct pf_pdesc *pd,
                src->state = dst->state = TCPS_TIME_WAIT;
 
        /* update expire time */
-       (*state)->expire = time_second;
+       (*state)->expire = time_uptime;
        if (src->state >= TCPS_FIN_WAIT_2 &&
            dst->state >= TCPS_FIN_WAIT_2)
                (*state)->timeout = PFTM_TCP_CLOSED;
@@ -4620,7 +4620,7 @@ pf_test_state_udp(struct pf_pdesc *pd, s
                dst->state = PFUDPS_MULTIPLE;
 
        /* update expire time */
-       (*state)->expire = time_second;
+       (*state)->expire = time_uptime;
        if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
                (*state)->timeout = PFTM_UDP_MULTIPLE;
        else
@@ -4762,7 +4762,7 @@ pf_test_state_icmp(struct pf_pdesc *pd, 
                                return (ret);
                }
 
-               (*state)->expire = time_second;
+               (*state)->expire = time_uptime;
                (*state)->timeout = PFTM_ICMP_ERROR_REPLY;
 
                /* translate source/destination address, if necessary */
@@ -5570,7 +5570,7 @@ pf_test_state_other(struct pf_pdesc *pd,
                dst->state = PFOTHERS_MULTIPLE;
 
        /* update expire time */
-       (*state)->expire = time_second;
+       (*state)->expire = time_uptime;
        if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
                (*state)->timeout = PFTM_OTHER_MULTIPLE;
        else
Index: sys/net/pf_ioctl.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pf_ioctl.c,v
retrieving revision 1.253
diff -u -p -u -r1.253 pf_ioctl.c
--- sys/net/pf_ioctl.c  8 Jul 2012 07:58:09 -0000       1.253
+++ sys/net/pf_ioctl.c  22 Aug 2012 18:20:08 -0000
@@ -2333,7 +2333,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
 
                p = psn->psn_src_nodes;
                RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
-                       int     secs = time_second, diff;
+                       int     secs = time_uptime, diff;
 
                        if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
                                break;
Index: sys/net/pf_norm.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pf_norm.c,v
retrieving revision 1.154
diff -u -p -u -r1.154 pf_norm.c
--- sys/net/pf_norm.c   12 May 2012 13:08:48 -0000      1.154
+++ sys/net/pf_norm.c   22 Aug 2012 18:20:08 -0000
@@ -173,7 +173,7 @@ void
 pf_purge_expired_fragments(void)
 {
        struct pf_fragment      *frag;
-       u_int32_t                expire = time_second -
+       int32_t                  expire = time_uptime -
                                    pf_default_rule.timeout[PFTM_FRAG];
 
        while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
@@ -238,7 +238,7 @@ pf_find_fragment(struct pf_fragment_cmp 
        frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
        if (frag != NULL) {
                /* XXX Are we sure we want to update the timeout? */
-               frag->fr_timeout = time_second;
+               frag->fr_timeout = time_uptime;
                TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
                TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
        }
@@ -314,7 +314,7 @@ pf_fillup_fragment(struct pf_fragment_cm
 
                *(struct pf_fragment_cmp *)frag = *key;
                TAILQ_INIT(&frag->fr_queue);
-               frag->fr_timeout = time_second;
+               frag->fr_timeout = time_uptime;
                frag->fr_maxlen = frent->fe_len;
 
                RB_INSERT(pf_frag_tree, &pf_frag_tree, frag);
Index: sys/net/pfvar.h
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pfvar.h,v
retrieving revision 1.367
diff -u -p -u -r1.367 pfvar.h
--- sys/net/pfvar.h     26 Jul 2012 12:25:31 -0000      1.367
+++ sys/net/pfvar.h     22 Aug 2012 18:20:08 -0000
@@ -824,8 +824,8 @@ struct pf_state {
        struct pfi_kif          *rt_kif;
        u_int64_t                packets[2];
        u_int64_t                bytes[2];
-       u_int32_t                creation;
-       u_int32_t                expire;
+       int32_t                  creation;
+       int32_t                  expire;
        u_int32_t                pfsync_time;
        u_int16_t                qid;
        u_int16_t                pqid;
@@ -895,8 +895,8 @@ struct pfsync_state {
        u_int32_t        rule;
        u_int32_t        anchor;
        u_int32_t        nat_rule;
-       u_int32_t        creation;
-       u_int32_t        expire;
+       int32_t          creation;
+       int32_t          expire;
        u_int32_t        packets[2][2];
        u_int32_t        bytes[2][2];
        u_int32_t        creatorid;
@@ -1818,7 +1818,7 @@ int       pf_normalize_tcp_stateful(struct pf_
            int *);
 int    pf_normalize_mss(struct pf_pdesc *, u_int16_t);
 void   pf_scrub(struct mbuf *, u_int16_t, sa_family_t, u_int8_t, u_int8_t);
-u_int32_t
+int32_t
        pf_state_expires(const struct pf_state *);
 void   pf_purge_expired_fragments(void);
 int    pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,

-- 
I'm not entirely sure you are real.

Reply via email to