Hi,
I think I got this now.
- replace time_seconds with time_uptime
- with that flow_finish in pflow can be simplified (pointed out by benno@)
this should take care of flows with finish < start for localy created
states
- change various variables from unsigned to signed (mainly) in pfsync which
could underflow (inspired by a patch from dlg@ from last year, pointed out
by cameild@). This fixes pflow and at the same time an underflow problem
in pfsync which dlg's patch was addressing. If I understand the history
correctly cameild@ noticed this problem last year.
Hrvoje Popovski tested an older version without the pfsync fix.
Since this changes the semantics of a field in pfsync both pfsync machines
need to be updated (I think it's ok to import 5.1 states into this version
but the other way around will cause problems.)
We are running with this patch since yesterday on a redundant firewall
(amd64) with pfsync and no longer see broken flows (i.e. where finish
< start). The pair survived various failovers.
Index: share/man/man4/pflow.4
===================================================================
RCS file: /opt/OpenBSD-CVS/src/share/man/man4/pflow.4,v
retrieving revision 1.11
diff -u -p -u -r1.11 pflow.4
--- share/man/man4/pflow.4 2 Feb 2012 12:34:37 -0000 1.11
+++ share/man/man4/pflow.4 22 Aug 2012 18:20:08 -0000
@@ -127,6 +127,12 @@ The
device first appeared in
.Ox 4.5 .
.Sh BUGS
+A state created by
+.Xr pfsync 4
+can have a creation or expiration time before the machine came up.
+.Nm
+pretends the flow was created or expired when the machine came up.
+
The IPFIX implementation is incomplete:
The required transport protocol SCTP is not supported.
Transport over TCP and DTLS protected flow export is also not supported.
Index: sys/net/if_pflow.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/if_pflow.c,v
retrieving revision 1.20
diff -u -p -u -r1.20 if_pflow.c
--- sys/net/if_pflow.c 11 Apr 2012 17:42:53 -0000 1.20
+++ sys/net/if_pflow.c 22 Aug 2012 18:20:08 -0000
@@ -553,12 +553,15 @@ copy_flow_data(struct pflow_flow *flow1,
flow1->flow_octets = htonl(st->bytes[0]);
flow2->flow_octets = htonl(st->bytes[1]);
- flow1->flow_start = flow2->flow_start =
+ /*
+ * Pretend the flow was created or expired when this machine
+ * came up when the state's creation or expiration is in the past.
+ */
+ flow1->flow_start = flow2->flow_start = st->creation < 0 ? 0 :
htonl(st->creation * 1000);
- flow1->flow_finish = flow2->flow_finish =
- htonl((time_uptime - (st->rule.ptr->timeout[st->timeout] ?
- st->rule.ptr->timeout[st->timeout] :
- pf_default_rule.timeout[st->timeout])) * 1000);
+ flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? 0 :
+ htonl(st->expire * 1000);
+
flow1->tcp_flags = flow2->tcp_flags = 0;
flow1->protocol = flow2->protocol = sk->proto;
flow1->tos = flow2->tos = st->rule.ptr->tos;
@@ -580,12 +583,14 @@ copy_flow4_data(struct pflow_flow4 *flow
flow1->flow_octets = htobe64(st->bytes[0]);
flow2->flow_octets = htobe64(st->bytes[1]);
- flow1->flow_start = flow2->flow_start =
+ /*
+ * Pretend the flow was created or expired when this machine
+ * came up when the state's creation or expiration is in the past.
+ */
+ flow1->flow_start = flow2->flow_start = st->creation < 0 ? 0 :
htonl(st->creation * 1000);
- flow1->flow_finish = flow2->flow_finish =
- htonl((time_uptime - (st->rule.ptr->timeout[st->timeout] ?
- st->rule.ptr->timeout[st->timeout] :
- pf_default_rule.timeout[st->timeout])) * 1000);
+ flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? 0 :
+ htonl(st->expire * 1000);
flow1->protocol = flow2->protocol = sk->proto;
flow1->tos = flow2->tos = st->rule.ptr->tos;
@@ -608,12 +613,14 @@ copy_flow6_data(struct pflow_flow6 *flow
flow1->flow_octets = htobe64(st->bytes[0]);
flow2->flow_octets = htobe64(st->bytes[1]);
- flow1->flow_start = flow2->flow_start =
+ /*
+ * Pretend the flow was created or expired when this machine
+ * came up when the state's creation or expiration is in the past.
+ */
+ flow1->flow_start = flow2->flow_start = st->creation < 0 ? 0 :
htonl(st->creation * 1000);
- flow1->flow_finish = flow2->flow_finish =
- htonl((time_uptime - (st->rule.ptr->timeout[st->timeout] ?
- st->rule.ptr->timeout[st->timeout] :
- pf_default_rule.timeout[st->timeout])) * 1000);
+ flow1->flow_finish = flow2->flow_finish = st->expire < 0 ? 0 :
+ htonl(st->expire * 1000);
flow1->protocol = flow2->protocol = sk->proto;
flow1->tos = flow2->tos = st->rule.ptr->tos;
Index: sys/net/if_pfsync.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/if_pfsync.c,v
retrieving revision 1.189
diff -u -p -u -r1.189 if_pfsync.c
--- sys/net/if_pfsync.c 26 Jul 2012 12:25:31 -0000 1.189
+++ sys/net/if_pfsync.c 22 Aug 2012 18:20:08 -0000
@@ -464,6 +464,7 @@ void
pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
{
pf_state_export(sp, st);
+ sp->expire = htonl(time_uptime - st->expire);
}
int
@@ -574,17 +575,7 @@ pfsync_state_import(struct pfsync_state
/* copy to state */
bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
st->creation = time_uptime - ntohl(sp->creation);
- st->expire = time_second;
- if (sp->expire) {
- u_int32_t timeout;
-
- timeout = r->timeout[sp->timeout];
- if (!timeout)
- timeout = pf_default_rule.timeout[sp->timeout];
-
- /* sp->expire may have been adaptively scaled by export. */
- st->expire -= timeout - ntohl(sp->expire);
- }
+ st->expire = time_uptime - ntohl(sp->expire);
st->direction = sp->direction;
st->log = sp->log;
@@ -948,7 +939,7 @@ pfsync_in_upd(caddr_t buf, int len, int
if (sync < 2) {
pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
pf_state_peer_ntoh(&sp->dst, &st->dst);
- st->expire = time_second;
+ st->expire = time_uptime;
st->timeout = sp->timeout;
}
st->pfsync_time = time_uptime;
@@ -1022,7 +1013,7 @@ pfsync_in_upd_c(caddr_t buf, int len, in
if (sync < 2) {
pfsync_alloc_scrub_memory(&up->dst, &st->dst);
pf_state_peer_ntoh(&up->dst, &st->dst);
- st->expire = time_second;
+ st->expire = time_uptime;
st->timeout = up->timeout;
}
st->pfsync_time = time_uptime;
Index: sys/net/pf.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pf.c,v
retrieving revision 1.809
diff -u -p -u -r1.809 pf.c
--- sys/net/pf.c 26 Jul 2012 12:25:31 -0000 1.809
+++ sys/net/pf.c 22 Aug 2012 18:20:08 -0000
@@ -389,13 +389,13 @@ pf_init_threshold(struct pf_threshold *t
threshold->limit = limit * PF_THRESHOLD_MULT;
threshold->seconds = seconds;
threshold->count = 0;
- threshold->last = time_second;
+ threshold->last = time_uptime;
}
void
pf_add_threshold(struct pf_threshold *threshold)
{
- u_int32_t t = time_second, diff = t - threshold->last;
+ u_int32_t t = time_uptime, diff = t - threshold->last;
if (diff >= threshold->seconds)
threshold->count = 0;
@@ -582,7 +582,7 @@ pf_insert_src_node(struct pf_src_node **
void
pf_remove_src_node(struct pf_src_node *sn)
{
- if (sn->states > 0 || sn->expire > time_second)
+ if (sn->states > 0 || sn->expire > time_uptime)
return;
if (sn->rule.ptr != NULL) {
@@ -1105,10 +1105,10 @@ pf_state_export(struct pfsync_state *sp,
bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
sp->creation = htonl(time_uptime - st->creation);
sp->expire = pf_state_expires(st);
- if (sp->expire <= time_second)
+ if (sp->expire <= time_uptime)
sp->expire = htonl(0);
else
- sp->expire = htonl(sp->expire - time_second);
+ sp->expire = htonl(sp->expire - time_uptime);
sp->direction = st->direction;
sp->log = st->log;
@@ -1169,7 +1169,7 @@ pf_purge_thread(void *v)
}
}
-u_int32_t
+int32_t
pf_state_expires(const struct pf_state *state)
{
u_int32_t timeout;
@@ -1179,7 +1179,7 @@ pf_state_expires(const struct pf_state *
/* handle all PFTM_* > PFTM_MAX here */
if (state->timeout == PFTM_PURGE)
- return (time_second);
+ return (time_uptime);
KASSERT(state->timeout != PFTM_UNLINKED);
KASSERT(state->timeout < PFTM_MAX);
timeout = state->rule.ptr->timeout[state->timeout];
@@ -1199,7 +1199,7 @@ pf_state_expires(const struct pf_state *
return (state->expire + timeout * (end - states) /
(end - start));
else
- return (time_second);
+ return (time_uptime);
}
return (state->expire + timeout);
}
@@ -1213,7 +1213,7 @@ pf_purge_expired_src_nodes(int waslocked
for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
- if (cur->states <= 0 && cur->expire <= time_second) {
+ if (cur->states <= 0 && cur->expire <= time_uptime) {
if (! locked) {
rw_enter_write(&pf_consistency_lock);
next = RB_NEXT(pf_src_tree,
@@ -1243,7 +1243,7 @@ pf_src_tree_remove_state(struct pf_state
if (!timeout)
timeout =
pf_default_rule.timeout[PFTM_SRC_NODE];
- sni->sn->expire = time_second + timeout;
+ sni->sn->expire = time_uptime + timeout;
}
pool_put(&pf_sn_item_pl, sni);
}
@@ -1343,7 +1343,7 @@ pf_purge_expired_states(u_int32_t maxche
locked = 1;
}
pf_free_state(cur);
- } else if (pf_state_expires(cur) <= time_second) {
+ } else if (pf_state_expires(cur) <= time_uptime) {
/* unlink and free expired state */
pf_unlink_state(cur);
if (! locked) {
@@ -3761,7 +3761,7 @@ pf_create_state(struct pf_pdesc *pd, str
}
s->creation = time_uptime;
- s->expire = time_second;
+ s->expire = time_uptime;
if (pd->proto == IPPROTO_TCP) {
if (s->state_flags & PFSTATE_SCRUB_TCP &&
@@ -4198,7 +4198,7 @@ pf_tcp_track_full(struct pf_pdesc *pd, s
src->state = dst->state = TCPS_TIME_WAIT;
/* update expire time */
- (*state)->expire = time_second;
+ (*state)->expire = time_uptime;
if (src->state >= TCPS_FIN_WAIT_2 &&
dst->state >= TCPS_FIN_WAIT_2)
(*state)->timeout = PFTM_TCP_CLOSED;
@@ -4375,7 +4375,7 @@ pf_tcp_track_sloppy(struct pf_pdesc *pd,
src->state = dst->state = TCPS_TIME_WAIT;
/* update expire time */
- (*state)->expire = time_second;
+ (*state)->expire = time_uptime;
if (src->state >= TCPS_FIN_WAIT_2 &&
dst->state >= TCPS_FIN_WAIT_2)
(*state)->timeout = PFTM_TCP_CLOSED;
@@ -4620,7 +4620,7 @@ pf_test_state_udp(struct pf_pdesc *pd, s
dst->state = PFUDPS_MULTIPLE;
/* update expire time */
- (*state)->expire = time_second;
+ (*state)->expire = time_uptime;
if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
(*state)->timeout = PFTM_UDP_MULTIPLE;
else
@@ -4762,7 +4762,7 @@ pf_test_state_icmp(struct pf_pdesc *pd,
return (ret);
}
- (*state)->expire = time_second;
+ (*state)->expire = time_uptime;
(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
/* translate source/destination address, if necessary */
@@ -5570,7 +5570,7 @@ pf_test_state_other(struct pf_pdesc *pd,
dst->state = PFOTHERS_MULTIPLE;
/* update expire time */
- (*state)->expire = time_second;
+ (*state)->expire = time_uptime;
if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
(*state)->timeout = PFTM_OTHER_MULTIPLE;
else
Index: sys/net/pf_ioctl.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pf_ioctl.c,v
retrieving revision 1.253
diff -u -p -u -r1.253 pf_ioctl.c
--- sys/net/pf_ioctl.c 8 Jul 2012 07:58:09 -0000 1.253
+++ sys/net/pf_ioctl.c 22 Aug 2012 18:20:08 -0000
@@ -2333,7 +2333,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
p = psn->psn_src_nodes;
RB_FOREACH(n, pf_src_tree, &tree_src_tracking) {
- int secs = time_second, diff;
+ int secs = time_uptime, diff;
if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
break;
Index: sys/net/pf_norm.c
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pf_norm.c,v
retrieving revision 1.154
diff -u -p -u -r1.154 pf_norm.c
--- sys/net/pf_norm.c 12 May 2012 13:08:48 -0000 1.154
+++ sys/net/pf_norm.c 22 Aug 2012 18:20:08 -0000
@@ -173,7 +173,7 @@ void
pf_purge_expired_fragments(void)
{
struct pf_fragment *frag;
- u_int32_t expire = time_second -
+ int32_t expire = time_uptime -
pf_default_rule.timeout[PFTM_FRAG];
while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
@@ -238,7 +238,7 @@ pf_find_fragment(struct pf_fragment_cmp
frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
if (frag != NULL) {
/* XXX Are we sure we want to update the timeout? */
- frag->fr_timeout = time_second;
+ frag->fr_timeout = time_uptime;
TAILQ_REMOVE(&pf_fragqueue, frag, frag_next);
TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next);
}
@@ -314,7 +314,7 @@ pf_fillup_fragment(struct pf_fragment_cm
*(struct pf_fragment_cmp *)frag = *key;
TAILQ_INIT(&frag->fr_queue);
- frag->fr_timeout = time_second;
+ frag->fr_timeout = time_uptime;
frag->fr_maxlen = frent->fe_len;
RB_INSERT(pf_frag_tree, &pf_frag_tree, frag);
Index: sys/net/pfvar.h
===================================================================
RCS file: /opt/OpenBSD-CVS/src/sys/net/pfvar.h,v
retrieving revision 1.367
diff -u -p -u -r1.367 pfvar.h
--- sys/net/pfvar.h 26 Jul 2012 12:25:31 -0000 1.367
+++ sys/net/pfvar.h 22 Aug 2012 18:20:08 -0000
@@ -824,8 +824,8 @@ struct pf_state {
struct pfi_kif *rt_kif;
u_int64_t packets[2];
u_int64_t bytes[2];
- u_int32_t creation;
- u_int32_t expire;
+ int32_t creation;
+ int32_t expire;
u_int32_t pfsync_time;
u_int16_t qid;
u_int16_t pqid;
@@ -895,8 +895,8 @@ struct pfsync_state {
u_int32_t rule;
u_int32_t anchor;
u_int32_t nat_rule;
- u_int32_t creation;
- u_int32_t expire;
+ int32_t creation;
+ int32_t expire;
u_int32_t packets[2][2];
u_int32_t bytes[2][2];
u_int32_t creatorid;
@@ -1818,7 +1818,7 @@ int pf_normalize_tcp_stateful(struct pf_
int *);
int pf_normalize_mss(struct pf_pdesc *, u_int16_t);
void pf_scrub(struct mbuf *, u_int16_t, sa_family_t, u_int8_t, u_int8_t);
-u_int32_t
+int32_t
pf_state_expires(const struct pf_state *);
void pf_purge_expired_fragments(void);
int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *,
--
I'm not entirely sure you are real.