Hi,

The diff is to fix a problem in a complex setup.

Normal setup of divert-reply for TCP connection:

  client --- relayd  --- server

- transparently forward TCP connections
- divert-reply is configured the outbound connection to the server
  - so that the PF state is removed when the PCB is deleted
  - otherwise if packets from server is comming after the PCB is
    deleted, they are accidentally forwarded directly to the client

In addtion to this, "match out nat-to" is configured for the outbound
connection instead of dropping "transparent".  The purpose of doing
this is to expand the space of ephemeral ports of NAT.  Ephemeral
ports of PCB is limitted in one 2^16 space, but ephemeral ports of PF
is limitted in 2^16 for each remote address.

In this case, if the PF state is dropped immediately after the PCB is
dropped, the port number of NAT might be reused quickly, then a
problem can happen on the server side since the port is used for the
old connection.

So the diff is to keep the state until timeout.

comment?

Index: sys/net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1138
diff -u -p -r1.1138 pf.c
--- sys/net/pf.c        30 Aug 2022 11:53:03 -0000      1.1138
+++ sys/net/pf.c        2 Sep 2022 12:54:36 -0000
@@ -1148,6 +1148,8 @@ pf_find_state(struct pf_pdesc *pd, struc
 
        if (s == NULL)
                return (PF_DROP);
+       if (ISSET(s->state_flags, PFSTATE_INP_UNLINKED))
+               return (PF_DROP);
 
        if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) {
                pf_add_threshold(&s->rule.ptr->pktrate);
@@ -1461,7 +1463,23 @@ pf_remove_divert_state(struct pf_state_k
                if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr &&
                    (si->s->rule.ptr->divert.type == PF_DIVERT_TO ||
                    si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
-                       pf_remove_state(si->s);
+                       if (si->s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
+                           si->s->key[PF_SK_WIRE] != si->s->key[PF_SK_STACK]) {
+                               /*
+                                * If the local address is translated, keep
+                                * the state for "tcp.closed" seconds to
+                                * prevent its source port from being reused.
+                                */
+                               if (si->s->src.state < TCPS_FIN_WAIT_2 ||
+                                   si->s->dst.state < TCPS_FIN_WAIT_2) {
+                                       pf_set_protostate(si->s, PF_PEER_BOTH,
+                                           TCPS_TIME_WAIT);
+                                       si->s->timeout = PFTM_TCP_CLOSED;
+                                       si->s->expire = getuptime();
+                               }
+                               si->s->state_flags |= PFSTATE_INP_UNLINKED;
+                       } else
+                               pf_remove_state(si->s);
                        break;
                }
        }
Index: sys/net/pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.509
diff -u -p -r1.509 pfvar.h
--- sys/net/pfvar.h     20 Jul 2022 09:33:11 -0000      1.509
+++ sys/net/pfvar.h     2 Sep 2022 12:54:37 -0000
@@ -784,6 +784,7 @@ struct pf_state {
 #define        PFSTATE_RANDOMID        0x0080
 #define        PFSTATE_SCRUB_TCP       0x0100
 #define        PFSTATE_SETPRIO         0x0200
+#define        PFSTATE_INP_UNLINKED    0x0400
 #define        PFSTATE_SCRUBMASK 
(PFSTATE_NODF|PFSTATE_RANDOMID|PFSTATE_SCRUB_TCP)
 #define        PFSTATE_SETMASK   (PFSTATE_SETTOS|PFSTATE_SETPRIO)
        u_int8_t                 log;




Reply via email to