i am in an annoying situation where i peer with a campus network on an
ospf link with a 9k mtu, but some corners of that network have layer 2
hops that don't support 9k packets. i sometimes want to tunnel large
(1500 byte) packets to hosts in those corners of the network by
letting the encapsulation protocol fragment. the tunnel endpoint
will then reassemble the packet and forward the full sized frame
as if nothing untoward happened.

the problem is that pf on the ospf hop "helps" by reassembling these
fragmented tunnel packets before sending them out the 9k ospf link.
the layer 2 hops then drop the packet because it's too big. i do
want pf to reassemble the packets so it can check it, but i also
want it to refragment it again afterward.

it turns out this is something that happens for ipv6 already, because
fragmentation in v6 is only supposed to be done by the endpoints. this
diff allows this same semantic for v4 packets if requested. to enable
it, configure "set reassemble yes refragment" in your pf.conf and it
will do the same for v4 that it does for v6.

i've only tested this lightly and now i need sleep. anyone have any
thoughts on this?

note that m_tag_find is really cheap if the tag doesnt exist thanks to
henning@.

Index: sys/net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1122
diff -u -p -r1.1122 pf.c
--- sys/net/pf.c        7 Jul 2021 18:38:25 -0000       1.1122
+++ sys/net/pf.c        31 Aug 2021 12:42:51 -0000
@@ -6049,6 +6049,7 @@ void
 pf_route(struct pf_pdesc *pd, struct pf_state *s)
 {
        struct mbuf             *m0;
+       struct m_tag            *mtag;
        struct mbuf_list         fml;
        struct sockaddr_in      *dst, sin;
        struct rtentry          *rt = NULL;
@@ -6132,6 +6133,15 @@ pf_route(struct pf_pdesc *pd, struct pf_
                ip = mtod(m0, struct ip *);
        }
 
+       /*
+        * If packet has been reassembled by PF earlier, we might have to
+        * use pf_refragment4() here to turn it back to fragments.
+        */
+       if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) {
+               (void) pf_refragment4(&m0, mtag, dst, ifp, rt);
+               goto done;
+       }
+
        in_proto_cksum_out(m0, ifp);
 
        if (ntohs(ip->ip_len) <= ifp->if_mtu) {
@@ -7357,16 +7367,20 @@ done:
                break;
        }
 
-#ifdef INET6
        /* if reassembled packet passed, create new fragments */
-       if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD &&
-           pd.af == AF_INET6) {
+       if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD) {
                struct m_tag    *mtag;
 
-               if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)))
+               mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL);
+               if (mtag == NULL)
+                       ; /* no reassembly required */
+#ifdef INET6
+               else if (pd.af == AF_INET6)
                        action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL);
-       }
 #endif /* INET6 */
+               else
+                       action = pf_refragment4(&pd.m, mtag, NULL, NULL, NULL);
+       }
        if (s && action != PF_DROP) {
                if (!s->if_index_in && dir == PF_IN)
                        s->if_index_in = ifp->if_index;
Index: sys/net/pf_norm.c
===================================================================
RCS file: /cvs/src/sys/net/pf_norm.c,v
retrieving revision 1.223
diff -u -p -r1.223 pf_norm.c
--- sys/net/pf_norm.c   10 Mar 2021 10:21:48 -0000      1.223
+++ sys/net/pf_norm.c   31 Aug 2021 12:42:51 -0000
@@ -782,7 +782,7 @@ pf_reassemble(struct mbuf **m0, int dir,
        struct pf_frent         *frent;
        struct pf_fragment      *frag;
        struct pf_frnode         key;
-       u_int16_t                total, hdrlen;
+       u_int16_t                total, maxlen, hdrlen;
 
        /* Get an entry for the fragment queue */
        if ((frent = pf_create_fragment(reason)) == NULL)
@@ -821,6 +821,7 @@ pf_reassemble(struct mbuf **m0, int dir,
        /* We have all the data */
        frent = TAILQ_FIRST(&frag->fr_queue);
        KASSERT(frent != NULL);
+       maxlen = frag->fr_maxlen;
        total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
            TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
        hdrlen = frent->fe_hdrlen;
@@ -843,6 +844,63 @@ pf_reassemble(struct mbuf **m0, int dir,
 
        PF_FRAG_UNLOCK();
        DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len));
+
+       if (ISSET(pf_status.reass, PF_REASS_REFRAG)) {
+               struct m_tag *mtag;
+               struct pf_fragment_tag *ftag;
+
+               mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(*ftag),
+                   M_NOWAIT);
+               if (mtag == NULL) {
+                       REASON_SET(reason, PFRES_MEMORY);
+                       return (PF_DROP);
+               }
+
+               ftag = (struct pf_fragment_tag *)(mtag + 1);
+               ftag->ft_hdrlen = hdrlen;
+               ftag->ft_maxlen = maxlen;
+               m_tag_prepend(m, mtag);
+       }
+
+       return (PF_PASS);
+}
+
+int
+pf_refragment4(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in *dst,
+    struct ifnet *ifp, struct rtentry *rt)
+{
+       struct mbuf             *m = *m0;
+       struct mbuf_list         fml;
+       struct pf_fragment_tag  *ftag = (struct pf_fragment_tag *)(mtag + 1);
+       u_int32_t                mtu;
+       u_int16_t                maxlen, hdrlen;
+       int                      error;
+
+       hdrlen = ftag->ft_hdrlen;
+       maxlen = ftag->ft_maxlen;
+       m_tag_delete(m, mtag);
+       mtag = NULL;
+       ftag = NULL;
+
+       /* Checksum must be calculated for the whole packet */
+       in_proto_cksum_out(m, NULL);
+
+       mtu = hdrlen + maxlen;
+       error = ip_fragment(m, &fml, ifp, mtu);
+       *m0 = NULL;     /* ip_fragment() has consumed original packet. */
+       if (error) {
+               DPFPRINTF(LOG_NOTICE, "ip_fragment error %d", error);
+               return (PF_DROP);
+       }
+
+       while ((m = ml_dequeue(&fml)) != NULL) {
+               m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED;
+               if (ifp == NULL)
+                       ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL, 0);
+               else
+                       ifp->if_output(ifp, m, sintosa(dst), rt);
+       }
+
        return (PF_PASS);
 }
 
Index: sys/net/pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.502
diff -u -p -r1.502 pfvar.h
--- sys/net/pfvar.h     23 Jun 2021 06:53:52 -0000      1.502
+++ sys/net/pfvar.h     31 Aug 2021 12:42:51 -0000
@@ -1329,6 +1329,7 @@ struct pf_status {
 
 #define PF_REASS_ENABLED       0x01
 #define PF_REASS_NODF          0x02
+#define PF_REASS_REFRAG                0x04
 
 #define        PF_SYNCOOKIES_NEVER     0
 #define        PF_SYNCOOKIES_ALWAYS    1
@@ -1770,6 +1771,8 @@ int       pf_match_gid(u_int8_t, gid_t, gid_t,
 
 int    pf_refragment6(struct mbuf **, struct m_tag *mtag,
            struct sockaddr_in6 *, struct ifnet *, struct rtentry *);
+int    pf_refragment4(struct mbuf **, struct m_tag *mtag,
+           struct sockaddr_in *, struct ifnet *, struct rtentry *);
 void   pf_normalize_init(void);
 int    pf_normalize_ip(struct pf_pdesc *, u_short *);
 int    pf_normalize_ip6(struct pf_pdesc *, u_short *);
Index: share/man/man5/pf.conf.5
===================================================================
RCS file: /cvs/src/share/man/man5/pf.conf.5,v
retrieving revision 1.587
diff -u -p -r1.587 pf.conf.5
--- share/man/man5/pf.conf.5    19 Jul 2021 16:23:56 -0000      1.587
+++ share/man/man5/pf.conf.5    31 Aug 2021 12:42:51 -0000
@@ -1317,7 +1317,7 @@ Alias for
 .Pp
 The default value is
 .Cm normal .
-.It Ic set Cm reassemble yes | no Op Cm no-df
+.It Ic set Cm reassemble yes | no Oo Cm no-df Oc Oo Cm refragment Oc
 The
 .Cm reassemble
 option is used to enable or disable the reassembly of fragmented packets,
@@ -1336,6 +1336,10 @@ the reassembled packet will have the
 bit cleared.
 The default value is
 .Cm yes .
+If
+.Cm refragment
+is also specified, a reassembled packet will be refragmented before
+being forwarded.
 .It Ic set Cm ruleset-optimization Ar level
 .Bl -tag -width profile -compact
 .It Cm basic
@@ -2775,7 +2779,10 @@ option         = "set" ( [ "timeout" ( t
                  [ "skip on" ifspec ] |
                  [ "debug" ( "emerg" | "alert" | "crit" | "err" |
                  "warning" | "notice" | "info" | "debug" ) ] |
-                 [ "reassemble" ( "yes" | "no" ) [ "no-df" ] ] )
+                 [ "reassemble" ( "yes" | "no" ) [ reassembleopts ] ] )
+
+reassembleopts = reassembleopt [ reassembleopts ]
+reassembleopt  = "no-df" | "refragment"
 
 pf-rule        = action [ ( "in" | "out" ) ]
                  [ "log" [ "(" logopts ")"] ] [ "quick" ]
Index: sbin/pfctl/parse.y
===================================================================
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.709
diff -u -p -r1.709 parse.y
--- sbin/pfctl/parse.y  1 Feb 2021 00:31:04 -0000       1.709
+++ sbin/pfctl/parse.y  31 Aug 2021 12:42:51 -0000
@@ -470,7 +470,7 @@ int parseport(char *, struct range *r, i
 
 %token PASS BLOCK MATCH SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS
 %token RETURNRST RETURNICMP RETURNICMP6 PROTO INET INET6 ALL ANY ICMPTYPE
-%token ICMP6TYPE CODE KEEP MODULATE STATE PORT BINATTO NODF
+%token ICMP6TYPE CODE KEEP MODULATE STATE PORT BINATTO NODF REFRAGMENT
 %token MINTTL ERROR ALLOWOPTS FILENAME ROUTETO DUPTO REPLYTO NO LABEL
 %token NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE
 %token REASSEMBLE ANCHOR SYNCOOKIES
@@ -490,11 +490,12 @@ int       parseport(char *, struct range *r, i
 %token <v.i>                   PORTBINARY
 %type  <v.interface>           interface if_list if_item_not if_item
 %type  <v.number>              number icmptype icmp6type uid gid
-%type  <v.number>              tos not yesno optnodf
+%type  <v.number>              tos not yesno
 %type  <v.probability>         probability
 %type  <v.weight>              optweight
 %type  <v.i>                   dir af optimizer syncookie_val
 %type  <v.i>                   sourcetrack flush unaryop statelock
+%type  <v.i>                   setreass_opts setreass_opts_l setreass_opt
 %type  <v.b>                   action
 %type  <v.b>                   flags flag blockspec prio
 %type  <v.range>               portplain portstar portrange
@@ -588,11 +589,19 @@ optimizer : string        {
                }
                ;
 
-optnodf                : /* empty */   { $$ = 0; }
-               | NODF          { $$ = 1; }
+setreass_opts  : /* empty */                           { $$ = 0; }
+               | setreass_opts_l                       { $$ = $1; }
                ;
 
-option         : SET REASSEMBLE yesno optnodf          {
+setreass_opts_l        : setreass_opts_l setreass_opt          { $$ = $1 | $2; 
}
+               | setreass_opt                          { $$ = $1; }
+               ;
+
+setreass_opt   : NODF          { $$ = PF_REASS_NODF; }
+               | REFRAGMENT    { $$ = PF_REASS_REFRAG; }
+               ;
+
+option         : SET REASSEMBLE yesno setreass_opts    {
                        pfctl_set_reassembly(pf, $3, $4);
                }
                | SET OPTIMIZATION STRING               {
@@ -5014,6 +5023,7 @@ lookup(char *s)
                { "rdr-to",             RDRTO},
                { "reassemble",         REASSEMBLE},
                { "received-on",        RECEIVEDON},
+               { "refragment",         REFRAGMENT},
                { "reply-to",           REPLYTO},
                { "return",             RETURN},
                { "return-icmp",        RETURNICMP},
Index: sbin/pfctl/pfctl.c
===================================================================
RCS file: /cvs/src/sbin/pfctl/pfctl.c,v
retrieving revision 1.383
diff -u -p -r1.383 pfctl.c
--- sbin/pfctl/pfctl.c  14 Oct 2020 19:30:37 -0000      1.383
+++ sbin/pfctl/pfctl.c  31 Aug 2021 12:42:51 -0000
@@ -1907,20 +1907,20 @@ pfctl_set_synflwats(struct pfctl *pf, u_
 }
 
 int
-pfctl_set_reassembly(struct pfctl *pf, int on, int nodf)
+pfctl_set_reassembly(struct pfctl *pf, int on, u_int32_t reass)
 {
        pf->reass_set = 1;
        if (on) {
-               pf->reassemble = PF_REASS_ENABLED;
-               if (nodf)
-                       pf->reassemble |= PF_REASS_NODF;
+               pf->reassemble = PF_REASS_ENABLED | reass;
        } else {
                pf->reassemble = 0;
        }
 
-       if (pf->opts & PF_OPT_VERBOSE)
-               printf("set reassemble %s %s\n", on ? "yes" : "no",
-                   nodf ? "no-df" : "");
+       if (pf->opts & PF_OPT_VERBOSE) {
+               printf("set reassemble %s%s%s\n", on ? "yes" : "no",
+                   (reass & PF_REASS_NODF) ? " no-df" : "",
+                   (reass & PF_REASS_REFRAG) ? " refragment" : "");
+       }
 
        return (0);
 }
Index: sbin/pfctl/pfctl_parser.h
===================================================================
RCS file: /cvs/src/sbin/pfctl/pfctl_parser.h,v
retrieving revision 1.117
diff -u -p -r1.117 pfctl_parser.h
--- sbin/pfctl/pfctl_parser.h   21 Jul 2020 14:10:51 -0000      1.117
+++ sbin/pfctl/pfctl_parser.h   31 Aug 2021 12:42:51 -0000
@@ -221,7 +221,7 @@ int     add_opt_table(struct pfctl *, st
 void   pfctl_add_rule(struct pfctl *, struct pf_rule *);
 
 int    pfctl_set_timeout(struct pfctl *, const char *, int, int);
-int    pfctl_set_reassembly(struct pfctl *, int, int);
+int    pfctl_set_reassembly(struct pfctl *, int, u_int32_t);
 int    pfctl_set_syncookies(struct pfctl *, u_int8_t,
            struct pfctl_watermarks *);
 int    pfctl_set_optimization(struct pfctl *, const char *);

Reply via email to