i am in an annoying situation where i peer with a campus network on an ospf link with a 9k mtu, but some corners of that network have layer 2 hops that don't support 9k packets. i sometimes want to tunnel large (1500 byte) packets to hosts in those corners of the network by letting the encapsulation protocol fragment. the tunnel endpoint will then reassemble the packet and forward the full sized frame as if nothing untoward happened.
the problem is that pf on the ospf hop "helps" by reassembling these fragmented tunnel packets before sending them out the 9k ospf link. the layer 2 hops then drop the packet because it's too big. i do want pf to reassemble the packets so it can check it, but i also want it to refragment it again afterward. it turns out this is something that happens for ipv6 already, because fragmentation in v6 is only supposed to be done by the endpoints. this diff allows this same semantic for v4 packets if requested. to enable it, configure "set reassemble yes refragment" in your pf.conf and it will do the same for v4 that it does for v6. i've only tested this lightly and now i need sleep. anyone have any thoughts on this? note that m_tag_find is really cheap if the tag doesnt exist thanks to henning@. Index: sys/net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1122 diff -u -p -r1.1122 pf.c --- sys/net/pf.c 7 Jul 2021 18:38:25 -0000 1.1122 +++ sys/net/pf.c 31 Aug 2021 12:42:51 -0000 @@ -6049,6 +6049,7 @@ void pf_route(struct pf_pdesc *pd, struct pf_state *s) { struct mbuf *m0; + struct m_tag *mtag; struct mbuf_list fml; struct sockaddr_in *dst, sin; struct rtentry *rt = NULL; @@ -6132,6 +6133,15 @@ pf_route(struct pf_pdesc *pd, struct pf_ ip = mtod(m0, struct ip *); } + /* + * If packet has been reassembled by PF earlier, we might have to + * use pf_refragment4() here to turn it back to fragments. + */ + if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { + (void) pf_refragment4(&m0, mtag, dst, ifp, rt); + goto done; + } + in_proto_cksum_out(m0, ifp); if (ntohs(ip->ip_len) <= ifp->if_mtu) { @@ -7357,16 +7367,20 @@ done: break; } -#ifdef INET6 /* if reassembled packet passed, create new fragments */ - if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && - pd.af == AF_INET6) { + if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD) { struct m_tag *mtag; - if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) + mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL); + if (mtag == NULL) + ; /* no reassembly required */ +#ifdef INET6 + else if (pd.af == AF_INET6) action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); - } #endif /* INET6 */ + else + action = pf_refragment4(&pd.m, mtag, NULL, NULL, NULL); + } if (s && action != PF_DROP) { if (!s->if_index_in && dir == PF_IN) s->if_index_in = ifp->if_index; Index: sys/net/pf_norm.c =================================================================== RCS file: /cvs/src/sys/net/pf_norm.c,v retrieving revision 1.223 diff -u -p -r1.223 pf_norm.c --- sys/net/pf_norm.c 10 Mar 2021 10:21:48 -0000 1.223 +++ sys/net/pf_norm.c 31 Aug 2021 12:42:51 -0000 @@ -782,7 +782,7 @@ pf_reassemble(struct mbuf **m0, int dir, struct pf_frent *frent; struct pf_fragment *frag; struct pf_frnode key; - u_int16_t total, hdrlen; + u_int16_t total, maxlen, hdrlen; /* Get an entry for the fragment queue */ if ((frent = pf_create_fragment(reason)) == NULL) @@ -821,6 +821,7 @@ pf_reassemble(struct mbuf **m0, int dir, /* We have all the data */ frent = TAILQ_FIRST(&frag->fr_queue); KASSERT(frent != NULL); + maxlen = frag->fr_maxlen; total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; hdrlen = frent->fe_hdrlen; @@ -843,6 +844,63 @@ pf_reassemble(struct mbuf **m0, int dir, PF_FRAG_UNLOCK(); DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len)); + + if (ISSET(pf_status.reass, PF_REASS_REFRAG)) { + struct m_tag *mtag; + struct pf_fragment_tag *ftag; + + mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(*ftag), + M_NOWAIT); + if (mtag == NULL) { + REASON_SET(reason, PFRES_MEMORY); + return (PF_DROP); + } + + ftag = (struct pf_fragment_tag *)(mtag + 1); + ftag->ft_hdrlen = hdrlen; + ftag->ft_maxlen = maxlen; + m_tag_prepend(m, mtag); + } + + return (PF_PASS); +} + +int +pf_refragment4(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in *dst, + struct ifnet *ifp, struct rtentry *rt) +{ + struct mbuf *m = *m0; + struct mbuf_list fml; + struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); + u_int32_t mtu; + u_int16_t maxlen, hdrlen; + int error; + + hdrlen = ftag->ft_hdrlen; + maxlen = ftag->ft_maxlen; + m_tag_delete(m, mtag); + mtag = NULL; + ftag = NULL; + + /* Checksum must be calculated for the whole packet */ + in_proto_cksum_out(m, NULL); + + mtu = hdrlen + maxlen; + error = ip_fragment(m, &fml, ifp, mtu); + *m0 = NULL; /* ip_fragment() has consumed original packet. */ + if (error) { + DPFPRINTF(LOG_NOTICE, "ip_fragment error %d", error); + return (PF_DROP); + } + + while ((m = ml_dequeue(&fml)) != NULL) { + m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; + if (ifp == NULL) + ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL, 0); + else + ifp->if_output(ifp, m, sintosa(dst), rt); + } + return (PF_PASS); } Index: sys/net/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.502 diff -u -p -r1.502 pfvar.h --- sys/net/pfvar.h 23 Jun 2021 06:53:52 -0000 1.502 +++ sys/net/pfvar.h 31 Aug 2021 12:42:51 -0000 @@ -1329,6 +1329,7 @@ struct pf_status { #define PF_REASS_ENABLED 0x01 #define PF_REASS_NODF 0x02 +#define PF_REASS_REFRAG 0x04 #define PF_SYNCOOKIES_NEVER 0 #define PF_SYNCOOKIES_ALWAYS 1 @@ -1770,6 +1771,8 @@ int pf_match_gid(u_int8_t, gid_t, gid_t, int pf_refragment6(struct mbuf **, struct m_tag *mtag, struct sockaddr_in6 *, struct ifnet *, struct rtentry *); +int pf_refragment4(struct mbuf **, struct m_tag *mtag, + struct sockaddr_in *, struct ifnet *, struct rtentry *); void pf_normalize_init(void); int pf_normalize_ip(struct pf_pdesc *, u_short *); int pf_normalize_ip6(struct pf_pdesc *, u_short *); Index: share/man/man5/pf.conf.5 =================================================================== RCS file: /cvs/src/share/man/man5/pf.conf.5,v retrieving revision 1.587 diff -u -p -r1.587 pf.conf.5 --- share/man/man5/pf.conf.5 19 Jul 2021 16:23:56 -0000 1.587 +++ share/man/man5/pf.conf.5 31 Aug 2021 12:42:51 -0000 @@ -1317,7 +1317,7 @@ Alias for .Pp The default value is .Cm normal . -.It Ic set Cm reassemble yes | no Op Cm no-df +.It Ic set Cm reassemble yes | no Oo Cm no-df Oc Oo Cm refragment Oc The .Cm reassemble option is used to enable or disable the reassembly of fragmented packets, @@ -1336,6 +1336,10 @@ the reassembled packet will have the bit cleared. The default value is .Cm yes . +If +.Cm refragment +is also specified, a reassembled packet will be refragmented before +being forwarded. .It Ic set Cm ruleset-optimization Ar level .Bl -tag -width profile -compact .It Cm basic @@ -2775,7 +2779,10 @@ option = "set" ( [ "timeout" ( t [ "skip on" ifspec ] | [ "debug" ( "emerg" | "alert" | "crit" | "err" | "warning" | "notice" | "info" | "debug" ) ] | - [ "reassemble" ( "yes" | "no" ) [ "no-df" ] ] ) + [ "reassemble" ( "yes" | "no" ) [ reassembleopts ] ] ) + +reassembleopts = reassembleopt [ reassembleopts ] +reassembleopt = "no-df" | "refragment" pf-rule = action [ ( "in" | "out" ) ] [ "log" [ "(" logopts ")"] ] [ "quick" ] Index: sbin/pfctl/parse.y =================================================================== RCS file: /cvs/src/sbin/pfctl/parse.y,v retrieving revision 1.709 diff -u -p -r1.709 parse.y --- sbin/pfctl/parse.y 1 Feb 2021 00:31:04 -0000 1.709 +++ sbin/pfctl/parse.y 31 Aug 2021 12:42:51 -0000 @@ -470,7 +470,7 @@ int parseport(char *, struct range *r, i %token PASS BLOCK MATCH SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS %token RETURNRST RETURNICMP RETURNICMP6 PROTO INET INET6 ALL ANY ICMPTYPE -%token ICMP6TYPE CODE KEEP MODULATE STATE PORT BINATTO NODF +%token ICMP6TYPE CODE KEEP MODULATE STATE PORT BINATTO NODF REFRAGMENT %token MINTTL ERROR ALLOWOPTS FILENAME ROUTETO DUPTO REPLYTO NO LABEL %token NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE %token REASSEMBLE ANCHOR SYNCOOKIES @@ -490,11 +490,12 @@ int parseport(char *, struct range *r, i %token <v.i> PORTBINARY %type <v.interface> interface if_list if_item_not if_item %type <v.number> number icmptype icmp6type uid gid -%type <v.number> tos not yesno optnodf +%type <v.number> tos not yesno %type <v.probability> probability %type <v.weight> optweight %type <v.i> dir af optimizer syncookie_val %type <v.i> sourcetrack flush unaryop statelock +%type <v.i> setreass_opts setreass_opts_l setreass_opt %type <v.b> action %type <v.b> flags flag blockspec prio %type <v.range> portplain portstar portrange @@ -588,11 +589,19 @@ optimizer : string { } ; -optnodf : /* empty */ { $$ = 0; } - | NODF { $$ = 1; } +setreass_opts : /* empty */ { $$ = 0; } + | setreass_opts_l { $$ = $1; } ; -option : SET REASSEMBLE yesno optnodf { +setreass_opts_l : setreass_opts_l setreass_opt { $$ = $1 | $2; } + | setreass_opt { $$ = $1; } + ; + +setreass_opt : NODF { $$ = PF_REASS_NODF; } + | REFRAGMENT { $$ = PF_REASS_REFRAG; } + ; + +option : SET REASSEMBLE yesno setreass_opts { pfctl_set_reassembly(pf, $3, $4); } | SET OPTIMIZATION STRING { @@ -5014,6 +5023,7 @@ lookup(char *s) { "rdr-to", RDRTO}, { "reassemble", REASSEMBLE}, { "received-on", RECEIVEDON}, + { "refragment", REFRAGMENT}, { "reply-to", REPLYTO}, { "return", RETURN}, { "return-icmp", RETURNICMP}, Index: sbin/pfctl/pfctl.c =================================================================== RCS file: /cvs/src/sbin/pfctl/pfctl.c,v retrieving revision 1.383 diff -u -p -r1.383 pfctl.c --- sbin/pfctl/pfctl.c 14 Oct 2020 19:30:37 -0000 1.383 +++ sbin/pfctl/pfctl.c 31 Aug 2021 12:42:51 -0000 @@ -1907,20 +1907,20 @@ pfctl_set_synflwats(struct pfctl *pf, u_ } int -pfctl_set_reassembly(struct pfctl *pf, int on, int nodf) +pfctl_set_reassembly(struct pfctl *pf, int on, u_int32_t reass) { pf->reass_set = 1; if (on) { - pf->reassemble = PF_REASS_ENABLED; - if (nodf) - pf->reassemble |= PF_REASS_NODF; + pf->reassemble = PF_REASS_ENABLED | reass; } else { pf->reassemble = 0; } - if (pf->opts & PF_OPT_VERBOSE) - printf("set reassemble %s %s\n", on ? "yes" : "no", - nodf ? "no-df" : ""); + if (pf->opts & PF_OPT_VERBOSE) { + printf("set reassemble %s%s%s\n", on ? "yes" : "no", + (reass & PF_REASS_NODF) ? " no-df" : "", + (reass & PF_REASS_REFRAG) ? " refragment" : ""); + } return (0); } Index: sbin/pfctl/pfctl_parser.h =================================================================== RCS file: /cvs/src/sbin/pfctl/pfctl_parser.h,v retrieving revision 1.117 diff -u -p -r1.117 pfctl_parser.h --- sbin/pfctl/pfctl_parser.h 21 Jul 2020 14:10:51 -0000 1.117 +++ sbin/pfctl/pfctl_parser.h 31 Aug 2021 12:42:51 -0000 @@ -221,7 +221,7 @@ int add_opt_table(struct pfctl *, st void pfctl_add_rule(struct pfctl *, struct pf_rule *); int pfctl_set_timeout(struct pfctl *, const char *, int, int); -int pfctl_set_reassembly(struct pfctl *, int, int); +int pfctl_set_reassembly(struct pfctl *, int, u_int32_t); int pfctl_set_syncookies(struct pfctl *, u_int8_t, struct pfctl_watermarks *); int pfctl_set_optimization(struct pfctl *, const char *);