On Mon, Aug 15, 2022 at 11:47:03AM +0200, Alexander Bluhm wrote:
> On Sun, Aug 14, 2022 at 02:04:09AM +0300, Vitaliy Makkoveev wrote:
> > On Sat, Aug 13, 2022 at 09:08:36PM +0200, Alexander Bluhm wrote:
> > > Hi,
> > >
> > > While running forwarding in parallel, I have introduced a hard
> > > barrier for parallel local protocol processing. The packets are
> > > requeued from shared to exclusive netlock.
> > >
> > > Unless we unlock all protocol input routines at once, we need some
> > > mechanism from to move from one queue to the other. The problem
> > > is, that we have to remember next protocol field and offset of the
> > > parsed header chain.
> > >
> > > A simple example is IPv6 hop-by-hop options precessing. This code
> > > is MP safe and can be moved from ip6_local() to ip6_ours() to run
> > > in parallel. If there were any options, the offset and next protocol
> > > are stored in a mbuf tag. Without tag we know that it is a regular
> > > IPv6 header.
> > >
> > > Of course mbuf tags kill performance, but who uses hop-by-hop options
> > > anyway? pf drops such packets per default.
> > >
> > > ok?
> > >
> >
> > Isn't it better to use ip6_offnxt?
>
> Maybe I want to use the same trick for the IP deliver loop one day.
> But until that happens ip6_offnxt is consistent to PACKET_TAG_IP6_OFFNXT.
>
> bluhm
>
ok mvs@
>
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.378
> diff -u -p -r1.378 ip_input.c
> --- netinet/ip_input.c 12 Aug 2022 14:49:15 -0000 1.378
> +++ netinet/ip_input.c 15 Aug 2022 09:38:53 -0000
> @@ -560,11 +560,13 @@ ip_input_if(struct mbuf **mp, int *offp,
> int
> ip_local(struct mbuf **mp, int *offp, int nxt, int af)
> {
> - struct ip *ip;
> + if (*offp == 0) {
> + struct ip *ip;
>
> - ip = mtod(*mp, struct ip *);
> - *offp = ip->ip_hl << 2;
> - nxt = ip->ip_p;
> + ip = mtod(*mp, struct ip *);
> + *offp = ip->ip_hl << 2;
> + nxt = ip->ip_p;
> + }
>
> /* Check whether we are already in a IPv4/IPv6 local deliver loop. */
> if (af == AF_UNSPEC)
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.252
> diff -u -p -r1.252 ip6_input.c
> --- netinet6/ip6_input.c 12 Aug 2022 14:49:15 -0000 1.252
> +++ netinet6/ip6_input.c 15 Aug 2022 09:43:20 -0000
> @@ -167,6 +167,11 @@ ip6_init(void)
> #endif
> }
>
> +struct ip6_offnxt {
> + int ion_off;
> + int ion_nxt;
> +};
> +
> /*
> * Enqueue packet for local delivery. Queuing is used as a boundary
> * between the network layer (input/forward path) running with
> @@ -175,10 +180,37 @@ ip6_init(void)
> int
> ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
> {
> + /* ip6_hbhchcheck() may be run before, then off and nxt are set */
> + if (*offp == 0) {
> + nxt = ip6_hbhchcheck(mp, offp, NULL);
> + if (nxt == IPPROTO_DONE)
> + return IPPROTO_DONE;
> + }
> +
> /* We are already in a IPv4/IPv6 local deliver loop. */
> if (af != AF_UNSPEC)
> return ip6_local(mp, offp, nxt, af);
>
> + /* save values for later, use after dequeue */
> + if (*offp != sizeof(struct ip6_hdr)) {
> + struct m_tag *mtag;
> + struct ip6_offnxt *ion;
> +
> + /* mbuf tags are expensive, but only used for header options */
> + mtag = m_tag_get(PACKET_TAG_IP6_OFFNXT, sizeof(*ion),
> + M_NOWAIT);
> + if (mtag == NULL) {
> + ip6stat_inc(ip6s_idropped);
> + m_freemp(mp);
> + return IPPROTO_DONE;
> + }
> + ion = (struct ip6_offnxt *)(mtag + 1);
> + ion->ion_off = *offp;
> + ion->ion_nxt = nxt;
> +
> + m_tag_prepend(*mp, mtag);
> + }
> +
> niq_enqueue(&ip6intrq, *mp);
> *mp = NULL;
> return IPPROTO_DONE;
> @@ -584,9 +616,27 @@ ip6_input_if(struct mbuf **mp, int *offp
> int
> ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
> {
> - nxt = ip6_hbhchcheck(mp, offp, NULL);
> - if (nxt == IPPROTO_DONE)
> - return IPPROTO_DONE;
> + if (*offp == 0) {
> + struct m_tag *mtag;
> +
> + mtag = m_tag_find(*mp, PACKET_TAG_IP6_OFFNXT, NULL);
> + if (mtag != NULL) {
> + struct ip6_offnxt *ion;
> +
> + ion = (struct ip6_offnxt *)(mtag + 1);
> + *offp = ion->ion_off;
> + nxt = ion->ion_nxt;
> +
> + m_tag_delete(*mp, mtag);
> + } else {
> + struct ip6_hdr *ip6;
> +
> + ip6 = mtod(*mp, struct ip6_hdr *);
> + *offp = sizeof(struct ip6_hdr);
> + nxt = ip6->ip6_nxt;
> +
> + }
> + }
>
> /* Check whether we are already in a IPv4/IPv6 local deliver loop. */
> if (af == AF_UNSPEC)
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.254
> diff -u -p -r1.254 mbuf.h
> --- sys/mbuf.h 14 Feb 2022 04:33:18 -0000 1.254
> +++ sys/mbuf.h 15 Aug 2022 09:43:14 -0000
> @@ -479,6 +479,7 @@ struct m_tag *m_tag_next(struct mbuf *,
> #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
> #define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
> #define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced
> marker */
> +#define PACKET_TAG_IP6_OFFNXT 0x8000 /* IPv6 offset and next
> proto */
>
> #define MTAG_BITS \
> ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
>