On Sat, Aug 13, 2022 at 09:08:36PM +0200, Alexander Bluhm wrote:
> Hi,
>
> While running forwarding in parallel, I have introduced a hard
> barrier for parallel local protocol processing. The packets are
> requeued from shared to exclusive netlock.
>
> Unless we unlock all protocol input routines at once, we need some
> mechanism from to move from one queue to the other. The problem
> is, that we have to remember next protocol field and offset of the
> parsed header chain.
>
> A simple example is IPv6 hop-by-hop options precessing. This code
> is MP safe and can be moved from ip6_local() to ip6_ours() to run
> in parallel. If there were any options, the offset and next protocol
> are stored in a mbuf tag. Without tag we know that it is a regular
> IPv6 header.
>
> Of course mbuf tags kill performance, but who uses hop-by-hop options
> anyway? pf drops such packets per default.
>
> ok?
>
Isn't it better to use ip6_offnxt?
> +struct ip_offnxt {
> + int ion_off;
> + int ion_nxt;
> +};
> +
> bluhm
>
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.378
> diff -u -p -r1.378 ip_input.c
> --- netinet/ip_input.c 12 Aug 2022 14:49:15 -0000 1.378
> +++ netinet/ip_input.c 13 Aug 2022 13:36:17 -0000
> @@ -560,11 +560,13 @@ ip_input_if(struct mbuf **mp, int *offp,
> int
> ip_local(struct mbuf **mp, int *offp, int nxt, int af)
> {
> - struct ip *ip;
> + if (*offp == 0) {
> + struct ip *ip;
>
> - ip = mtod(*mp, struct ip *);
> - *offp = ip->ip_hl << 2;
> - nxt = ip->ip_p;
> + ip = mtod(*mp, struct ip *);
> + *offp = ip->ip_hl << 2;
> + nxt = ip->ip_p;
> + }
>
> /* Check whether we are already in a IPv4/IPv6 local deliver loop. */
> if (af == AF_UNSPEC)
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.252
> diff -u -p -r1.252 ip6_input.c
> --- netinet6/ip6_input.c 12 Aug 2022 14:49:15 -0000 1.252
> +++ netinet6/ip6_input.c 13 Aug 2022 13:42:42 -0000
> @@ -167,6 +167,11 @@ ip6_init(void)
> #endif
> }
>
> +struct ip_offnxt {
> + int ion_off;
> + int ion_nxt;
> +};
> +
> /*
> * Enqueue packet for local delivery. Queuing is used as a boundary
> * between the network layer (input/forward path) running with
> @@ -175,10 +180,37 @@ ip6_init(void)
> int
> ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
> {
> + /* ip6_hbhchcheck() may be run before, then off and nxt are set */
> + if (*offp == 0) {
> + nxt = ip6_hbhchcheck(mp, offp, NULL);
> + if (nxt == IPPROTO_DONE)
> + return IPPROTO_DONE;
> + }
> +
> /* We are already in a IPv4/IPv6 local deliver loop. */
> if (af != AF_UNSPEC)
> return ip6_local(mp, offp, nxt, af);
>
> + /* save values for later, use after dequeue */
> + if (*offp != sizeof(struct ip6_hdr)) {
> + struct m_tag *mtag;
> + struct ip_offnxt *ion;
> +
> + /* mbuf tags are expensive, but only used for header options */
> + mtag = m_tag_get(PACKET_TAG_IP6_OFF_NXT, sizeof(*ion),
> + M_NOWAIT);
> + if (mtag == NULL) {
> + ip6stat_inc(ip6s_idropped);
> + m_freemp(mp);
> + return IPPROTO_DONE;
> + }
> + ion = (struct ip_offnxt *)(mtag + 1);
> + ion->ion_off = *offp;
> + ion->ion_nxt = nxt;
> +
> + m_tag_prepend(*mp, mtag);
> + }
> +
> niq_enqueue(&ip6intrq, *mp);
> *mp = NULL;
> return IPPROTO_DONE;
> @@ -584,9 +616,27 @@ ip6_input_if(struct mbuf **mp, int *offp
> int
> ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
> {
> - nxt = ip6_hbhchcheck(mp, offp, NULL);
> - if (nxt == IPPROTO_DONE)
> - return IPPROTO_DONE;
> + if (*offp == 0) {
> + struct m_tag *mtag;
> +
> + mtag = m_tag_find(*mp, PACKET_TAG_IP6_OFF_NXT, NULL);
> + if (mtag != NULL) {
> + struct ip_offnxt *ion;
> +
> + ion = (struct ip_offnxt *)(mtag + 1);
> + *offp = ion->ion_off;
> + nxt = ion->ion_nxt;
> +
> + m_tag_delete(*mp, mtag);
> + } else {
> + struct ip6_hdr *ip6;
> +
> + ip6 = mtod(*mp, struct ip6_hdr *);
> + *offp = sizeof(struct ip6_hdr);
> + nxt = ip6->ip6_nxt;
> +
> + }
> + }
>
> /* Check whether we are already in a IPv4/IPv6 local deliver loop. */
> if (af == AF_UNSPEC)
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.254
> diff -u -p -r1.254 mbuf.h
> --- sys/mbuf.h 14 Feb 2022 04:33:18 -0000 1.254
> +++ sys/mbuf.h 13 Aug 2022 13:36:17 -0000
> @@ -479,6 +479,7 @@ struct m_tag *m_tag_next(struct mbuf *,
> #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
> #define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
> #define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced
> marker */
> +#define PACKET_TAG_IP6_OFF_NXT 0x8000 /* IPv6 offset and next
> proto */
>
> #define MTAG_BITS \
> ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
>