On Mon, Aug 15, 2022 at 11:47:03AM +0200, Alexander Bluhm wrote:
> On Sun, Aug 14, 2022 at 02:04:09AM +0300, Vitaliy Makkoveev wrote:
> > On Sat, Aug 13, 2022 at 09:08:36PM +0200, Alexander Bluhm wrote:
> > > Hi,
> > > 
> > > While running forwarding in parallel, I have introduced a hard
> > > barrier for parallel local protocol processing.  The packets are
> > > requeued from shared to exclusive netlock.
> > > 
> > > Unless we unlock all protocol input routines at once, we need some
> > > mechanism from to move from one queue to the other.  The problem
> > > is, that we have to remember next protocol field and offset of the
> > > parsed header chain.
> > > 
> > > A simple example is IPv6 hop-by-hop options precessing.  This code
> > > is MP safe and can be moved from ip6_local() to ip6_ours() to run
> > > in parallel.  If there were any options, the offset and next protocol
> > > are stored in a mbuf tag.  Without tag we know that it is a regular
> > > IPv6 header.
> > > 
> > > Of course mbuf tags kill performance, but who uses hop-by-hop options
> > > anyway?  pf drops such packets per default.
> > > 
> > > ok?
> > > 
> > 
> > Isn't it better to use ip6_offnxt? 
> 
> Maybe I want to use the same trick for the IP deliver loop one day.
> But until that happens ip6_offnxt is consistent to PACKET_TAG_IP6_OFFNXT.
> 
> bluhm
> 

ok mvs@

> 
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.378
> diff -u -p -r1.378 ip_input.c
> --- netinet/ip_input.c        12 Aug 2022 14:49:15 -0000      1.378
> +++ netinet/ip_input.c        15 Aug 2022 09:38:53 -0000
> @@ -560,11 +560,13 @@ ip_input_if(struct mbuf **mp, int *offp,
>  int
>  ip_local(struct mbuf **mp, int *offp, int nxt, int af)
>  {
> -     struct ip *ip;
> +     if (*offp == 0) {
> +             struct ip *ip;
>  
> -     ip = mtod(*mp, struct ip *);
> -     *offp = ip->ip_hl << 2;
> -     nxt = ip->ip_p;
> +             ip = mtod(*mp, struct ip *);
> +             *offp = ip->ip_hl << 2;
> +             nxt = ip->ip_p;
> +     }
>  
>       /* Check whether we are already in a IPv4/IPv6 local deliver loop. */
>       if (af == AF_UNSPEC)
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.252
> diff -u -p -r1.252 ip6_input.c
> --- netinet6/ip6_input.c      12 Aug 2022 14:49:15 -0000      1.252
> +++ netinet6/ip6_input.c      15 Aug 2022 09:43:20 -0000
> @@ -167,6 +167,11 @@ ip6_init(void)
>  #endif
>  }
>  
> +struct ip6_offnxt {
> +     int     ion_off;
> +     int     ion_nxt;
> +};
> +
>  /*
>   * Enqueue packet for local delivery.  Queuing is used as a boundary
>   * between the network layer (input/forward path) running with
> @@ -175,10 +180,37 @@ ip6_init(void)
>  int
>  ip6_ours(struct mbuf **mp, int *offp, int nxt, int af)
>  {
> +     /* ip6_hbhchcheck() may be run before, then off and nxt are set */
> +     if (*offp == 0) {
> +             nxt = ip6_hbhchcheck(mp, offp, NULL);
> +             if (nxt == IPPROTO_DONE)
> +                     return IPPROTO_DONE;
> +     }
> +
>       /* We are already in a IPv4/IPv6 local deliver loop. */
>       if (af != AF_UNSPEC)
>               return ip6_local(mp, offp, nxt, af);
>  
> +     /* save values for later, use after dequeue */
> +     if (*offp != sizeof(struct ip6_hdr)) {
> +             struct m_tag *mtag;
> +             struct ip6_offnxt *ion;
> +
> +             /* mbuf tags are expensive, but only used for header options */
> +             mtag = m_tag_get(PACKET_TAG_IP6_OFFNXT, sizeof(*ion),
> +                 M_NOWAIT);
> +             if (mtag == NULL) {
> +                     ip6stat_inc(ip6s_idropped);
> +                     m_freemp(mp);
> +                     return IPPROTO_DONE;
> +             }
> +             ion = (struct ip6_offnxt *)(mtag + 1);
> +             ion->ion_off = *offp;
> +             ion->ion_nxt = nxt;
> +
> +             m_tag_prepend(*mp, mtag);
> +     }
> +
>       niq_enqueue(&ip6intrq, *mp);
>       *mp = NULL;
>       return IPPROTO_DONE;
> @@ -584,9 +616,27 @@ ip6_input_if(struct mbuf **mp, int *offp
>  int
>  ip6_local(struct mbuf **mp, int *offp, int nxt, int af)
>  {
> -     nxt = ip6_hbhchcheck(mp, offp, NULL);
> -     if (nxt == IPPROTO_DONE)
> -             return IPPROTO_DONE;
> +     if (*offp == 0) {
> +             struct m_tag *mtag;
> +
> +             mtag = m_tag_find(*mp, PACKET_TAG_IP6_OFFNXT, NULL);
> +             if (mtag != NULL) {
> +                     struct ip6_offnxt *ion;
> +
> +                     ion = (struct ip6_offnxt *)(mtag + 1);
> +                     *offp = ion->ion_off;
> +                     nxt = ion->ion_nxt;
> +
> +                     m_tag_delete(*mp, mtag);
> +             } else {
> +                     struct ip6_hdr *ip6;
> +
> +                     ip6 = mtod(*mp, struct ip6_hdr *);
> +                     *offp = sizeof(struct ip6_hdr);
> +                     nxt = ip6->ip6_nxt;
> +                     
> +             }
> +     }
>  
>       /* Check whether we are already in a IPv4/IPv6 local deliver loop. */
>       if (af == AF_UNSPEC)
> Index: sys/mbuf.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.254
> diff -u -p -r1.254 mbuf.h
> --- sys/mbuf.h        14 Feb 2022 04:33:18 -0000      1.254
> +++ sys/mbuf.h        15 Aug 2022 09:43:14 -0000
> @@ -479,6 +479,7 @@ struct m_tag *m_tag_next(struct mbuf *, 
>  #define PACKET_TAG_SRCROUTE          0x1000 /* IPv4 source routing options */
>  #define PACKET_TAG_TUNNEL            0x2000  /* Tunnel endpoint address */
>  #define PACKET_TAG_CARP_BAL_IP               0x4000  /* carp(4) ip balanced 
> marker */
> +#define PACKET_TAG_IP6_OFFNXT                0x8000  /* IPv6 offset and next 
> proto */
>  
>  #define MTAG_BITS \
>      ("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
> 

Reply via email to