Hi,

I think that is a good idea.

On Wed, 22 Jan 2020 08:35:05 +1000
David Gwynne <da...@gwynne.id.au> wrote:
> Has anyone got an opinion on this? I am still interested in doing more
> packet capture things on OpenBSD using GRE as a transport, and the idea
> of maintaining this out of tree just makes me feel tired.
> 
> On Tue, Oct 29, 2019 at 06:34:50PM +1000, David Gwynne wrote:
>> i've been toying with this idea of implementing GRE as a datagram
>> protocol that userland can use just like UDP. the idea is to make it
>> easy to support the implementation of NHRP in userland for mgre(4),
>> and also for ERSPAN* support without going down the path linux took**.
>> 
>> so this is the result of having a go at implementing the idea. the diff
>> includes several independent parts, but they all work together to make
>> GRE as comfortable to use as UDP. the two main parts are the actual
>> protocol implementation in src/sys/netinet/ip_gre.c, and the tweaks to
>> getaddrinfo to allow the resolution of gre services. the /etc/services
>> chunk gets used by the getaddrinfo bits.
>> 
>> so, the first chunk lets you do this (as root in userland):
>> 
>>      int s = socket(AF_INET, SOCK_DGRAM, IPPROTO_GRE);
>> 
>> that gives you a file descriptor you can then use with bind(),
>> connect(), sendto(), recvfrom(), etc. you write a message to the
>> kernel and it prepends the GRE and IP headers and pushes it out.
>> it is set up so the GRE protocol is handed to the kernel via the
>> sin_port or sin6_port member of struct sockaddr_in an sockaddr_in6
>> respectively. there's no source and destination protocol fields, just
>> one that both ends agree on, so if you connect then bind, your
>> sockaddrs have to agree on the proto. unfortunately there's no such
>> thing as a wildcard or reserved protocol in GRE, so 0 can't be used
>> as a wildcard like it can in udp and tcp.
>> 
>> the sockets support the configuration of optional GRE headers, as
>> defined in RFC 2890, using setsockopt. importantly you can enable
>> the key and sequence number headers, which again, the kernel offloads
>> for you.
>> 
>> the second chunk tweaks getaddrinfo so it lets you specify things other
>> than IPPROTO_UDP and IPPROTO_TCP. protocols other than those are now
>> looked up in /etc/protocols to get their name, which in turn is used to
>> look up entries in /etc/services. while i was there and reading rfcs, i
>> noted different behaviour for wildcarded socktypes and protocols, which
>> i've tried to implement. eric@ seems generally ok with this stuff, and
>> suggested the tweak to pledge to allow access to /etc/protocols using
>> the dns pledge. tcp and udp are still special though, and are still
>> omgoptimised.
>> 
>> all this together lets the program at
>> https://mild.embarrassm.net/~dlg/diff/egred.c work. it is a userland
>> reimplementation of a simplified egre(4) using tap(4) and a gre socket.
>> the io path is literally reading from one fd and writing it to the othe,
>> everything else is boilerplate.
>> 
>> i suspect the kernel stuff is a bit rough as i havent had to test every
>> path, but it supports common functionality.
>> 
>> thoughts? i am pretty pleased with this has turned out, and would be
>> keen to put it in the tree and work on it some more.
>> 
>> * https://tools.ietf.org/html/draft-foschiano-erspan-03
>> ** http://vger.kernel.org/lpc_net2018_talks/erspan-linux-presentation.pdf
>> 
>> Index: etc/services
>> ===================================================================
>> RCS file: /cvs/src/etc/services,v
>> retrieving revision 1.96
>> diff -u -p -r1.96 services
>> --- etc/services     27 Jan 2019 20:35:06 -0000      1.96
>> +++ etc/services     29 Oct 2019 07:57:44 -0000
>> @@ -332,6 +332,21 @@ spamd-cfg       8026/tcp                        # 
>> spamd(8) configur
>>  dhcpd-sync  8067/udp                        # dhcpd(8) synchronisation
>>  hunt                26740/udp                       # hunt(6)
>>  #
>> +# GRE Protocol Types
>> +#
>> +keepalive   0/gre                           # 0x0000: IP tunnel keepalive
>> +ipv4                2048/gre                        # 0x0800: IPv4
>> +nhrp                8193/gre                        # 0x2001: Next Hop 
>> Resolution Protocol
>> +erspan3             8939/gre                        # 0x22eb: ERSPAN III
>> +transether  25944/gre       ethernet        # 0x6558: Trans Ether Bridging
>> +ipv6                34525/gre                       # 0x86dd: IPv6
>> +wccp                34878/gre                       # 0x883e: Web Content 
>> Cache Protocol
>> +mpls                34887/gre                       # 0x8847: MPLS
>> +#mpls               34888/gre                       # 0x8848: MPLS Multicast
>> +erspan              35006/gre       erspan2         # 0x88be: ERSPAN I/II
>> +nsh         35151/gre                       # 0x894f: Network Service Header
>> +control             47082/gre                       # 0xb7ea: RFC 8157
>> +#
>>  # Appletalk
>>  #
>>  rtmp                1/ddp                           # Routing Table 
>> Maintenance Protocol
>> Index: lib/libc/asr/getaddrinfo_async.c
>> ===================================================================
>> RCS file: /cvs/src/lib/libc/asr/getaddrinfo_async.c,v
>> retrieving revision 1.56
>> diff -u -p -r1.56 getaddrinfo_async.c
>> --- lib/libc/asr/getaddrinfo_async.c 3 Nov 2018 09:13:24 -0000       1.56
>> +++ lib/libc/asr/getaddrinfo_async.c 29 Oct 2019 07:57:54 -0000
>> @@ -34,36 +34,15 @@
>>  
>>  #include "asr_private.h"
>>  
>> -struct match {
>> -    int family;
>> -    int socktype;
>> -    int protocol;
>> -};
>> -
>>  static int getaddrinfo_async_run(struct asr_query *, struct asr_result *);
>>  static int get_port(const char *, const char *, int);
>> +static int get_service(const char *, int, int);
>>  static int iter_family(struct asr_query *, int);
>>  static int addrinfo_add(struct asr_query *, const struct sockaddr *, const 
>> char *);
>>  static int addrinfo_from_file(struct asr_query *, int,  FILE *);
>>  static int addrinfo_from_pkt(struct asr_query *, char *, size_t);
>>  static int addrconfig_setup(struct asr_query *);
>>  
>> -static const struct match matches[] = {
>> -    { PF_INET,      SOCK_DGRAM,     IPPROTO_UDP     },
>> -    { PF_INET,      SOCK_STREAM,    IPPROTO_TCP     },
>> -    { PF_INET,      SOCK_RAW,       0               },
>> -    { PF_INET6,     SOCK_DGRAM,     IPPROTO_UDP     },
>> -    { PF_INET6,     SOCK_STREAM,    IPPROTO_TCP     },
>> -    { PF_INET6,     SOCK_RAW,       0               },
>> -    { -1,           0,              0,              },
>> -};
>> -
>> -#define MATCH_FAMILY(a, b) ((a) == matches[(b)].family || (a) == PF_UNSPEC)
>> -#define MATCH_PROTO(a, b) ((a) == matches[(b)].protocol || (a) == 0 || 
>> matches[(b)].protocol == 0)
>> -/* Do not match SOCK_RAW unless explicitly specified */
>> -#define MATCH_SOCKTYPE(a, b) ((a) == matches[(b)].socktype || ((a) == 0 && \
>> -                            matches[(b)].socktype != SOCK_RAW))
>> -
>>  enum {
>>      DOM_INIT,
>>      DOM_DOMAIN,
>> @@ -199,24 +178,27 @@ getaddrinfo_async_run(struct asr_query *
>>                      }
>>              }
>>  
>> -            /* Make sure there is at least a valid combination */
>> -            for (i = 0; matches[i].family != -1; i++)
>> -                    if (MATCH_FAMILY(ai->ai_family, i) &&
>> -                        MATCH_SOCKTYPE(ai->ai_socktype, i) &&
>> -                        MATCH_PROTO(ai->ai_protocol, i))
>> -                            break;
>> -            if (matches[i].family == -1) {
>> -                    ar->ar_gai_errno = EAI_BADHINTS;
>> -                    async_set_state(as, ASR_STATE_HALT);
>> -                    break;
>> -            }
>> -
>> -            if (ai->ai_protocol == 0 || ai->ai_protocol == IPPROTO_UDP)
>> +            switch (ai->ai_protocol) {
>> +            case 0:
>>                      as->as.ai.port_udp = get_port(as->as.ai.servname, "udp",
>>                          as->as.ai.hints.ai_flags & AI_NUMERICSERV);
>> -            if (ai->ai_protocol == 0 || ai->ai_protocol == IPPROTO_TCP)
>>                      as->as.ai.port_tcp = get_port(as->as.ai.servname, "tcp",
>>                          as->as.ai.hints.ai_flags & AI_NUMERICSERV);
>> +                    break;
>> +            case IPPROTO_TCP:
>> +                    as->as.ai.port_tcp = get_port(as->as.ai.servname, "tcp",
>> +                        as->as.ai.hints.ai_flags & AI_NUMERICSERV);
>> +                    break;
>> +            case IPPROTO_UDP:
>> +                    as->as.ai.port_udp = get_port(as->as.ai.servname, "udp",
>> +                        as->as.ai.hints.ai_flags & AI_NUMERICSERV);
>> +                    break;
>> +            default:
>> +                    as->as.ai.port_udp = get_service(as->as.ai.servname,
>> +                        ai->ai_protocol,
>> +                        as->as.ai.hints.ai_flags & AI_NUMERICSERV);
>> +                    break;
>> +            }
>>              if (as->as.ai.port_tcp == -2 || as->as.ai.port_udp == -2 ||
>>                  (as->as.ai.port_tcp == -1 && as->as.ai.port_udp == -1) ||
>>                  (ai->ai_protocol && (as->as.ai.port_udp == -1 ||
>> @@ -491,6 +473,24 @@ get_port(const char *servname, const cha
>>      return (port);
>>  }
>>  
>> +static int
>> +get_service(const char *servname, int protocol, int numonly)
>> +{
>> +    struct protoent pe;
>> +    struct protoent_data ped;
>> +    int rv;
>> +
>> +    memset(&ped, 0, sizeof(ped));
>> +    rv = getprotobynumber_r(protocol, &pe, &ped);
>> +    if (rv == -1)
>> +            return (-1);
>> +
>> +    rv = get_port(servname, pe.p_name, numonly);
>> +    endprotoent_r(&ped);
>> +
>> +    return (rv);
>> +}
>> +
>>  /*
>>   * Iterate over the address families that are to be queried. Use the
>>   * list on the async context, unless a specific family was given in hints.
>> @@ -519,65 +519,107 @@ iter_family(struct asr_query *as, int fi
>>   * entry per protocol/socktype match.
>>   */
>>  static int
>> -addrinfo_add(struct asr_query *as, const struct sockaddr *sa, const char 
>> *cname)
>> +addrinfo_add_ai(struct asr_query *as, const struct sockaddr *sa,
>> +    const char *cname, int socktype, int proto, int port)
>>  {
>>      struct addrinfo         *ai;
>> -    int                      i, port, proto;
>> -
>> -    for (i = 0; matches[i].family != -1; i++) {
>> -            if (matches[i].family != sa->sa_family ||
>> -                !MATCH_SOCKTYPE(as->as.ai.hints.ai_socktype, i) ||
>> -                !MATCH_PROTO(as->as.ai.hints.ai_protocol, i))
>> -                    continue;
>> -
>> -            proto = as->as.ai.hints.ai_protocol;
>> -            if (!proto)
>> -                    proto = matches[i].protocol;
>> -
>> -            if (proto == IPPROTO_TCP)
>> -                    port = as->as.ai.port_tcp;
>> -            else if (proto == IPPROTO_UDP)
>> -                    port = as->as.ai.port_udp;
>> -            else
>> -                    port = 0;
>> +    int                      i;
>>  
>> -            /* servname specified, but not defined for this protocol */
>> -            if (port == -1)
>> -                    continue;
>> +    if (port == -1)
>> +            return (0);
>>  
>> -            ai = calloc(1, sizeof(*ai) + sa->sa_len);
>> -            if (ai == NULL)
>> +    ai = calloc(1, sizeof(*ai) + sa->sa_len);
>> +    if (ai == NULL)
>> +            return (EAI_MEMORY);
>> +    ai->ai_family = sa->sa_family;
>> +    ai->ai_socktype = socktype;
>> +    ai->ai_protocol = proto;
>> +    ai->ai_flags = as->as.ai.hints.ai_flags;
>> +    ai->ai_addrlen = sa->sa_len;
>> +    ai->ai_addr = (void *)(ai + 1);
>> +    if (cname &&
>> +        as->as.ai.hints.ai_flags & (AI_CANONNAME | AI_FQDN)) {
>> +            if ((ai->ai_canonname = strdup(cname)) == NULL) {
>> +                    free(ai);
>>                      return (EAI_MEMORY);
>> -            ai->ai_family = sa->sa_family;
>> -            ai->ai_socktype = matches[i].socktype;
>> -            ai->ai_protocol = proto;
>> -            ai->ai_flags = as->as.ai.hints.ai_flags;
>> -            ai->ai_addrlen = sa->sa_len;
>> -            ai->ai_addr = (void *)(ai + 1);
>> -            if (cname &&
>> -                as->as.ai.hints.ai_flags & (AI_CANONNAME | AI_FQDN)) {
>> -                    if ((ai->ai_canonname = strdup(cname)) == NULL) {
>> -                            free(ai);
>> -                            return (EAI_MEMORY);
>> -                    }
>>              }
>> -            memmove(ai->ai_addr, sa, sa->sa_len);
>> -            if (sa->sa_family == PF_INET)
>> -                    ((struct sockaddr_in *)ai->ai_addr)->sin_port =
>> -                        htons(port);
>> -            else if (sa->sa_family == PF_INET6)
>> -                    ((struct sockaddr_in6 *)ai->ai_addr)->sin6_port =
>> -                        htons(port);
>> -
>> -            if (as->as.ai.aifirst == NULL)
>> -                    as->as.ai.aifirst = ai;
>> -            if (as->as.ai.ailast)
>> -                    as->as.ai.ailast->ai_next = ai;
>> -            as->as.ai.ailast = ai;
>> -            as->as_count += 1;
>>      }
>> +    memmove(ai->ai_addr, sa, sa->sa_len);
>> +    if (sa->sa_family == PF_INET)
>> +            ((struct sockaddr_in *)ai->ai_addr)->sin_port =
>> +                htons(port);
>> +    else if (sa->sa_family == PF_INET6)
>> +            ((struct sockaddr_in6 *)ai->ai_addr)->sin6_port =
>> +                htons(port);
>> +
>> +    if (as->as.ai.aifirst == NULL)
>> +            as->as.ai.aifirst = ai;
>> +    if (as->as.ai.ailast)
>> +            as->as.ai.ailast->ai_next = ai;
>> +    as->as.ai.ailast = ai;
>> +    as->as_count += 1;
>>  
>>      return (0);
>> +}
>> +
>> +static int
>> +addrinfo_add_proto(struct asr_query *as, const struct sockaddr *sa,
>> +    const char *cname, int proto, int port)
>> +{
>> +    int rv;
>> +
>> +    switch (as->as.ai.hints.ai_socktype) {
>> +    case 0:
>> +            rv = addrinfo_add_ai(as, sa, cname, SOCK_STREAM, proto, port);
>> +            if (rv != 0)
>> +                    break;
>> +
>> +            rv = addrinfo_add_ai(as, sa, cname, SOCK_DGRAM, proto, port);
>> +            if (rv != 0)
>> +                    break;
>> +
>> +            break;
>> +
>> +    default:
>> +            rv = addrinfo_add_ai(as, sa, cname,
>> +                as->as.ai.hints.ai_socktype, proto, port);
>> +            break;
>> +    }
>> +
>> +    return (rv);
>> +}
>> +
>> +static int
>> +addrinfo_add(struct asr_query *as, const struct sockaddr *sa, const char 
>> *cname)
>> +{
>> +    int rv;
>> +
>> +    switch (as->as.ai.hints.ai_protocol) {
>> +    case 0:
>> +            rv = addrinfo_add_proto(as, sa, cname,
>> +                IPPROTO_TCP, as->as.ai.port_tcp);
>> +            if (rv != 0)
>> +                    break;
>> +
>> +            rv = addrinfo_add_proto(as, sa, cname,
>> +                IPPROTO_UDP, as->as.ai.port_udp);
>> +            if (rv != 0)
>> +                    break;
>> +
>> +            break;
>> +
>> +    case IPPROTO_TCP:
>> +            rv = addrinfo_add_proto(as, sa, cname,
>> +                IPPROTO_TCP, as->as.ai.port_tcp);
>> +            break;
>> +
>> +    default: /* includes IPPROTO_UDP */
>> +            rv = addrinfo_add_proto(as, sa, cname,
>> +                as->as.ai.hints.ai_protocol, as->as.ai.port_udp);
>> +            break;
>> +    }
>> +
>> +    return (rv);
>>  }
>>  
>>  static int
>> Index: sys/conf/files
>> ===================================================================
>> RCS file: /cvs/src/sys/conf/files,v
>> retrieving revision 1.675
>> diff -u -p -r1.675 files
>> --- sys/conf/files   5 Oct 2019 05:33:14 -0000       1.675
>> +++ sys/conf/files   29 Oct 2019 07:57:58 -0000
>> @@ -862,7 +862,7 @@ file netinet/tcp_subr.c
>>  file netinet/tcp_timer.c
>>  file netinet/tcp_usrreq.c
>>  file netinet/udp_usrreq.c
>> -file netinet/ip_gre.c
>> +file netinet/ip_gre.c                       gre
>>  file netinet/ip_ipsp.c                      ipsec | tcp_signature
>>  file netinet/ip_spd.c                       ipsec | tcp_signature
>>  file netinet/ip_ipip.c
>> Index: sys/kern/kern_pledge.c
>> ===================================================================
>> RCS file: /cvs/src/sys/kern/kern_pledge.c,v
>> retrieving revision 1.255
>> diff -u -p -r1.255 kern_pledge.c
>> --- sys/kern/kern_pledge.c   25 Aug 2019 18:46:40 -0000      1.255
>> +++ sys/kern/kern_pledge.c   29 Oct 2019 07:57:58 -0000
>> @@ -666,7 +666,7 @@ pledge_namei(struct proc *p, struct name
>>                      }
>>              }
>>  
>> -            /* DNS needs /etc/{resolv.conf,hosts,services}. */
>> +            /* DNS needs /etc/{resolv.conf,hosts,services,protocols}. */
>>              if ((ni->ni_pledge == PLEDGE_RPATH) &&
>>                  (p->p_p->ps_pledge & PLEDGE_DNS)) {
>>                      if (strcmp(path, "/etc/resolv.conf") == 0) {
>> @@ -678,6 +678,10 @@ pledge_namei(struct proc *p, struct name
>>                              return (0);
>>                      }
>>                      if (strcmp(path, "/etc/services") == 0) {
>> +                            ni->ni_cnd.cn_flags |= BYPASSUNVEIL;
>> +                            return (0);
>> +                    }
>> +                    if (strcmp(path, "/etc/protocols") == 0) {
>>                              ni->ni_cnd.cn_flags |= BYPASSUNVEIL;
>>                              return (0);
>>                      }
>> Index: sys/net/if_gre.c
>> ===================================================================
>> RCS file: /cvs/src/sys/net/if_gre.c,v
>> retrieving revision 1.152
>> diff -u -p -r1.152 if_gre.c
>> --- sys/net/if_gre.c 29 Jul 2019 16:28:25 -0000      1.152
>> +++ sys/net/if_gre.c 29 Oct 2019 07:57:58 -0000
>> @@ -69,6 +69,8 @@
>>  #include <netinet/ip_var.h>
>>  #include <netinet/ip_ecn.h>
>>  
>> +#include <netinet/gre_proto.h>
>> +
>>  #ifdef INET6
>>  #include <netinet/ip6.h>
>>  #include <netinet6/ip6_var.h>
>> @@ -103,28 +105,6 @@
>>  /*
>>   * packet formats
>>   */
>> -struct gre_header {
>> -    uint16_t                gre_flags;
>> -#define GRE_CP                              0x8000  /* Checksum Present */
>> -#define GRE_KP                              0x2000  /* Key Present */
>> -#define GRE_SP                              0x1000  /* Sequence Present */
>> -
>> -#define GRE_VERS_MASK                       0x0007
>> -#define GRE_VERS_0                  0x0000
>> -#define GRE_VERS_1                  0x0001
>> -
>> -    uint16_t                gre_proto;
>> -} __packed __aligned(4);
>> -
>> -struct gre_h_cksum {
>> -    uint16_t                gre_cksum;
>> -    uint16_t                gre_reserved1;
>> -} __packed __aligned(4);
>> -
>> -struct gre_h_key {
>> -    uint32_t                gre_key;
>> -} __packed __aligned(4);
>> -
>>  #define GRE_EOIP            0x6400
>>  
>>  struct gre_h_key_eoip {
>> @@ -132,13 +112,7 @@ struct gre_h_key_eoip {
>>      uint16_t                eoip_tunnel_id; /* little endian */
>>  } __packed __aligned(4);
>>  
>> -#define NVGRE_VSID_RES_MIN  0x000000 /* reserved for future use */
>> -#define NVGRE_VSID_RES_MAX  0x000fff
>> -#define NVGRE_VSID_NVE2NVE  0xffffff /* vendor specific NVE-to-NVE comms */
>> -
>> -struct gre_h_seq {
>> -    uint32_t                gre_seq;
>> -} __packed __aligned(4);
>> +#define GRE_WCCP 0x883e
>>  
>>  struct gre_h_wccp {
>>      uint8_t                 wccp_flags;
>> @@ -147,7 +121,10 @@ struct gre_h_wccp {
>>      uint8_t                 pri_bucket;
>>  } __packed __aligned(4);
>>  
>> -#define GRE_WCCP 0x883e
>> +
>> +#define NVGRE_VSID_RES_MIN  0x000000 /* reserved for future use */
>> +#define NVGRE_VSID_RES_MAX  0x000fff
>> +#define NVGRE_VSID_NVE2NVE  0xffffff /* vendor specific NVE-to-NVE comms */
>>  
>>  #define GRE_HDRLEN (sizeof(struct ip) + sizeof(struct gre_header))
>>  
>> @@ -289,8 +266,8 @@ static int       gre_up(struct gre_softc *);
>>  static int  gre_down(struct gre_softc *);
>>  static void gre_link_state(struct ifnet *, unsigned int);
>>  
>> -static int  gre_input_key(struct mbuf **, int *, int, int, uint8_t,
>> -                struct gre_tunnel *);
>> +static struct mbuf *
>> +            gre_if_input(struct mbuf *, int, uint8_t, struct gre_tunnel *);
>>  
>>  static struct mbuf *
>>              gre_ipv4_patch(const struct gre_tunnel *, struct mbuf *,
>> @@ -893,10 +870,9 @@ eoip_clone_destroy(struct ifnet *ifp)
>>      return (0);
>>  }
>>  
>> -int
>> -gre_input(struct mbuf **mp, int *offp, int type, int af)
>> +struct mbuf *
>> +gre_if4_input(struct mbuf *m, int hlen)
>>  {
>> -    struct mbuf *m = *mp;
>>      struct gre_tunnel key;
>>      struct ip *ip;
>>  
>> @@ -908,17 +884,13 @@ gre_input(struct mbuf **mp, int *offp, i
>>      key.t_src4 = ip->ip_dst;
>>      key.t_dst4 = ip->ip_src;
>>  
>> -    if (gre_input_key(mp, offp, type, af, ip->ip_tos, &key) == -1)
>> -            return (rip_input(mp, offp, type, af));
>> -
>> -    return (IPPROTO_DONE);
>> +    return (gre_if_input(m, hlen, ip->ip_tos, &key));
>>  }
>>  
>>  #ifdef INET6
>> -int
>> -gre_input6(struct mbuf **mp, int *offp, int type, int af)
>> +struct mbuf *
>> +gre_if6_input(struct mbuf *m, int hlen)
>>  {
>> -    struct mbuf *m = *mp;
>>      struct gre_tunnel key;
>>      struct ip6_hdr *ip6;
>>      uint32_t flow;
>> @@ -933,10 +905,7 @@ gre_input6(struct mbuf **mp, int *offp, 
>>  
>>      flow = bemtoh32(&ip6->ip6_flow);
>>  
>> -    if (gre_input_key(mp, offp, type, af, flow >> 20, &key) == -1)
>> -            return (rip6_input(mp, offp, type, af));
>> -
>> -    return (IPPROTO_DONE);
>> +    return (gre_if_input(m, hlen, flow >> 20, &key));
>>  }
>>  #endif /* INET6 */
>>  
>> @@ -996,12 +965,10 @@ gre_input_1(struct gre_tunnel *key, stru
>>      return (m);
>>  }
>>  
>> -static int
>> -gre_input_key(struct mbuf **mp, int *offp, int type, int af, uint8_t otos,
>> -    struct gre_tunnel *key)
>> +static struct mbuf *
>> +gre_if_input(struct mbuf *m, int iphlen, uint8_t otos, struct gre_tunnel 
>> *key)
>>  {
>> -    struct mbuf *m = *mp;
>> -    int iphlen = *offp, hlen, rxprio;
>> +    int hlen, rxprio;
>>      struct ifnet *ifp;
>>      const struct gre_tunnel *tunnel;
>>      caddr_t buf;
>> @@ -1025,7 +992,7 @@ gre_input_key(struct mbuf **mp, int *off
>>  
>>      m = m_pullup(m, hlen);
>>      if (m == NULL)
>> -            return (IPPROTO_DONE);
>> +            return (NULL);
>>  
>>      buf = mtod(m, caddr_t);
>>      gh = (struct gre_header *)(buf + iphlen);
>> @@ -1038,7 +1005,7 @@ gre_input_key(struct mbuf **mp, int *off
>>      case htons(GRE_VERS_1):
>>              m = gre_input_1(key, m, gh, otos, iphlen);
>>              if (m == NULL)
>> -                    return (IPPROTO_DONE);
>> +                    return (NULL);
>>              /* FALLTHROUGH */
>>      default:
>>              goto decline;
>> @@ -1055,7 +1022,7 @@ gre_input_key(struct mbuf **mp, int *off
>>  
>>              m = m_pullup(m, hlen);
>>              if (m == NULL)
>> -                    return (IPPROTO_DONE);
>> +                    return (NULL);
>>  
>>              buf = mtod(m, caddr_t);
>>              gh = (struct gre_header *)(buf + iphlen);
>> @@ -1071,7 +1038,7 @@ gre_input_key(struct mbuf **mp, int *off
>>                  nvgre_input(key, m, hlen, otos) == -1)
>>                      goto decline;
>>  
>> -            return (IPPROTO_DONE);
>> +            return (NULL);
>>      }
>>  
>>      ifp = gre_find(key);
>> @@ -1148,7 +1115,7 @@ gre_input_key(struct mbuf **mp, int *off
>>  
>>              m_adj(m, hlen);
>>              gre_keepalive_recv(ifp, m);
>> -            return (IPPROTO_DONE);
>> +            return (NULL);
>>  
>>      default:
>>              goto decline;
>> @@ -1162,7 +1129,7 @@ gre_input_key(struct mbuf **mp, int *off
>>  
>>      m = (*patch)(tunnel, m, &itos, otos);
>>      if (m == NULL)
>> -            return (IPPROTO_DONE); 
>> +            return (NULL); 
>>  
>>      if (tunnel->t_key_mask == GRE_KEY_ENTROPY) {
>>              m->m_pkthdr.ph_flowid = M_FLOWID_VALID |
>> @@ -1203,10 +1170,9 @@ gre_input_key(struct mbuf **mp, int *off
>>  #endif
>>  
>>      (*input)(ifp, m);
>> -    return (IPPROTO_DONE);
>> +    return (NULL);
>>  decline:
>> -    *mp = m;
>> -    return (-1);
>> +    return (m);
>>  }
>>  
>>  static struct mbuf *
>> Index: sys/netinet/gre_proto.h
>> ===================================================================
>> RCS file: sys/netinet/gre_proto.h
>> diff -N sys/netinet/gre_proto.h
>> --- /dev/null        1 Jan 1970 00:00:00 -0000
>> +++ sys/netinet/gre_proto.h  29 Oct 2019 07:57:58 -0000
>> @@ -0,0 +1,48 @@
>> +/* $OpenBSD$ */
>> +
>> +/*
>> + * Copyright (c) 2019 David Gwynne <d...@openbsd.org>
>> + *
>> + * Permission to use, copy, modify, and distribute this software for any
>> + * purpose with or without fee is hereby granted, provided that the above
>> + * copyright notice and this permission notice appear in all copies.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
>> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
>> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
>> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
>> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
>> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>> + */
>> +
>> +#ifndef _NETINET_GRE_H_
>> +#define _NETINET_GRE_H_
>> +
>> +struct gre_header {
>> +    uint16_t        gre_flags;
>> +#define GRE_CP                      0x8000  /* Checksum Present */
>> +#define GRE_KP                      0x2000  /* Key Present */
>> +#define GRE_SP                      0x1000  /* Sequence Present */
>> +
>> +#define GRE_VERS_MASK               0x0007
>> +#define GRE_VERS_0          0x0000
>> +#define GRE_VERS_1          0x0001
>> +
>> +    uint16_t        gre_proto;
>> +};
>> +
>> +struct gre_h_cksum {
>> +    uint16_t        gre_cksum;
>> +    uint16_t        gre_reserved1;
>> +};
>> +
>> +struct gre_h_key {
>> +    uint32_t        gre_key;
>> +};
>> +
>> +struct gre_h_seq {
>> +    uint32_t        gre_seq;
>> +};
>> +
>> +#endif /* _NETINET_GRE_H_ */
>> Index: sys/netinet/gre_var.h
>> ===================================================================
>> RCS file: sys/netinet/gre_var.h
>> diff -N sys/netinet/gre_var.h
>> --- /dev/null        1 Jan 1970 00:00:00 -0000
>> +++ sys/netinet/gre_var.h    29 Oct 2019 07:57:58 -0000
>> @@ -0,0 +1,64 @@
>> +/* $OpenBSD$ */
>> +
>> +/*
>> + * Copyright (c) 2019 David Gwynne <d...@openbsd.org>
>> + *
>> + * Permission to use, copy, modify, and distribute this software for any
>> + * purpose with or without fee is hereby granted, provided that the above
>> + * copyright notice and this permission notice appear in all copies.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
>> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
>> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
>> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
>> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
>> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>> + */
>> +
>> +#ifndef _NETINET_GRE_VAR_H_
>> +#define _NETINET_GRE_VAR_H_
>> +
>> +/*
>> + * setsockopt(s, IPPROTO_GRE, ...
>> + */
>> +
>> +#define GRE_CKSUM   1       /* bool; enable GRE checksum headers */
>> +#define GRE_KEY             2       /* uint32_t; enable and set GRE key */
>> +                            /* NULL; disable GRE key header */ 
>> +#define GRE_SEQ             3       /* uint32_t; enable and set GRE seq */
>> +                            /* NULL; disable GRE seq header */
>> +
>> +#define GRE_RECVSEQ 4       /* bool; enable reception of seq numbers */
>> +#define GRE_SENDSEQ GRE_RECVSEQ
>> +
>> +#ifdef _KERNEL
>> +int gre_raw_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
>> +        struct mbuf *, struct proc *);
>> +int gre_sysctl(int *, u_int, void *, size_t *, void *, size_t);
>> +
>> +void        gre_init(void);
>> +
>> +int gre_attach(struct socket *, int);
>> +int gre_detach(struct socket *);
>> +
>> +int gre_ip4_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
>> +        struct mbuf *, struct proc *);
>> +int gre_ip4_ctloutput(int, struct socket *, int, int, struct mbuf *);
>> +
>> +int gre_ip4_input(struct mbuf **, int *, int, int);
>> +
>> +struct mbuf *
>> +    gre_if4_input(struct mbuf *, int); /* interface glue */
>> +
>> +#ifdef INET6
>> +int gre_ip6_usrreq(struct socket *, int, struct mbuf *, struct mbuf *,
>> +        struct mbuf *, struct proc *);
>> +int gre_ip6_ctloutput(int, struct socket *, int, int, struct mbuf *);
>> +int gre_ip6_input(struct mbuf **, int *, int, int);
>> +
>> +struct mbuf *
>> +    gre_if6_input(struct mbuf *, int); /* interface glue */
>> +#endif /* INET6 */
>> +#endif /* _KERNEL */
>> +#endif /* _NETINET_GRE_VAR_H_ */
>> Index: sys/netinet/in_proto.c
>> ===================================================================
>> RCS file: /cvs/src/sys/netinet/in_proto.c,v
>> retrieving revision 1.93
>> diff -u -p -r1.93 in_proto.c
>> --- sys/netinet/in_proto.c   15 Jul 2019 12:40:42 -0000      1.93
>> +++ sys/netinet/in_proto.c   29 Oct 2019 07:57:58 -0000
>> @@ -147,8 +147,7 @@
>>  
>>  #include "gre.h"
>>  #if NGRE > 0
>> -#include <netinet/ip_gre.h>
>> -#include <net/if_gre.h>
>> +#include <netinet/gre_var.h>
>>  #endif
>>  
>>  #include "carp.h"
>> @@ -346,11 +345,24 @@ const struct protosw inetsw[] = {
>>    .pr_domain        = &inetdomain,
>>    .pr_protocol      = IPPROTO_GRE,
>>    .pr_flags = PR_ATOMIC|PR_ADDR,
>> -  .pr_input = gre_input,
>> +  .pr_input = gre_ip4_input,
>>    .pr_ctloutput     = rip_ctloutput,
>> -  .pr_usrreq        = gre_usrreq,
>> +  .pr_usrreq        = gre_raw_usrreq,
>>    .pr_attach        = rip_attach,
>>    .pr_detach        = rip_detach,
>> +  .pr_sysctl        = gre_sysctl
>> +},
>> +{
>> +  .pr_type  = SOCK_DGRAM,
>> +  .pr_domain        = &inetdomain,
>> +  .pr_protocol      = IPPROTO_GRE,
>> +  .pr_flags = PR_ATOMIC|PR_ADDR,
>> +  .pr_input = gre_ip4_input,
>> +  .pr_ctloutput     = gre_ip4_ctloutput,
>> +  .pr_usrreq        = gre_ip4_usrreq,
>> +  .pr_attach        = gre_attach,
>> +  .pr_detach        = gre_detach,
>> +  .pr_init  = gre_init,
>>    .pr_sysctl        = gre_sysctl
>>  },
>>  #endif /* NGRE > 0 */
>> Index: sys/netinet/ip_gre.c
>> ===================================================================
>> RCS file: /cvs/src/sys/netinet/ip_gre.c,v
>> retrieving revision 1.71
>> diff -u -p -r1.71 ip_gre.c
>> --- sys/netinet/ip_gre.c     7 Feb 2018 22:30:59 -0000       1.71
>> +++ sys/netinet/ip_gre.c     29 Oct 2019 07:57:58 -0000
>> @@ -1,7 +1,23 @@
>> -/*      $OpenBSD: ip_gre.c,v 1.71 2018/02/07 22:30:59 dlg Exp $ */
>> +/*  $OpenBSD: ip_gre.c,v 1.71 2018/02/07 22:30:59 dlg Exp $ */
>>  /*  $NetBSD: ip_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
>>  
>>  /*
>> + * Copyright (c) 2019 David Gwynne <d...@openbsd.org>
>> + *
>> + * Permission to use, copy, modify, and distribute this software for any
>> + * purpose with or without fee is hereby granted, provided that the above
>> + * copyright notice and this permission notice appear in all copies.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
>> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
>> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
>> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
>> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
>> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
>> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>> + */
>> +
>> +/*
>>   * Copyright (c) 1998 The NetBSD Foundation, Inc.
>>   * All rights reserved.
>>   *
>> @@ -30,16 +46,6 @@
>>   * POSSIBILITY OF SUCH DAMAGE.
>>   */
>>  
>> -/*
>> - * decapsulate tunneled packets and send them on
>> - * output half is in net/if_gre.[ch]
>> - * This currently handles IPPROTO_GRE, IPPROTO_MOBILE
>> - */
>> -
>> -
>> -#include "gre.h"
>> -#if NGRE > 0
>> -
>>  #include <sys/param.h>
>>  #include <sys/systm.h>
>>  #include <sys/mbuf.h>
>> @@ -47,24 +53,40 @@
>>  #include <sys/socket.h>
>>  #include <sys/socketvar.h>
>>  #include <sys/sysctl.h>
>> +#include <sys/proc.h>
>> +#include <sys/atomic.h>
>> +#include <sys/pool.h>
>> +#include <sys/tree.h>
>> +
>> +#include <sys/domain.h>
>>  
>>  #include <net/if.h>
>> +#include <net/if_var.h>
>>  #include <net/route.h>
>>  
>>  #include <netinet/in.h>
>> +#include <netinet/in_var.h>
>>  #include <netinet/ip.h>
>>  #include <netinet/ip_var.h>
>>  #include <netinet/in_pcb.h>
>>  
>> +#include <netinet/gre_var.h>
>> +#include <netinet/gre_proto.h>
>> +
>>  #ifdef PIPEX
>>  #include <net/pipex.h>
>>  #endif
>>  
>> +
>> +/*
>> + * socket({AF_INET,AF_INET6}, SOCK_RAW, IPPROTO_GRE);
>> + */
>> +
>>  int
>> -gre_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
>> +gre_raw_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
>>      struct mbuf *control, struct proc *p)
>>  {
>> -#ifdef  PIPEX 
>> +#ifdef  PIPEX
>>      struct inpcb *inp = sotoinpcb(so);
>>  
>>      if (inp != NULL && inp->inp_pipex && req == PRU_SEND) {
>> @@ -92,4 +114,1817 @@ gre_usrreq(struct socket *so, int req, s
>>  #endif
>>      return rip_usrreq(so, req, m, nam, control, p);
>>  }
>> -#endif /* if NGRE > 0 */
>> +
>> +/*
>> + * support socket({AF_INET,AF_INET6}, SOCK_DGRAM, IPPROTO_GRE);
>> + *
>> + * GRE datagram sockets provide support for GRE version 0 packets
>> + * in the kernel.
>> + */
>> +
>> +#define GRE_VALID_MASK      (GRE_VERS_MASK | GRE_CP | GRE_KP | GRE_SP)
>> +
>> +struct gre_pcb_key;
>> +
>> +struct gre_pcb {
>> +    struct inpcb             gpcb_inpcb;
>> +    unsigned int             gpcb_pflags;   /* pcb flags */
>> +#define GREPCB_RECVSEQ                      (1 << 0)
>> +    uint16_t                 gpcb_flags;    /* network byteorder */
>> +    uint32_t                 gpcb_key;      /* network byteorder */
>> +    uint32_t                 gpcb_seq;      /* host byteorder */
>> +
>> +    struct gre_pcb_key      *gpcb_pcb_key;
>> +    int                      gpcb_reuse;
>> +    uint8_t                  gpcb_ttl;
>> +};
>> +TAILQ_HEAD(gre_pcb_list, inpcb);
>> +
>> +static inline struct gre_pcb *
>> +inp_gpcb(struct inpcb *inp)
>> +{
>> +    return ((struct gre_pcb *)inp);
>> +}
>> +
>> +#define gpcb_inp(_gpcb) (&(_gpcb)->gpcb_inpcb)
>> +
>> +static inline struct socket *
>> +gpcb_so(struct gre_pcb *gpcb)
>> +{
>> +    return (gpcb_inp(gpcb)->inp_socket);
>> +}
>> +
>> +struct gre_pcb_key {
>> +    union inpaddru          gk_laddr;
>> +#define gk_laddr4   gk_laddr.iau_a4u.inaddr
>> +#define gk_laddr6   gk_laddr.iau_addr6
>> +    union inpaddru          gk_faddr;
>> +#define gk_faddr4   gk_faddr.iau_a4u.inaddr
>> +#define gk_faddr6   gk_faddr.iau_addr6
>> +
>> +    uint16_t                gk_flags;       /* network byteorder */
>> +    uint16_t                gk_proto;       /* network byteorder */
>> +    uint32_t                gk_key;         /* network byteorder */
>> +
>> +    unsigned int            gk_rtableid;
>> +    sa_family_t             gk_family;
>> +
>> +    RBT_ENTRY(gre_pcb_key)  gk_entry;
>> +    struct gre_pcb_list     gk_pcbs;
>> +    unsigned int            gk_state;
>> +#define GRE_S_DISCONNECTED          0
>> +#define GRE_S_WILDCARD                      1
>> +#define GRE_S_BOUND                 2
>> +#define GRE_S_CONNECTED                     3
>> +};
>> +
>> +RBT_HEAD(gre_tree_wildcards, gre_pcb_key);
>> +RBT_HEAD(gre_tree_bound, gre_pcb_key);
>> +RBT_HEAD(gre_tree_connected, gre_pcb_key);
>> +
>> +static int  gre_pcb_key_cmp_wildcard(const struct gre_pcb_key *,
>> +                const struct gre_pcb_key *);
>> +static int  gre_pcb_key_cmp_bound(const struct gre_pcb_key *,
>> +                const struct gre_pcb_key *);
>> +
>> +RBT_PROTOTYPE(gre_tree_wildcards, gre_pcb_key, gk_entry,
>> +    gre_pcb_key_cmp_wildcard);
>> +RBT_PROTOTYPE(gre_tree_bound, gre_pcb_key, gk_entry,
>> +    gre_pcb_key_cmp_bound);
>> +RBT_PROTOTYPE(gre_tree_connected, gre_pcb_key, gk_entry,
>> +    gre_pcb_key_cmp_connected);
>> +
>> +struct gre_ops {
>> +    int     (*op_nametosa)(struct inpcb *, struct mbuf *,
>> +                struct sockaddr **);
>> +    int     (*op_is_wildcard)(struct sockaddr *);
>> +    int     (*op_is_multicast)(struct sockaddr *);
>> +    int     (*op_is_broadcast)(unsigned int, struct sockaddr *);
>> +    int     (*op_is_local)(unsigned int, struct sockaddr *);
>> +
>> +    uint16_t (*op_proto)(struct sockaddr *);
>> +    void    (*op_addr)(union inpaddru *, struct sockaddr *);
>> +
>> +    int     (*op_selsrc)(struct inpcb *, struct sockaddr *,
>> +                union inpaddru *, void *);
>> +    int     (*op_control)(struct socket *, u_long, caddr_t, struct ifnet *);
>> +    void    (*op_getsockname)(struct inpcb *, struct mbuf *);
>> +    void    (*op_getpeername)(struct inpcb *, struct mbuf *);
>> +    int     (*op_ctloutput)(int, struct socket *, int, int, struct mbuf *);
>> +
>> +    void    (*op_sbappend)(struct gre_pcb *, struct mbuf *, int, uint32_t);
>> +
>> +    /* PRU_SEND is split into two parts, with gre_output in the middle: */
>> +
>> +    /* 1: the "pre" phase, so ipv6 can do it's stupid pktops stuff */
>> +    int     (*op_send)(const struct gre_ops *, struct gre_pcb *,
>> +                struct mbuf *, struct mbuf *, struct mbuf *);
>> +    /* 2: actually doing the ip encap and output */
>> +    int     (*op_output)(struct gre_pcb *, const struct gre_pcb_key *,
>> +                struct mbuf *, void *);
>> +
>> +    int * const defttl;
>> +};
>> +struct gre_tree_wildcards gre_wildcards = RBT_INITIALIZER();
>> +struct gre_tree_bound gre_bound = RBT_INITIALIZER();
>> +struct gre_tree_connected gre_connected = RBT_INITIALIZER();
>> +
>> +struct pool gre_pcb_key_pool;
>> +struct pool gre_pcb_pool;
>> +
>> +static struct mbuf *
>> +             gre_ip_input(const struct gre_ops *, struct mbuf *, int,
>> +                uint8_t, struct gre_pcb_key *);
>> +
>> +static int   gre_usrreq(const struct gre_ops *, struct socket *, int,
>> +                struct mbuf *, struct mbuf *, struct mbuf *,
>> +                struct proc *);
>> +static int   gre_ctloutput(const struct gre_ops *, int, struct socket *,
>> +                int, int, struct mbuf *);
>> +
>> +static int   gre_send(const struct gre_ops *, struct gre_pcb *,
>> +                struct mbuf *, struct mbuf *, struct mbuf *);
>> +static int   gre_output(const struct gre_ops *, struct gre_pcb *,
>> +                struct mbuf *, struct mbuf *, struct mbuf *, void *);
>> +static int   gre_disconnect(struct gre_pcb *);
>> +
>> +#define GRE_OPT_EINVAL      ((void *)-1)
>> +static void *gre_opt(struct mbuf *, int, int, socklen_t);
>> +
>> +unsigned int gre_sendspace = 9216; /* XXX sysctl? */
>> +unsigned int gre_recvspace = (40 * 1024);
>> +
>> +static struct gre_pcb_key *
>> +gre_pcb_key_get(const struct gre_pcb *gpcb)
>> +{
>> +    struct gre_pcb_key *gk;
>> +
>> +    gk = pool_get(&gre_pcb_key_pool, PR_NOWAIT|PR_ZERO);
>> +    if (gk == NULL)
>> +            return (NULL);
>> +
>> +    gk->gk_flags = gpcb->gpcb_flags;
>> +    gk->gk_key = gpcb->gpcb_key;
>> +
>> +    TAILQ_INIT(&gk->gk_pcbs);
>> +
>> +    return (gk);
>> +}
>> +
>> +static void
>> +gre_pcb_key_put(struct gre_pcb_key *gk)
>> +{
>> +    pool_put(&gre_pcb_key_pool, gk);
>> +}
>> +
>> +static struct gre_pcb_key *
>> +gre_pcb_key_insert(unsigned int state, struct gre_pcb_key *gk)
>> +{
>> +    struct gre_pcb_key *ogk;
>> +
>> +    gk->gk_state = state;
>> +
>> +    switch (state) {
>> +    case GRE_S_WILDCARD:
>> +            ogk = RBT_INSERT(gre_tree_wildcards, &gre_wildcards, gk);
>> +            break;
>> +    case GRE_S_BOUND:
>> +            ogk = RBT_INSERT(gre_tree_bound, &gre_bound, gk);
>> +            break;
>> +    case GRE_S_CONNECTED:
>> +            ogk = RBT_INSERT(gre_tree_connected, &gre_connected, gk);
>> +            break;
>> +    default:
>> +            panic("%s unexpected state %u", __func__, state);
>> +    }
>> +
>> +    return (ogk);
>> +}
>> +
>> +static inline int
>> +gre_pcb_empty(struct gre_pcb_list *l)
>> +{
>> +    return (TAILQ_EMPTY(l));
>> +}
>> +
>> +static inline void
>> +gre_pcb_insert(struct gre_pcb_list *l, struct gre_pcb *gpcb)
>> +{
>> +    TAILQ_INSERT_TAIL(l, gpcb_inp(gpcb), inp_queue);
>> +}
>> +
>> +static inline void
>> +gre_pcb_remove(struct gre_pcb_list *l, struct gre_pcb *gpcb)
>> +{
>> +    TAILQ_REMOVE(l, gpcb_inp(gpcb), inp_queue);
>> +}
>> +
>> +static inline struct gre_pcb *
>> +gre_pcb_first(struct gre_pcb_list *l)
>> +{
>> +    return (inp_gpcb(TAILQ_FIRST(l)));
>> +}
>> +
>> +static inline struct gre_pcb *
>> +gre_pcb_next(struct gre_pcb *gpcb)
>> +{
>> +    return (inp_gpcb(TAILQ_NEXT(gpcb_inp(gpcb), inp_queue)));
>> +}
>> +
>> +/*
>> + * INET4
>> + */
>> +
>> +static int  gre_ip4_nametosa(struct inpcb *, struct mbuf *,
>> +                struct sockaddr **);
>> +static int  gre_ip4_is_wildcard(struct sockaddr *);
>> +static int  gre_ip4_is_multicast(struct sockaddr *);
>> +static int  gre_ip4_is_broadcast(unsigned int, struct sockaddr *);
>> +static int  gre_ip4_is_local(unsigned int, struct sockaddr *);
>> +static uint16_t     gre_ip4_proto(struct sockaddr *);
>> +static void gre_ip4_addr(union inpaddru *, struct sockaddr *);
>> +static int  gre_ip4_selsrc(struct inpcb *, struct sockaddr *,
>> +                union inpaddru *, void *);
>> +static void gre_ip4_sbappend(struct gre_pcb *, struct mbuf *, int,
>> +                uint32_t);
>> +static int  gre_ip4_send(const struct gre_ops *, struct gre_pcb *,
>> +                struct mbuf *, struct mbuf *, struct mbuf *);
>> +static int  gre_ip4_output(struct gre_pcb *, const struct gre_pcb_key *,
>> +                struct mbuf *, void *);
>> +
>> +static const struct gre_ops gre_ip4_ops = {
>> +    .op_nametosa            = gre_ip4_nametosa,
>> +    .op_is_wildcard         = gre_ip4_is_wildcard,
>> +    .op_is_multicast        = gre_ip4_is_multicast,
>> +    .op_is_broadcast        = gre_ip4_is_broadcast,
>> +    .op_is_local            = gre_ip4_is_local,
>> +
>> +    .op_proto               = gre_ip4_proto,
>> +    .op_addr                = gre_ip4_addr,
>> +
>> +    .op_selsrc              = gre_ip4_selsrc,
>> +    .op_control             = in_control,
>> +    .op_getsockname         = in_setsockaddr,
>> +    .op_getpeername         = in_setpeeraddr,
>> +    .op_ctloutput           = ip_ctloutput,
>> +
>> +    .op_sbappend            = gre_ip4_sbappend,
>> +    .op_send                = gre_ip4_send,
>> +    .op_output              = gre_ip4_output,
>> +
>> +    .defttl                 = &ip_defttl,
>> +};
>> +
>> +
>> +static int
>> +gre_ip4_nametosa(struct inpcb *inp, struct mbuf *addr, struct sockaddr **sa)
>> +{
>> +    return (in_nam2sin(addr, (struct sockaddr_in **)sa));
>> +}
>> +
>> +static int
>> +gre_ip4_is_wildcard(struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in *sin = satosin(sa);
>> +    return (sin->sin_addr.s_addr == INADDR_ANY);
>> +}
>> +
>> +static int
>> +gre_ip4_is_multicast(struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in *sin = satosin(sa);
>> +    return (IN_MULTICAST(sin->sin_addr.s_addr));
>> +}
>> +
>> +static int
>> +gre_ip4_is_broadcast(unsigned int rtableid, struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in *sin = satosin(sa);
>> +
>> +    return ((sin->sin_addr.s_addr == INADDR_BROADCAST) ||
>> +        in_broadcast(sin->sin_addr, rtableid));
>> +}
>> +
>> +static int
>> +gre_ip4_is_local(unsigned int rtableid, struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in *sin = satosin(sa);
>> +    /* cope with ifa_ifwithaddr using memcmp */
>> +    struct sockaddr_in needle = {
>> +            .sin_len = sin->sin_len,
>> +            .sin_family = sin->sin_family,
>> +            .sin_addr = sin->sin_addr,
>> +    };
>> +    struct ifaddr *ifa;
>> +
>> +    ifa = ifa_ifwithaddr(sintosa(&needle), rtableid);
>> +    if (ifa == NULL)
>> +            return (EADDRNOTAVAIL);
>> +
>> +    return (0);
>> +}
>> +
>> +static uint16_t
>> +gre_ip4_proto(struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in *sin = satosin(sa);
>> +    return (sin->sin_port);
>> +}
>> +
>> +static void
>> +gre_ip4_addr(union inpaddru *addr, struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in *sin = satosin(sa);
>> +    addr->iau_a4u.inaddr = sin->sin_addr;
>> +}
>> +
>> +static int
>> +gre_ip4_selsrc(struct inpcb *inp, struct sockaddr *sa, union inpaddru 
>> *laddr,
>> +    void *opts)
>> +{
>> +    struct in_addr *insrc;
>> +    int error;
>> +
>> +    insrc = gre_opt(opts, IPPROTO_IP, IP_SENDSRCADDR, sizeof(*insrc));
>> +    if (insrc == GRE_OPT_EINVAL)
>> +            return (EINVAL);
>> +
>> +    if (insrc == NULL) {
>> +            error = in_pcbselsrc(&insrc, satosin(sa), inp);
>> +            if (error != 0)
>> +                    return (error);
>> +    } else {
>> +            struct sockaddr_in sin = {
>> +                    .sin_len = sizeof(sin),
>> +                    .sin_family = AF_INET,
>> +                    .sin_addr = *insrc,
>> +            };
>> +
>> +            /* XXX sigh. */
>> +            error = in_pcbaddrisavail(inp, &sin, 0, NULL);
>> +            if (error != 0)
>> +                    return (error);
>> +    }
>> +
>> +    laddr->iau_a4u.inaddr = *insrc;
>> +
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_ip4_send(const struct gre_ops *ops, struct gre_pcb *gpcb,
>> +    struct mbuf *m, struct mbuf *addr, struct mbuf *control)
>> +{
>> +    return (gre_output(ops, gpcb, m, addr, control, control));
>> +}
>> +
>> +static int
>> +gre_ip4_output(struct gre_pcb *gpcb, const struct gre_pcb_key *gk,
>> +    struct mbuf *m, void *opts)
>> +{
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +    struct ip *ip;
>> +    int error;
>> +    uint32_t ipsecflowinfo = 0;
>> +
>> +    m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
>> +    if (m == NULL) {
>> +            error = ENOBUFS;
>> +            goto dropped;
>> +    }
>> +    if (m->m_pkthdr.len > IP_MAXPACKET) {
>> +            error = EMSGSIZE;
>> +            goto drop;
>> +    }
>> +
>> +#ifdef IPSEC
>> +    if (ISSET(inp->inp_flags, INP_IPSECFLOWINFO)) {
>> +            uint32_t *p = gre_opt(opts, IPPROTO_IP, IP_IPSECFLOWINFO,
>> +                sizeof(*p));
>> +            if (p == GRE_OPT_EINVAL) {
>> +                    error = EINVAL;
>> +                    goto drop;
>> +            }
>> +
>> +            if (p != NULL)
>> +                    ipsecflowinfo = *p;
>> +    }
>> +#endif /* IPSEC */
>> +
>> +    ip = mtod(m, struct ip *);
>> +    ip->ip_v = IPVERSION;
>> +    ip->ip_hl = sizeof(*ip) >> 2;
>> +    ip->ip_off = 0; /* XXX nodf? */
>> +    ip->ip_tos = 0; /* XXX */;
>> +    ip->ip_len = htons(m->m_pkthdr.len);
>> +    ip->ip_ttl = gpcb->gpcb_ttl;
>> +    ip->ip_p = IPPROTO_GRE;
>> +    ip->ip_src = gk->gk_laddr4;
>> +    ip->ip_dst = gk->gk_faddr4;
>> +
>> +    error = ip_output(m, inp->inp_options, &inp->inp_route,
>> +        gpcb_so(gpcb)->so_options & SO_BROADCAST, inp->inp_moptions, inp,
>> +        ipsecflowinfo);
>> +
>> +    return (error);
>> +
>> +drop:
>> +    m_freem(m);
>> +dropped:
>> +    return (error);
>> +}
>> +
>> +static void *
>> +gre_opt(struct mbuf *control, int level, int type, socklen_t len)
>> +{
>> +    u_int clen;
>> +    struct cmsghdr *cm;
>> +    caddr_t cmsgs;
>> +    size_t mlen;
>> +
>> +    if (control == NULL)
>> +            return (NULL);
>> +
>> +    if (control->m_next != NULL)
>> +            return (GRE_OPT_EINVAL);
>> +
>> +    mlen = CMSG_LEN(len);
>> +
>> +    clen = control->m_len;
>> +    cmsgs = mtod(control, caddr_t);
>> +    do {
>> +            if (clen < CMSG_LEN(0))
>> +                    return (GRE_OPT_EINVAL);
>> +
>> +            cm = (struct cmsghdr *)cmsgs;
>> +            if (cm->cmsg_len < CMSG_LEN(0) ||
>> +                CMSG_ALIGN(cm->cmsg_len) > clen)
>> +                    return (GRE_OPT_EINVAL);
>> +
>> +            if (cm->cmsg_level == level &&
>> +                cm->cmsg_type == type &&
>> +                cm->cmsg_len == mlen)
>> +                    return (CMSG_DATA(cm));
>> +
>> +            clen -= CMSG_ALIGN(cm->cmsg_len);
>> +            cmsgs += CMSG_ALIGN(cm->cmsg_len);
>> +    } while (clen);
>> +
>> +    return (NULL);
>> +}
>> +
>> +static int
>> +gre_send(const struct gre_ops *ops, struct gre_pcb *gpcb,
>> +    struct mbuf *m, struct mbuf *addr, struct mbuf *control)
>> +{
>> +    int error;
>> +
>> +    error = (*ops->op_send)(ops, gpcb, m, addr, control);
>> +
>> +    m_freem(control);
>> +
>> +    return (error);
>> +}
>> +
>> +static int
>> +gre_output(const struct gre_ops *ops, struct gre_pcb *gpcb,
>> +    struct mbuf *m, struct mbuf *addr, struct mbuf *control, void *opts)
>> +{
>> +    const struct gre_pcb_key *gk = gpcb->gpcb_pcb_key;
>> +    struct gre_pcb_key key;
>> +    struct gre_header *gh;
>> +    int error;
>> +
>> +    if (addr != NULL) {
>> +            struct gre_pcb_key *ogk;
>> +            struct inpcb *inp = gpcb_inp(gpcb);
>> +            struct sockaddr *sa;
>> +            int state = GRE_S_DISCONNECTED;
>> +
>> +            if (gk != NULL)
>> +                    state = gk->gk_state;
>> +
>> +            if (state == GRE_S_CONNECTED) {
>> +                    error = EISCONN;
>> +                    goto drop;
>> +            }
>> +
>> +            error = (*ops->op_nametosa)(inp, addr, &sa);
>> +            if (error != 0)
>> +                    goto drop;
>> +
>> +            key.gk_family = sa->sa_family;
>> +            key.gk_proto = (*ops->op_proto)(sa);
>> +
>> +            if (state != GRE_S_DISCONNECTED) {
>> +                    KASSERT(key.gk_family == gk->gk_family);
>> +                    if (key.gk_proto != gk->gk_proto) {
>> +                            error = EADDRNOTAVAIL;
>> +                            goto drop;
>> +                    }
>> +            }
>> +            if (state == GRE_S_BOUND) {
>> +                    key.gk_laddr = gk->gk_laddr;
>> +            } else {
>> +                    error = (*ops->op_selsrc)(inp, sa, &key.gk_laddr,
>> +                        opts);
>> +                    if (error != 0)
>> +                            goto drop;
>> +            }
>> +
>> +            (*ops->op_addr)(&key.gk_faddr, sa);
>> +
>> +            key.gk_rtableid = inp->inp_rtableid;
>> +            key.gk_flags = gpcb->gpcb_flags;
>> +            key.gk_key = gpcb->gpcb_key;
>> +
>> +            ogk = RBT_FIND(gre_tree_connected, &gre_connected, &key);
>> +            if (ogk != NULL) {
>> +                    struct gre_pcb *ogpcb;
>> +                    int reuse = gpcb->gpcb_reuse;
>> +
>> +                    ogpcb = gre_pcb_first(&ogk->gk_pcbs);
>> +                    if (ogpcb != NULL) {
>> +                            struct socket *oso = gpcb_so(ogpcb);
>> +                            if (!ISSET(reuse, oso->so_options)) {
>> +                                    error = EADDRINUSE;
>> +                                    goto drop;
>> +                            }
>> +                    }
>> +            }
>> +
>> +            gk = &key;
>> +    } else {
>> +            if (gk == NULL || gk->gk_state != GRE_S_CONNECTED) {
>> +                    error = ENOTCONN;
>> +                    goto drop;
>> +            }
>> +    }
>> +
>> +    if (ISSET(gk->gk_flags, htons(GRE_SP))) {
>> +            struct gre_h_seq *gsh;
>> +            uint32_t *seqno;
>> +
>> +            seqno = gre_opt(control, IPPROTO_GRE, GRE_SENDSEQ,
>> +                sizeof(*seqno));
>> +            if (seqno == GRE_OPT_EINVAL) {
>> +                    error = EINVAL;
>> +                    goto drop;
>> +            }
>> +
>> +            m = m_prepend(m, sizeof(*gsh), M_DONTWAIT);
>> +            if (m == NULL)
>> +                    return (ENOBUFS);
>> +
>> +            gsh = mtod(m, struct gre_h_seq *);
>> +            htobem32(&gsh->gre_seq, seqno != NULL ?
>> +                (gpcb->gpcb_seq = *seqno) : /* keep track of new start */
>> +                atomic_inc_int_nv(&gpcb->gpcb_seq));
>> +    }
>> +
>> +    if (ISSET(gk->gk_flags, htons(GRE_KP))) {
>> +            struct gre_h_key *gkh;
>> +
>> +            m = m_prepend(m, sizeof(*gkh), M_DONTWAIT);
>> +            if (m == NULL)
>> +                    return (ENOBUFS);
>> +
>> +            gkh = mtod(m, struct gre_h_key *);
>> +            gkh->gre_key = gk->gk_key;
>> +    }
>> +
>> +    if (ISSET(gk->gk_flags, htons(GRE_CP))) {
>> +            struct gre_h_cksum *gch;
>> +
>> +            m = m_prepend(m, sizeof(*gch), M_DONTWAIT);
>> +            if (m == NULL)
>> +                    return (ENOBUFS);
>> +
>> +            gch = mtod(m, struct gre_h_cksum *);
>> +            gch->gre_cksum = 0; /* XXX need to checksum */
>> +            gch->gre_reserved1 = 0;
>> +    }
>> +
>> +    m = m_prepend(m, sizeof(*gh), M_DONTWAIT);
>> +    if (m == NULL)
>> +            return (ENOBUFS);
>> +
>> +    gh = mtod(m, struct gre_header *);
>> +    gh->gre_flags = gk->gk_flags;
>> +    gh->gre_proto = gk->gk_proto;
>> +
>> +    KASSERT(ISSET(m->m_flags, M_PKTHDR));
>> +
>> +    m->m_pkthdr.ph_rtableid = gpcb_inp(gpcb)->inp_rtableid;
>> +
>> +    return ((*ops->op_output)(gpcb, gk, m, opts));
>> +
>> +drop:
>> +    m_freem(m);
>> +    return (error);
>> +}
>> +
>> +static void
>> +gre_sbappend(struct gre_pcb *gpcb, struct sockaddr *sa, struct mbuf *m,
>> +    struct mbuf *opts, int hlen, uint32_t seqno)
>> +{
>> +    struct socket *so = gpcb_so(gpcb);
>> +
>> +    if (ISSET(gpcb->gpcb_pflags, GREPCB_RECVSEQ)) {
>> +            struct mbuf *opt = sbcreatecontrol(&seqno, sizeof(seqno),
>> +                GRE_RECVSEQ, IPPROTO_GRE);
>> +            if (opt != NULL) {
>> +                    opt->m_next = opts;
>> +                    opts = opt;
>> +            }
>> +    }
>> +
>> +    m_adj(m, hlen);
>> +    if (sbappendaddr(so, &so->so_rcv, sa, m, opts) == 0) {
>> +            m_freem(m);
>> +            m_freem(opts);
>> +            return;
>> +    }
>> +
>> +    sorwakeup(so);
>> +}
>> +
>> +int
>> +gre_ip4_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf 
>> *addr,
>> +    struct mbuf *control, struct proc *p)
>> +{
>> +    return (gre_usrreq(&gre_ip4_ops, so, req, m, addr, control, p));
>> +}
>> +
>> +int
>> +gre_ip4_ctloutput(int op, struct socket *so, int level, int optname,
>> +    struct mbuf *m)
>> +{
>> +    return (gre_ctloutput(&gre_ip4_ops, op, so, level, optname, m));
>> +}
>> +
>> +static void
>> +gre_ip4_sbappend(struct gre_pcb *gpcb, struct mbuf *m, int hlen, uint32_t 
>> seqno)
>> +{
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +    struct socket *so = gpcb_so(gpcb);
>> +    struct mbuf *opts = NULL;
>> +    struct sockaddr_in sin = {
>> +            .sin_len = sizeof(sin),
>> +            .sin_family = AF_INET,
>> +            .sin_port = inp->inp_lport,
>> +            .sin_addr = mtod(m, struct ip *)->ip_src,
>> +    };
>> +
>> +    if (ISSET(inp->inp_flags, INP_CONTROLOPTS) ||
>> +        ISSET(so->so_options, SO_TIMESTAMP))
>> +            ip_savecontrol(inp, &opts, mtod(m, struct ip *), m);
>> +
>> +    if (ISSET(inp->inp_flags, INP_RECVDSTPORT)) {
>> +            struct mbuf *opt;
>> +
>> +            opt = sbcreatecontrol(&inp->inp_lport, sizeof(inp->inp_lport),
>> +                IP_RECVDSTPORT, IPPROTO_IP);
>> +            if (opt != NULL) {
>> +                    opt->m_next = opts;
>> +                    opts = opt;
>> +            }
>> +    }
>> +
>> +    gre_sbappend(gpcb, sintosa(&sin), m, opts, hlen, seqno);
>> +}
>> +
>> +int
>> +gre_ip4_input(struct mbuf **mp, int *offp, int proto, int af)
>> +{
>> +    struct mbuf *m = *mp;
>> +    struct gre_pcb_key gk;
>> +    struct ip *ip;
>> +
>> +    m = gre_if4_input(m, *offp);
>> +    if (m == NULL)
>> +            return (IPPROTO_DONE);
>> +
>> +    ip = mtod(m, struct ip *);
>> +
>> +    gk.gk_family = AF_INET;
>> +    gk.gk_laddr4 = ip->ip_dst;
>> +    gk.gk_faddr4 = ip->ip_src;
>> +
>> +    m = gre_ip_input(&gre_ip4_ops, m, *offp, ip->ip_ttl, &gk);
>> +    if (m == NULL)
>> +            return (IPPROTO_DONE);
>> +
>> +    *mp = m;
>> +    return (rip_input(mp, offp, proto, af));
>> +}
>> +
>> +#if INET6
>> +#include <netinet6/ip6_var.h>
>> +#include <netinet6/in6_var.h>
>> +
>> +static int  gre_ip6_nametosa(struct inpcb *, struct mbuf *,
>> +                struct sockaddr **);
>> +static int  gre_ip6_is_wildcard(struct sockaddr *);
>> +static int  gre_ip6_is_multicast(struct sockaddr *);
>> +static int  gre_ip6_is_broadcast(unsigned int, struct sockaddr *);
>> +static int  gre_ip6_is_local(unsigned int, struct sockaddr *);
>> +static uint16_t     gre_ip6_proto(struct sockaddr *);
>> +static void gre_ip6_addr(union inpaddru *, struct sockaddr *);
>> +static int  gre_ip6_selsrc(struct inpcb *, struct sockaddr *,
>> +                union inpaddru *, void *);
>> +static void gre_ip6_sbappend(struct gre_pcb *, struct mbuf *, int,
>> +                uint32_t);
>> +static int  gre_ip6_send(const struct gre_ops *, struct gre_pcb *,
>> +                struct mbuf *, struct mbuf *, struct mbuf *);
>> +static int  gre_ip6_output(struct gre_pcb *, const struct gre_pcb_key *,
>> +                struct mbuf *, void *);
>> +
>> +static const struct gre_ops gre_ip6_ops = {
>> +    .op_nametosa            = gre_ip6_nametosa,
>> +    .op_is_wildcard         = gre_ip6_is_wildcard,
>> +    .op_is_multicast        = gre_ip6_is_multicast,
>> +    .op_is_broadcast        = gre_ip6_is_broadcast,
>> +    .op_is_local            = gre_ip6_is_local,
>> +
>> +    .op_proto               = gre_ip6_proto,
>> +    .op_addr                = gre_ip6_addr,
>> +
>> +    .op_selsrc              = gre_ip6_selsrc,
>> +    .op_control             = in6_control,
>> +    .op_getsockname         = in6_setsockaddr,
>> +    .op_getpeername         = in6_setpeeraddr,
>> +    .op_ctloutput           = ip6_ctloutput,
>> +    .op_sbappend            = gre_ip6_sbappend,
>> +    .op_send                = gre_ip6_send,
>> +    .op_output              = gre_ip6_output,
>> +
>> +    .defttl                 = &ip6_defhlim,
>> +};
>> +
>> +static int
>> +gre_ip6_nametosa(struct inpcb *inp, struct mbuf *addr, struct sockaddr **sa)
>> +{
>> +    struct sockaddr_in6 *sin6;
>> +    int error;
>> +
>> +    error = in6_nam2sin6(addr, &sin6);
>> +    if (error != 0)
>> +            return (error);
>> +
>> +    /* reject IPv4 mapped addresses, we have no support for them */
>> +    if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
>> +            return (EADDRNOTAVAIL);
>> +
>> +    if (in6_embedscope(&sin6->sin6_addr, sin6, inp) != 0)
>> +            return (EINVAL);
>> +
>> +    *sa = sin6tosa(sin6);
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_ip6_is_wildcard(struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in6 *sin6 = satosin6(sa);
>> +    return (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr));
>> +}
>> +
>> +static int
>> +gre_ip6_is_multicast(struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in6 *sin6 = satosin6(sa);
>> +    return (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr));
>> +}
>> +
>> +static int
>> +gre_ip6_is_broadcast(unsigned int rtableid, struct sockaddr *sa)
>> +{
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_ip6_is_local(unsigned int rtableid, struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in6 *sin6 = satosin6(sa);
>> +    struct sockaddr_in6 needle = {
>> +            .sin6_len = sin6->sin6_len,
>> +            .sin6_family = sin6->sin6_family,
>> +            .sin6_addr = sin6->sin6_addr,
>> +    };
>> +    struct ifaddr *ifa;
>> +
>> +    ifa = ifa_ifwithaddr(sin6tosa(&needle), rtableid);
>> +    if (ifa == NULL)
>> +            return (EADDRNOTAVAIL);
>> +
>> +    /*
>> +     * bind to an anycast address might accidentally
>> +     * cause sending a packet with an anycast source
>> +     * address, so we forbid it.
>> +     *
>> +     * We should allow to bind to a deprecated address,
>> +     * since the application dare to use it.
>> +     * But, can we assume that they are careful enough
>> +     * to check if the address is deprecated or not?
>> +     * Maybe, as a safeguard, we should have a setsockopt
>> +     * flag to control the bind(2) behavior against
>> +     * deprecated addresses (default: forbid bind(2)).
>> +     */
>> +    if (ISSET(ifatoia6(ifa)->ia6_flags, IN6_IFF_ANYCAST|
>> +        IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED|IN6_IFF_DETACHED))
>> +            return (EADDRNOTAVAIL);
>> +
>> +    return (0);
>> +}
>> +
>> +static uint16_t
>> +gre_ip6_proto(struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in6 *sin6 = satosin6(sa);
>> +    return (sin6->sin6_port);
>> +}
>> +
>> +static void
>> +gre_ip6_addr(union inpaddru *addr, struct sockaddr *sa)
>> +{
>> +    struct sockaddr_in6 *sin6 = satosin6(sa);
>> +    addr->iau_addr6 = sin6->sin6_addr;
>> +}
>> +
>> +static int
>> +gre_ip6_selsrc(struct inpcb *inp, struct sockaddr *sa, union inpaddru 
>> *laddr,
>> +    void *opts)
>> +{
>> +    struct in6_addr *in6src;
>> +    int error;
>> +
>> +    if (opts == NULL)
>> +            opts = inp->inp_outputopts6;
>> +
>> +    error = in6_pcbselsrc(&in6src, satosin6(sa), inp, opts);
>> +    if (error != 0)
>> +            return (error);
>> +
>> +    laddr->iau_addr6 = *in6src;
>> +
>> +    return (0);
>> +}
>> +
>> +int
>> +gre_ip6_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf 
>> *addr,
>> +    struct mbuf *control, struct proc *p)
>> +{
>> +    return (gre_usrreq(&gre_ip6_ops, so, req, m, addr, control, p));
>> +}
>> +
>> +int
>> +gre_ip6_ctloutput(int op, struct socket *so, int level, int optname,
>> +    struct mbuf *m)
>> +{
>> +    return (gre_ctloutput(&gre_ip6_ops, op, so, level, optname, m));
>> +}
>> +
>> +static void
>> +gre_ip6_sbappend(struct gre_pcb *gpcb, struct mbuf *m, int hlen, uint32_t 
>> seqno)
>> +{
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +    struct socket *so = gpcb_so(gpcb);
>> +    struct mbuf *opts = NULL;
>> +    struct sockaddr_in6 sin6 = {
>> +            .sin6_len = sizeof(sin6),
>> +            .sin6_family = AF_INET6,
>> +            .sin6_port = inp->inp_lport,
>> +    };
>> +
>> +    in6_recoverscope(&sin6, &mtod(m, struct ip6_hdr *)->ip6_src);
>> +
>> +    if (ISSET(inp->inp_flags, IN6P_CONTROLOPTS) ||
>> +        ISSET(so->so_options, SO_TIMESTAMP))
>> +            ip6_savecontrol(inp, m, &opts);
>> +
>> +    if (ISSET(inp->inp_flags, IN6P_RECVDSTPORT)) {
>> +            struct mbuf *opt;
>> +
>> +            opt = sbcreatecontrol(&inp->inp_fport, sizeof(inp->inp_fport),
>> +                IPV6_RECVDSTPORT, IPPROTO_IPV6);
>> +            if (opt != NULL) {
>> +                    opt->m_next = opts;
>> +                    opts = opt;
>> +            }
>> +    }
>> +
>> +    gre_sbappend(gpcb, sin6tosa(&sin6), m, opts, hlen, seqno);
>> +}
>> +
>> +static int
>> +gre_ip6_send(const struct gre_ops *ops, struct gre_pcb *gpcb, struct mbuf 
>> *m,
>> +    struct mbuf *addr, struct mbuf *control)
>> +{
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +    struct ip6_pktopts *opts = inp->inp_outputopts6;
>> +    struct ip6_pktopts opt;
>> +    int error;
>> +
>> +    if (control != NULL) {
>> +            error = ip6_setpktopts(control, &opt, opts, /* priv */ 1,
>> +                IPPROTO_GRE);
>> +            if (error != 0) {
>> +                    m_freem(m);
>> +                    return (error);
>> +            }
>> +            opts = &opt;
>> +    }
>> +
>> +    error = gre_output(ops, gpcb, m, addr, control, opts);
>> +    if (control != NULL)
>> +            ip6_clearpktopts(&opt, -1);
>> +    return (error);
>> +}
>> +
>> +static int
>> +gre_ip6_output(struct gre_pcb *gpcb, const struct gre_pcb_key *gk,
>> +    struct mbuf *m, void *opts)
>> +{
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +    uint16_t len = m->m_pkthdr.len;
>> +    struct ip6_hdr *ip6;
>> +    int flags = 0;
>> +    int error;
>> +
>> +    if (len > IP_MAXPACKET) {
>> +            error = EMSGSIZE;
>> +            goto drop;
>> +    }
>> +
>> +    m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
>> +    if (m == NULL) {
>> +            error = ENOBUFS;
>> +            goto dropped;
>> +    }
>> +
>> +    ip6 = mtod(m, struct ip6_hdr *);
>> +    ip6->ip6_vfc = IPV6_VERSION;
>> +    ip6->ip6_plen = htons(len);
>> +    ip6->ip6_nxt = IPPROTO_GRE;
>> +    ip6->ip6_hlim = gpcb->gpcb_ttl;
>> +    ip6->ip6_src = gk->gk_laddr6;
>> +    ip6->ip6_dst = gk->gk_faddr6;
>> +
>> +    if (ISSET(inp->inp_flags, IN6P_MINMTU)) /* wtf */
>> +            flags |= IPV6_MINMTU;
>> +
>> +    error = ip6_output(m, opts, &inp->inp_route6,
>> +        flags, inp->inp_moptions6, inp);
>> +
>> +    return (error);
>> +
>> +drop:
>> +    m_freem(m);
>> +dropped:
>> +    return (error);
>> +}
>> +
>> +int
>> +gre_ip6_input(struct mbuf **mp, int *offp, int proto, int af)
>> +{
>> +    struct mbuf *m = *mp;
>> +    struct gre_pcb_key gk;
>> +    struct ip6_hdr *ip6;
>> +
>> +    m = gre_if6_input(m, *offp);
>> +    if (m == NULL)
>> +            return (IPPROTO_DONE);
>> +
>> +    ip6 = mtod(m, struct ip6_hdr *);
>> +
>> +    gk.gk_family = AF_INET6;
>> +    gk.gk_laddr6 = ip6->ip6_dst;
>> +    gk.gk_faddr6 = ip6->ip6_src;
>> +
>> +    m = gre_ip_input(&gre_ip6_ops, m, *offp, ip6->ip6_hlim, &gk);
>> +    if (m == NULL)
>> +            return (IPPROTO_DONE);
>> +
>> +    *mp = m;
>> +    return (rip6_input(mp, offp, proto, af));
>> +}
>> +#endif /* INET6 */
>> +
>> +/*
>> + * generic GRE protocol handling
>> + */
>> +
>> +void
>> +gre_init(void)
>> +{
>> +    pool_init(&gre_pcb_key_pool, sizeof(struct gre_pcb_key), 0,
>> +        IPL_NONE, PR_WAITOK, "grekey", NULL);
>> +    pool_init(&gre_pcb_pool, sizeof(struct gre_pcb), 0,
>> +        IPL_NONE, PR_WAITOK, "grepcb", NULL);
>> +}
>> +
>> +int
>> +gre_attach(struct socket *so, int proto)
>> +{
>> +    const struct gre_ops *ops = &gre_ip4_ops;
>> +    struct gre_pcb *gpcb;
>> +    struct inpcb *inp;
>> +    int error;
>> +    int flags = 0;
>> +
>> +    if (so->so_pcb != NULL)
>> +            return (EINVAL);
>> +
>> +    error = suser(curproc);
>> +    if (error != 0)
>> +            return (error);
>> +
>> +    error = soreserve(so, gre_sendspace, gre_recvspace);
>> +    if (error != 0)
>> +            return (error);
>> +
>> +#ifdef INET6
>> +    if (sotopf(so) == PF_INET6) {
>> +            ops = &gre_ip6_ops;
>> +            flags = INP_IPV6;
>> +    }
>> +#endif
>> +
>> +    gpcb = pool_get(&gre_pcb_pool, PR_NOWAIT | PR_ZERO);
>> +    if (gpcb == NULL)
>> +            return (ENOBUFS);
>> +
>> +    inp = gpcb_inp(gpcb);
>> +    inp->inp_socket = so;
>> +    refcnt_init(&inp->inp_refcnt);
>> +    inp->inp_seclevel[SL_AUTH] = IPSEC_AUTH_LEVEL_DEFAULT;
>> +    inp->inp_seclevel[SL_ESP_TRANS] = IPSEC_ESP_TRANS_LEVEL_DEFAULT;
>> +    inp->inp_seclevel[SL_ESP_NETWORK] = IPSEC_ESP_NETWORK_LEVEL_DEFAULT;
>> +    inp->inp_seclevel[SL_IPCOMP] = IPSEC_IPCOMP_LEVEL_DEFAULT;
>> +    inp->inp_rtableid = curproc->p_p->ps_rtableid;
>> +    inp->inp_ip_minttl = 0;
>> +    inp->inp_flags |= flags;
>> +    inp->inp_hops = *ops->defttl;
>> +    inp->inp_ppcb = (caddr_t)gpcb;
>> +
>> +    so->so_pcb = inp;
>> +
>> +    return (0);
>> +}
>> +
>> +static void
>> +gre_inpdetach(struct gre_pcb *gpcb)
>> +{
>> +    struct socket *so = gpcb_so(gpcb);
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +
>> +    KASSERT(so->so_pcb == inp);
>> +
>> +    so->so_pcb = NULL;
>> +    sofree(so, SL_NOUNLOCK);
>> +
>> +    m_freem(inp->inp_options);
>> +    if (inp->inp_route.ro_rt) {
>> +            rtfree(inp->inp_route.ro_rt);
>> +            inp->inp_route.ro_rt = NULL;
>> +    }
>> +
>> +    switch (ISSET(inp->inp_flags, INP_IPV6)) {
>> +    case 0:
>> +            ip_freemoptions(inp->inp_moptions);
>> +            break;
>> +#ifdef INET6
>> +    case INP_IPV6:
>> +            ip6_freepcbopts(inp->inp_outputopts6);
>> +            ip6_freemoptions(inp->inp_moptions6);
>> +            break;
>> +#endif
>> +    }
>> +
>> +    KASSERT((struct gre_pcb *)inp->inp_ppcb == (struct gre_pcb *)inp);
>> +
>> +    (void)gre_disconnect(gpcb);
>> +    pool_put(&gre_pcb_pool, gpcb);
>> +}
>> +
>> +int
>> +gre_detach(struct socket *so)
>> +{
>> +    struct inpcb *inp;
>> +
>> +    soassertlocked(so);
>> +
>> +    inp = sotoinpcb(so);
>> +    if (inp == NULL)
>> +            return (EINVAL);
>> +
>> +    gre_inpdetach(inp_gpcb(inp));
>> +
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_bind(const struct gre_ops *ops, struct gre_pcb *gpcb, struct mbuf *addr)
>> +{
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +    struct socket *so = gpcb_so(gpcb);
>> +    struct sockaddr *sa;
>> +    unsigned int state = GRE_S_BOUND;
>> +    struct gre_pcb_key *gk, *ogk;
>> +    int reuse = ISSET(so->so_options, SO_REUSEPORT);
>> +    int error;
>> +
>> +    if (gpcb->gpcb_pcb_key != NULL)
>> +            return (EISCONN);
>> +
>> +    error = (*ops->op_nametosa)(inp, addr, &sa);
>> +    if (error != 0)
>> +            return (error);
>> +
>> +    if ((*ops->op_is_wildcard)(sa)) {
>> +            state = GRE_S_WILDCARD;
>> +    } else if ((*ops->op_is_multicast)(sa)) {
>> +            /*
>> +             * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
>> +             * allow complete duplication of binding if
>> +             * SO_REUSEPORT is set, or if SO_REUSEADDR is set
>> +             * and a multicast address is bound on both
>> +             * new and duplicated sockets.
>> +             */
>> +            if (ISSET(so->so_options, SO_REUSEADDR|SO_REUSEPORT))
>> +                    reuse = SO_REUSEADDR|SO_REUSEPORT;
>> +    } else if (ISSET(so->so_options, SO_BINDANY) ||
>> +        (*ops->op_is_broadcast)(inp->inp_rtableid, sa) == 0) {
>> +            /*
>> +             * we must check that we are binding to an address we
>> +             * own except when:
>> +             * - SO_BINDANY is set or
>> +             * - we are binding a UDP socket to 255.255.255.255 or
>> +             * - we are binding a UDP socket to one of our broadcast
>> +             *   addresses
>> +             */
>> +            ;
>> +    } else {
>> +            error = (*ops->op_is_local)(inp->inp_rtableid, sa);
>> +            if (error != 0)
>> +                    return (error);
>> +    }
>> +
>> +    gk = gre_pcb_key_get(gpcb);
>> +    if (gk == NULL)
>> +            return (ENOMEM);
>> +
>> +    gk->gk_family = sa->sa_family;
>> +    (*ops->op_addr)(&gk->gk_laddr, sa);
>> +    gk->gk_proto = (*ops->op_proto)(sa);
>> +
>> +    ogk = gre_pcb_key_insert(state, gk);
>> +    if (ogk != NULL) {
>> +            struct gre_pcb *ogpcb;
>> +
>> +            gre_pcb_key_put(gk);
>> +
>> +            ogpcb = gre_pcb_first(&ogk->gk_pcbs);
>> +            if (ogpcb != NULL) {
>> +                    struct socket *oso = gpcb_so(ogpcb);
>> +                    if (!ISSET(reuse, oso->so_options))
>> +                            return (EADDRINUSE);
>> +            }
>> +
>> +            gk = ogk;
>> +    }
>> +
>> +    /* commit */
>> +
>> +    gpcb->gpcb_pcb_key = gk;
>> +    gre_pcb_insert(&gk->gk_pcbs, gpcb);
>> +
>> +    inp->inp_laddru = gk->gk_laddr;
>> +    inp->inp_lport = gk->gk_proto;
>> +
>> +    gpcb->gpcb_reuse = reuse;
>> +
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_connect(const struct gre_ops *ops, struct gre_pcb *gpcb, struct mbuf 
>> *addr)
>> +{
>> +    struct inpcb *inp = gpcb_inp(gpcb);
>> +    struct socket *so = gpcb_so(gpcb);
>> +    struct gre_pcb_key *bgk = gpcb->gpcb_pcb_key;
>> +    struct gre_pcb_key *gk, *ogk;
>> +    struct sockaddr *sa;
>> +    int reuse = ISSET(so->so_options, SO_REUSEPORT);
>> +    int error;
>> +
>> +    if (bgk != NULL && bgk->gk_state == GRE_S_CONNECTED)
>> +            return (EISCONN);
>> +
>> +    error = (*ops->op_nametosa)(inp, addr, &sa);
>> +    if (error != 0)
>> +            return (error);
>> +
>> +    /* don't allow connections to wildcard addresses */
>> +    if ((*ops->op_is_wildcard)(sa))
>> +            return (EADDRNOTAVAIL);
>> +
>> +    gk = gre_pcb_key_get(gpcb);
>> +    if (gk == NULL)
>> +            return (ENOBUFS);
>> +
>> +    if (bgk == NULL || bgk->gk_state == GRE_S_WILDCARD) {
>> +            error = (*ops->op_selsrc)(inp, sa, &gk->gk_laddr, NULL);
>> +            if (error != 0)
>> +                    goto put;
>> +
>> +            gk->gk_proto = (*ops->op_proto)(sa);
>> +    } else {
>> +            if (bgk->gk_proto != (*ops->op_proto)(sa)) {
>> +                    error = EADDRNOTAVAIL;
>> +                    goto put;
>> +            }
>> +
>> +            gk->gk_laddr = bgk->gk_laddr;
>> +            gk->gk_proto = bgk->gk_proto;
>> +            reuse |= gpcb->gpcb_reuse;
>> +    }
>> +
>> +    gk->gk_family = sa->sa_family;
>> +    (*ops->op_addr)(&gk->gk_faddr, sa);
>> +
>> +    ogk = gre_pcb_key_insert(GRE_S_CONNECTED, gk);
>> +    if (ogk != NULL) {
>> +            struct gre_pcb *ogpcb;
>> +
>> +            gre_pcb_key_put(gk);
>> +
>> +            ogpcb = gre_pcb_first(&ogk->gk_pcbs);
>> +            if (ogpcb != NULL) {
>> +                    struct socket *oso = gpcb_so(ogpcb);
>> +                    KASSERTMSG(oso != NULL, "ogk %p ogpcb %p oso %p",
>> +                        ogk, ogpcb, oso);
>> +                    if (!ISSET(reuse, oso->so_options))
>> +                            return (EADDRINUSE);
>> +            }
>> +
>> +            gk = ogk;
>> +    }
>> +
>> +    /* commit */
>> +
>> +    gre_disconnect(gpcb);
>> +    gpcb->gpcb_pcb_key = gk;
>> +    gre_pcb_insert(&gk->gk_pcbs, gpcb);
>> +
>> +    inp->inp_laddru = gk->gk_laddr;
>> +    inp->inp_faddru = gk->gk_faddr;
>> +    inp->inp_lport = inp->inp_fport = gk->gk_proto;
>> +
>> +    gpcb->gpcb_reuse = reuse;
>> +    soisconnected(so);
>> +    return (0);
>> +
>> +put:
>> +    gre_pcb_key_put(gk);
>> +    return (error);
>> +}
>> +
>> +
>> +static int
>> +gre_getsockname(const struct gre_ops *ops, struct gre_pcb *gpcb,
>> +    struct mbuf *addr)
>> +{
>> +    struct gre_pcb_key *gk = gpcb->gpcb_pcb_key;
>> +
>> +    if (gk == NULL)
>> +            return (ENOTCONN);
>> +
>> +    (*ops->op_getsockname)(gpcb_inp(gpcb), addr);
>> +
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_getpeername(const struct gre_ops *ops, struct gre_pcb *gpcb,
>> +    struct mbuf *addr)
>> +{
>> +    struct gre_pcb_key *gk = gpcb->gpcb_pcb_key;
>> +
>> +    if (gk == NULL || gk->gk_state != GRE_S_CONNECTED)
>> +            return (ENOTCONN);
>> +
>> +    (*ops->op_getpeername)(gpcb_inp(gpcb), addr);
>> +
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_disconnect(struct gre_pcb *gpcb)
>> +{
>> +    struct gre_pcb_key *gk;
>> +
>> +    gk = gpcb->gpcb_pcb_key;
>> +    if (gk == NULL) {
>> +            return (ENOTCONN);
>> +    }
>> +
>> +    gre_pcb_remove(&gk->gk_pcbs, gpcb);
>> +    if (gre_pcb_empty(&gk->gk_pcbs)) {
>> +            switch (gk->gk_state) {
>> +            case GRE_S_WILDCARD:
>> +                    RBT_REMOVE(gre_tree_wildcards, &gre_wildcards, gk);
>> +                    break;
>> +            case GRE_S_BOUND:
>> +                    RBT_REMOVE(gre_tree_bound, &gre_bound, gk);
>> +                    break;
>> +            case GRE_S_CONNECTED:
>> +                    RBT_REMOVE(gre_tree_connected, &gre_connected, gk);
>> +                    break;
>> +            }
>> +            pool_put(&gre_pcb_key_pool, gk);
>> +    }
>> +
>> +    gpcb->gpcb_pcb_key = NULL;
>> +
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_usrreq(const struct gre_ops *ops, struct socket *so, int req,
>> +    struct mbuf *m, struct mbuf *addr, struct mbuf *control, struct proc *p)
>> +{
>> +    struct inpcb *inp;
>> +    struct gre_pcb *gpcb;
>> +    int error = 0;
>> +
>> +    if (req == PRU_CONTROL) {
>> +            return ((*ops->op_control)(so, (u_long)m, (caddr_t)addr,
>> +                (struct ifnet *)control));
>> +    }
>> +
>> +    soassertlocked(so);
>> +
>> +    inp = sotoinpcb(so);
>> +    if (inp == NULL) {
>> +            error = EINVAL;
>> +            goto release;
>> +    }
>> +    gpcb = inp_gpcb(inp);
>> +
>> +    switch (req) {
>> +    case PRU_BIND:
>> +            error = gre_bind(ops, gpcb, addr);
>> +            break;
>> +
>> +    case PRU_LISTEN:
>> +            error = EOPNOTSUPP;
>> +            break;
>> +
>> +    case PRU_CONNECT:
>> +            error = gre_connect(ops, gpcb, addr);
>> +            break;
>> +
>> +    case PRU_CONNECT2:
>> +            error = EOPNOTSUPP;
>> +            break;
>> +
>> +    case PRU_ACCEPT:
>> +            error = EOPNOTSUPP;
>> +            break;
>> +
>> +    case PRU_DISCONNECT:
>> +            error = gre_disconnect(gpcb);
>> +            if (error != 0)
>> +                    break;
>> +
>> +            gpcb->gpcb_reuse = 0;
>> +            CLR(so->so_state, SS_ISCONNECTED); /* XXX cos udp_usrreq.c */
>> +            memset(&inp->inp_laddru, 0, sizeof(inp->inp_laddru));
>> +            memset(&inp->inp_faddru, 0, sizeof(inp->inp_faddru));
>> +            inp->inp_lport = inp->inp_fport = 0;
>> +            break;
>> +
>> +    case PRU_SHUTDOWN:
>> +            socantsendmore(so);
>> +            break;
>> +
>> +    case PRU_SEND:
>> +            return (gre_send(ops, gpcb, m, addr, control));
>> +
>> +    case PRU_ABORT:
>> +            soisdisconnected(so);
>> +            gre_inpdetach(gpcb);
>> +            break;
>> +
>> +    case PRU_SOCKADDR:
>> +            return (gre_getsockname(ops, gpcb, addr));
>> +            break;
>> +    case PRU_PEERADDR:
>> +            return (gre_getpeername(ops, gpcb, addr));
>> +            break;
>> +
>> +    case PRU_SENSE:
>> +            /* stat: don't bother with a block size. */
>> +            break;
>> +
>> +    case PRU_SENDOOB:
>> +    case PRU_FASTTIMO:
>> +    case PRU_SLOWTIMO:
>> +    case PRU_PROTORCV:
>> +    case PRU_PROTOSEND:
>> +    case PRU_RCVD:
>> +    case PRU_RCVOOB:
>> +            error = EOPNOTSUPP;
>> +            break;
>> +
>> +    default:
>> +            panic("%s req %d", __func__, req);
>> +    }
>> +
>> +release:
>> +    switch (req) {
>> +    case PRU_RCVD:
>> +    case PRU_RCVOOB:
>> +    case PRU_SENSE:
>> +            break;
>> +    default:
>> +            m_freem(control);
>> +            m_freem(m);
>> +            break;
>> +    }
>> +
>> +    return (error);
>> +}
>> +
>> +static int
>> +gre_setopt(struct gre_pcb *gpcb, int optname, struct mbuf *m)
>> +{
>> +    /*
>> +     * only support changing these options when the socket is
>> +     * completely disconnected. the amount of code needed to try
>> +     * changing the gre_pcb_key was "quite large" and arguably
>> +     * not worth it.
>> +     */
>> +    switch (optname) {
>> +    case GRE_CKSUM:
>> +    case GRE_KEY:
>> +    case GRE_SEQ:
>> +            if (gpcb->gpcb_pcb_key != NULL)
>> +                    return (EISCONN);
>> +            break;
>> +    }
>> +
>> +    switch (optname) {
>> +    case GRE_CKSUM:
>> +            if (m == NULL || m->m_len != sizeof(int))
>> +                    return (EINVAL);
>> +
>> +            if (*mtod(m, int *))
>> +                    SET(gpcb->gpcb_flags, htons(GRE_CP));
>> +            else
>> +                    CLR(gpcb->gpcb_flags, htons(GRE_CP));
>> +            break;
>> +    case GRE_KEY:
>> +            if (m == NULL || m->m_len == 0) { /* disable key */
>> +                    CLR(gpcb->gpcb_flags, htons(GRE_KP));
>> +                    gpcb->gpcb_key = htonl(0);
>> +                    break;
>> +            }
>> +
>> +            if (m->m_len != sizeof(gpcb->gpcb_key))
>> +                    return (EINVAL);
>> +
>> +            SET(gpcb->gpcb_flags, htons(GRE_KP));
>> +            htobem32(&gpcb->gpcb_key, *mtod(m, uint32_t *));
>> +            break;
>> +    case GRE_SEQ:
>> +            if (m == NULL || m->m_len == 0) { /* disable seq */
>> +                    CLR(gpcb->gpcb_flags, htons(GRE_SP));
>> +                    gpcb->gpcb_seq = 0;
>> +                    break;
>> +            }
>> +
>> +            if (m->m_len != sizeof(gpcb->gpcb_seq))
>> +                    return (EINVAL);
>> +
>> +            SET(gpcb->gpcb_flags, htons(GRE_SP));
>> +            gpcb->gpcb_seq = *mtod(m, uint32_t *);
>> +            break;
>> +    case GRE_RECVSEQ:
>> +            if (m == NULL || m->m_len != sizeof(int))
>> +                    return (EINVAL);
>> +
>> +            if (*mtod(m, int *))
>> +                    SET(gpcb->gpcb_pflags, GREPCB_RECVSEQ);
>> +            else
>> +                    CLR(gpcb->gpcb_pflags, GREPCB_RECVSEQ);
>> +            break;
>> +    default:
>> +            return (ENOPROTOOPT);
>> +            break;
>> +    }
>> +
>> +    return (0);
>> +}
>> +
>> +static int
>> +gre_getopt(struct gre_pcb *gpcb, int optname, struct mbuf *m)
>> +{
>> +    switch (optname) {
>> +    case GRE_KEY:
>> +            if (!ISSET(gpcb->gpcb_flags, htons(GRE_KP)))
>> +                    return (ENOTCONN);
>> +
>> +            m->m_len = sizeof(gpcb->gpcb_key);
>> +            *mtod(m, uint32_t *) = bemtoh32(&gpcb->gpcb_key);
>> +            break;
>> +    case GRE_CKSUM:
>> +            m->m_len = sizeof(int);
>> +            *mtod(m, int *) = !!ISSET(gpcb->gpcb_flags, htons(GRE_CP));
>> +            break;
>> +    case GRE_SEQ:
>> +            if (!ISSET(gpcb->gpcb_flags, htons(GRE_SP)))
>> +                    return (ENOTCONN);
>> +
>> +            m->m_len = sizeof(gpcb->gpcb_seq);
>> +            *mtod(m, uint32_t *) = gpcb->gpcb_key;
>> +            break;
>> +    default:
>> +            return (ENOPROTOOPT);
>> +    }
>> +
>> +    return (0);
>> +}
>> +
>> +/*
>> + * IP socket option processing.
>> + */
>> +static int
>> +gre_ctloutput(const struct gre_ops *ops, int op, struct socket *so,
>> +    int level, int optname, struct mbuf *m)
>> +{
>> +    struct inpcb *inp;
>> +    struct gre_pcb *gpcb;
>> +    int error;
>> +
>> +    inp = sotoinpcb(so);
>> +    if (inp == NULL)
>> +            return (ECONNRESET);
>> +    if (level != IPPROTO_GRE)
>> +            return ((*ops->op_ctloutput)(level, so, level, optname, m));
>> +
>> +    gpcb = inp_gpcb(inp);
>> +
>> +    switch (op) {
>> +    case PRCO_SETOPT:
>> +            error = gre_setopt(gpcb, optname, m);
>> +            break;
>> +    case PRCO_GETOPT:
>> +            error = gre_getopt(gpcb, optname, m);
>> +            break;
>> +    default:
>> +            panic("%s op %d", __func__, op);
>> +    }
>> +
>> +    return (error);
>> +}
>> +
>> +static void *
>> +gre_pullup(struct mbuf **mp, int *offp, int len)
>> +{
>> +    int hlen = *offp + len;
>> +    void *h;
>> +
>> +    if ((*mp)->m_pkthdr.len < hlen)
>> +            return (NULL); /* decline */
>> +
>> +    *mp = m_pullup(*mp, hlen);
>> +    if (*mp == NULL)
>> +            return (NULL);
>> +
>> +    h = mtod(*mp, caddr_t) + *offp;
>> +    *offp = hlen;
>> +
>> +    return (h);
>> +}
>> +
>> +static int
>> +gre_candeliver(const struct gre_pcb *gpcb, uint8_t ttl)
>> +{
>> +    const struct inpcb *inp = gpcb_inp(gpcb);
>> +
>> +    if (ISSET(inp->inp_socket->so_state, SS_CANTRCVMORE))
>> +            return (0);
>> +
>> +    if (inp->inp_ip_minttl > ttl)
>> +            return (0);
>> +
>> +    return (1);
>> +}
>> +
>> +static struct mbuf *
>> +gre_ip_input(const struct gre_ops *ops, struct mbuf *m, int iphlen,
>> +    uint8_t ttl, struct gre_pcb_key *key)
>> +{
>> +    int hlen = iphlen;
>> +    struct gre_header *gh;
>> +    struct gre_pcb_key *gk;
>> +    struct gre_pcb *gpcb, *ngpcb;
>> +    uint16_t cksum = 0;
>> +    uint32_t seqno = 0;
>> +
>> +    gh = gre_pullup(&m, &hlen, sizeof(*gh));
>> +    if (gh == NULL)
>> +            return (m);
>> +
>> +    if ((gh->gre_flags & htons(GRE_VERS_MASK)) != htons(GRE_VERS_0))
>> +            return (m); /* decline */
>> +
>> +    if (ISSET(gh->gre_flags, ~htons(GRE_VALID_MASK)))
>> +            return (m); /* decline */
>> +
>> +    key->gk_flags = gh->gre_flags;
>> +    key->gk_proto = gh->gre_proto;
>> +
>> +    if (ISSET(key->gk_flags, htons(GRE_CP))) {
>> +            struct gre_h_cksum *gch;
>> +
>> +            gch = gre_pullup(&m, &hlen, sizeof(*gch));
>> +            if (gch == NULL)
>> +                    return (m);
>> +
>> +            cksum = gch->gre_cksum;
>> +
>> +            /* XXX ignore Reserved (Offset) field */
>> +    }
>> +
>> +    if (ISSET(key->gk_flags, htons(GRE_KP))) {
>> +            struct gre_h_key *gkh;
>> +
>> +            gkh = gre_pullup(&m, &hlen, sizeof(*gkh));
>> +            if (gkh == NULL)
>> +                    return (m);
>> +
>> +            key->gk_key = gkh->gre_key;
>> +    }
>> +
>> +    if (ISSET(key->gk_flags, htons(GRE_SP))) {
>> +            struct gre_h_seq *gsh;
>> +
>> +            gsh = gre_pullup(&m, &hlen, sizeof(*gsh));
>> +            if (gsh == NULL)
>> +                    return (m);
>> +
>> +            seqno = bemtoh32(&gsh->gre_seq);
>> +    }
>> +
>> +    key->gk_rtableid = m->m_pkthdr.ph_rtableid;
>> +
>> +    gk = RBT_FIND(gre_tree_connected, &gre_connected, key);
>> +    if (gk == NULL) {
>> +            gk = RBT_FIND(gre_tree_bound, &gre_bound, key);
>> +            if (gk == NULL) {
>> +                    gk = RBT_FIND(gre_tree_wildcards, &gre_wildcards, key);
>> +                    if (gk == NULL)
>> +                            return (m); /* decline */
>> +            }
>> +    }
>> +
>> +    /* it's ours now */
>> +
>> +    if (ISSET(key->gk_flags, htons(GRE_CP))) {
>> +            /* XXX actually do the checksum calc */
>> +    }
>> +
>> +    gpcb = gre_pcb_first(&gk->gk_pcbs);
>> +    while (!gre_candeliver(gpcb, ttl)) {
>> +            gpcb = gre_pcb_next(gpcb);
>> +            if (gpcb == NULL)
>> +                    goto drop;
>> +    }
>> +
>> +    ngpcb = gpcb;
>> +    for (;;) {
>> +            struct mbuf *mm;
>> +
>> +            ngpcb = gre_pcb_next(ngpcb);
>> +            if (ngpcb == NULL)
>> +                    break;
>> +
>> +            if (!gre_candeliver(ngpcb, ttl))
>> +                    continue;
>> +
>> +            mm = m_dup_pkt(m, 0, M_DONTWAIT);
>> +            if (mm == NULL) {
>> +                    /* assume further copies will also fail */
>> +                    break;
>> +            }
>> +
>> +            (*ops->op_sbappend)(gpcb, mm, hlen, seqno);
>> +            gpcb = ngpcb;
>> +    }
>> +
>> +    (*ops->op_sbappend)(gpcb, m, hlen, seqno);
>> +
>> +    return (NULL);
>> +
>> +drop:
>> +    m_freem(m);
>> +    return (NULL);
>> +}
>> +
>> +static int
>> +gre_pcb_key_cmp_wildcard(const struct gre_pcb_key *a,
>> +    const struct gre_pcb_key *b)
>> +{
>> +    if (a->gk_proto > b->gk_proto)
>> +            return (1);
>> +    if (a->gk_proto < b->gk_proto)
>> +            return (-1);
>> +
>> +    if (a->gk_flags > b->gk_flags)
>> +            return (1);
>> +    if (a->gk_flags < b->gk_flags)
>> +            return (-1);
>> +
>> +    if (ISSET(a->gk_flags, htons(GRE_KP))) {
>> +            if (a->gk_key > b->gk_key)
>> +                    return (1);
>> +            if (a->gk_key < b->gk_key)
>> +                    return (-1);
>> +    }
>> +
>> +    if (a->gk_rtableid > b->gk_rtableid)
>> +            return (1);
>> +    if (a->gk_rtableid < b->gk_rtableid)
>> +            return (-1);
>> +
>> +    if (a->gk_family > b->gk_family)
>> +            return (1);
>> +    if (a->gk_family < b->gk_family)
>> +            return (-1);
>> +
>> +    return (0);
>> +}
>> +
>> +RBT_GENERATE(gre_tree_wildcards, gre_pcb_key, gk_entry,
>> +    gre_pcb_key_cmp_wildcard);
>> +
>> +static int
>> +gre_pcb_key_cmp_bound(const struct gre_pcb_key *a,
>> +    const struct gre_pcb_key *b)
>> +{
>> +    int rv;
>> +
>> +    rv = gre_pcb_key_cmp_wildcard(a, b);
>> +    if (rv != 0)
>> +            return (rv);
>> +
>> +    switch (a->gk_family) {
>> +    case AF_INET:
>> +            rv = memcmp(&a->gk_laddr4, &b->gk_laddr4, sizeof(a->gk_laddr4));
>> +            break;
>> +#ifdef INET
>> +    case AF_INET6:
>> +            rv = memcmp(&a->gk_laddr6, &b->gk_laddr6, sizeof(a->gk_laddr6));
>> +            break;
>> +#endif
>> +    default:
>> +            unhandled_af(a->gk_family);
>> +    }
>> +
>> +    return (rv);
>> +}
>> +
>> +RBT_GENERATE(gre_tree_bound, gre_pcb_key, gk_entry, gre_pcb_key_cmp_bound);
>> +
>> +static int
>> +gre_pcb_key_cmp_connected(const struct gre_pcb_key *a,
>> +    const struct gre_pcb_key *b)
>> +{
>> +    int rv;
>> +
>> +    rv = gre_pcb_key_cmp_bound(a, b);
>> +    if (rv != 0)
>> +            return (rv);
>> +
>> +    switch (a->gk_family) {
>> +    case AF_INET:
>> +            rv = memcmp(&a->gk_faddr4, &b->gk_faddr4, sizeof(a->gk_faddr4));
>> +            break;
>> +#ifdef INET
>> +    case AF_INET6:
>> +            rv = memcmp(&a->gk_faddr6, &b->gk_faddr6, sizeof(a->gk_faddr6));
>> +            break;
>> +#endif
>> +    default:
>> +            unhandled_af(a->gk_family);
>> +    }
>> +
>> +    return (rv);
>> +}
>> +
>> +RBT_GENERATE(gre_tree_connected, gre_pcb_key, gk_entry,
>> +    gre_pcb_key_cmp_connected);
>> Index: sys/netinet6/in6_proto.c
>> ===================================================================
>> RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
>> retrieving revision 1.104
>> diff -u -p -r1.104 in6_proto.c
>> --- sys/netinet6/in6_proto.c 13 Jun 2019 08:12:11 -0000      1.104
>> +++ sys/netinet6/in6_proto.c 29 Oct 2019 07:57:58 -0000
>> @@ -117,7 +117,7 @@
>>  
>>  #include "gre.h"
>>  #if NGRE > 0
>> -#include <net/if_gre.h>
>> +#include <netinet/gre_var.h>
>>  #endif
>>  
>>  /*
>> @@ -340,11 +340,22 @@ const struct protosw inet6sw[] = {
>>    .pr_domain        = &inet6domain,
>>    .pr_protocol      = IPPROTO_GRE,
>>    .pr_flags = PR_ATOMIC|PR_ADDR,
>> -  .pr_input = gre_input6,
>> +  .pr_input = gre_ip6_input,
>>    .pr_ctloutput     = rip6_ctloutput,
>>    .pr_usrreq        = rip6_usrreq,
>>    .pr_attach        = rip6_attach,
>>    .pr_detach        = rip6_detach,
>> +},
>> +{
>> +  .pr_type  = SOCK_DGRAM,
>> +  .pr_domain        = &inet6domain,
>> +  .pr_protocol      = IPPROTO_GRE,
>> +  .pr_flags = PR_ATOMIC|PR_ADDR,
>> +  .pr_input = gre_ip6_input,
>> +  .pr_ctloutput     = gre_ip6_ctloutput,
>> +  .pr_usrreq        = gre_ip6_usrreq,
>> +  .pr_attach        = gre_attach,
>> +  .pr_detach        = gre_detach,
>>  },
>>  #endif /* NGRE */
> 

Reply via email to