Module Name: src Committed By: knakahara Date: Wed Jan 10 10:56:31 UTC 2018
Modified Files: src/distrib/sets/lists/comp: mi src/sys/conf: files src/sys/net: Makefile files.net if_types.h src/sys/netinet: in.c in.h ip_var.h src/sys/netinet6: in6.c in6.h ip6_var.h src/sys/netipsec: Makefile files.netipsec ipsec.h key.c key.h Added Files: src/sys/net: if_ipsec.c if_ipsec.h src/sys/netipsec: ipsecif.c ipsecif.h Log Message: add ipsec(4) interface, which is used for route-based VPN. man and ATF are added later, please see man for details. reviewed by christos@n.o, joerg@n.o and ozaki-r@n.o, thanks. https://mail-index.netbsd.org/tech-net/2017/12/18/msg006557.html To generate a diff of this commit: cvs rdiff -u -r1.2167 -r1.2168 src/distrib/sets/lists/comp/mi cvs rdiff -u -r1.1190 -r1.1191 src/sys/conf/files cvs rdiff -u -r1.33 -r1.34 src/sys/net/Makefile cvs rdiff -u -r1.13 -r1.14 src/sys/net/files.net cvs rdiff -u -r0 -r1.1 src/sys/net/if_ipsec.c src/sys/net/if_ipsec.h cvs rdiff -u -r1.27 -r1.28 src/sys/net/if_types.h cvs rdiff -u -r1.213 -r1.214 src/sys/netinet/in.c cvs rdiff -u -r1.102 -r1.103 src/sys/netinet/in.h cvs rdiff -u -r1.121 -r1.122 src/sys/netinet/ip_var.h cvs rdiff -u -r1.256 -r1.257 src/sys/netinet6/in6.c cvs rdiff -u -r1.87 -r1.88 src/sys/netinet6/in6.h cvs rdiff -u -r1.74 -r1.75 src/sys/netinet6/ip6_var.h cvs rdiff -u -r1.5 -r1.6 src/sys/netipsec/Makefile cvs rdiff -u -r1.12 -r1.13 src/sys/netipsec/files.netipsec cvs rdiff -u -r1.61 -r1.62 src/sys/netipsec/ipsec.h cvs rdiff -u -r0 -r1.1 src/sys/netipsec/ipsecif.c src/sys/netipsec/ipsecif.h cvs rdiff -u -r1.246 -r1.247 src/sys/netipsec/key.c cvs rdiff -u -r1.33 -r1.34 src/sys/netipsec/key.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/distrib/sets/lists/comp/mi diff -u src/distrib/sets/lists/comp/mi:1.2167 src/distrib/sets/lists/comp/mi:1.2168 --- src/distrib/sets/lists/comp/mi:1.2167 Tue Jan 9 03:31:13 2018 +++ src/distrib/sets/lists/comp/mi Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.2167 2018/01/09 03:31:13 christos Exp $ +# $NetBSD: mi,v 1.2168 2018/01/10 10:56:30 knakahara Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. ./etc/mtree/set.comp comp-sys-root @@ -2224,6 +2224,7 @@ ./usr/include/net/if_hippi.h comp-c-include ./usr/include/net/if_ieee1394.h comp-c-include ./usr/include/net/if_ieee80211.h comp-obsolete obsolete +./usr/include/net/if_ipsec.h comp-c-include ./usr/include/net/if_l2tp.h comp-c-include ./usr/include/net/if_llc.h comp-c-include ./usr/include/net/if_media.h comp-c-include @@ -2382,6 +2383,7 @@ ./usr/include/netipsec/ipcomp_var.h comp-c-include ./usr/include/netipsec/ipip_var.h comp-c-include ./usr/include/netipsec/ipsec.h comp-c-include +./usr/include/netipsec/ipsecif.h comp-c-include ./usr/include/netipsec/ipsec_var.h comp-c-include ./usr/include/netipsec/keydb.h comp-obsolete obsolete ./usr/include/netipsec/keysock.h comp-c-include Index: src/sys/conf/files diff -u src/sys/conf/files:1.1190 src/sys/conf/files:1.1191 --- src/sys/conf/files:1.1190 Tue Jan 9 03:31:12 2018 +++ src/sys/conf/files Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -# $NetBSD: files,v 1.1190 2018/01/09 03:31:12 christos Exp $ +# $NetBSD: files,v 1.1191 2018/01/10 10:56:30 knakahara Exp $ # @(#)files.newconf 7.5 (Berkeley) 5/10/93 version 20171118 @@ -1463,6 +1463,7 @@ defpseudo carp: ifnet, ether, arp defpseudodev etherip: ifnet, ether, arp defpseudodev l2tp: ifnet, ether, arp defpseudo canloop: ifnet +defpseudo ipsecif: ifnet # avoid to confuse ipsec itself option defpseudo sequencer defpseudo clockctl Index: src/sys/net/Makefile diff -u src/sys/net/Makefile:1.33 src/sys/net/Makefile:1.34 --- src/sys/net/Makefile:1.33 Thu Feb 16 08:12:44 2017 +++ src/sys/net/Makefile Wed Jan 10 10:56:30 2018 @@ -1,10 +1,10 @@ -# $NetBSD: Makefile,v 1.33 2017/02/16 08:12:44 knakahara Exp $ +# $NetBSD: Makefile,v 1.34 2018/01/10 10:56:30 knakahara Exp $ INCSDIR= /usr/include/net INCS= bpf.h bpfjit.h bpfdesc.h dlt.h ethertypes.h if.h if_arc.h if_arp.h \ if_atm.h if_bridgevar.h if_dl.h if_ether.h if_etherip.h if_fddi.h if_gif.h \ - if_gre.h if_hippi.h if_ieee1394.h if_llc.h if_media.h if_mpls.h \ + if_gre.h if_hippi.h if_ieee1394.h if_ipsec.h if_llc.h if_media.h if_mpls.h \ if_pflog.h if_ppp.h if_pppoe.h if_l2tp.h if_sppp.h if_srt.h if_stf.h \ if_tap.h if_token.h if_tun.h if_types.h if_vlanvar.h net_stats.h \ netisr.h pfil.h pfkeyv2.h pfvar.h ppp-comp.h ppp_defs.h radix.h \ Index: src/sys/net/files.net diff -u src/sys/net/files.net:1.13 src/sys/net/files.net:1.14 --- src/sys/net/files.net:1.13 Thu Feb 16 08:12:44 2017 +++ src/sys/net/files.net Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -# $NetBSD: files.net,v 1.13 2017/02/16 08:12:44 knakahara Exp $ +# $NetBSD: files.net,v 1.14 2018/01/10 10:56:30 knakahara Exp $ # XXX CLEANUP define net @@ -22,6 +22,7 @@ file net/if_gif.c gif needs-flag file net/if_gre.c gre needs-flag file net/if_hippisubr.c hippi needs-flag file net/if_ieee1394subr.c ieee1394 +file net/if_ipsec.c ipsec & ipsecif file net/if_llatbl.c inet | inet6 file net/if_l2tp.c l2tp needs-flag file net/if_loop.c loop Index: src/sys/net/if_types.h diff -u src/sys/net/if_types.h:1.27 src/sys/net/if_types.h:1.28 --- src/sys/net/if_types.h:1.27 Thu Feb 16 08:12:44 2017 +++ src/sys/net/if_types.h Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: if_types.h,v 1.27 2017/02/16 08:12:44 knakahara Exp $ */ +/* $NetBSD: if_types.h,v 1.28 2018/01/10 10:56:30 knakahara Exp $ */ /* * Copyright (c) 1989, 1993, 1994 @@ -265,5 +265,6 @@ #define IFT_PFSYNC 0xf6 /* Packet filter state syncing */ #define IFT_L2TP 0xf7 /* L2TPv3 I/F */ #define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */ +#define IFT_IPSEC 0xf9 /* IPsec I/F */ #endif /* !_NET_IF_TYPES_H_ */ Index: src/sys/netinet/in.c diff -u src/sys/netinet/in.c:1.213 src/sys/netinet/in.c:1.214 --- src/sys/netinet/in.c:1.213 Wed Dec 27 08:35:20 2017 +++ src/sys/netinet/in.c Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: in.c,v 1.213 2017/12/27 08:35:20 ozaki-r Exp $ */ +/* $NetBSD: in.c,v 1.214 2018/01/10 10:56:30 knakahara Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,7 +91,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.213 2017/12/27 08:35:20 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.214 2018/01/10 10:56:30 knakahara Exp $"); #include "arp.h" @@ -1877,6 +1877,44 @@ out: return ia; } +int +in_tunnel_validate(const struct ip *ip, struct in_addr src, struct in_addr dst) +{ + struct in_ifaddr *ia4; + int s; + + /* check for address match */ + if (src.s_addr != ip->ip_dst.s_addr || + dst.s_addr != ip->ip_src.s_addr) + return 0; + + /* martian filters on outer source - NOT done in ip_input! */ + if (IN_MULTICAST(ip->ip_src.s_addr)) + return 0; + switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) { + case 0: + case 127: + case 255: + return 0; + } + /* reject packets with broadcast on source */ + s = pserialize_read_enter(); + IN_ADDRLIST_READER_FOREACH(ia4) { + if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0) + continue; + if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) { + pserialize_read_exit(s); + return 0; + } + } + pserialize_read_exit(s); + + /* NOTE: packet may dropped by uRPF */ + + /* return valid bytes length */ + return sizeof(src) + sizeof(dst); +} + #if NARP > 0 struct in_llentry { Index: src/sys/netinet/in.h diff -u src/sys/netinet/in.h:1.102 src/sys/netinet/in.h:1.103 --- src/sys/netinet/in.h:1.102 Mon Jan 1 00:51:36 2018 +++ src/sys/netinet/in.h Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: in.h,v 1.102 2018/01/01 00:51:36 christos Exp $ */ +/* $NetBSD: in.h,v 1.103 2018/01/10 10:56:30 knakahara Exp $ */ /* * Copyright (c) 1982, 1986, 1990, 1993 @@ -587,6 +587,9 @@ struct ip_moptions; struct in_ifaddr *in_selectsrc(struct sockaddr_in *, struct route *, int, struct ip_moptions *, int *, struct psref *); +struct ip; +int in_tunnel_validate(const struct ip *, struct in_addr, struct in_addr); + #define in_hosteq(s,t) ((s).s_addr == (t).s_addr) #define in_nullhost(x) ((x).s_addr == INADDR_ANY) Index: src/sys/netinet/ip_var.h diff -u src/sys/netinet/ip_var.h:1.121 src/sys/netinet/ip_var.h:1.122 --- src/sys/netinet/ip_var.h:1.121 Mon Dec 11 05:47:18 2017 +++ src/sys/netinet/ip_var.h Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_var.h,v 1.121 2017/12/11 05:47:18 ryo Exp $ */ +/* $NetBSD: ip_var.h,v 1.122 2018/01/10 10:56:30 knakahara Exp $ */ /* * Copyright (c) 1982, 1986, 1993 @@ -164,8 +164,9 @@ struct ip_pktopts { #define IP_STAT_NOGIF 28 /* no match gif found */ #define IP_STAT_BADADDR 29 /* invalid address on header */ #define IP_STAT_NOL2TP 30 /* no match l2tp found */ +#define IP_STAT_NOIPSEC 31 /* no match ipsec(4) found */ -#define IP_NSTATS 31 +#define IP_NSTATS 32 #ifdef _KERNEL Index: src/sys/netinet6/in6.c diff -u src/sys/netinet6/in6.c:1.256 src/sys/netinet6/in6.c:1.257 --- src/sys/netinet6/in6.c:1.256 Mon Dec 25 04:41:49 2017 +++ src/sys/netinet6/in6.c Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: in6.c,v 1.256 2017/12/25 04:41:49 ozaki-r Exp $ */ +/* $NetBSD: in6.c,v 1.257 2018/01/10 10:56:30 knakahara Exp $ */ /* $KAME: in6.c,v 1.198 2001/07/18 09:12:38 itojun Exp $ */ /* @@ -62,7 +62,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.256 2017/12/25 04:41:49 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.257 2018/01/10 10:56:30 knakahara Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" @@ -2324,6 +2324,24 @@ in6_setmaxmtu(void) in6_maxmtu = maxmtu; } +int +in6_tunnel_validate(const struct ip6_hdr *ip6, const struct in6_addr *src, + const struct in6_addr *dst) +{ + + /* check for address match */ + if (!IN6_ARE_ADDR_EQUAL(src, &ip6->ip6_dst) || + !IN6_ARE_ADDR_EQUAL(dst, &ip6->ip6_src)) + return 0; + + /* martian filters on outer source - done in ip6_input */ + + /* NOTE: the pakcet may be dropped by uRPF. */ + + /* return valid bytes length */ + return sizeof(*src) + sizeof(*dst); +} + /* * Provide the length of interface identifiers to be used for the link attached * to the given interface. The length should be defined in "IPv6 over Index: src/sys/netinet6/in6.h diff -u src/sys/netinet6/in6.h:1.87 src/sys/netinet6/in6.h:1.88 --- src/sys/netinet6/in6.h:1.87 Mon Feb 15 14:59:03 2016 +++ src/sys/netinet6/in6.h Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: in6.h,v 1.87 2016/02/15 14:59:03 rtr Exp $ */ +/* $NetBSD: in6.h,v 1.88 2018/01/10 10:56:30 knakahara Exp $ */ /* $KAME: in6.h,v 1.83 2001/03/29 02:55:07 jinmei Exp $ */ /* @@ -712,6 +712,10 @@ extern void in6_if_down(struct ifnet *); extern void addrsel_policy_init(void); extern u_char ip6_protox[]; +struct ip6_hdr; +int in6_tunnel_validate(const struct ip6_hdr *, const struct in6_addr *, + const struct in6_addr *); + #define satosin6(sa) ((struct sockaddr_in6 *)(sa)) #define satocsin6(sa) ((const struct sockaddr_in6 *)(sa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) Index: src/sys/netinet6/ip6_var.h diff -u src/sys/netinet6/ip6_var.h:1.74 src/sys/netinet6/ip6_var.h:1.75 --- src/sys/netinet6/ip6_var.h:1.74 Fri Mar 3 07:13:06 2017 +++ src/sys/netinet6/ip6_var.h Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ip6_var.h,v 1.74 2017/03/03 07:13:06 ozaki-r Exp $ */ +/* $NetBSD: ip6_var.h,v 1.75 2018/01/10 10:56:30 knakahara Exp $ */ /* $KAME: ip6_var.h,v 1.33 2000/06/11 14:59:20 jinmei Exp $ */ /* @@ -226,8 +226,9 @@ struct ip6_pktopts { #define IP6_STAT_FORWARD_CACHEMISS 399 #define IP6_STAT_FASTFORWARD 400 /* packets fast forwarded */ #define IP6_STAT_FASTFORWARDFLOWS 401 /* number of fast forward flows */ +#define IP6_STAT_NOIPSEC 402 /* no match ipsec(4) found */ -#define IP6_NSTATS 402 +#define IP6_NSTATS 403 #define IP6FLOW_HASHBITS 6 /* should not be a multiple of 8 */ Index: src/sys/netipsec/Makefile diff -u src/sys/netipsec/Makefile:1.5 src/sys/netipsec/Makefile:1.6 --- src/sys/netipsec/Makefile:1.5 Fri Jan 6 14:17:11 2012 +++ src/sys/netipsec/Makefile Wed Jan 10 10:56:30 2018 @@ -1,9 +1,10 @@ -# $NetBSD: Makefile,v 1.5 2012/01/06 14:17:11 drochner Exp $ +# $NetBSD: Makefile,v 1.6 2018/01/10 10:56:30 knakahara Exp $ INCSDIR= /usr/include/netipsec INCS= ah_var.h esp_var.h ipcomp_var.h ipip_var.h ipsec_var.h \ keysock.h INCS+= ipsec.h +INCS+= ipsecif.h .include <bsd.kinc.mk> Index: src/sys/netipsec/files.netipsec diff -u src/sys/netipsec/files.netipsec:1.12 src/sys/netipsec/files.netipsec:1.13 --- src/sys/netipsec/files.netipsec:1.12 Wed Jun 5 19:01:26 2013 +++ src/sys/netipsec/files.netipsec Wed Jan 10 10:56:30 2018 @@ -1,8 +1,9 @@ -# $Id: files.netipsec,v 1.12 2013/06/05 19:01:26 christos Exp $ +# $Id: files.netipsec,v 1.13 2018/01/10 10:56:30 knakahara Exp $ # # defflag opt_ipsec.h IPSEC: opencrypto defflag opt_ipsec.h IPSEC_DEBUG +defflag opt_ipsec.h IPSEC_TX_TOS_CLEAR file netipsec/ipsec.c ipsec needs-flag file netipsec/ipsec_input.c ipsec @@ -19,3 +20,5 @@ file netipsec/key_debug.c ipsec file netipsec/keysock.c ipsec file netipsec/xform_tcp.c ipsec & tcp_signature + +file netipsec/ipsecif.c ipsec & ipsecif Index: src/sys/netipsec/ipsec.h diff -u src/sys/netipsec/ipsec.h:1.61 src/sys/netipsec/ipsec.h:1.62 --- src/sys/netipsec/ipsec.h:1.61 Tue Oct 3 08:56:52 2017 +++ src/sys/netipsec/ipsec.h Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ipsec.h,v 1.61 2017/10/03 08:56:52 ozaki-r Exp $ */ +/* $NetBSD: ipsec.h,v 1.62 2018/01/10 10:56:30 knakahara Exp $ */ /* $FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/netipsec/ipsec.h,v 1.2.4.2 2004/02/14 22:23:23 bms Exp $ */ /* $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $ */ @@ -84,6 +84,10 @@ struct secpolicy { #define IPSEC_SPSTATE_DEAD 0 #define IPSEC_SPSTATE_ALIVE 1 + u_int origin; /* who generate this SP. */ +#define IPSEC_SPORIGIN_USER 0 +#define IPSEC_SPORIGIN_KERNEL 1 + u_int policy; /* DISCARD, NONE or IPSEC, see keyv2.h */ struct ipsecrequest *req; /* pointer to the ipsec request tree, */ Index: src/sys/netipsec/key.c diff -u src/sys/netipsec/key.c:1.246 src/sys/netipsec/key.c:1.247 --- src/sys/netipsec/key.c:1.246 Fri Dec 1 06:34:14 2017 +++ src/sys/netipsec/key.c Wed Jan 10 10:56:30 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: key.c,v 1.246 2017/12/01 06:34:14 ozaki-r Exp $ */ +/* $NetBSD: key.c,v 1.247 2018/01/10 10:56:30 knakahara Exp $ */ /* $FreeBSD: src/sys/netipsec/key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $ */ /* $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $ */ @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.246 2017/12/01 06:34:14 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.247 2018/01/10 10:56:30 knakahara Exp $"); /* * This code is referred to RFC 2367 @@ -196,6 +196,10 @@ static u_int32_t acq_seq = 0; * however, a socket can be destroyed in softint so we cannot destroy * it directly instead we just mark it DEAD and delay the destruction * until GC by the timer + * - SP origin + * - SPs can be created by both userland programs and kernel components. + * The SPs created in kernel must not be removed by userland programs, + * although the SPs can be read by userland programs. */ /* * Locking notes on SAD: @@ -584,13 +588,6 @@ struct _keystat { u_long getspi_count; /* the avarage of count to try to get new SPI */ } keystat; -struct sadb_msghdr { - struct sadb_msg *msg; - void *ext[SADB_EXT_MAX + 1]; - int extoff[SADB_EXT_MAX + 1]; - int extlen[SADB_EXT_MAX + 1]; -}; - static void key_init_spidx_bymsghdr(struct secpolicyindex *, const struct sadb_msghdr *); @@ -621,10 +618,9 @@ static void key_freesp_so(struct secpoli #endif static struct secpolicy *key_getsp (const struct secpolicyindex *); static struct secpolicy *key_getspbyid (u_int32_t); -static struct secpolicy *key_lookup_and_remove_sp(const struct secpolicyindex *); -static struct secpolicy *key_lookupbyid_and_remove_sp(u_int32_t); +static struct secpolicy *key_lookup_and_remove_sp(const struct secpolicyindex *, bool); +static struct secpolicy *key_lookupbyid_and_remove_sp(u_int32_t, bool); static void key_destroy_sp(struct secpolicy *); -static u_int16_t key_newreqid (void); static struct mbuf *key_gather_mbuf (struct mbuf *, const struct sadb_msghdr *, int, int, ...); static int key_api_spdadd(struct socket *, struct mbuf *, @@ -1642,14 +1638,19 @@ key_getsp(const struct secpolicyindex *s * others : found, pointer to a SP. */ static struct secpolicy * -key_lookup_and_remove_sp(const struct secpolicyindex *spidx) +key_lookup_and_remove_sp(const struct secpolicyindex *spidx, bool from_kernel) { struct secpolicy *sp = NULL; mutex_enter(&key_spd.lock); SPLIST_WRITER_FOREACH(sp, spidx->dir) { KASSERT(sp->state != IPSEC_SPSTATE_DEAD); - + /* + * SPs created in kernel(e.g. ipsec(4) I/F) must not be + * removed by userland programs. + */ + if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL) + continue; if (key_spidx_match_exactly(spidx, &sp->spidx)) { key_unlink_sp(sp); goto out; @@ -1702,19 +1703,31 @@ out: * others : found, pointer to a SP. */ static struct secpolicy * -key_lookupbyid_and_remove_sp(u_int32_t id) +key_lookupbyid_and_remove_sp(u_int32_t id, bool from_kernel) { struct secpolicy *sp; mutex_enter(&key_spd.lock); SPLIST_READER_FOREACH(sp, IPSEC_DIR_INBOUND) { KASSERT(sp->state != IPSEC_SPSTATE_DEAD); + /* + * SPs created in kernel(e.g. ipsec(4) I/F) must not be + * removed by userland programs. + */ + if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL) + continue; if (sp->id == id) goto out; } SPLIST_READER_FOREACH(sp, IPSEC_DIR_OUTBOUND) { KASSERT(sp->state != IPSEC_SPSTATE_DEAD); + /* + * SPs created in kernel(e.g. ipsec(4) I/F) must not be + * removed by userland programs. + */ + if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL) + continue; if (sp->id == id) goto out; } @@ -1742,8 +1755,9 @@ key_newsp(const char* where, int tag) * NOTE: `state', `secpolicyindex' in secpolicy structure are not set, * so must be set properly later. */ -struct secpolicy * -key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error) +static struct secpolicy * +_key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error, + bool from_kernel) { struct secpolicy *newsp; @@ -1852,10 +1866,21 @@ key_msg2sp(const struct sadb_x_policy *x xisr_reqid = xisr->sadb_x_ipsecrequest_reqid; /* validity check */ /* + * case 1) from_kernel == false + * That means the request comes from userland. * If range violation of reqid, kernel will * update it, don't refuse it. + * + * case 2) from_kernel == true + * That means the request comes from kernel + * (e.g. ipsec(4) I/F). + * Use thre requested reqid to avoid inconsistency + * between kernel's reqid and the reqid in pf_key + * message sent to userland. The pf_key message is + * built by diverting request mbuf. */ - if (xisr_reqid > IPSEC_MANUAL_REQID_MAX) { + if (!from_kernel && + xisr_reqid > IPSEC_MANUAL_REQID_MAX) { IPSECLOG(LOG_DEBUG, "reqid=%d range " "violation, updated by kernel.\n", @@ -1939,7 +1964,14 @@ free_exit: return NULL; } -static u_int16_t +struct secpolicy * +key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error) +{ + + return _key_msg2sp(xpl0, len, error, false); +} + +u_int16_t key_newreqid(void) { static u_int16_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; @@ -2086,24 +2118,13 @@ key_gather_mbuf(struct mbuf *m, const st } /* - * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing - * add an entry to SP database, when received - * <base, address(SD), (lifetime(H),) policy> - * from the user(?). - * Adding to SP database, - * and send - * <base, address(SD), (lifetime(H),) policy> - * to the socket which was send. - * - * SPDADD set a unique policy entry. - * SPDSETIDX like SPDADD without a part of policy requests. - * SPDUPDATE replace a unique policy entry. - * - * m will always be freed. + * The argument _sp must not overwrite until SP is created and registered + * successfully. */ static int -key_api_spdadd(struct socket *so, struct mbuf *m, - const struct sadb_msghdr *mhp) +key_spdadd(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp, struct secpolicy **_sp, + bool from_kernel) { const struct sockaddr *src, *dst; const struct sadb_x_policy *xpl0; @@ -2184,7 +2205,7 @@ key_api_spdadd(struct socket *so, struct struct secpolicy *sp; if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { - sp = key_lookup_and_remove_sp(&spidx); + sp = key_lookup_and_remove_sp(&spidx, from_kernel); if (sp != NULL) key_destroy_sp(sp); } else { @@ -2198,7 +2219,7 @@ key_api_spdadd(struct socket *so, struct } /* allocation new SP entry */ - newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error); + newsp = _key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error, from_kernel); if (newsp == NULL) { return key_senderror(so, m, error); } @@ -2214,11 +2235,20 @@ key_api_spdadd(struct socket *so, struct newsp->lastused = newsp->created; newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; + if (from_kernel) + newsp->origin = IPSEC_SPORIGIN_KERNEL; + else + newsp->origin = IPSEC_SPORIGIN_USER; key_init_sp(newsp); + if (from_kernel) + KEY_SP_REF(newsp); sadb_x_policy_id = newsp->id; + if (_sp != NULL) + *_sp = newsp; + mutex_enter(&key_spd.lock); SPLIST_WRITER_INSERT_TAIL(newsp->spidx.dir, newsp); mutex_exit(&key_spd.lock); @@ -2275,13 +2305,19 @@ key_api_spdadd(struct socket *so, struct sizeof(*xpl), &off); if (mpolicy == NULL) { /* n is already freed */ + /* + * valid sp has been created, so we does not overwrite _sp + * NULL here. let caller decide to use the sp or not. + */ return key_senderror(so, m, ENOBUFS); } xpl = (struct sadb_x_policy *)(mtod(mpolicy, char *) + off); if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) { m_freem(n); + /* ditto */ return key_senderror(so, m, EINVAL); } + xpl->sadb_x_policy_id = sadb_x_policy_id; m_freem(m); @@ -2290,6 +2326,55 @@ key_api_spdadd(struct socket *so, struct } /* + * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing + * add an entry to SP database, when received + * <base, address(SD), (lifetime(H),) policy> + * from the user(?). + * Adding to SP database, + * and send + * <base, address(SD), (lifetime(H),) policy> + * to the socket which was send. + * + * SPDADD set a unique policy entry. + * SPDSETIDX like SPDADD without a part of policy requests. + * SPDUPDATE replace a unique policy entry. + * + * m will always be freed. + */ +static int +key_api_spdadd(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp) +{ + + return key_spdadd(so, m, mhp, NULL, false); +} + +struct secpolicy * +key_kpi_spdadd(struct mbuf *m) +{ + struct sadb_msghdr mh; + int error; + struct secpolicy *sp = NULL; + + error = key_align(m, &mh); + if (error) + return NULL; + + error = key_spdadd(NULL, m, &mh, &sp, true); + if (error) { + /* + * Currently, when key_spdadd() cannot send a PFKEY message + * which means SP has been created, key_spdadd() returns error + * although SP is created successfully. + * Kernel components would not care PFKEY messages, so return + * the "sp" regardless of error code. key_spdadd() overwrites + * the argument only if SP is created successfully. + */ + } + return sp; +} + +/* * get new policy id. * OUT: * 0: failure. @@ -2370,7 +2455,7 @@ key_api_spddelete(struct socket *so, str key_init_spidx_bymsghdr(&spidx, mhp); /* Is there SP in SPD ? */ - sp = key_lookup_and_remove_sp(&spidx); + sp = key_lookup_and_remove_sp(&spidx, false); if (sp == NULL) { IPSECLOG(LOG_DEBUG, "no SP found.\n"); return key_senderror(so, m, EINVAL); @@ -2426,8 +2511,8 @@ key_alloc_mbuf_simple(int len, int mflag * m will always be freed. */ static int -key_api_spddelete2(struct socket *so, struct mbuf *m, - const struct sadb_msghdr *mhp) +key_spddelete2(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp, bool from_kernel) { u_int32_t id; struct secpolicy *sp; @@ -2443,7 +2528,7 @@ key_api_spddelete2(struct socket *so, st id = xpl->sadb_x_policy_id; /* Is there SP in SPD ? */ - sp = key_lookupbyid_and_remove_sp(id); + sp = key_lookupbyid_and_remove_sp(id, from_kernel); if (sp == NULL) { IPSECLOG(LOG_DEBUG, "no SP found id:%u.\n", id); return key_senderror(so, m, EINVAL); @@ -2486,6 +2571,39 @@ key_api_spddelete2(struct socket *so, st } /* + * SADB_SPDDELETE2 processing + * receive + * <base, policy(*)> + * from the user(?), and set SADB_SASTATE_DEAD, + * and send, + * <base, policy(*)> + * to the ikmpd. + * policy(*) including direction of policy. + * + * m will always be freed. + */ +static int +key_api_spddelete2(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp) +{ + + return key_spddelete2(so, m, mhp, false); +} + +int +key_kpi_spddelete2(struct mbuf *m) +{ + struct sadb_msghdr mh; + int error; + + error = key_align(m, &mh); + if (error) + return EINVAL; + + return key_spddelete2(NULL, m, &mh, true); +} + +/* * SADB_X_GET processing * receive * <base, policy(*)> @@ -2630,10 +2748,17 @@ key_api_spdflush(struct socket *so, stru mutex_enter(&key_spd.lock); SPLIST_WRITER_FOREACH(sp, dir) { KASSERT(sp->state != IPSEC_SPSTATE_DEAD); - key_unlink_sp(sp); - mutex_exit(&key_spd.lock); - key_destroy_sp(sp); - goto retry; + /* + * Userlang programs can remove SPs created by userland + * probrams only, that is, they cannot remove SPs + * created in kernel(e.g. ipsec(4) I/F). + */ + if (sp->origin == IPSEC_SPORIGIN_USER) { + key_unlink_sp(sp); + mutex_exit(&key_spd.lock); + key_destroy_sp(sp); + goto retry; + } } mutex_exit(&key_spd.lock); } @@ -7696,6 +7821,16 @@ key_senderror(struct socket *so, struct KASSERT(m->m_len >= sizeof(struct sadb_msg)); + if (so == NULL) { + /* + * This means the request comes from kernel. + * As the request comes from kernel, it is unnecessary to + * send message to userland. Just return errcode directly. + */ + m_freem(m); + return code; + } + msg = mtod(m, struct sadb_msg *); msg->sadb_msg_errno = code; return key_sendup_mbuf(so, m, KEY_SENDUP_ONE); Index: src/sys/netipsec/key.h diff -u src/sys/netipsec/key.h:1.33 src/sys/netipsec/key.h:1.34 --- src/sys/netipsec/key.h:1.33 Tue Nov 21 07:03:08 2017 +++ src/sys/netipsec/key.h Wed Jan 10 10:56:31 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: key.h,v 1.33 2017/11/21 07:03:08 ozaki-r Exp $ */ +/* $NetBSD: key.h,v 1.34 2018/01/10 10:56:31 knakahara Exp $ */ /* $FreeBSD: src/sys/netipsec/key.h,v 1.1.4.1 2003/01/24 05:11:36 sam Exp $ */ /* $KAME: key.h,v 1.21 2001/07/27 03:51:30 itojun Exp $ */ @@ -42,11 +42,18 @@ struct ipsecrequest; struct secasvar; struct sockaddr; struct socket; -struct sadb_msg; -struct sadb_x_policy; struct secasindex; union sockaddr_union; +#include <net/pfkeyv2.h> + +struct sadb_msghdr { + struct sadb_msg *msg; + void *ext[SADB_EXT_MAX + 1]; + int extoff[SADB_EXT_MAX + 1]; + int extlen[SADB_EXT_MAX + 1]; +}; + int key_havesp(u_int dir); struct secpolicy *key_lookup_sp_byspidx(const struct secpolicyindex *, u_int, const char*, int); @@ -116,7 +123,10 @@ int key_get_used(void); u_int16_t key_portfromsaddr (const union sockaddr_union *); - +/* for ipsec(4) */ +struct secpolicy *key_kpi_spdadd(struct mbuf *); +int key_kpi_spddelete2(struct mbuf *); +u_int16_t key_newreqid(void); #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_SECA); Added files: Index: src/sys/net/if_ipsec.c diff -u /dev/null src/sys/net/if_ipsec.c:1.1 --- /dev/null Wed Jan 10 10:56:31 2018 +++ src/sys/net/if_ipsec.c Wed Jan 10 10:56:30 2018 @@ -0,0 +1,1736 @@ +/* $NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */ + +/* + * Copyright (c) 2017 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $"); + +#ifdef _KERNEL_OPT +#include "opt_inet.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/errno.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/syslog.h> +#include <sys/cpu.h> +#include <sys/kmem.h> +#include <sys/mutex.h> +#include <sys/pserialize.h> +#include <sys/psref.h> + +#include <net/if.h> +#include <net/if_types.h> +#include <net/route.h> +#include <net/bpf.h> +#include <net/pfkeyv2.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#ifdef INET +#include <netinet/in_var.h> +#endif /* INET */ + +#ifdef INET6 +#include <netinet6/in6_var.h> +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#endif /* INET6 */ + +#include <netinet/ip_encap.h> + +#include <net/if_ipsec.h> + +#include <net/raw_cb.h> +#include <net/pfkeyv2.h> + +#include <netipsec/key.h> +#include <netipsec/ipsec.h> +#include <netipsec/ipsecif.h> + +static void if_ipsec_ro_init_pc(void *, void *, struct cpu_info *); +static void if_ipsec_ro_fini_pc(void *, void *, struct cpu_info *); + +static int if_ipsec_clone_create(struct if_clone *, int); +static int if_ipsec_clone_destroy(struct ifnet *); + +static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int); +static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *); + +static int if_ipsec_encap_attach(struct ipsec_variant *); +static int if_ipsec_encap_detach(struct ipsec_variant *); +static int if_ipsec_set_tunnel(struct ifnet *, + struct sockaddr *, struct sockaddr *); +static void if_ipsec_delete_tunnel(struct ifnet *); +static int if_ipsec_ensure_flags(struct ifnet *, short); +static void if_ipsec_attach0(struct ipsec_softc *); + +static int if_ipsec_update_variant(struct ipsec_softc *, + struct ipsec_variant *, struct ipsec_variant *); + +/* sadb_msg */ +static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t); +static inline void if_ipsec_add_pad(struct mbuf *, size_t); +static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *, + struct sockaddr *, int, uint16_t); +static inline size_t if_ipsec_set_sadb_src(struct sadb_address *, + struct sockaddr *, int); +static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *, + struct sockaddr *, int); +static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *, + struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t); +static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t); +static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t); +static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t); +/* SPD */ +static int if_ipsec_share_sp(struct ipsec_variant *); +static int if_ipsec_unshare_sp(struct ipsec_variant *); +static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *, + in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int); +static inline int if_ipsec_del_sp0(struct secpolicy *); +static int if_ipsec_add_sp(struct ipsec_variant *, + struct sockaddr *, in_port_t, struct sockaddr *, in_port_t); +static void if_ipsec_del_sp(struct ipsec_variant *); +static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *, + struct ipsec_variant *); + +static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *, + in_port_t); +#define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target) \ + if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport) +#define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target) \ + if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport) + +/* + * ipsec global variable definitions + */ + +/* This list is used in ioctl context only. */ +LIST_HEAD(ipsec_sclist, ipsec_softc); +static struct { + struct ipsec_sclist list; + kmutex_t lock; +} ipsec_softcs __cacheline_aligned; + +pserialize_t ipsec_psz __read_mostly; +struct psref_class *iv_psref_class __read_mostly; + +struct if_clone ipsec_cloner = + IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy); +static int max_ipsec_nesting = MAX_IPSEC_NEST; + +/* ARGSUSED */ +void +ipsecifattach(int count) +{ + + mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE); + LIST_INIT(&ipsec_softcs.list); + + ipsec_psz = pserialize_create(); + iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET); + + if_clone_attach(&ipsec_cloner); +} + +static int +if_ipsec_clone_create(struct if_clone *ifc, int unit) +{ + struct ipsec_softc *sc; + struct ipsec_variant *var; + + sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); + + if_initname(&sc->ipsec_if, ifc->ifc_name, unit); + + if_ipsec_attach0(sc); + + var = kmem_zalloc(sizeof(*var), KM_SLEEP); + var->iv_softc = sc; + psref_target_init(&var->iv_psref, iv_psref_class); + + sc->ipsec_var = var; + mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE); + sc->ipsec_ro_percpu = percpu_alloc(sizeof(struct ipsec_ro)); + percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_init_pc, NULL); + + mutex_enter(&ipsec_softcs.lock); + LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list); + mutex_exit(&ipsec_softcs.lock); + return 0; +} + +static void +if_ipsec_attach0(struct ipsec_softc *sc) +{ + + sc->ipsec_if.if_addrlen = 0; + sc->ipsec_if.if_mtu = IPSEC_MTU; + sc->ipsec_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST; + /* set ipsec(4) specific default flags. */ + sc->ipsec_if.if_flags |= IFF_FWD_IPV6; + sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE; + sc->ipsec_if.if_ioctl = if_ipsec_ioctl; + sc->ipsec_if.if_output = if_ipsec_output; + sc->ipsec_if.if_type = IFT_IPSEC; + sc->ipsec_if.if_dlt = DLT_NULL; + sc->ipsec_if.if_softc = sc; + IFQ_SET_READY(&sc->ipsec_if.if_snd); + if_initialize(&sc->ipsec_if); + if_alloc_sadl(&sc->ipsec_if); + bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int)); + if_register(&sc->ipsec_if); +} + +static void +if_ipsec_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused) +{ + struct ipsec_ro *iro = p; + + mutex_init(&iro->ir_lock, MUTEX_DEFAULT, IPL_NONE); +} + +static void +if_ipsec_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused) +{ + struct ipsec_ro *iro = p; + + rtcache_free(&iro->ir_ro); + + mutex_destroy(&iro->ir_lock); +} + +static int +if_ipsec_clone_destroy(struct ifnet *ifp) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct ipsec_variant *var; + int bound; + + mutex_enter(&ipsec_softcs.lock); + LIST_REMOVE(sc, ipsec_list); + mutex_exit(&ipsec_softcs.lock); + + bound = curlwp_bind(); + if_ipsec_delete_tunnel(&sc->ipsec_if); + curlwp_bindx(bound); + + bpf_detach(ifp); + if_detach(ifp); + + percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_fini_pc, NULL); + percpu_free(sc->ipsec_ro_percpu, sizeof(struct ipsec_ro)); + + mutex_destroy(&sc->ipsec_lock); + + var = sc->ipsec_var; + kmem_free(var, sizeof(*var)); + kmem_free(sc, sizeof(*sc)); + + return 0; +} + +static inline bool +if_ipsec_nat_t(struct ipsec_softc *sc) +{ + + return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0; +} + +static inline bool +if_ipsec_fwd_ipv6(struct ipsec_softc *sc) +{ + + return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0; +} + +int +if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg) +{ + struct ip ip; + struct ipsec_softc *sc; + struct ipsec_variant *var = NULL; + struct psref psref; + int ret = 0; + + sc = arg; + KASSERT(sc != NULL); + + if ((sc->ipsec_if.if_flags & IFF_UP) == 0) + goto out; + + var = if_ipsec_getref_variant(sc, &psref); + if (if_ipsec_variant_is_unconfigured(var)) + goto out; + + switch (proto) { + case IPPROTO_IPV4: + case IPPROTO_IPV6: + break; + default: + goto out; + } + + if (m->m_pkthdr.len < sizeof(ip)) + goto out; + + m_copydata(m, 0, sizeof(ip), &ip); + switch (ip.ip_v) { +#ifdef INET + case IPVERSION: + if (var->iv_psrc->sa_family != AF_INET || + var->iv_pdst->sa_family != AF_INET) + goto out; + ret = ipsecif4_encap_func(m, &ip, var); + break; +#endif + default: + goto out; + } + +out: + if (var != NULL) + if_ipsec_putref_variant(var, &psref); + return ret; +} + +/* + * ipsec(4) I/F may cause infinite recursion calls when misconfigured. + * We'll prevent this by introducing upper limit. + */ +static int +if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m) +{ + + return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting); +} + +int +if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + const struct rtentry *rt) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct ipsec_variant *var; + struct psref psref; + int error; + int bound; + + IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family); + + error = if_ipsec_check_nesting(ifp, m); + if (error) { + m_freem(m); + goto noref_end; + } + + if ((ifp->if_flags & IFF_UP) == 0) { + m_freem(m); + error = ENETDOWN; + goto noref_end; + } + + + bound = curlwp_bind(); + var = if_ipsec_getref_variant(sc, &psref); + if (if_ipsec_variant_is_unconfigured(var)) { + m_freem(m); + error = ENETDOWN; + goto end; + } + + m->m_flags &= ~(M_BCAST|M_MCAST); + + /* use DLT_NULL encapsulation here to pass inner af type */ + M_PREPEND(m, sizeof(int), M_DONTWAIT); + if (!m) { + error = ENOBUFS; + goto end; + } + *mtod(m, int *) = dst->sa_family; + +#if INET6 + /* drop IPv6 packet if IFF_FWD_IPV6 is not set */ + if (dst->sa_family == AF_INET6 && + !if_ipsec_fwd_ipv6(sc)) { + /* + * IPv6 packet is not allowed to forward,that is not error. + */ + error = 0; + IF_DROP(&ifp->if_snd); + m_freem(m); + goto end; + } +#endif + + error = if_ipsec_out_direct(var, m, dst->sa_family); + +end: + if_ipsec_putref_variant(var, &psref); + curlwp_bindx(bound); +noref_end: + if (error) + ifp->if_oerrors++; + + return error; +} + +static inline int +if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family) +{ + struct ifnet *ifp = &var->iv_softc->ipsec_if; + int error; + int len; + + KASSERT(if_ipsec_heldref_variant(var)); + KASSERT(var->iv_output != NULL); + + len = m->m_pkthdr.len; + + /* input DLT_NULL frame to BPF */ + bpf_mtap(ifp, m); + + /* grab and chop off inner af type */ + /* XXX need pullup? */ + m_adj(m, sizeof(int)); + + error = var->iv_output(var, family, m); + if (error) + return error; + + ifp->if_opackets++; + ifp->if_obytes += len; + + return 0; +} + +void +if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp) +{ + + KASSERT(ifp != NULL); + + m_set_rcvif(m, ifp); + + bpf_mtap_af(ifp, af, m); + + if_ipsec_in_enqueue(m, af, ifp); + + return; +} + +static inline void +if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp) +{ + pktqueue_t *pktq; + int pktlen; + + /* + * Put the packet to the network layer input queue according to the + * specified address family. + */ + switch (af) { +#ifdef INET + case AF_INET: + pktq = ip_pktq; + break; +#endif +#ifdef INET6 + case AF_INET6: + pktq = ip6_pktq; + break; +#endif + default: + ifp->if_ierrors++; + m_freem(m); + return; + } + +#if 1 + const u_int h = curcpu()->ci_index; +#else + const uint32_t h = pktq_rps_hash(m); +#endif + pktlen = m->m_pkthdr.len; + if (__predict_true(pktq_enqueue(pktq, m, h))) { + ifp->if_ibytes += pktlen; + ifp->if_ipackets++; + } else { + m_freem(m); + } + + return; +} + +static inline int +if_ipsec_check_salen(struct sockaddr *addr) +{ + + switch (addr->sa_family) { +#ifdef INET + case AF_INET: + if (addr->sa_len != sizeof(struct sockaddr_in)) + return EINVAL; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (addr->sa_len != sizeof(struct sockaddr_in6)) + return EINVAL; + break; +#endif /* INET6 */ + default: + return EAFNOSUPPORT; + } + + return 0; +} + +/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ +int +if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct ipsec_variant *var = NULL; + struct ifreq *ifr = (struct ifreq*)data; + struct ifaddr *ifa = (struct ifaddr*)data; + int error = 0, size; + struct sockaddr *dst, *src; + u_long mtu; + short oflags = ifp->if_flags; + int bound; + struct psref psref; + + switch (cmd) { + case SIOCINITIFADDR: + ifp->if_flags |= IFF_UP; + ifa->ifa_rtrequest = p2p_rtrequest; + break; + + case SIOCSIFDSTADDR: + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + switch (ifr->ifr_addr.sa_family) { +#ifdef INET + case AF_INET: /* IP supports Multicast */ + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: /* IP6 supports Multicast */ + break; +#endif /* INET6 */ + default: /* Other protocols doesn't support Multicast */ + error = EAFNOSUPPORT; + break; + } + break; + + case SIOCSIFMTU: + mtu = ifr->ifr_mtu; + if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX) + return EINVAL; + else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET) + error = 0; + break; + +#ifdef INET + case SIOCSIFPHYADDR: +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: +#endif /* INET6 */ + case SIOCSLIFPHYADDR: + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + src = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); + break; +#endif /* INET */ +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + src = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_dstaddr); + break; +#endif /* INET6 */ + case SIOCSLIFPHYADDR: + src = (struct sockaddr *) + &(((struct if_laddrreq *)data)->addr); + dst = (struct sockaddr *) + &(((struct if_laddrreq *)data)->dstaddr); + break; + default: + return EINVAL; + } + + /* sa_family must be equal */ + if (src->sa_family != dst->sa_family) + return EINVAL; + + error = if_ipsec_check_salen(src); + if (error) + return error; + error = if_ipsec_check_salen(dst); + if (error) + return error; + + /* check sa_family looks sane for the cmd */ + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + if (src->sa_family == AF_INET) + break; + return EAFNOSUPPORT; +#endif /* INET */ +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + if (src->sa_family == AF_INET6) + break; + return EAFNOSUPPORT; +#endif /* INET6 */ + case SIOCSLIFPHYADDR: + /* checks done in the above */ + break; + } + /* + * calls if_ipsec_getref_variant() for other softcs to check + * address pair duplicattion + */ + bound = curlwp_bind(); + error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst); + if (error) + goto bad; + break; + + case SIOCDIFPHYADDR: + bound = curlwp_bind(); + if_ipsec_delete_tunnel(&sc->ipsec_if); + curlwp_bindx(bound); + break; + + case SIOCGIFPSRCADDR: +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: +#endif /* INET6 */ + bound = curlwp_bind(); + var = if_ipsec_getref_variant(sc, &psref); + if (var->iv_psrc == NULL) { + error = EADDRNOTAVAIL; + goto bad; + } + src = var->iv_psrc; + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + dst = &ifr->ifr_addr; + size = sizeof(ifr->ifr_addr); + break; +#endif /* INET */ +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + dst = (struct sockaddr *) + &(((struct in6_ifreq *)data)->ifr_addr); + size = sizeof(((struct in6_ifreq *)data)->ifr_addr); + break; +#endif /* INET6 */ + default: + error = EADDRNOTAVAIL; + goto bad; + } + if (src->sa_len > size) { + error = EINVAL; + goto bad; + } + error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst); + if (error) + goto bad; + if_ipsec_putref_variant(var, &psref); + curlwp_bindx(bound); + break; + + case SIOCGIFPDSTADDR: +#ifdef INET6 + case SIOCGIFPDSTADDR_IN6: +#endif /* INET6 */ + bound = curlwp_bind(); + var = if_ipsec_getref_variant(sc, &psref); + if (var->iv_pdst == NULL) { + error = EADDRNOTAVAIL; + goto bad; + } + src = var->iv_pdst; + switch (cmd) { +#ifdef INET + case SIOCGIFPDSTADDR: + dst = &ifr->ifr_addr; + size = sizeof(ifr->ifr_addr); + break; +#endif /* INET */ +#ifdef INET6 + case SIOCGIFPDSTADDR_IN6: + dst = (struct sockaddr *) + &(((struct in6_ifreq *)data)->ifr_addr); + size = sizeof(((struct in6_ifreq *)data)->ifr_addr); + break; +#endif /* INET6 */ + default: + error = EADDRNOTAVAIL; + goto bad; + } + if (src->sa_len > size) { + error = EINVAL; + goto bad; + } + error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst); + if (error) + goto bad; + if_ipsec_putref_variant(var, &psref); + curlwp_bindx(bound); + break; + + case SIOCGLIFPHYADDR: + bound = curlwp_bind(); + var = if_ipsec_getref_variant(sc, &psref); + if (if_ipsec_variant_is_unconfigured(var)) { + error = EADDRNOTAVAIL; + goto bad; + } + + /* copy src */ + src = var->iv_psrc; + dst = (struct sockaddr *) + &(((struct if_laddrreq *)data)->addr); + size = sizeof(((struct if_laddrreq *)data)->addr); + if (src->sa_len > size) { + error = EINVAL; + goto bad; + } + error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst); + if (error) + goto bad; + + /* copy dst */ + src = var->iv_pdst; + dst = (struct sockaddr *) + &(((struct if_laddrreq *)data)->dstaddr); + size = sizeof(((struct if_laddrreq *)data)->dstaddr); + if (src->sa_len > size) { + error = EINVAL; + goto bad; + } + error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst); + if (error) + goto bad; + if_ipsec_putref_variant(var, &psref); + curlwp_bindx(bound); + break; + + default: + error = ifioctl_common(ifp, cmd, data); + if (!error) { + bound = curlwp_bind(); + error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags); + if (error) + goto bad; + } + break; + } + return error; + +bad: + if (var != NULL) + if_ipsec_putref_variant(var, &psref); + curlwp_bindx(bound); + + return error; +} + +struct encap_funcs { + int (*ef_inet)(struct ipsec_variant *); + int (*ef_inet6)(struct ipsec_variant *); +}; + +static struct encap_funcs ipsec_encap_attach = { + .ef_inet = ipsecif4_attach, + .ef_inet6 = &ipsecif6_attach, +}; + +static struct encap_funcs ipsec_encap_detach = { + .ef_inet = ipsecif4_detach, + .ef_inet6 = &ipsecif6_detach, +}; + +static int +if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs) +{ + int error; + + KASSERT(var != NULL); + KASSERT(if_ipsec_variant_is_configured(var)); + + switch (var->iv_psrc->sa_family) { +#ifdef INET + case AF_INET: + error = (funcs->ef_inet)(var); + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + error = (funcs->ef_inet6)(var); + break; +#endif /* INET6 */ + default: + error = EINVAL; + break; + } + + return error; +} + +static int +if_ipsec_encap_attach(struct ipsec_variant *var) +{ + + return if_ipsec_encap_common(var, &ipsec_encap_attach); +} + +static int +if_ipsec_encap_detach(struct ipsec_variant *var) +{ + + return if_ipsec_encap_common(var, &ipsec_encap_detach); +} + +/* + * Validate and set ipsec(4) I/F configurations. + * (1) validate + * (1-1) Check the argument src and dst address pair will change + * configuration from current src and dst address pair. + * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair + * with argument src and dst address pair, except for NAT-T shared + * tunnels. + * (2) set + * (2-1) Create variant for new configuration. + * (2-2) Create temporary "null" variant used to avoid to access + * dangling variant while SPs are deleted and added. + * (2-3) Swap variant include its SPs. + * (2-4) Cleanup last configurations. + */ +static int +if_ipsec_set_tunnel(struct ifnet *ifp, + struct sockaddr *src, struct sockaddr *dst) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct ipsec_softc *sc2; + struct ipsec_variant *ovar, *nvar, *nullvar; + struct sockaddr *osrc, *odst; + struct sockaddr *nsrc, *ndst; + in_port_t nsport = 0, ndport = 0; + int error; + + error = encap_lock_enter(); + if (error) + return error; + + nsrc = sockaddr_dup(src, M_WAITOK); + ndst = sockaddr_dup(dst, M_WAITOK); + nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP); + nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP); + + mutex_enter(&sc->ipsec_lock); + + ovar = sc->ipsec_var; + + switch(nsrc->sa_family) { +#ifdef INET + case AF_INET: + nsport = ntohs(satosin(src)->sin_port); + /* + * avoid confuse SP when NAT-T disabled, + * e.g. + * expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4) + * confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4) + */ + satosin(nsrc)->sin_port = 0; + ndport = ntohs(satosin(dst)->sin_port); + satosin(ndst)->sin_port = 0; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + nsport = ntohs(satosin6(src)->sin6_port); + satosin6(nsrc)->sin6_port = 0; + ndport = ntohs(satosin6(dst)->sin6_port); + satosin6(ndst)->sin6_port = 0; + break; +#endif /* INET6 */ + default: + log(LOG_DEBUG, + "%s: Invalid address family: %d.\n", + __func__, src->sa_family); + error = EINVAL; + goto out; + } + + /* + * (1-1) Check the argument src and dst address pair will change + * configuration from current src and dst address pair. + */ + if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) && + (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) && + (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) { + /* address and port pair not changed. */ + error = 0; + goto out; + } + + /* + * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair + * with argument src and dst address pair, except for NAT-T shared + * tunnels. + */ + mutex_enter(&ipsec_softcs.lock); + LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) { + struct ipsec_variant *var2; + struct psref psref; + + if (sc2 == sc) + continue; + var2 = if_ipsec_getref_variant(sc2, &psref); + if (if_ipsec_variant_is_unconfigured(var2)) { + if_ipsec_putref_variant(var2, &psref); + continue; + } + if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) { + if_ipsec_putref_variant(var2, &psref); + continue; /* NAT-T shared tunnel */ + } + if (sockaddr_cmp(var2->iv_pdst, dst) == 0 && + sockaddr_cmp(var2->iv_psrc, src) == 0) { + if_ipsec_putref_variant(var2, &psref); + mutex_exit(&ipsec_softcs.lock); + error = EADDRNOTAVAIL; + goto out; + } + + if_ipsec_putref_variant(var2, &psref); + /* XXX both end must be valid? (I mean, not 0.0.0.0) */ + } + mutex_exit(&ipsec_softcs.lock); + + + osrc = ovar->iv_psrc; + odst = ovar->iv_pdst; + + /* + * (2-1) Create ipsec_variant for new configuration. + */ + if_ipsec_copy_variant(nvar, ovar); + nvar->iv_psrc = nsrc; + nvar->iv_pdst = ndst; + nvar->iv_sport = nsport; + nvar->iv_dport = ndport; + nvar->iv_encap_cookie4 = NULL; + nvar->iv_encap_cookie6 = NULL; + psref_target_init(&nvar->iv_psref, iv_psref_class); + error = if_ipsec_encap_attach(nvar); + if (error) + goto out; + + /* + * (2-2) Create temporary "null" variant. + */ + if_ipsec_copy_variant(nullvar, ovar); + if_ipsec_clear_config(nullvar); + psref_target_init(&nullvar->iv_psref, iv_psref_class); + membar_producer(); + /* + * (2-3) Swap variant include its SPs. + */ + error = if_ipsec_update_variant(sc, nvar, nullvar); + if (error) { + if_ipsec_encap_detach(nvar); + goto out; + } + + mutex_exit(&sc->ipsec_lock); + + /* + * (2-4) Cleanup last configurations. + */ + if (if_ipsec_variant_is_configured(ovar)) + if_ipsec_encap_detach(ovar); + encap_lock_exit(); + + if (osrc != NULL) + sockaddr_free(osrc); + if (odst != NULL) + sockaddr_free(odst); + kmem_free(ovar, sizeof(*ovar)); + kmem_free(nullvar, sizeof(*nullvar)); + + return 0; + +out: + mutex_exit(&sc->ipsec_lock); + encap_lock_exit(); + + sockaddr_free(nsrc); + sockaddr_free(ndst); + kmem_free(nvar, sizeof(*nvar)); + kmem_free(nullvar, sizeof(*nullvar)); + + return error; +} + +/* + * Validate and delete ipsec(4) I/F configurations. + * (1) validate + * (1-1) Check current src and dst address pair are null, + * which means the ipsec(4) I/F is already done deletetunnel. + * (2) delete + * (2-1) Create variant for deleted status. + * (2-2) Create temporary "null" variant used to avoid to access + * dangling variant while SPs are deleted and added. + * NOTE: + * The contents of temporary "null" variant equal to the variant + * of (2-1), however two psref_target_destroy() synchronization + * points are necessary to avoid to access dangling variant + * while SPs are deleted and added. To implement that simply, + * we use the same manner as if_ipsec_set_tunnel(), that is, + * create extra "null" variant and use it temporarily. + * (2-3) Swap variant include its SPs. + * (2-4) Cleanup last configurations. + */ +static void +if_ipsec_delete_tunnel(struct ifnet *ifp) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct ipsec_variant *ovar, *nvar, *nullvar; + struct sockaddr *osrc, *odst; + int error; + + error = encap_lock_enter(); + if (error) + return; + + nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP); + nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP); + + mutex_enter(&sc->ipsec_lock); + + ovar = sc->ipsec_var; + osrc = ovar->iv_psrc; + odst = ovar->iv_pdst; + /* + * (1-1) Check current src and dst address pair are null, + * which means the ipsec(4) I/F is already done deletetunnel. + */ + if (osrc == NULL || odst == NULL) { + /* address pair not changed. */ + mutex_exit(&sc->ipsec_lock); + encap_lock_exit(); + kmem_free(nvar, sizeof(*nvar)); + return; + } + + /* + * (2-1) Create variant for deleted status. + */ + if_ipsec_copy_variant(nvar, ovar); + if_ipsec_clear_config(nvar); + psref_target_init(&nvar->iv_psref, iv_psref_class); + + /* + * (2-2) Create temporary "null" variant used to avoid to access + * dangling variant while SPs are deleted and added. + */ + if_ipsec_copy_variant(nullvar, ovar); + if_ipsec_clear_config(nullvar); + psref_target_init(&nullvar->iv_psref, iv_psref_class); + membar_producer(); + /* + * (2-3) Swap variant include its SPs. + */ + /* if_ipsec_update_variant() does not fail when delete SP only. */ + (void)if_ipsec_update_variant(sc, nvar, nullvar); + + mutex_exit(&sc->ipsec_lock); + + /* + * (2-4) Cleanup last configurations. + */ + if (if_ipsec_variant_is_configured(ovar)) + if_ipsec_encap_detach(ovar); + encap_lock_exit(); + + sockaddr_free(osrc); + sockaddr_free(odst); + kmem_free(ovar, sizeof(*ovar)); + kmem_free(nullvar, sizeof(*nullvar)); +} + +/* + * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed. + * (1) check + * (1-1) Check flags are changed. + * (1-2) Check current src and dst address pair. If they are null, + * that means the ipsec(4) I/F is deletetunnel'ed, so it is + * not needed to update. + * (2) update + * (2-1) Create variant for new SPs. + * (2-2) Create temporary "null" variant used to avoid to access + * dangling variant while SPs are deleted and added. + * NOTE: + * There is the same problem as if_ipsec_delete_tunnel(). + * (2-3) Swap variant include its SPs. + * (2-4) Cleanup unused configurations. + * NOTE: use the same encap_cookies. + */ +static int +if_ipsec_ensure_flags(struct ifnet *ifp, short oflags) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct ipsec_variant *ovar, *nvar, *nullvar; + int error; + + /* + * (1) Check flags are changed. + */ + if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) == + (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6))) + return 0; /* flags not changed. */ + + error = encap_lock_enter(); + if (error) + return error; + + nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP); + nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP); + + mutex_enter(&sc->ipsec_lock); + + ovar = sc->ipsec_var; + /* + * (1-2) Check current src and dst address pair. + */ + if (if_ipsec_variant_is_unconfigured(ovar)) { + /* nothing to do */ + mutex_exit(&sc->ipsec_lock); + return 0; + } + + /* + * (2-1) Create variant for new SPs. + */ + if_ipsec_copy_variant(nvar, ovar); + psref_target_init(&nvar->iv_psref, iv_psref_class); + /* + * (2-2) Create temporary "null" variant used to avoid to access + * dangling variant while SPs are deleted and added. + */ + if_ipsec_copy_variant(nullvar, ovar); + if_ipsec_clear_config(nullvar); + psref_target_init(&nullvar->iv_psref, iv_psref_class); + membar_producer(); + /* + * (2-3) Swap variant include its SPs. + */ + error = if_ipsec_update_variant(sc, nvar, nullvar); + + mutex_exit(&sc->ipsec_lock); + encap_lock_exit(); + + /* + * (2-4) Cleanup unused configurations. + */ + if (!error) + kmem_free(ovar, sizeof(*ovar)); + else + kmem_free(nvar, sizeof(*ovar)); + kmem_free(nullvar, sizeof(*nullvar)); + + return error; +} + +/* + * SPD management + */ + +/* + * Share SP set with other NAT-T ipsec(4) I/F(s). + * Return 1, when "var" shares SP set. + * Return 0, when "var" cannot share SP set. + * + * NOTE: + * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock + * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0 + * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's + * set_tunnel causes race. + * Currently, (fortunately) encap_lock works as this global lock. + */ +static int +if_ipsec_share_sp(struct ipsec_variant *var) +{ + struct ipsec_softc *sc = var->iv_softc; + struct ipsec_softc *sc2; + struct ipsec_variant *var2; + struct psref psref; + + KASSERT(encap_lock_held()); + KASSERT(var->iv_pdst != NULL && var->iv_pdst != NULL); + + mutex_enter(&ipsec_softcs.lock); + LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) { + if (sc2 == sc) + continue; + var2 = if_ipsec_getref_variant(sc2, &psref); + if (if_ipsec_variant_is_unconfigured(var2)) { + if_ipsec_putref_variant(var2, &psref); + continue; + } + if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 || + sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) { + if_ipsec_putref_variant(var2, &psref); + continue; + } + + break; + } + mutex_exit(&ipsec_softcs.lock); + if (sc2 == NULL) + return 0; /* not shared */ + + IV_SP_IN(var) = IV_SP_IN(var2); + IV_SP_IN6(var) = IV_SP_IN6(var2); + IV_SP_OUT(var) = IV_SP_OUT(var2); + IV_SP_OUT6(var) = IV_SP_OUT6(var2); + + if_ipsec_putref_variant(var2, &psref); + return 1; /* shared */ +} + +/* + * Unshare SP set with other NAT-T ipsec(4) I/F(s). + * Return 1, when "var" shared SP set, and then unshare them. + * Return 0, when "var" did not share SP set. + * + * NOTE: + * See if_ipsec_share_sp()'s note. + */ +static int +if_ipsec_unshare_sp(struct ipsec_variant *var) +{ + struct ipsec_softc *sc = var->iv_softc; + struct ipsec_softc *sc2; + struct ipsec_variant *var2; + struct psref psref; + + KASSERT(encap_lock_held()); + + if (!var->iv_pdst || !var->iv_psrc) + return 0; + + mutex_enter(&ipsec_softcs.lock); + LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) { + if (sc2 == sc) + continue; + var2 = if_ipsec_getref_variant(sc2, &psref); + if (!var2->iv_pdst || !var2->iv_psrc) { + if_ipsec_putref_variant(var2, &psref); + continue; + } + if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 || + sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) { + if_ipsec_putref_variant(var2, &psref); + continue; + } + + break; + } + mutex_exit(&ipsec_softcs.lock); + if (sc2 == NULL) + return 0; /* not shared */ + + IV_SP_IN(var) = NULL; + IV_SP_IN6(var) = NULL; + IV_SP_OUT(var) = NULL; + IV_SP_OUT6(var) = NULL; + if_ipsec_putref_variant(var2, &psref); + return 1; /* shared */ +} + +static inline void +if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len) +{ + struct mbuf *m; + + MGET(m, M_WAITOK | M_ZERO, MT_DATA); + m->m_len = PFKEY_ALIGN8(len); + m_copyback(m, 0, len, data); + m_cat(m0, m); +} + +static inline void +if_ipsec_add_pad(struct mbuf *m0, size_t len) +{ + struct mbuf *m; + + if (len == 0) + return; + + MGET(m, M_WAITOK | M_ZERO, MT_DATA); + m->m_len = len; + m_cat(m0, m); +} + +static inline size_t +if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr, + int proto, uint16_t exttype) +{ + size_t size; + + KASSERT(saaddr != NULL); + KASSERT(addr != NULL); + + size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len); + saaddr->sadb_address_len = PFKEY_UNIT64(size); + saaddr->sadb_address_exttype = exttype; + saaddr->sadb_address_proto = proto; + switch (addr->sa_family) { +#ifdef INET + case AF_INET: + saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3; + break; +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3; + break; +#endif /* INET6 */ + default: + log(LOG_DEBUG, + "%s: Invalid address family: %d.\n", + __func__, addr->sa_family); + break; + } + saaddr->sadb_address_reserved = 0; + + return size; +} + +static inline size_t +if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src, + int proto) +{ + + return if_ipsec_set_sadb_addr(sasrc, src, proto, + SADB_EXT_ADDRESS_SRC); +} + +static inline size_t +if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst, + int proto) +{ + + return if_ipsec_set_sadb_addr(sadst, dst, proto, + SADB_EXT_ADDRESS_DST); +} + +static inline size_t +if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl, + struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id, + uint8_t level) +{ + size_t size; + + KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL); + + size = sizeof(*xpl); + if (policy == IPSEC_POLICY_IPSEC) { + size += PFKEY_ALIGN8(sizeof(*xisr)); + } + xpl->sadb_x_policy_len = PFKEY_UNIT64(size); + xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; + xpl->sadb_x_policy_type = policy; + xpl->sadb_x_policy_dir = dir; + xpl->sadb_x_policy_reserved = 0; + xpl->sadb_x_policy_id = id; + xpl->sadb_x_policy_reserved2 = 0; + + if (policy == IPSEC_POLICY_IPSEC) { + xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr)); + xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP; + xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT; + xisr->sadb_x_ipsecrequest_level = level; + xisr->sadb_x_ipsecrequest_reqid = key_newreqid(); + } + + return size; +} + +static inline void +if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype) +{ + + KASSERT(msg != NULL); + + msg->sadb_msg_version = PF_KEY_V2; + msg->sadb_msg_type = msgtype; + msg->sadb_msg_errno = 0; + msg->sadb_msg_satype = SADB_SATYPE_UNSPEC; + msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen; + msg->sadb_msg_reserved = 0; + msg->sadb_msg_seq = 0; /* XXXX */ + msg->sadb_msg_pid = 0; /* XXXX */ +} + +static inline void +if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen) +{ + + if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD); +} + +static inline void +if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen) +{ + + if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2); +} + +static int +if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr, + in_port_t port) +{ + int error = 0; + + sockaddr_copy(addrport, addr->sa_len, addr); + + switch (addr->sa_family) { +#ifdef INET + case AF_INET: { + struct sockaddr_in *sin = satosin(addrport); + sin->sin_port = htons(port); + break; + } +#endif /* INET */ +#ifdef INET6 + case AF_INET6: { + struct sockaddr_in6 *sin6 = satosin6(addrport); + sin6->sin6_port = htons(port); + break; + } +#endif /* INET6 */ + default: + log(LOG_DEBUG, + "%s: Invalid address family: %d.\n", + __func__, addr->sa_family); + error = EINVAL; + } + + return error; +} + +static struct secpolicy * +if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport, + struct sockaddr *dst, in_port_t dport, + int dir, int proto, int level, u_int policy) +{ + struct sadb_msg msg; + struct sadb_address xsrc, xdst; + struct sadb_x_policy xpl; + struct sadb_x_ipsecrequest xisr; + size_t size; + size_t padlen; + uint16_t ext_msg_len = 0; + struct mbuf *m; + + memset(&msg, 0, sizeof(msg)); + memset(&xsrc, 0, sizeof(xsrc)); + memset(&xdst, 0, sizeof(xdst)); + memset(&xpl, 0, sizeof(xpl)); + memset(&xisr, 0, sizeof(xisr)); + + MGETHDR(m, M_WAITOK, MT_DATA); + + size = if_ipsec_set_sadb_src(&xsrc, src, proto); + ext_msg_len += PFKEY_UNIT64(size); + size = if_ipsec_set_sadb_dst(&xdst, dst, proto); + ext_msg_len += PFKEY_UNIT64(size); + size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level); + ext_msg_len += PFKEY_UNIT64(size); + if_ipsec_set_sadb_msg_add(&msg, ext_msg_len); + + /* build PF_KEY message */ + + m->m_len = sizeof(msg); + m_copyback(m, 0, sizeof(msg), &msg); + + if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc)); + if (sport == 0) { + if_ipsec_add_mbuf(m, src, src->sa_len); + } else { + struct sockaddr addrport; + + if_ipsec_set_addr_port(&addrport, src, sport); + if_ipsec_add_mbuf(m, &addrport, addrport.sa_len); + } + padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len) + - (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len)); + if_ipsec_add_pad(m, padlen); + + if_ipsec_add_mbuf(m, &xdst, sizeof(xdst)); + if (dport == 0) { + if_ipsec_add_mbuf(m, dst, dst->sa_len); + } else { + struct sockaddr addrport; + + if_ipsec_set_addr_port(&addrport, dst, dport); + if_ipsec_add_mbuf(m, &addrport, addrport.sa_len); + } + padlen = PFKEY_UNUNIT64(xdst.sadb_address_len) + - (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len)); + if_ipsec_add_pad(m, padlen); + + if_ipsec_add_mbuf(m, &xpl, sizeof(xpl)); + if (policy == IPSEC_POLICY_IPSEC) + if_ipsec_add_mbuf(m, &xisr, sizeof(xisr)); + + /* key_kpi_spdadd() has already done KEY_SP_REF(). */ + return key_kpi_spdadd(m); +} + +static int +if_ipsec_add_sp(struct ipsec_variant *var, + struct sockaddr *src, in_port_t sport, + struct sockaddr *dst, in_port_t dport) +{ + struct ipsec_softc *sc = var->iv_softc; + int level; + u_int v6policy; + + /* + * must delete sp before add it. + */ + KASSERT(IV_SP_IN(var) == NULL); + KASSERT(IV_SP_OUT(var) == NULL); + KASSERT(IV_SP_IN6(var) == NULL); + KASSERT(IV_SP_OUT6(var) == NULL); + + /* + * can be shared? + */ + if (if_ipsec_share_sp(var)) + return 0; + + if (if_ipsec_nat_t(sc)) + level = IPSEC_LEVEL_REQUIRE; + else + level = IPSEC_LEVEL_UNIQUE; + + if (if_ipsec_fwd_ipv6(sc)) + v6policy = IPSEC_POLICY_IPSEC; + else + v6policy = IPSEC_POLICY_DISCARD; + + IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport, + IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC); + if (IV_SP_IN(var) == NULL) + goto fail; + IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport, + IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC); + if (IV_SP_OUT(var) == NULL) + goto fail; + IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport, + IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy); + if (IV_SP_IN6(var) == NULL) + goto fail; + IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport, + IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy); + if (IV_SP_OUT6(var) == NULL) + goto fail; + + return 0; + +fail: + if (IV_SP_IN6(var) != NULL) { + if_ipsec_del_sp0(IV_SP_IN6(var)); + IV_SP_IN6(var) = NULL; + } + if (IV_SP_OUT(var) != NULL) { + if_ipsec_del_sp0(IV_SP_OUT(var)); + IV_SP_OUT(var) = NULL; + } + if (IV_SP_IN(var) != NULL) { + if_ipsec_del_sp0(IV_SP_IN(var)); + IV_SP_IN(var) = NULL; + } + + return EEXIST; +} + +static int +if_ipsec_del_sp0(struct secpolicy *sp) +{ + struct sadb_msg msg; + struct sadb_x_policy xpl; + size_t size; + uint16_t ext_msg_len = 0; + int error; + struct mbuf *m; + + if (sp == NULL) + return 0; + + memset(&msg, 0, sizeof(msg)); + memset(&xpl, 0, sizeof(xpl)); + + MGETHDR(m, M_WAITOK, MT_DATA); + + size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0); + ext_msg_len += PFKEY_UNIT64(size); + + if_ipsec_set_sadb_msg_del(&msg, ext_msg_len); + + m->m_len = sizeof(msg); + m_copyback(m, 0, sizeof(msg), &msg); + + if_ipsec_add_mbuf(m, &xpl, sizeof(xpl)); + + /* unreference correspond to key_kpi_spdadd(). */ + KEY_SP_UNREF(&sp); + error = key_kpi_spddelete2(m); + if (error != 0) { + log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n", + __func__, sp->id, error); + } + return error; +} + +static void +if_ipsec_del_sp(struct ipsec_variant *var) +{ + + /* are the SPs shared? */ + if (if_ipsec_unshare_sp(var)) + return; + + (void)if_ipsec_del_sp0(IV_SP_OUT(var)); + (void)if_ipsec_del_sp0(IV_SP_IN(var)); + (void)if_ipsec_del_sp0(IV_SP_OUT6(var)); + (void)if_ipsec_del_sp0(IV_SP_IN6(var)); + IV_SP_IN(var) = NULL; + IV_SP_IN6(var) = NULL; + IV_SP_OUT(var) = NULL; + IV_SP_OUT6(var) = NULL; +} + +static int +if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar, + struct ipsec_variant *nvar) +{ + in_port_t src_port = 0; + in_port_t dst_port = 0; + struct sockaddr *src; + struct sockaddr *dst; + int error = 0; + + KASSERT(mutex_owned(&sc->ipsec_lock)); + + if_ipsec_del_sp(ovar); + + src = nvar->iv_psrc; + dst = nvar->iv_pdst; + if (if_ipsec_nat_t(sc)) { + /* NAT-T enabled */ + src_port = nvar->iv_sport; + dst_port = nvar->iv_dport; + } + if (src && dst) + error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port); + + return error; +} + +/* + * ipsec_variant and its SPs update API. + * + * Assumption: + * reader side dereferences sc->ipsec_var in reader critical section only, + * that is, all of reader sides do not reader the sc->ipsec_var after + * pserialize_perform(). + */ +static int +if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar, + struct ipsec_variant *nullvar) +{ + struct ifnet *ifp = &sc->ipsec_if; + struct ipsec_variant *ovar = sc->ipsec_var; + int error; + + KASSERT(mutex_owned(&sc->ipsec_lock)); + + /* + * To keep consistency between ipsec(4) I/F settings and SPs, + * we stop packet processing while replacing SPs, that is, we set + * "null" config variant to sc->ipsec_var. + */ + sc->ipsec_var = nullvar; + pserialize_perform(ipsec_psz); + psref_target_destroy(&ovar->iv_psref, iv_psref_class); + + error = if_ipsec_replace_sp(sc, ovar, nvar); + if (!error) + sc->ipsec_var = nvar; + else { + sc->ipsec_var = ovar; /* rollback */ + psref_target_init(&ovar->iv_psref, iv_psref_class); + } + + pserialize_perform(ipsec_psz); + psref_target_destroy(&nullvar->iv_psref, iv_psref_class); + + if (if_ipsec_variant_is_configured(sc->ipsec_var)) + ifp->if_flags |= IFF_RUNNING; + else + ifp->if_flags &= ~IFF_RUNNING; + + return error; +} Index: src/sys/net/if_ipsec.h diff -u /dev/null src/sys/net/if_ipsec.h:1.1 --- /dev/null Wed Jan 10 10:56:31 2018 +++ src/sys/net/if_ipsec.h Wed Jan 10 10:56:30 2018 @@ -0,0 +1,231 @@ +/* $NetBSD: if_ipsec.h,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */ + +/* + * Copyright (c) 2017 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * if_ipsec.h + */ + +#ifndef _NET_IF_IPSEC_H_ +#define _NET_IF_IPSEC_H_ + +#include <sys/queue.h> +#ifdef _KERNEL +#include <sys/psref.h> +#endif + +#ifdef _KERNEL_OPT +#include "opt_inet.h" +#endif + +#include <netinet/in.h> +#include <netipsec/ipsec.h> + +#ifdef _KERNEL +/* + * This macro controls the upper limitation on nesting of ipsec tunnels. + * Since, setting a large value to this macro with a careless configuration + * may introduce system crash, we don't allow any nestings by default. + * If you need to configure nested ipsec tunnels, you can define this macro + * in your kernel configuration file. However, if you do so, please be + * careful to configure the tunnels so that it won't make a loop. + */ +#ifndef MAX_IPSEC_NEST +#define MAX_IPSEC_NEST 1 +#endif + +#define IFF_NAT_T IFF_LINK0 /* enable NAT-T */ +#define IFF_ECN IFF_LINK1 /* enable ECN */ +#define IFF_FWD_IPV6 IFF_LINK2 /* foward IPv6 packet */ + +extern struct psref_class *iv_psref_class; + +struct ipsec_variant { + struct ipsec_softc *iv_softc; + + struct sockaddr *iv_psrc; /* Physical src addr */ + struct sockaddr *iv_pdst; /* Physical dst addr */ + const struct encaptab *iv_encap_cookie4; + const struct encaptab *iv_encap_cookie6; + int (*iv_output)(struct ipsec_variant *, int, struct mbuf *); + in_port_t iv_sport; + in_port_t iv_dport; + + /* + * IPsec SPs + * Don't change directly, use if_ipsec_replace_sp(). + */ + struct secpolicy *iv_sp[IPSEC_DIR_MAX]; + struct secpolicy *iv_sp6[IPSEC_DIR_MAX]; + + struct psref_target iv_psref; +}; + +struct ipsec_ro { + struct route ir_ro; + kmutex_t ir_lock; +}; + +struct ipsec_softc { + struct ifnet ipsec_if; /* common area - must be at the top */ + percpu_t *ipsec_ro_percpu; /* struct ipsec_ro */ + struct ipsec_variant *ipsec_var; /* + * reader must use ipsec_getref_variant() + * instead of direct dereference. + */ + kmutex_t ipsec_lock; /* writer lock for ipsec_var */ + + LIST_ENTRY(ipsec_softc) ipsec_list; /* list of all gifs */ +}; + +#define IPSEC_MTU (1280) /* Default MTU */ +#define IPSEC_MTU_MIN (1280) /* Minimum MTU */ +#define IPSEC_MTU_MAX (8192) /* Maximum MTU */ + +#define IV_SP_IN(x) ((x)->iv_sp[IPSEC_DIR_INBOUND]) +#define IV_SP_IN6(x) ((x)->iv_sp6[IPSEC_DIR_INBOUND]) +#define IV_SP_OUT(x) ((x)->iv_sp[IPSEC_DIR_OUTBOUND]) +#define IV_SP_OUT6(x) ((x)->iv_sp6[IPSEC_DIR_OUTBOUND]) + +static inline bool +if_ipsec_variant_is_configured(struct ipsec_variant *var) +{ + + return (var->iv_psrc != NULL && var->iv_pdst != NULL); +} + +static inline bool +if_ipsec_variant_is_unconfigured(struct ipsec_variant *var) +{ + + return (var->iv_psrc == NULL || var->iv_pdst == NULL); +} + +static inline void +if_ipsec_copy_variant(struct ipsec_variant *dst, struct ipsec_variant *src) +{ + + dst->iv_softc = src->iv_softc; + dst->iv_psrc = src->iv_psrc; + dst->iv_pdst = src->iv_pdst; + dst->iv_encap_cookie4 = src->iv_encap_cookie4; + dst->iv_encap_cookie6 = src->iv_encap_cookie6; + dst->iv_output = src->iv_output; + dst->iv_sport = src->iv_sport; + dst->iv_dport = src->iv_dport; +} + +static inline void +if_ipsec_clear_config(struct ipsec_variant *var) +{ + + var->iv_psrc = NULL; + var->iv_pdst = NULL; + var->iv_encap_cookie4 = NULL; + var->iv_encap_cookie6 = NULL; + var->iv_output = NULL; + var->iv_sport = 0; + var->iv_dport = 0; +} + +/* + * Get ipsec_variant from ipsec_softc. + * + * Never return NULL by contract. + * ipsec_variant itself is protected not to be freed by lv_psref. + * Once a reader dereference sc->sc_var by this API, the reader must not + * re-dereference from sc->sc_var. + */ +static inline struct ipsec_variant * +if_ipsec_getref_variant(struct ipsec_softc *sc, struct psref *psref) +{ + struct ipsec_variant *var; + int s; + + s = pserialize_read_enter(); + var = sc->ipsec_var; + KASSERT(var != NULL); + membar_datadep_consumer(); + psref_acquire(psref, &var->iv_psref, iv_psref_class); + pserialize_read_exit(s); + + return var; +} + +static inline void +if_ipsec_putref_variant(struct ipsec_variant *var, struct psref *psref) +{ + + KASSERT(var != NULL); + psref_release(psref, &var->iv_psref, iv_psref_class); +} + +static inline bool +if_ipsec_heldref_variant(struct ipsec_variant *var) +{ + + return psref_held(&var->iv_psref, iv_psref_class); +} + +void ipsecifattach(int); +int if_ipsec_encap_func(struct mbuf *, int, int, void *); +void if_ipsec_input(struct mbuf *, int, struct ifnet *); +int if_ipsec_output(struct ifnet *, struct mbuf *, + const struct sockaddr *, const struct rtentry *); +int if_ipsec_ioctl(struct ifnet *, u_long, void *); +#endif /* _KERNEL */ + +/* + * sharing SP note: + * When ipsec(4) I/Fs use NAT-T, they can use the same src and dst address pair + * as long as they use different port. Howerver, SPD cannot have the SPs which + * use the same src and dst address pair and the same policy. So, such ipsec(4) + * I/Fs share the same SPs. + * To avoid race between ipsec0 set_tunnel/delete_tunnel and ipsec1 + * t_tunnel/delete_tunnel, any global lock is needed. See also the following + * locking notes. + * + * Locking notes: + * + ipsec_softcs.list is protected by ipsec_softcs.lock (an adaptive mutex) + * ipsec_softc_list is list of all ipsec_softcs. It is used by ioctl + * context only. + * + ipsec_softc->ipsec_var is protected by + * - ipsec_softc->ipsec_lock (an adaptive mutex) for writer + * - ipsec_var->iv_psref for reader + * ipsec_softc->ipsec_var is used for variant values while the ipsec tunnel + * exists. + * + struct ipsec_ro->ir_ro is protected by struct ipsec_ro->ir_lock. + * This lock is required to exclude softnet/0 lwp(such as output + * processing softint) and processing lwp(such as DAD timer processing). + * + if_ipsec_share_sp() and if_ipsec_unshare_sp() operations are serialized by + * encap_lock + * This only need to be global lock, need not to be encap_lock. + * + * Locking order: + * - encap_lock => ipsec_softc->ipsec_lock => ipsec_softcs.lock + */ +#endif /* _NET_IF_IPSEC_H_ */ Index: src/sys/netipsec/ipsecif.c diff -u /dev/null src/sys/netipsec/ipsecif.c:1.1 --- /dev/null Wed Jan 10 10:56:31 2018 +++ src/sys/netipsec/ipsecif.c Wed Jan 10 10:56:30 2018 @@ -0,0 +1,933 @@ +/* $NetBSD: ipsecif.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */ + +/* + * Copyright (c) 2017 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $"); + +#ifdef _KERNEL_OPT +#include "opt_inet.h" +#include "opt_ipsec.h" +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/mbuf.h> +#include <sys/errno.h> +#include <sys/ioctl.h> +#include <sys/syslog.h> +#include <sys/kernel.h> + +#include <net/if.h> +#include <net/route.h> + +#include <netinet/in.h> +#include <netinet/in_systm.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/in_var.h> +#include <netinet/ip_encap.h> +#include <netinet/ip_ecn.h> +#include <netinet/ip_private.h> +#include <netinet/udp.h> + +#ifdef INET6 +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet6/ip6_private.h> +#include <netinet6/in6_var.h> +#include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */ +#include <netinet/ip_ecn.h> +#endif + +#include <netipsec/key.h> +#include <netipsec/ipsecif.h> + +#include <net/if_ipsec.h> + +static void ipsecif4_input(struct mbuf *, int, int, void *); +static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *); +static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *, + struct ifnet *); + +#ifdef INET6 +static int ipsecif6_input(struct mbuf **, int *, int, void *); +static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *); +static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *, + struct ifnet *); +#endif + +static int ip_ipsec_ttl = IPSEC_TTL; +static int ip_ipsec_copy_tos = 0; +#ifdef INET6 +static int ip6_ipsec_hlim = IPSEC_HLIM; +static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */ +static int ip6_ipsec_copy_tos = 0; +#endif + +struct encapsw ipsecif4_encapsw = { + .encapsw4 = { + .pr_input = ipsecif4_input, + .pr_ctlinput = NULL, + } +}; + +#ifdef INET6 +static const struct encapsw ipsecif6_encapsw; +#endif + +static struct mbuf * +ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m, + uint8_t proto, uint8_t tos) +{ + struct ip *ip; + struct sockaddr_in *src, *dst; + + src = satosin(var->iv_psrc); + dst = satosin(var->iv_pdst); + + if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) || + src->sin_addr.s_addr == INADDR_BROADCAST || + dst->sin_addr.s_addr == INADDR_BROADCAST) { + m_freem(m); + return NULL; + } + m->m_flags &= ~M_BCAST; + + if (IN_MULTICAST(src->sin_addr.s_addr) || + IN_MULTICAST(dst->sin_addr.s_addr)) { + m_freem(m); + return NULL; + } + + M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); + if (m && M_UNWRITABLE(m, sizeof(struct ip))) + m = m_pullup(m, sizeof(struct ip)); + if (m == NULL) + return NULL; + + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_off = htons(0); + ip->ip_id = 0; + ip->ip_hl = sizeof(*ip) >> 2; + if (ip_ipsec_copy_tos) + ip->ip_tos = tos; + else + ip->ip_tos = 0; + ip->ip_sum = 0; + ip->ip_src = src->sin_addr; + ip->ip_dst = dst->sin_addr; + ip->ip_p = proto; + ip->ip_ttl = ip_ipsec_ttl; + ip->ip_len = htons(m->m_pkthdr.len); +#ifndef IPSEC_TX_TOS_CLEAR + struct ifnet *ifp = &var->iv_softc->ipsec_if; + if (ifp->if_flags & IFF_ECN) + ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos); + else + ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos); +#endif + + return m; +} + +static int +ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr) +{ + struct ip ip0; + struct ip *ip; + int mtu; + struct secasvar *sav; + + sav = key_lookup_sa_bysaidx(&isr->saidx); + if (sav == NULL) + return 0; + + if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) && + !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) { + mtu = 0; + goto out; + } + + if (m->m_len < sizeof(struct ip)) { + m_copydata(m, 0, sizeof(ip0), &ip0); + ip = &ip0; + + } else { + ip = mtod(m, struct ip *); + } + mtu = sav->esp_frag; + if (ntohs(ip->ip_len) <= mtu) + mtu = 0; + +out: + KEY_SA_UNREF(&sav); + return mtu; +} + +static struct mbuf * +ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0) +{ + const struct ip *ip; + int proto; + int tos; + + KASSERT(proto0 != NULL); + KASSERT(tos0 != NULL); + + switch (family) { + case AF_INET: + proto = IPPROTO_IPV4; + if (m->m_len < sizeof(*ip)) { + m = m_pullup(m, sizeof(*ip)); + if (!m) { + *tos0 = 0; + *proto0 = 0; + return NULL; + } + } + ip = mtod(m, const struct ip *); + tos = ip->ip_tos; + /* TODO: support ALTQ for innner packet */ + break; +#ifdef INET6 + case AF_INET6: { + const struct ip6_hdr *ip6; + proto = IPPROTO_IPV6; + if (m->m_len < sizeof(*ip6)) { + m = m_pullup(m, sizeof(*ip6)); + if (!m) { + *tos0 = 0; + *proto0 = 0; + return NULL; + } + } + ip6 = mtod(m, const struct ip6_hdr *); + tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + /* TODO: support ALTQ for innner packet */ + break; + } +#endif /* INET6 */ + default: + *tos0 = 0; + *proto0 = 0; + return NULL; + } + + *proto0 = proto; + *tos0 = tos; + return m; +} + +static int +ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu) +{ + struct ifnet *ifp = &var->iv_softc->ipsec_if; + struct mbuf *next; + struct m_tag *mtag; + int error; + + KASSERT(if_ipsec_heldref_variant(var)); + + mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL); + if (mtag) + m_tag_delete(m, mtag); + + error = ip_fragment(m, ifp, mtu); + if (error) + return error; + + for (error = 0; m; m = next) { + next = m->m_nextpkt; + m->m_next = NULL; + if (error) { + m_freem(m); + continue; + } + + error = ipsecif4_output(var, family, m); + } + if (error == 0) + IP_STATINC(IP_STAT_FRAGMENTED); + + return error; +} + +int +ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var) +{ + struct m_tag *mtag; + struct sockaddr_in *src, *dst; + u_int16_t src_port = 0; + u_int16_t dst_port = 0; + + KASSERT(var != NULL); + + src = satosin(var->iv_psrc); + dst = satosin(var->iv_pdst); + mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL); + if (mtag) { + u_int16_t *ports; + + ports = (u_int16_t *)(mtag + 1); + src_port = ports[0]; + dst_port = ports[1]; + } + + /* address match */ + if (src->sin_addr.s_addr != ip->ip_dst.s_addr || + dst->sin_addr.s_addr != ip->ip_src.s_addr) + return 0; + + /* UDP encap? */ + if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0) + goto match; + + /* port match */ + if (src_port != var->iv_dport || + dst_port != var->iv_sport) { +#ifdef DEBUG + printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n", + __func__, ntohs(src_port), ntohs(dst_port), + ntohs(var->iv_sport), ntohs(var->iv_dport)); +#endif + return 0; + } + +match: + /* + * hide NAT-T information from encapsulated traffics. + * they don't know about IPsec. + */ + if (mtag) + m_tag_delete(m, mtag); + return sizeof(src->sin_addr) + sizeof(dst->sin_addr); +} + +static int +ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m) +{ + struct secpolicy *sp = NULL; + u_int8_t tos; + int proto; + int error; + int mtu; + u_long sa_mtu = 0; + + KASSERT(if_ipsec_heldref_variant(var)); + KASSERT(if_ipsec_variant_is_configured(var)); + KASSERT(var->iv_psrc->sa_family == AF_INET); + KASSERT(var->iv_pdst->sa_family == AF_INET); + + sp = IV_SP_OUT(var); + KASSERT(sp != NULL); + /* + * The SPs in ipsec_variant are prevented from freed by + * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here. + */ + + KASSERT(sp->policy != IPSEC_POLICY_NONE); + KASSERT(sp->policy != IPSEC_POLICY_ENTRUST); + KASSERT(sp->policy != IPSEC_POLICY_BYPASS); + if(sp->policy != IPSEC_POLICY_IPSEC) { + struct ifnet *ifp = &var->iv_softc->ipsec_if; + m_freem(m); + IF_DROP(&ifp->if_snd); + return 0; + } + + /* get flowinfo */ + m = ipsecif4_flowinfo(m, family, &proto, &tos); + if (m == NULL) { + error = ENETUNREACH; + goto done; + } + + /* prepend new IP header */ + m = ipsecif4_prepend_hdr(var, m, proto, tos); + if (m == NULL) { + error = ENETUNREACH; + goto done; + } + + /* + * Normal netipsec's NAT-T fragmentation is done in ip_output(). + * See "natt_frag" processing. + * However, ipsec(4) interface's one is not done in the same way, + * so we must do NAT-T fragmentation by own code. + */ + /* NAT-T ESP fragmentation */ + mtu = ipsecif4_needfrag(m, sp->req); + if (mtu > 0) + return ipsecif4_fragout(var, family, m, mtu); + + /* IPsec output */ + IP_STATINC(IP_STAT_LOCALOUT); + error = ipsec4_process_packet(m, sp->req, &sa_mtu); + if (error == ENOENT) + error = 0; + /* + * frangmentation is already done in ipsecif4_fragout(), + * so ipsec4_process_packet() must not do fragmentation here. + */ + KASSERT(error != 0 || sa_mtu == 0); + +done: + return error; +} + +#ifdef INET6 +static int +ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m) +{ + struct ifnet *ifp = &var->iv_softc->ipsec_if; + struct ipsec_softc *sc = ifp->if_softc; + struct ipsec_ro *iro; + struct rtentry *rt; + struct sockaddr_in6 *sin6_src; + struct sockaddr_in6 *sin6_dst; + struct ip6_hdr *ip6; + int proto, error; + u_int8_t itos, otos; + union { + struct sockaddr dst; + struct sockaddr_in6 dst6; + } u; + + KASSERT(if_ipsec_heldref_variant(var)); + KASSERT(if_ipsec_variant_is_configured(var)); + + sin6_src = satosin6(var->iv_psrc); + sin6_dst = satosin6(var->iv_pdst); + + KASSERT(sin6_src->sin6_family == AF_INET6); + KASSERT(sin6_dst->sin6_family == AF_INET6); + + switch (family) { +#ifdef INET + case AF_INET: + { + struct ip *ip; + + proto = IPPROTO_IPV4; + if (m->m_len < sizeof(*ip)) { + m = m_pullup(m, sizeof(*ip)); + if (!m) + return ENOBUFS; + } + ip = mtod(m, struct ip *); + itos = ip->ip_tos; + /* + * TODO: + *support ALTQ for innner packet + */ + break; + } +#endif /* INET */ + case AF_INET6: + { + struct ip6_hdr *xip6; + proto = IPPROTO_IPV6; + if (m->m_len < sizeof(*xip6)) { + m = m_pullup(m, sizeof(*xip6)); + if (!m) + return ENOBUFS; + } + xip6 = mtod(m, struct ip6_hdr *); + itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff; + /* TODO: + * support ALTQ for innner packet + */ + break; + } + default: + m_freem(m); + return EAFNOSUPPORT; + } + + /* prepend new IP header */ + M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); + if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr))) + m = m_pullup(m, sizeof(struct ip6_hdr)); + if (m == NULL) + return ENOBUFS; + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_plen = htons((u_short)m->m_pkthdr.len); + ip6->ip6_nxt = proto; + ip6->ip6_hlim = ip6_ipsec_hlim; + ip6->ip6_src = sin6_src->sin6_addr; + /* bidirectional configured tunnel mode */ + if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) { + ip6->ip6_dst = sin6_dst->sin6_addr; + } else { + m_freem(m); + return ENETUNREACH; + } +#ifndef IPSEC_TX_TOS_CLEAR + if (ifp->if_flags & IFF_ECN) + ip_ecn_ingress(ECN_ALLOWED, &otos, &itos); + else + ip_ecn_ingress(ECN_NOCARE, &otos, &itos); + + if (!ip6_ipsec_copy_tos) + otos = 0; +#else + if (ip6_ipsec_copy_tos) + otos = itos; + else + otos = 0; +#endif + ip6->ip6_flow &= ~ntohl(0xff00000); + ip6->ip6_flow |= htonl((u_int32_t)otos << 20); + + sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0); + + iro = percpu_getref(sc->ipsec_ro_percpu); + mutex_enter(&iro->ir_lock); + if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) { + mutex_exit(&iro->ir_lock); + percpu_putref(sc->ipsec_ro_percpu); + m_freem(m); + return ENETUNREACH; + } + + if (rt->rt_ifp == ifp) { + rtcache_unref(rt, &iro->ir_ro); + rtcache_free(&iro->ir_ro); + mutex_exit(&iro->ir_lock); + percpu_putref(sc->ipsec_ro_percpu); + m_freem(m); + return ENETUNREACH; + } + rtcache_unref(rt, &iro->ir_ro); + + /* + * force fragmentation to minimum MTU, to avoid path MTU discovery. + * it is too painful to ask for resend of inner packet, to achieve + * path MTU discovery for encapsulated packets. + */ + error = ip6_output(m, 0, &iro->ir_ro, + ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL); + if (error) + rtcache_free(&iro->ir_ro); + + mutex_exit(&iro->ir_lock); + percpu_putref(sc->ipsec_ro_percpu); + + return error; +} +#endif /* INET6 */ + +static void +ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg) +{ + struct ifnet *ipsecp; + struct ipsec_softc *sc = eparg; + struct ipsec_variant *var; + const struct ip *ip; + int af; +#ifndef IPSEC_TX_TOS_CLEAR + u_int8_t otos; +#endif + struct psref psref_rcvif; + struct psref psref_var; + struct ifnet *rcvif; + + KASSERT(sc != NULL); + + ipsecp = &sc->ipsec_if; + if ((ipsecp->if_flags & IFF_UP) == 0) { + m_freem(m); + ip_statinc(IP_STAT_NOIPSEC); + return; + } + + var = if_ipsec_getref_variant(sc, &psref_var); + if (if_ipsec_variant_is_unconfigured(var)) { + if_ipsec_putref_variant(var, &psref_var); + m_freem(m); + ip_statinc(IP_STAT_NOIPSEC); + return; + } + + ip = mtod(m, const struct ip *); + + rcvif = m_get_rcvif_psref(m, &psref_rcvif); + if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) { + m_put_rcvif_psref(rcvif, &psref_rcvif); + if_ipsec_putref_variant(var, &psref_var); + m_freem(m); + ip_statinc(IP_STAT_NOIPSEC); + return; + } + m_put_rcvif_psref(rcvif, &psref_rcvif); + if_ipsec_putref_variant(var, &psref_var); +#ifndef IPSEC_TX_TOS_CLEAR + otos = ip->ip_tos; +#endif + m_adj(m, off); + + switch (proto) { + case IPPROTO_IPV4: + { + struct ip *xip; + af = AF_INET; + if (M_UNWRITABLE(m, sizeof(*xip))) { + m = m_pullup(m, sizeof(*xip)); + if (!m) + return; + } + xip = mtod(m, struct ip *); +#ifndef IPSEC_TX_TOS_CLEAR + if (ipsecp->if_flags & IFF_ECN) + ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos); + else + ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos); +#endif + break; + } +#ifdef INET6 + case IPPROTO_IPV6: + { + struct ip6_hdr *ip6; + u_int8_t itos; + af = AF_INET6; + if (M_UNWRITABLE(m, sizeof(*ip6))) { + m = m_pullup(m, sizeof(*ip6)); + if (!m) + return; + } + ip6 = mtod(m, struct ip6_hdr *); + itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; +#ifndef IPSEC_TX_TOS_CLEAR + if (ipsecp->if_flags & IFF_ECN) + ip_ecn_egress(ECN_ALLOWED, &otos, &itos); + else + ip_ecn_egress(ECN_NOCARE, &otos, &itos); +#endif + ip6->ip6_flow &= ~htonl(0xff << 20); + ip6->ip6_flow |= htonl((u_int32_t)itos << 20); + break; + } +#endif /* INET6 */ + default: + ip_statinc(IP_STAT_NOIPSEC); + m_freem(m); + return; + } + if_ipsec_input(m, af, ipsecp); + + return; +} + +/* + * validate and filter the pakcet + */ +static int +ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var, + struct ifnet *ifp) +{ + struct sockaddr_in *src, *dst; + + src = satosin(var->iv_psrc); + dst = satosin(var->iv_pdst); + + return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr); +} + +#ifdef INET6 +static int +ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg) +{ + struct mbuf *m = *mp; + struct ifnet *ipsecp; + struct ipsec_softc *sc = eparg; + struct ipsec_variant *var; + struct ip6_hdr *ip6; + int af = 0; +#ifndef IPSEC_TX_TOS_CLEAR + u_int32_t otos; +#endif + struct psref psref_rcvif; + struct psref psref_var; + struct ifnet *rcvif; + + KASSERT(eparg != NULL); + + ipsecp = &sc->ipsec_if; + if ((ipsecp->if_flags & IFF_UP) == 0) { + m_freem(m); + IP6_STATINC(IP6_STAT_NOIPSEC); + return IPPROTO_DONE; + } + + var = if_ipsec_getref_variant(sc, &psref_var); + if (if_ipsec_variant_is_unconfigured(var)) { + if_ipsec_putref_variant(var, &psref_var); + m_freem(m); + IP6_STATINC(IP6_STAT_NOIPSEC); + return IPPROTO_DONE; + } + + ip6 = mtod(m, struct ip6_hdr *); + + rcvif = m_get_rcvif_psref(m, &psref_rcvif); + if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) { + m_put_rcvif_psref(rcvif, &psref_rcvif); + if_ipsec_putref_variant(var, &psref_var); + m_freem(m); + IP6_STATINC(IP6_STAT_NOIPSEC); + return IPPROTO_DONE; + } + m_put_rcvif_psref(rcvif, &psref_rcvif); + if_ipsec_putref_variant(var, &psref_var); + +#ifndef IPSEC_TX_TOS_CLEAR + otos = ip6->ip6_flow; +#endif + m_adj(m, *offp); + + switch (proto) { +#ifdef INET + case IPPROTO_IPV4: + { + af = AF_INET; +#ifndef IPSEC_TX_TOS_CLEAR + struct ip *ip; + u_int8_t otos8; + otos8 = (ntohl(otos) >> 20) & 0xff; + + if (M_UNWRITABLE(m, sizeof(*ip))) { + m = m_pullup(m, sizeof(*ip)); + if (!m) + return IPPROTO_DONE; + } + ip = mtod(m, struct ip *); + if (ipsecp->if_flags & IFF_ECN) + ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos); + else + ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos); +#endif + break; + } +#endif /* INET */ + case IPPROTO_IPV6: + { + af = AF_INET6; +#ifndef IPSEC_TX_TOS_CLEAR + struct ip6_hdr *xip6; + + if (M_UNWRITABLE(m, sizeof(*xip6))) { + m = m_pullup(m, sizeof(*xip6)); + if (!m) + return IPPROTO_DONE; + } + xip6 = mtod(m, struct ip6_hdr *); + if (ipsecp->if_flags & IFF_ECN) + ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow); + else + ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow); + break; +#endif + } + default: + IP6_STATINC(IP6_STAT_NOIPSEC); + m_freem(m); + return IPPROTO_DONE; + } + + if_ipsec_input(m, af, ipsecp); + return IPPROTO_DONE; +} + +/* + * validate and filter the packet. + */ +static int +ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var, + struct ifnet *ifp) +{ + struct sockaddr_in6 *src, *dst; + + src = satosin6(var->iv_psrc); + dst = satosin6(var->iv_pdst); + + return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr); +} +#endif /* INET6 */ + +int +ipsecif4_attach(struct ipsec_variant *var) +{ + struct ipsec_softc *sc = var->iv_softc; + + KASSERT(if_ipsec_variant_is_configured(var)); + + if (var->iv_encap_cookie4 != NULL) + return EALREADY; + var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func, + &ipsecif4_encapsw, sc); + if (var->iv_encap_cookie4 == NULL) + return EEXIST; + + var->iv_output = ipsecif4_output; + return 0; +} + +int +ipsecif4_detach(struct ipsec_variant *var) +{ + int error; + + if (var->iv_encap_cookie4 == NULL) + return 0; + + var->iv_output = NULL; + error = encap_detach(var->iv_encap_cookie4); + if (error == 0) + var->iv_encap_cookie4 = NULL; + + return error; +} + +#ifdef INET6 +int +ipsecif6_attach(struct ipsec_variant *var) +{ + struct sockaddr_in6 mask6; + struct ipsec_softc *sc = var->iv_softc; + + KASSERT(if_ipsec_variant_is_configured(var)); + KASSERT(var->iv_encap_cookie6 == NULL); + + memset(&mask6, 0, sizeof(mask6)); + mask6.sin6_len = sizeof(struct sockaddr_in6); + mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] = + mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0; + + var->iv_encap_cookie6 = encap_attach(AF_INET6, -1, + var->iv_psrc, (struct sockaddr *)&mask6, + var->iv_pdst, (struct sockaddr *)&mask6, + &ipsecif6_encapsw, sc); + if (var->iv_encap_cookie6 == NULL) + return EEXIST; + + var->iv_output = ipsecif6_output; + return 0; +} + +static void +ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused) +{ + struct ipsec_ro *iro = p; + + mutex_enter(&iro->ir_lock); + rtcache_free(&iro->ir_ro); + mutex_exit(&iro->ir_lock); +} + +int +ipsecif6_detach(struct ipsec_variant *var) +{ + struct ipsec_softc *sc = var->iv_softc; + int error; + + KASSERT(var->iv_encap_cookie6 != NULL); + + percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL); + + var->iv_output = NULL; + error = encap_detach(var->iv_encap_cookie6); + if (error == 0) + var->iv_encap_cookie6 = NULL; + return error; +} + +void * +ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg) +{ + struct ipsec_softc *sc = eparg; + struct ip6ctlparam *ip6cp = NULL; + struct ip6_hdr *ip6; + const struct sockaddr_in6 *dst6; + struct ipsec_ro *iro; + + if (sa->sa_family != AF_INET6 || + sa->sa_len != sizeof(struct sockaddr_in6)) + return NULL; + + if ((unsigned)cmd >= PRC_NCMDS) + return NULL; + if (cmd == PRC_HOSTDEAD) + d = NULL; + else if (inet6ctlerrmap[cmd] == 0) + return NULL; + + /* if the parameter is from icmp6, decode it. */ + if (d != NULL) { + ip6cp = (struct ip6ctlparam *)d; + ip6 = ip6cp->ip6c_ip6; + } else { + ip6 = NULL; + } + + if (!ip6) + return NULL; + + iro = percpu_getref(sc->ipsec_ro_percpu); + mutex_enter(&iro->ir_lock); + dst6 = satocsin6(rtcache_getdst(&iro->ir_ro)); + /* XXX scope */ + if (dst6 == NULL) + ; + else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr)) + /* flush route cache */ + rtcache_free(&iro->ir_ro); + + mutex_exit(&iro->ir_lock); + percpu_putref(sc->ipsec_ro_percpu); + + return NULL; +} + +ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput) +#define ipsecif6_ctlinput ipsecif6_ctlinput_wrapper + +static const struct encapsw ipsecif6_encapsw = { + .encapsw6 = { + .pr_input = ipsecif6_input, + .pr_ctlinput = ipsecif6_ctlinput, + } +}; +#endif /* INET6 */ Index: src/sys/netipsec/ipsecif.h diff -u /dev/null src/sys/netipsec/ipsecif.h:1.1 --- /dev/null Wed Jan 10 10:56:31 2018 +++ src/sys/netipsec/ipsecif.h Wed Jan 10 10:56:30 2018 @@ -0,0 +1,47 @@ +/* $NetBSD: ipsecif.h,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */ + +/* + * Copyright (c) 2017 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _NETIPSEC_IPSECIF_H_ +#define _NETIPSEC_IPSECIF_H_ + +#include <net/if_ipsec.h> + +#define IPSEC_TTL 64 +#define IPSEC_HLIM 64 + +#ifdef _KERNEL +int ipsecif4_encap_func(struct mbuf *, struct ip *, struct ipsec_variant *); +int ipsecif4_attach(struct ipsec_variant *); +int ipsecif4_detach(struct ipsec_variant *); + +int ipsecif6_attach(struct ipsec_variant *); +int ipsecif6_detach(struct ipsec_variant *); +void *ipsecif6_ctlinput(int, const struct sockaddr *, void *, void *); +#endif + +#endif /*_NETIPSEC_IPSECIF_H_*/