Module Name:    src
Committed By:   knakahara
Date:           Wed Jan 10 10:56:31 UTC 2018

Modified Files:
        src/distrib/sets/lists/comp: mi
        src/sys/conf: files
        src/sys/net: Makefile files.net if_types.h
        src/sys/netinet: in.c in.h ip_var.h
        src/sys/netinet6: in6.c in6.h ip6_var.h
        src/sys/netipsec: Makefile files.netipsec ipsec.h key.c key.h
Added Files:
        src/sys/net: if_ipsec.c if_ipsec.h
        src/sys/netipsec: ipsecif.c ipsecif.h

Log Message:
add ipsec(4) interface, which is used for route-based VPN.

man and ATF are added later, please see man for details.

reviewed by christos@n.o, joerg@n.o and ozaki-r@n.o, thanks.
https://mail-index.netbsd.org/tech-net/2017/12/18/msg006557.html


To generate a diff of this commit:
cvs rdiff -u -r1.2167 -r1.2168 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.1190 -r1.1191 src/sys/conf/files
cvs rdiff -u -r1.33 -r1.34 src/sys/net/Makefile
cvs rdiff -u -r1.13 -r1.14 src/sys/net/files.net
cvs rdiff -u -r0 -r1.1 src/sys/net/if_ipsec.c src/sys/net/if_ipsec.h
cvs rdiff -u -r1.27 -r1.28 src/sys/net/if_types.h
cvs rdiff -u -r1.213 -r1.214 src/sys/netinet/in.c
cvs rdiff -u -r1.102 -r1.103 src/sys/netinet/in.h
cvs rdiff -u -r1.121 -r1.122 src/sys/netinet/ip_var.h
cvs rdiff -u -r1.256 -r1.257 src/sys/netinet6/in6.c
cvs rdiff -u -r1.87 -r1.88 src/sys/netinet6/in6.h
cvs rdiff -u -r1.74 -r1.75 src/sys/netinet6/ip6_var.h
cvs rdiff -u -r1.5 -r1.6 src/sys/netipsec/Makefile
cvs rdiff -u -r1.12 -r1.13 src/sys/netipsec/files.netipsec
cvs rdiff -u -r1.61 -r1.62 src/sys/netipsec/ipsec.h
cvs rdiff -u -r0 -r1.1 src/sys/netipsec/ipsecif.c src/sys/netipsec/ipsecif.h
cvs rdiff -u -r1.246 -r1.247 src/sys/netipsec/key.c
cvs rdiff -u -r1.33 -r1.34 src/sys/netipsec/key.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2167 src/distrib/sets/lists/comp/mi:1.2168
--- src/distrib/sets/lists/comp/mi:1.2167	Tue Jan  9 03:31:13 2018
+++ src/distrib/sets/lists/comp/mi	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-#	$NetBSD: mi,v 1.2167 2018/01/09 03:31:13 christos Exp $
+#	$NetBSD: mi,v 1.2168 2018/01/10 10:56:30 knakahara Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 ./etc/mtree/set.comp				comp-sys-root
@@ -2224,6 +2224,7 @@
 ./usr/include/net/if_hippi.h			comp-c-include
 ./usr/include/net/if_ieee1394.h			comp-c-include
 ./usr/include/net/if_ieee80211.h		comp-obsolete		obsolete
+./usr/include/net/if_ipsec.h			comp-c-include
 ./usr/include/net/if_l2tp.h			comp-c-include
 ./usr/include/net/if_llc.h			comp-c-include
 ./usr/include/net/if_media.h			comp-c-include
@@ -2382,6 +2383,7 @@
 ./usr/include/netipsec/ipcomp_var.h		comp-c-include
 ./usr/include/netipsec/ipip_var.h		comp-c-include
 ./usr/include/netipsec/ipsec.h			comp-c-include
+./usr/include/netipsec/ipsecif.h		comp-c-include
 ./usr/include/netipsec/ipsec_var.h		comp-c-include
 ./usr/include/netipsec/keydb.h			comp-obsolete		obsolete
 ./usr/include/netipsec/keysock.h		comp-c-include

Index: src/sys/conf/files
diff -u src/sys/conf/files:1.1190 src/sys/conf/files:1.1191
--- src/sys/conf/files:1.1190	Tue Jan  9 03:31:12 2018
+++ src/sys/conf/files	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-#	$NetBSD: files,v 1.1190 2018/01/09 03:31:12 christos Exp $
+#	$NetBSD: files,v 1.1191 2018/01/10 10:56:30 knakahara Exp $
 #	@(#)files.newconf	7.5 (Berkeley) 5/10/93
 
 version 	20171118
@@ -1463,6 +1463,7 @@ defpseudo carp:		ifnet, ether, arp
 defpseudodev etherip:	ifnet, ether, arp
 defpseudodev l2tp:	ifnet, ether, arp
 defpseudo canloop:	ifnet
+defpseudo ipsecif:	ifnet		# avoid to confuse ipsec itself option
 
 defpseudo sequencer
 defpseudo clockctl

Index: src/sys/net/Makefile
diff -u src/sys/net/Makefile:1.33 src/sys/net/Makefile:1.34
--- src/sys/net/Makefile:1.33	Thu Feb 16 08:12:44 2017
+++ src/sys/net/Makefile	Wed Jan 10 10:56:30 2018
@@ -1,10 +1,10 @@
-#	$NetBSD: Makefile,v 1.33 2017/02/16 08:12:44 knakahara Exp $
+#	$NetBSD: Makefile,v 1.34 2018/01/10 10:56:30 knakahara Exp $
 
 INCSDIR= /usr/include/net
 
 INCS=	bpf.h bpfjit.h bpfdesc.h dlt.h ethertypes.h if.h if_arc.h if_arp.h \
 	if_atm.h if_bridgevar.h if_dl.h if_ether.h if_etherip.h if_fddi.h if_gif.h \
-	if_gre.h if_hippi.h if_ieee1394.h if_llc.h if_media.h if_mpls.h \
+	if_gre.h if_hippi.h if_ieee1394.h if_ipsec.h if_llc.h if_media.h if_mpls.h \
 	if_pflog.h if_ppp.h if_pppoe.h if_l2tp.h if_sppp.h if_srt.h if_stf.h \
 	if_tap.h if_token.h if_tun.h if_types.h if_vlanvar.h net_stats.h \
 	netisr.h pfil.h pfkeyv2.h pfvar.h ppp-comp.h ppp_defs.h radix.h \

Index: src/sys/net/files.net
diff -u src/sys/net/files.net:1.13 src/sys/net/files.net:1.14
--- src/sys/net/files.net:1.13	Thu Feb 16 08:12:44 2017
+++ src/sys/net/files.net	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-#	$NetBSD: files.net,v 1.13 2017/02/16 08:12:44 knakahara Exp $
+#	$NetBSD: files.net,v 1.14 2018/01/10 10:56:30 knakahara Exp $
 
 # XXX CLEANUP
 define	net
@@ -22,6 +22,7 @@ file	net/if_gif.c			gif			needs-flag
 file	net/if_gre.c			gre			needs-flag
 file	net/if_hippisubr.c		hippi			needs-flag
 file	net/if_ieee1394subr.c		ieee1394
+file	net/if_ipsec.c			ipsec & ipsecif
 file	net/if_llatbl.c			inet | inet6
 file	net/if_l2tp.c			l2tp			needs-flag
 file	net/if_loop.c			loop

Index: src/sys/net/if_types.h
diff -u src/sys/net/if_types.h:1.27 src/sys/net/if_types.h:1.28
--- src/sys/net/if_types.h:1.27	Thu Feb 16 08:12:44 2017
+++ src/sys/net/if_types.h	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_types.h,v 1.27 2017/02/16 08:12:44 knakahara Exp $	*/
+/*	$NetBSD: if_types.h,v 1.28 2018/01/10 10:56:30 knakahara Exp $	*/
 
 /*
  * Copyright (c) 1989, 1993, 1994
@@ -265,5 +265,6 @@
 #define IFT_PFSYNC	0xf6		/* Packet filter state syncing */
 #define IFT_L2TP	0xf7		/* L2TPv3 I/F */
 #define IFT_CARP	0xf8		/* Common Address Redundancy Protocol */
+#define IFT_IPSEC	0xf9		/* IPsec I/F */
 
 #endif /* !_NET_IF_TYPES_H_ */

Index: src/sys/netinet/in.c
diff -u src/sys/netinet/in.c:1.213 src/sys/netinet/in.c:1.214
--- src/sys/netinet/in.c:1.213	Wed Dec 27 08:35:20 2017
+++ src/sys/netinet/in.c	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: in.c,v 1.213 2017/12/27 08:35:20 ozaki-r Exp $	*/
+/*	$NetBSD: in.c,v 1.214 2018/01/10 10:56:30 knakahara Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.213 2017/12/27 08:35:20 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.214 2018/01/10 10:56:30 knakahara Exp $");
 
 #include "arp.h"
 
@@ -1877,6 +1877,44 @@ out:
 	return ia;
 }
 
+int
+in_tunnel_validate(const struct ip *ip, struct in_addr src, struct in_addr dst)
+{
+	struct in_ifaddr *ia4;
+	int s;
+
+	/* check for address match */
+	if (src.s_addr != ip->ip_dst.s_addr ||
+	    dst.s_addr != ip->ip_src.s_addr)
+		return 0;
+
+	/* martian filters on outer source - NOT done in ip_input! */
+	if (IN_MULTICAST(ip->ip_src.s_addr))
+		return 0;
+	switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
+	case 0:
+	case 127:
+	case 255:
+		return 0;
+	}
+	/* reject packets with broadcast on source */
+	s = pserialize_read_enter();
+	IN_ADDRLIST_READER_FOREACH(ia4) {
+		if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
+			continue;
+		if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
+			pserialize_read_exit(s);
+			return 0;
+		}
+	}
+	pserialize_read_exit(s);
+
+	/* NOTE: packet may dropped by uRPF */
+
+	/* return valid bytes length */
+	return sizeof(src) + sizeof(dst);
+}
+
 #if NARP > 0
 
 struct in_llentry {

Index: src/sys/netinet/in.h
diff -u src/sys/netinet/in.h:1.102 src/sys/netinet/in.h:1.103
--- src/sys/netinet/in.h:1.102	Mon Jan  1 00:51:36 2018
+++ src/sys/netinet/in.h	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: in.h,v 1.102 2018/01/01 00:51:36 christos Exp $	*/
+/*	$NetBSD: in.h,v 1.103 2018/01/10 10:56:30 knakahara Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1990, 1993
@@ -587,6 +587,9 @@ struct ip_moptions;
 struct in_ifaddr *in_selectsrc(struct sockaddr_in *,
 	struct route *, int, struct ip_moptions *, int *, struct psref *);
 
+struct ip;
+int in_tunnel_validate(const struct ip *, struct in_addr, struct in_addr);
+
 #define	in_hosteq(s,t)	((s).s_addr == (t).s_addr)
 #define	in_nullhost(x)	((x).s_addr == INADDR_ANY)
 

Index: src/sys/netinet/ip_var.h
diff -u src/sys/netinet/ip_var.h:1.121 src/sys/netinet/ip_var.h:1.122
--- src/sys/netinet/ip_var.h:1.121	Mon Dec 11 05:47:18 2017
+++ src/sys/netinet/ip_var.h	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_var.h,v 1.121 2017/12/11 05:47:18 ryo Exp $	*/
+/*	$NetBSD: ip_var.h,v 1.122 2018/01/10 10:56:30 knakahara Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -164,8 +164,9 @@ struct ip_pktopts {
 #define	IP_STAT_NOGIF		28	/* no match gif found */
 #define	IP_STAT_BADADDR		29	/* invalid address on header */
 #define	IP_STAT_NOL2TP		30	/* no match l2tp found */
+#define	IP_STAT_NOIPSEC		31	/* no match ipsec(4) found */
 
-#define	IP_NSTATS		31
+#define	IP_NSTATS		32
 
 #ifdef _KERNEL
 

Index: src/sys/netinet6/in6.c
diff -u src/sys/netinet6/in6.c:1.256 src/sys/netinet6/in6.c:1.257
--- src/sys/netinet6/in6.c:1.256	Mon Dec 25 04:41:49 2017
+++ src/sys/netinet6/in6.c	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: in6.c,v 1.256 2017/12/25 04:41:49 ozaki-r Exp $	*/
+/*	$NetBSD: in6.c,v 1.257 2018/01/10 10:56:30 knakahara Exp $	*/
 /*	$KAME: in6.c,v 1.198 2001/07/18 09:12:38 itojun Exp $	*/
 
 /*
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.256 2017/12/25 04:41:49 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.257 2018/01/10 10:56:30 knakahara Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -2324,6 +2324,24 @@ in6_setmaxmtu(void)
 		in6_maxmtu = maxmtu;
 }
 
+int
+in6_tunnel_validate(const struct ip6_hdr *ip6, const struct in6_addr *src,
+    const struct in6_addr *dst)
+{
+
+	/* check for address match */
+	if (!IN6_ARE_ADDR_EQUAL(src, &ip6->ip6_dst) ||
+	    !IN6_ARE_ADDR_EQUAL(dst, &ip6->ip6_src))
+		return 0;
+
+	/* martian filters on outer source - done in ip6_input */
+
+	/* NOTE: the pakcet may be dropped by uRPF. */
+
+	/* return valid bytes length */
+	return sizeof(*src) + sizeof(*dst);
+}
+
 /*
  * Provide the length of interface identifiers to be used for the link attached
  * to the given interface.  The length should be defined in "IPv6 over

Index: src/sys/netinet6/in6.h
diff -u src/sys/netinet6/in6.h:1.87 src/sys/netinet6/in6.h:1.88
--- src/sys/netinet6/in6.h:1.87	Mon Feb 15 14:59:03 2016
+++ src/sys/netinet6/in6.h	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: in6.h,v 1.87 2016/02/15 14:59:03 rtr Exp $	*/
+/*	$NetBSD: in6.h,v 1.88 2018/01/10 10:56:30 knakahara Exp $	*/
 /*	$KAME: in6.h,v 1.83 2001/03/29 02:55:07 jinmei Exp $	*/
 
 /*
@@ -712,6 +712,10 @@ extern void in6_if_down(struct ifnet *);
 extern void addrsel_policy_init(void);
 extern	u_char	ip6_protox[];
 
+struct ip6_hdr;
+int in6_tunnel_validate(const struct ip6_hdr *, const struct in6_addr *,
+	const struct in6_addr *);
+
 #define	satosin6(sa)	((struct sockaddr_in6 *)(sa))
 #define	satocsin6(sa)	((const struct sockaddr_in6 *)(sa))
 #define	sin6tosa(sin6)	((struct sockaddr *)(sin6))

Index: src/sys/netinet6/ip6_var.h
diff -u src/sys/netinet6/ip6_var.h:1.74 src/sys/netinet6/ip6_var.h:1.75
--- src/sys/netinet6/ip6_var.h:1.74	Fri Mar  3 07:13:06 2017
+++ src/sys/netinet6/ip6_var.h	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip6_var.h,v 1.74 2017/03/03 07:13:06 ozaki-r Exp $	*/
+/*	$NetBSD: ip6_var.h,v 1.75 2018/01/10 10:56:30 knakahara Exp $	*/
 /*	$KAME: ip6_var.h,v 1.33 2000/06/11 14:59:20 jinmei Exp $	*/
 
 /*
@@ -226,8 +226,9 @@ struct	ip6_pktopts {
 #define	IP6_STAT_FORWARD_CACHEMISS 399
 #define	IP6_STAT_FASTFORWARD	400	/* packets fast forwarded */
 #define	IP6_STAT_FASTFORWARDFLOWS 401	/* number of fast forward flows */
+#define	IP6_STAT_NOIPSEC	402	/* no match ipsec(4) found */
 
-#define	IP6_NSTATS		402
+#define	IP6_NSTATS		403
 
 #define IP6FLOW_HASHBITS         6 /* should not be a multiple of 8 */
 

Index: src/sys/netipsec/Makefile
diff -u src/sys/netipsec/Makefile:1.5 src/sys/netipsec/Makefile:1.6
--- src/sys/netipsec/Makefile:1.5	Fri Jan  6 14:17:11 2012
+++ src/sys/netipsec/Makefile	Wed Jan 10 10:56:30 2018
@@ -1,9 +1,10 @@
-#	$NetBSD: Makefile,v 1.5 2012/01/06 14:17:11 drochner Exp $
+#	$NetBSD: Makefile,v 1.6 2018/01/10 10:56:30 knakahara Exp $
 
 INCSDIR= /usr/include/netipsec
 
 INCS=	ah_var.h  esp_var.h ipcomp_var.h ipip_var.h ipsec_var.h \
 	keysock.h
 INCS+=	ipsec.h
+INCS+=	ipsecif.h
 
 .include <bsd.kinc.mk>

Index: src/sys/netipsec/files.netipsec
diff -u src/sys/netipsec/files.netipsec:1.12 src/sys/netipsec/files.netipsec:1.13
--- src/sys/netipsec/files.netipsec:1.12	Wed Jun  5 19:01:26 2013
+++ src/sys/netipsec/files.netipsec	Wed Jan 10 10:56:30 2018
@@ -1,8 +1,9 @@
-#	$Id: files.netipsec,v 1.12 2013/06/05 19:01:26 christos Exp $
+#	$Id: files.netipsec,v 1.13 2018/01/10 10:56:30 knakahara Exp $
 #
 #
 defflag opt_ipsec.h	IPSEC:	opencrypto
 defflag opt_ipsec.h	IPSEC_DEBUG
+defflag opt_ipsec.h	IPSEC_TX_TOS_CLEAR
 
 file	netipsec/ipsec.c		ipsec	needs-flag
 file	netipsec/ipsec_input.c		ipsec
@@ -19,3 +20,5 @@ file	netipsec/key_debug.c		ipsec
 file	netipsec/keysock.c		ipsec
 
 file	netipsec/xform_tcp.c		ipsec & tcp_signature
+
+file	netipsec/ipsecif.c		ipsec & ipsecif

Index: src/sys/netipsec/ipsec.h
diff -u src/sys/netipsec/ipsec.h:1.61 src/sys/netipsec/ipsec.h:1.62
--- src/sys/netipsec/ipsec.h:1.61	Tue Oct  3 08:56:52 2017
+++ src/sys/netipsec/ipsec.h	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: ipsec.h,v 1.61 2017/10/03 08:56:52 ozaki-r Exp $	*/
+/*	$NetBSD: ipsec.h,v 1.62 2018/01/10 10:56:30 knakahara Exp $	*/
 /*	$FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/netipsec/ipsec.h,v 1.2.4.2 2004/02/14 22:23:23 bms Exp $	*/
 /*	$KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $	*/
 
@@ -84,6 +84,10 @@ struct secpolicy {
 #define IPSEC_SPSTATE_DEAD	0
 #define IPSEC_SPSTATE_ALIVE	1
 
+	u_int origin;			/* who generate this SP. */
+#define IPSEC_SPORIGIN_USER	0
+#define IPSEC_SPORIGIN_KERNEL	1
+
 	u_int policy;		/* DISCARD, NONE or IPSEC, see keyv2.h */
 	struct ipsecrequest *req;
 				/* pointer to the ipsec request tree, */

Index: src/sys/netipsec/key.c
diff -u src/sys/netipsec/key.c:1.246 src/sys/netipsec/key.c:1.247
--- src/sys/netipsec/key.c:1.246	Fri Dec  1 06:34:14 2017
+++ src/sys/netipsec/key.c	Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: key.c,v 1.246 2017/12/01 06:34:14 ozaki-r Exp $	*/
+/*	$NetBSD: key.c,v 1.247 2018/01/10 10:56:30 knakahara Exp $	*/
 /*	$FreeBSD: src/sys/netipsec/key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $	*/
 /*	$KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $	*/
 
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.246 2017/12/01 06:34:14 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.247 2018/01/10 10:56:30 knakahara Exp $");
 
 /*
  * This code is referred to RFC 2367
@@ -196,6 +196,10 @@ static u_int32_t acq_seq = 0;
  *     however, a socket can be destroyed in softint so we cannot destroy
  *     it directly instead we just mark it DEAD and delay the destruction
  *     until GC by the timer
+ * - SP origin
+ *   - SPs can be created by both userland programs and kernel components.
+ *     The SPs created in kernel must not be removed by userland programs,
+ *     although the SPs can be read by userland programs.
  */
 /*
  * Locking notes on SAD:
@@ -584,13 +588,6 @@ struct _keystat {
 	u_long getspi_count; /* the avarage of count to try to get new SPI */
 } keystat;
 
-struct sadb_msghdr {
-	struct sadb_msg *msg;
-	void *ext[SADB_EXT_MAX + 1];
-	int extoff[SADB_EXT_MAX + 1];
-	int extlen[SADB_EXT_MAX + 1];
-};
-
 static void
 key_init_spidx_bymsghdr(struct secpolicyindex *, const struct sadb_msghdr *);
 
@@ -621,10 +618,9 @@ static void key_freesp_so(struct secpoli
 #endif
 static struct secpolicy *key_getsp (const struct secpolicyindex *);
 static struct secpolicy *key_getspbyid (u_int32_t);
-static struct secpolicy *key_lookup_and_remove_sp(const struct secpolicyindex *);
-static struct secpolicy *key_lookupbyid_and_remove_sp(u_int32_t);
+static struct secpolicy *key_lookup_and_remove_sp(const struct secpolicyindex *, bool);
+static struct secpolicy *key_lookupbyid_and_remove_sp(u_int32_t, bool);
 static void key_destroy_sp(struct secpolicy *);
-static u_int16_t key_newreqid (void);
 static struct mbuf *key_gather_mbuf (struct mbuf *,
 	const struct sadb_msghdr *, int, int, ...);
 static int key_api_spdadd(struct socket *, struct mbuf *,
@@ -1642,14 +1638,19 @@ key_getsp(const struct secpolicyindex *s
  *	others	: found, pointer to a SP.
  */
 static struct secpolicy *
-key_lookup_and_remove_sp(const struct secpolicyindex *spidx)
+key_lookup_and_remove_sp(const struct secpolicyindex *spidx, bool from_kernel)
 {
 	struct secpolicy *sp = NULL;
 
 	mutex_enter(&key_spd.lock);
 	SPLIST_WRITER_FOREACH(sp, spidx->dir) {
 		KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
-
+		/*
+		 * SPs created in kernel(e.g. ipsec(4) I/F) must not be
+		 * removed by userland programs.
+		 */
+		if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
+			continue;
 		if (key_spidx_match_exactly(spidx, &sp->spidx)) {
 			key_unlink_sp(sp);
 			goto out;
@@ -1702,19 +1703,31 @@ out:
  *	others	: found, pointer to a SP.
  */
 static struct secpolicy *
-key_lookupbyid_and_remove_sp(u_int32_t id)
+key_lookupbyid_and_remove_sp(u_int32_t id, bool from_kernel)
 {
 	struct secpolicy *sp;
 
 	mutex_enter(&key_spd.lock);
 	SPLIST_READER_FOREACH(sp, IPSEC_DIR_INBOUND) {
 		KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
+		/*
+		 * SPs created in kernel(e.g. ipsec(4) I/F) must not be
+		 * removed by userland programs.
+		 */
+		if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
+			continue;
 		if (sp->id == id)
 			goto out;
 	}
 
 	SPLIST_READER_FOREACH(sp, IPSEC_DIR_OUTBOUND) {
 		KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
+		/*
+		 * SPs created in kernel(e.g. ipsec(4) I/F) must not be
+		 * removed by userland programs.
+		 */
+		if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
+			continue;
 		if (sp->id == id)
 			goto out;
 	}
@@ -1742,8 +1755,9 @@ key_newsp(const char* where, int tag)
  * NOTE: `state', `secpolicyindex' in secpolicy structure are not set,
  * so must be set properly later.
  */
-struct secpolicy *
-key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error)
+static struct secpolicy *
+_key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error,
+    bool from_kernel)
 {
 	struct secpolicy *newsp;
 
@@ -1852,10 +1866,21 @@ key_msg2sp(const struct sadb_x_policy *x
 			xisr_reqid = xisr->sadb_x_ipsecrequest_reqid;
 			/* validity check */
 			/*
+			 * case 1) from_kernel == false
+			 * That means the request comes from userland.
 			 * If range violation of reqid, kernel will
 			 * update it, don't refuse it.
+			 *
+			 * case 2) from_kernel == true
+			 * That means the request comes from kernel
+			 * (e.g. ipsec(4) I/F).
+			 * Use thre requested reqid to avoid inconsistency
+			 * between kernel's reqid and the reqid in pf_key
+			 * message sent to userland. The pf_key message is
+			 * built by diverting request mbuf.
 			 */
-			if (xisr_reqid > IPSEC_MANUAL_REQID_MAX) {
+			if (!from_kernel &&
+			    xisr_reqid > IPSEC_MANUAL_REQID_MAX) {
 				IPSECLOG(LOG_DEBUG,
 				    "reqid=%d range "
 				    "violation, updated by kernel.\n",
@@ -1939,7 +1964,14 @@ free_exit:
 	return NULL;
 }
 
-static u_int16_t
+struct secpolicy *
+key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error)
+{
+
+	return _key_msg2sp(xpl0, len, error, false);
+}
+
+u_int16_t
 key_newreqid(void)
 {
 	static u_int16_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1;
@@ -2086,24 +2118,13 @@ key_gather_mbuf(struct mbuf *m, const st
 }
 
 /*
- * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing
- * add an entry to SP database, when received
- *   <base, address(SD), (lifetime(H),) policy>
- * from the user(?).
- * Adding to SP database,
- * and send
- *   <base, address(SD), (lifetime(H),) policy>
- * to the socket which was send.
- *
- * SPDADD set a unique policy entry.
- * SPDSETIDX like SPDADD without a part of policy requests.
- * SPDUPDATE replace a unique policy entry.
- *
- * m will always be freed.
+ * The argument _sp must not overwrite until SP is created and registered
+ * successfully.
  */
 static int
-key_api_spdadd(struct socket *so, struct mbuf *m,
-	   const struct sadb_msghdr *mhp)
+key_spdadd(struct socket *so, struct mbuf *m,
+	   const struct sadb_msghdr *mhp, struct secpolicy **_sp,
+	   bool from_kernel)
 {
 	const struct sockaddr *src, *dst;
 	const struct sadb_x_policy *xpl0;
@@ -2184,7 +2205,7 @@ key_api_spdadd(struct socket *so, struct
 	struct secpolicy *sp;
 
 	if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) {
-		sp = key_lookup_and_remove_sp(&spidx);
+		sp = key_lookup_and_remove_sp(&spidx, from_kernel);
 		if (sp != NULL)
 			key_destroy_sp(sp);
 	} else {
@@ -2198,7 +2219,7 @@ key_api_spdadd(struct socket *so, struct
     }
 
 	/* allocation new SP entry */
-	newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error);
+	newsp = _key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error, from_kernel);
 	if (newsp == NULL) {
 		return key_senderror(so, m, error);
 	}
@@ -2214,11 +2235,20 @@ key_api_spdadd(struct socket *so, struct
 	newsp->lastused = newsp->created;
 	newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0;
 	newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0;
+	if (from_kernel)
+		newsp->origin = IPSEC_SPORIGIN_KERNEL;
+	else
+		newsp->origin = IPSEC_SPORIGIN_USER;
 
 	key_init_sp(newsp);
+	if (from_kernel)
+		KEY_SP_REF(newsp);
 
 	sadb_x_policy_id = newsp->id;
 
+	if (_sp != NULL)
+		*_sp = newsp;
+
 	mutex_enter(&key_spd.lock);
 	SPLIST_WRITER_INSERT_TAIL(newsp->spidx.dir, newsp);
 	mutex_exit(&key_spd.lock);
@@ -2275,13 +2305,19 @@ key_api_spdadd(struct socket *so, struct
 	    sizeof(*xpl), &off);
 	if (mpolicy == NULL) {
 		/* n is already freed */
+		/*
+		 * valid sp has been created, so we does not overwrite _sp
+		 * NULL here. let caller decide to use the sp or not.
+		 */
 		return key_senderror(so, m, ENOBUFS);
 	}
 	xpl = (struct sadb_x_policy *)(mtod(mpolicy, char *) + off);
 	if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) {
 		m_freem(n);
+		/* ditto */
 		return key_senderror(so, m, EINVAL);
 	}
+
 	xpl->sadb_x_policy_id = sadb_x_policy_id;
 
 	m_freem(m);
@@ -2290,6 +2326,55 @@ key_api_spdadd(struct socket *so, struct
 }
 
 /*
+ * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing
+ * add an entry to SP database, when received
+ *   <base, address(SD), (lifetime(H),) policy>
+ * from the user(?).
+ * Adding to SP database,
+ * and send
+ *   <base, address(SD), (lifetime(H),) policy>
+ * to the socket which was send.
+ *
+ * SPDADD set a unique policy entry.
+ * SPDSETIDX like SPDADD without a part of policy requests.
+ * SPDUPDATE replace a unique policy entry.
+ *
+ * m will always be freed.
+ */
+static int
+key_api_spdadd(struct socket *so, struct mbuf *m,
+	       const struct sadb_msghdr *mhp)
+{
+
+	return key_spdadd(so, m, mhp, NULL, false);
+}
+
+struct secpolicy *
+key_kpi_spdadd(struct mbuf *m)
+{
+	struct sadb_msghdr mh;
+	int error;
+	struct secpolicy *sp = NULL;
+
+	error = key_align(m, &mh);
+	if (error)
+		return NULL;
+
+	error = key_spdadd(NULL, m, &mh, &sp, true);
+	if (error) {
+		/*
+		 * Currently, when key_spdadd() cannot send a PFKEY message
+		 * which means SP has been created, key_spdadd() returns error
+		 * although SP is created successfully.
+		 * Kernel components would not care PFKEY messages, so return
+		 * the "sp" regardless of error code. key_spdadd() overwrites
+		 * the argument only if SP  is created successfully.
+		 */
+	}
+	return sp;
+}
+
+/*
  * get new policy id.
  * OUT:
  *	0:	failure.
@@ -2370,7 +2455,7 @@ key_api_spddelete(struct socket *so, str
 	key_init_spidx_bymsghdr(&spidx, mhp);
 
 	/* Is there SP in SPD ? */
-	sp = key_lookup_and_remove_sp(&spidx);
+	sp = key_lookup_and_remove_sp(&spidx, false);
 	if (sp == NULL) {
 		IPSECLOG(LOG_DEBUG, "no SP found.\n");
 		return key_senderror(so, m, EINVAL);
@@ -2426,8 +2511,8 @@ key_alloc_mbuf_simple(int len, int mflag
  * m will always be freed.
  */
 static int
-key_api_spddelete2(struct socket *so, struct mbuf *m,
-	       const struct sadb_msghdr *mhp)
+key_spddelete2(struct socket *so, struct mbuf *m,
+	       const struct sadb_msghdr *mhp, bool from_kernel)
 {
 	u_int32_t id;
 	struct secpolicy *sp;
@@ -2443,7 +2528,7 @@ key_api_spddelete2(struct socket *so, st
 	id = xpl->sadb_x_policy_id;
 
 	/* Is there SP in SPD ? */
-	sp = key_lookupbyid_and_remove_sp(id);
+	sp = key_lookupbyid_and_remove_sp(id, from_kernel);
 	if (sp == NULL) {
 		IPSECLOG(LOG_DEBUG, "no SP found id:%u.\n", id);
 		return key_senderror(so, m, EINVAL);
@@ -2486,6 +2571,39 @@ key_api_spddelete2(struct socket *so, st
 }
 
 /*
+ * SADB_SPDDELETE2 processing
+ * receive
+ *   <base, policy(*)>
+ * from the user(?), and set SADB_SASTATE_DEAD,
+ * and send,
+ *   <base, policy(*)>
+ * to the ikmpd.
+ * policy(*) including direction of policy.
+ *
+ * m will always be freed.
+ */
+static int
+key_api_spddelete2(struct socket *so, struct mbuf *m,
+	       const struct sadb_msghdr *mhp)
+{
+
+	return key_spddelete2(so, m, mhp, false);
+}
+
+int
+key_kpi_spddelete2(struct mbuf *m)
+{
+	struct sadb_msghdr mh;
+	int error;
+
+	error = key_align(m, &mh);
+	if (error)
+		return EINVAL;
+
+	return key_spddelete2(NULL, m, &mh, true);
+}
+
+/*
  * SADB_X_GET processing
  * receive
  *   <base, policy(*)>
@@ -2630,10 +2748,17 @@ key_api_spdflush(struct socket *so, stru
 		mutex_enter(&key_spd.lock);
 		SPLIST_WRITER_FOREACH(sp, dir) {
 			KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
-			key_unlink_sp(sp);
-			mutex_exit(&key_spd.lock);
-			key_destroy_sp(sp);
-			goto retry;
+			/*
+			 * Userlang programs can remove SPs created by userland
+			 * probrams only, that is, they cannot remove SPs
+			 * created in kernel(e.g. ipsec(4) I/F).
+			 */
+			if (sp->origin == IPSEC_SPORIGIN_USER) {
+				key_unlink_sp(sp);
+				mutex_exit(&key_spd.lock);
+				key_destroy_sp(sp);
+				goto retry;
+			}
 		}
 		mutex_exit(&key_spd.lock);
 	}
@@ -7696,6 +7821,16 @@ key_senderror(struct socket *so, struct 
 
 	KASSERT(m->m_len >= sizeof(struct sadb_msg));
 
+	if (so == NULL) {
+		/*
+		 * This means the request comes from kernel.
+		 * As the request comes from kernel, it is unnecessary to
+		 * send message to userland. Just return errcode directly.
+		 */
+		m_freem(m);
+		return code;
+	}
+
 	msg = mtod(m, struct sadb_msg *);
 	msg->sadb_msg_errno = code;
 	return key_sendup_mbuf(so, m, KEY_SENDUP_ONE);

Index: src/sys/netipsec/key.h
diff -u src/sys/netipsec/key.h:1.33 src/sys/netipsec/key.h:1.34
--- src/sys/netipsec/key.h:1.33	Tue Nov 21 07:03:08 2017
+++ src/sys/netipsec/key.h	Wed Jan 10 10:56:31 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: key.h,v 1.33 2017/11/21 07:03:08 ozaki-r Exp $	*/
+/*	$NetBSD: key.h,v 1.34 2018/01/10 10:56:31 knakahara Exp $	*/
 /*	$FreeBSD: src/sys/netipsec/key.h,v 1.1.4.1 2003/01/24 05:11:36 sam Exp $	*/
 /*	$KAME: key.h,v 1.21 2001/07/27 03:51:30 itojun Exp $	*/
 
@@ -42,11 +42,18 @@ struct ipsecrequest;
 struct secasvar;
 struct sockaddr;
 struct socket;
-struct sadb_msg;
-struct sadb_x_policy;
 struct secasindex;
 union sockaddr_union;
 
+#include <net/pfkeyv2.h>
+
+struct sadb_msghdr {
+	struct sadb_msg *msg;
+	void *ext[SADB_EXT_MAX + 1];
+	int extoff[SADB_EXT_MAX + 1];
+	int extlen[SADB_EXT_MAX + 1];
+};
+
 int key_havesp(u_int dir);
 struct secpolicy *key_lookup_sp_byspidx(const struct secpolicyindex *, u_int,
 	const char*, int);
@@ -116,7 +123,10 @@ int key_get_used(void);
 
 u_int16_t key_portfromsaddr (const union sockaddr_union *);
 
-
+/* for ipsec(4) */
+struct secpolicy *key_kpi_spdadd(struct mbuf *);
+int key_kpi_spddelete2(struct mbuf *);
+u_int16_t key_newreqid(void);
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_SECA);

Added files:

Index: src/sys/net/if_ipsec.c
diff -u /dev/null src/sys/net/if_ipsec.c:1.1
--- /dev/null	Wed Jan 10 10:56:31 2018
+++ src/sys/net/if_ipsec.c	Wed Jan 10 10:56:30 2018
@@ -0,0 +1,1736 @@
+/*	$NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $  */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $");
+
+#ifdef _KERNEL_OPT
+#include "opt_inet.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/syslog.h>
+#include <sys/cpu.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/pserialize.h>
+#include <sys/psref.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <net/bpf.h>
+#include <net/pfkeyv2.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef	INET
+#include <netinet/in_var.h>
+#endif	/* INET */
+
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif /* INET6 */
+
+#include <netinet/ip_encap.h>
+
+#include <net/if_ipsec.h>
+
+#include <net/raw_cb.h>
+#include <net/pfkeyv2.h>
+
+#include <netipsec/key.h>
+#include <netipsec/ipsec.h>
+#include <netipsec/ipsecif.h>
+
+static void if_ipsec_ro_init_pc(void *, void *, struct cpu_info *);
+static void if_ipsec_ro_fini_pc(void *, void *, struct cpu_info *);
+
+static int if_ipsec_clone_create(struct if_clone *, int);
+static int if_ipsec_clone_destroy(struct ifnet *);
+
+static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
+static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
+
+static int if_ipsec_encap_attach(struct ipsec_variant *);
+static int if_ipsec_encap_detach(struct ipsec_variant *);
+static int if_ipsec_set_tunnel(struct ifnet *,
+    struct sockaddr *, struct sockaddr *);
+static void if_ipsec_delete_tunnel(struct ifnet *);
+static int if_ipsec_ensure_flags(struct ifnet *, short);
+static void if_ipsec_attach0(struct ipsec_softc *);
+
+static int if_ipsec_update_variant(struct ipsec_softc *,
+    struct ipsec_variant *, struct ipsec_variant *);
+
+/* sadb_msg */
+static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
+static inline void if_ipsec_add_pad(struct mbuf *, size_t);
+static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
+    struct sockaddr *, int, uint16_t);
+static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
+    struct sockaddr *, int);
+static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
+    struct sockaddr *, int);
+static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
+    struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t);
+static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
+static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
+static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
+/* SPD */
+static int if_ipsec_share_sp(struct ipsec_variant *);
+static int if_ipsec_unshare_sp(struct ipsec_variant *);
+static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
+    in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
+static inline int if_ipsec_del_sp0(struct secpolicy *);
+static int if_ipsec_add_sp(struct ipsec_variant *,
+    struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
+static void if_ipsec_del_sp(struct ipsec_variant *);
+static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
+    struct ipsec_variant *);
+
+static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
+    in_port_t);
+#define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target)			\
+	if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
+#define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target)			\
+	if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
+
+/*
+ * ipsec global variable definitions
+ */
+
+/* This list is used in ioctl context only. */
+LIST_HEAD(ipsec_sclist, ipsec_softc);
+static struct {
+	struct ipsec_sclist list;
+	kmutex_t lock;
+} ipsec_softcs __cacheline_aligned;
+
+pserialize_t ipsec_psz __read_mostly;
+struct psref_class *iv_psref_class __read_mostly;
+
+struct if_clone ipsec_cloner =
+    IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
+static int max_ipsec_nesting = MAX_IPSEC_NEST;
+
+/* ARGSUSED */
+void
+ipsecifattach(int count)
+{
+
+	mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
+	LIST_INIT(&ipsec_softcs.list);
+
+	ipsec_psz = pserialize_create();
+	iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
+
+	if_clone_attach(&ipsec_cloner);
+}
+
+static int
+if_ipsec_clone_create(struct if_clone *ifc, int unit)
+{
+	struct ipsec_softc *sc;
+	struct ipsec_variant *var;
+
+	sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
+
+	if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
+
+	if_ipsec_attach0(sc);
+
+	var = kmem_zalloc(sizeof(*var), KM_SLEEP);
+	var->iv_softc = sc;
+	psref_target_init(&var->iv_psref, iv_psref_class);
+
+	sc->ipsec_var = var;
+	mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
+	sc->ipsec_ro_percpu = percpu_alloc(sizeof(struct ipsec_ro));
+	percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_init_pc, NULL);
+
+	mutex_enter(&ipsec_softcs.lock);
+	LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
+	mutex_exit(&ipsec_softcs.lock);
+	return 0;
+}
+
+static void
+if_ipsec_attach0(struct ipsec_softc *sc)
+{
+
+	sc->ipsec_if.if_addrlen = 0;
+	sc->ipsec_if.if_mtu    = IPSEC_MTU;
+	sc->ipsec_if.if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
+	/* set ipsec(4) specific default flags. */
+	sc->ipsec_if.if_flags  |= IFF_FWD_IPV6;
+	sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE;
+	sc->ipsec_if.if_ioctl  = if_ipsec_ioctl;
+	sc->ipsec_if.if_output = if_ipsec_output;
+	sc->ipsec_if.if_type   = IFT_IPSEC;
+	sc->ipsec_if.if_dlt    = DLT_NULL;
+	sc->ipsec_if.if_softc  = sc;
+	IFQ_SET_READY(&sc->ipsec_if.if_snd);
+	if_initialize(&sc->ipsec_if);
+	if_alloc_sadl(&sc->ipsec_if);
+	bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
+	if_register(&sc->ipsec_if);
+}
+
+static void
+if_ipsec_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
+{
+	struct ipsec_ro *iro = p;
+
+	mutex_init(&iro->ir_lock, MUTEX_DEFAULT, IPL_NONE);
+}
+
+static void
+if_ipsec_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
+{
+	struct ipsec_ro *iro = p;
+
+	rtcache_free(&iro->ir_ro);
+
+	mutex_destroy(&iro->ir_lock);
+}
+
+static int
+if_ipsec_clone_destroy(struct ifnet *ifp)
+{
+	struct ipsec_softc *sc = ifp->if_softc;
+	struct ipsec_variant *var;
+	int bound;
+
+	mutex_enter(&ipsec_softcs.lock);
+	LIST_REMOVE(sc, ipsec_list);
+	mutex_exit(&ipsec_softcs.lock);
+
+	bound = curlwp_bind();
+	if_ipsec_delete_tunnel(&sc->ipsec_if);
+	curlwp_bindx(bound);
+
+	bpf_detach(ifp);
+	if_detach(ifp);
+
+	percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_fini_pc, NULL);
+	percpu_free(sc->ipsec_ro_percpu, sizeof(struct ipsec_ro));
+
+	mutex_destroy(&sc->ipsec_lock);
+
+	var = sc->ipsec_var;
+	kmem_free(var, sizeof(*var));
+	kmem_free(sc, sizeof(*sc));
+
+	return 0;
+}
+
+static inline bool
+if_ipsec_nat_t(struct ipsec_softc *sc)
+{
+
+	return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
+}
+
+static inline bool
+if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
+{
+
+	return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
+}
+
+int
+if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
+{
+	struct ip ip;
+	struct ipsec_softc *sc;
+	struct ipsec_variant *var = NULL;
+	struct psref psref;
+	int ret = 0;
+
+	sc = arg;
+	KASSERT(sc != NULL);
+
+	if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
+		goto out;
+
+	var = if_ipsec_getref_variant(sc, &psref);
+	if (if_ipsec_variant_is_unconfigured(var))
+		goto out;
+
+	switch (proto) {
+	case IPPROTO_IPV4:
+	case IPPROTO_IPV6:
+		break;
+	default:
+		goto out;
+	}
+
+	if (m->m_pkthdr.len < sizeof(ip))
+		goto out;
+
+	m_copydata(m, 0, sizeof(ip), &ip);
+	switch (ip.ip_v) {
+#ifdef INET
+	case IPVERSION:
+		if (var->iv_psrc->sa_family != AF_INET ||
+		    var->iv_pdst->sa_family != AF_INET)
+			goto out;
+		ret = ipsecif4_encap_func(m, &ip, var);
+		break;
+#endif
+	default:
+		goto out;
+	}
+
+out:
+	if (var != NULL)
+		if_ipsec_putref_variant(var, &psref);
+	return ret;
+}
+
+/*
+ * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
+ * We'll prevent this by introducing upper limit.
+ */
+static int
+if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+
+	return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
+}
+
+int
+if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+    const struct rtentry *rt)
+{
+	struct ipsec_softc *sc = ifp->if_softc;
+	struct ipsec_variant *var;
+	struct psref psref;
+	int error;
+	int bound;
+
+	IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
+
+	error = if_ipsec_check_nesting(ifp, m);
+	if (error) {
+		m_freem(m);
+		goto noref_end;
+	}
+
+	if ((ifp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		error = ENETDOWN;
+		goto noref_end;
+	}
+
+
+	bound = curlwp_bind();
+	var = if_ipsec_getref_variant(sc, &psref);
+	if (if_ipsec_variant_is_unconfigured(var)) {
+		m_freem(m);
+		error = ENETDOWN;
+		goto end;
+	}
+
+	m->m_flags &= ~(M_BCAST|M_MCAST);
+
+	/* use DLT_NULL encapsulation here to pass inner af type */
+	M_PREPEND(m, sizeof(int), M_DONTWAIT);
+	if (!m) {
+		error = ENOBUFS;
+		goto end;
+	}
+	*mtod(m, int *) = dst->sa_family;
+
+#if INET6
+	/* drop IPv6 packet if IFF_FWD_IPV6 is not set */
+	if (dst->sa_family == AF_INET6 &&
+	    !if_ipsec_fwd_ipv6(sc)) {
+		/*
+		 * IPv6 packet is not allowed to forward,that is not error.
+		 */
+		error = 0;
+		IF_DROP(&ifp->if_snd);
+		m_freem(m);
+		goto end;
+	}
+#endif
+
+	error = if_ipsec_out_direct(var, m, dst->sa_family);
+
+end:
+	if_ipsec_putref_variant(var, &psref);
+	curlwp_bindx(bound);
+noref_end:
+	if (error)
+		ifp->if_oerrors++;
+
+	return error;
+}
+
+static inline int
+if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
+{
+	struct ifnet *ifp = &var->iv_softc->ipsec_if;
+	int error;
+	int len;
+
+	KASSERT(if_ipsec_heldref_variant(var));
+	KASSERT(var->iv_output != NULL);
+
+	len = m->m_pkthdr.len;
+
+	/* input DLT_NULL frame to BPF */
+	bpf_mtap(ifp, m);
+
+	/* grab and chop off inner af type */
+	/* XXX need pullup? */
+	m_adj(m, sizeof(int));
+
+	error = var->iv_output(var, family, m);
+	if (error)
+		return error;
+
+	ifp->if_opackets++;
+	ifp->if_obytes += len;
+
+	return 0;
+}
+
+void
+if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
+{
+
+	KASSERT(ifp != NULL);
+
+	m_set_rcvif(m, ifp);
+
+	bpf_mtap_af(ifp, af, m);
+
+	if_ipsec_in_enqueue(m, af, ifp);
+
+	return;
+}
+
+static inline void
+if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
+{
+	pktqueue_t *pktq;
+	int pktlen;
+
+	/*
+	 * Put the packet to the network layer input queue according to the
+	 * specified address family.
+	 */
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		pktq = ip_pktq;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		pktq = ip6_pktq;
+		break;
+#endif
+	default:
+		ifp->if_ierrors++;
+		m_freem(m);
+		return;
+	}
+
+#if 1
+	const u_int h = curcpu()->ci_index;
+#else
+	const uint32_t h = pktq_rps_hash(m);
+#endif
+	pktlen = m->m_pkthdr.len;
+	if (__predict_true(pktq_enqueue(pktq, m, h))) {
+		ifp->if_ibytes += pktlen;
+		ifp->if_ipackets++;
+	} else {
+		m_freem(m);
+	}
+
+	return;
+}
+
+static inline int
+if_ipsec_check_salen(struct sockaddr *addr)
+{
+
+	switch (addr->sa_family) {
+#ifdef INET
+	case AF_INET:
+		if (addr->sa_len != sizeof(struct sockaddr_in))
+			return EINVAL;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (addr->sa_len != sizeof(struct sockaddr_in6))
+			return EINVAL;
+		break;
+#endif /* INET6 */
+	default:
+		return EAFNOSUPPORT;
+	}
+
+	return 0;
+}
+
+/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
+int
+if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
+{
+	struct ipsec_softc *sc  = ifp->if_softc;
+	struct ipsec_variant *var = NULL;
+	struct ifreq     *ifr = (struct ifreq*)data;
+	struct ifaddr    *ifa = (struct ifaddr*)data;
+	int error = 0, size;
+	struct sockaddr *dst, *src;
+	u_long mtu;
+	short oflags = ifp->if_flags;
+	int bound;
+	struct psref psref;
+
+	switch (cmd) {
+	case SIOCINITIFADDR:
+		ifp->if_flags |= IFF_UP;
+		ifa->ifa_rtrequest = p2p_rtrequest;
+		break;
+
+	case SIOCSIFDSTADDR:
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		switch (ifr->ifr_addr.sa_family) {
+#ifdef INET
+		case AF_INET:	/* IP supports Multicast */
+			break;
+#endif /* INET */
+#ifdef INET6
+		case AF_INET6:	/* IP6 supports Multicast */
+			break;
+#endif /* INET6 */
+		default:  /* Other protocols doesn't support Multicast */
+			error = EAFNOSUPPORT;
+			break;
+		}
+		break;
+
+	case SIOCSIFMTU:
+		mtu = ifr->ifr_mtu;
+		if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
+			return EINVAL;
+		else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
+			error = 0;
+		break;
+
+#ifdef INET
+	case SIOCSIFPHYADDR:
+#endif
+#ifdef INET6
+	case SIOCSIFPHYADDR_IN6:
+#endif /* INET6 */
+	case SIOCSLIFPHYADDR:
+		switch (cmd) {
+#ifdef INET
+		case SIOCSIFPHYADDR:
+			src = (struct sockaddr *)
+				&(((struct in_aliasreq *)data)->ifra_addr);
+			dst = (struct sockaddr *)
+				&(((struct in_aliasreq *)data)->ifra_dstaddr);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case SIOCSIFPHYADDR_IN6:
+			src = (struct sockaddr *)
+				&(((struct in6_aliasreq *)data)->ifra_addr);
+			dst = (struct sockaddr *)
+				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
+			break;
+#endif /* INET6 */
+		case SIOCSLIFPHYADDR:
+			src = (struct sockaddr *)
+				&(((struct if_laddrreq *)data)->addr);
+			dst = (struct sockaddr *)
+				&(((struct if_laddrreq *)data)->dstaddr);
+			break;
+		default:
+			return EINVAL;
+		}
+
+		/* sa_family must be equal */
+		if (src->sa_family != dst->sa_family)
+			return EINVAL;
+
+		error = if_ipsec_check_salen(src);
+		if (error)
+			return error;
+		error = if_ipsec_check_salen(dst);
+		if (error)
+			return error;
+
+		/* check sa_family looks sane for the cmd */
+		switch (cmd) {
+#ifdef INET
+		case SIOCSIFPHYADDR:
+			if (src->sa_family == AF_INET)
+				break;
+			return EAFNOSUPPORT;
+#endif /* INET */
+#ifdef INET6
+		case SIOCSIFPHYADDR_IN6:
+			if (src->sa_family == AF_INET6)
+				break;
+			return EAFNOSUPPORT;
+#endif /* INET6 */
+		case SIOCSLIFPHYADDR:
+			/* checks done in the above */
+			break;
+		}
+		/*
+		 * calls if_ipsec_getref_variant() for other softcs to check
+		 * address pair duplicattion
+		 */
+		bound = curlwp_bind();
+		error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
+		if (error)
+			goto bad;
+		break;
+
+	case SIOCDIFPHYADDR:
+		bound = curlwp_bind();
+		if_ipsec_delete_tunnel(&sc->ipsec_if);
+		curlwp_bindx(bound);
+		break;
+
+	case SIOCGIFPSRCADDR:
+#ifdef INET6
+	case SIOCGIFPSRCADDR_IN6:
+#endif /* INET6 */
+		bound = curlwp_bind();
+		var = if_ipsec_getref_variant(sc, &psref);
+		if (var->iv_psrc == NULL) {
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		src = var->iv_psrc;
+		switch (cmd) {
+#ifdef INET
+		case SIOCGIFPSRCADDR:
+			dst = &ifr->ifr_addr;
+			size = sizeof(ifr->ifr_addr);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case SIOCGIFPSRCADDR_IN6:
+			dst = (struct sockaddr *)
+				&(((struct in6_ifreq *)data)->ifr_addr);
+			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+			break;
+#endif /* INET6 */
+		default:
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		if (src->sa_len > size) {
+			error = EINVAL;
+			goto bad;
+		}
+		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
+		if (error)
+			goto bad;
+		if_ipsec_putref_variant(var, &psref);
+		curlwp_bindx(bound);
+		break;
+
+	case SIOCGIFPDSTADDR:
+#ifdef INET6
+	case SIOCGIFPDSTADDR_IN6:
+#endif /* INET6 */
+		bound = curlwp_bind();
+		var = if_ipsec_getref_variant(sc, &psref);
+		if (var->iv_pdst == NULL) {
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		src = var->iv_pdst;
+		switch (cmd) {
+#ifdef INET
+		case SIOCGIFPDSTADDR:
+			dst = &ifr->ifr_addr;
+			size = sizeof(ifr->ifr_addr);
+			break;
+#endif /* INET */
+#ifdef INET6
+		case SIOCGIFPDSTADDR_IN6:
+			dst = (struct sockaddr *)
+				&(((struct in6_ifreq *)data)->ifr_addr);
+			size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+			break;
+#endif /* INET6 */
+		default:
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+		if (src->sa_len > size) {
+			error = EINVAL;
+			goto bad;
+		}
+		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
+		if (error)
+			goto bad;
+		if_ipsec_putref_variant(var, &psref);
+		curlwp_bindx(bound);
+		break;
+
+	case SIOCGLIFPHYADDR:
+		bound = curlwp_bind();
+		var = if_ipsec_getref_variant(sc, &psref);
+		if (if_ipsec_variant_is_unconfigured(var)) {
+			error = EADDRNOTAVAIL;
+			goto bad;
+		}
+
+		/* copy src */
+		src = var->iv_psrc;
+		dst = (struct sockaddr *)
+			&(((struct if_laddrreq *)data)->addr);
+		size = sizeof(((struct if_laddrreq *)data)->addr);
+		if (src->sa_len > size) {
+			error = EINVAL;
+			goto bad;
+		}
+		error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
+		if (error)
+			goto bad;
+
+		/* copy dst */
+		src = var->iv_pdst;
+		dst = (struct sockaddr *)
+			&(((struct if_laddrreq *)data)->dstaddr);
+		size = sizeof(((struct if_laddrreq *)data)->dstaddr);
+		if (src->sa_len > size) {
+			error = EINVAL;
+			goto bad;
+		}
+		error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
+		if (error)
+			goto bad;
+		if_ipsec_putref_variant(var, &psref);
+		curlwp_bindx(bound);
+		break;
+
+	default:
+		error = ifioctl_common(ifp, cmd, data);
+		if (!error) {
+			bound = curlwp_bind();
+			error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
+			if (error)
+				goto bad;
+		}
+		break;
+	}
+	return error;
+
+bad:
+	if (var != NULL)
+		if_ipsec_putref_variant(var, &psref);
+	curlwp_bindx(bound);
+
+	return error;
+}
+
+struct encap_funcs {
+	int (*ef_inet)(struct ipsec_variant *);
+	int (*ef_inet6)(struct ipsec_variant *);
+};
+
+static struct encap_funcs ipsec_encap_attach = {
+	.ef_inet = ipsecif4_attach,
+	.ef_inet6 = &ipsecif6_attach,
+};
+
+static struct encap_funcs ipsec_encap_detach = {
+	.ef_inet = ipsecif4_detach,
+	.ef_inet6 = &ipsecif6_detach,
+};
+
+static int
+if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
+{
+	int error;
+
+	KASSERT(var != NULL);
+	KASSERT(if_ipsec_variant_is_configured(var));
+
+	switch (var->iv_psrc->sa_family) {
+#ifdef INET
+	case AF_INET:
+		error = (funcs->ef_inet)(var);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		error = (funcs->ef_inet6)(var);
+		break;
+#endif /* INET6 */
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return error;
+}
+
+static int
+if_ipsec_encap_attach(struct ipsec_variant *var)
+{
+
+	return if_ipsec_encap_common(var, &ipsec_encap_attach);
+}
+
+static int
+if_ipsec_encap_detach(struct ipsec_variant *var)
+{
+
+	return if_ipsec_encap_common(var, &ipsec_encap_detach);
+}
+
+/*
+ * Validate and set ipsec(4) I/F configurations.
+ *     (1) validate
+ *         (1-1) Check the argument src and dst address pair will change
+ *               configuration from current src and dst address pair.
+ *         (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
+ *               with argument src and dst address pair, except for NAT-T shared
+ *               tunnels.
+ *     (2) set
+ *         (2-1) Create variant for new configuration.
+ *         (2-2) Create temporary "null" variant used to avoid to access
+ *               dangling variant while SPs are deleted and added.
+ *         (2-3) Swap variant include its SPs.
+ *         (2-4) Cleanup last configurations.
+ */
+static int
+if_ipsec_set_tunnel(struct ifnet *ifp,
+    struct sockaddr *src, struct sockaddr *dst)
+{
+	struct ipsec_softc *sc = ifp->if_softc;
+	struct ipsec_softc *sc2;
+	struct ipsec_variant *ovar, *nvar, *nullvar;
+	struct sockaddr *osrc, *odst;
+	struct sockaddr *nsrc, *ndst;
+	in_port_t nsport = 0, ndport = 0;
+	int error;
+
+	error = encap_lock_enter();
+	if (error)
+		return error;
+
+	nsrc = sockaddr_dup(src, M_WAITOK);
+	ndst = sockaddr_dup(dst, M_WAITOK);
+	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
+	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
+
+	mutex_enter(&sc->ipsec_lock);
+
+	ovar = sc->ipsec_var;
+
+	switch(nsrc->sa_family) {
+#ifdef INET
+	case AF_INET:
+		nsport = ntohs(satosin(src)->sin_port);
+		/*
+		 * avoid confuse SP when NAT-T disabled,
+		 * e.g.
+		 *     expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
+		 *     confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
+		 */
+		satosin(nsrc)->sin_port = 0;
+		ndport = ntohs(satosin(dst)->sin_port);
+		satosin(ndst)->sin_port = 0;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		nsport = ntohs(satosin6(src)->sin6_port);
+		satosin6(nsrc)->sin6_port = 0;
+		ndport = ntohs(satosin6(dst)->sin6_port);
+		satosin6(ndst)->sin6_port = 0;
+		break;
+#endif /* INET6 */
+	default:
+		log(LOG_DEBUG,
+		    "%s: Invalid address family: %d.\n",
+		    __func__, src->sa_family);
+		error = EINVAL;
+		goto out;
+	}
+
+	/*
+	 * (1-1) Check the argument src and dst address pair will change
+	 *       configuration from current src and dst address pair.
+	 */
+	if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
+	    (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
+	    (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
+		/* address and port pair not changed. */
+		error = 0;
+		goto out;
+	}
+
+	/*
+	 * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
+	 *       with argument src and dst address pair, except for NAT-T shared
+	 *       tunnels.
+	 */
+	mutex_enter(&ipsec_softcs.lock);
+	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
+		struct ipsec_variant *var2;
+		struct psref psref;
+
+		if (sc2 == sc)
+			continue;
+		var2 = if_ipsec_getref_variant(sc2, &psref);
+		if (if_ipsec_variant_is_unconfigured(var2)) {
+			if_ipsec_putref_variant(var2, &psref);
+			continue;
+		}
+		if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
+			if_ipsec_putref_variant(var2, &psref);
+			continue; /* NAT-T shared tunnel */
+		}
+		if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
+		    sockaddr_cmp(var2->iv_psrc, src) == 0) {
+			if_ipsec_putref_variant(var2, &psref);
+			mutex_exit(&ipsec_softcs.lock);
+			error = EADDRNOTAVAIL;
+			goto out;
+		}
+
+		if_ipsec_putref_variant(var2, &psref);
+		/* XXX both end must be valid? (I mean, not 0.0.0.0) */
+	}
+	mutex_exit(&ipsec_softcs.lock);
+
+
+	osrc = ovar->iv_psrc;
+	odst = ovar->iv_pdst;
+
+	/*
+	 * (2-1) Create ipsec_variant for new configuration.
+	 */
+	if_ipsec_copy_variant(nvar, ovar);
+	nvar->iv_psrc = nsrc;
+	nvar->iv_pdst = ndst;
+	nvar->iv_sport = nsport;
+	nvar->iv_dport = ndport;
+	nvar->iv_encap_cookie4 = NULL;
+	nvar->iv_encap_cookie6 = NULL;
+	psref_target_init(&nvar->iv_psref, iv_psref_class);
+	error = if_ipsec_encap_attach(nvar);
+	if (error)
+		goto out;
+
+	/*
+	 * (2-2) Create temporary "null" variant.
+	 */
+	if_ipsec_copy_variant(nullvar, ovar);
+	if_ipsec_clear_config(nullvar);
+	psref_target_init(&nullvar->iv_psref, iv_psref_class);
+	membar_producer();
+	/*
+	 * (2-3) Swap variant include its SPs.
+	 */
+	error = if_ipsec_update_variant(sc, nvar, nullvar);
+	if (error) {
+		if_ipsec_encap_detach(nvar);
+		goto out;
+	}
+
+	mutex_exit(&sc->ipsec_lock);
+
+	/*
+	 * (2-4) Cleanup last configurations.
+	 */
+	if (if_ipsec_variant_is_configured(ovar))
+		if_ipsec_encap_detach(ovar);
+	encap_lock_exit();
+
+	if (osrc != NULL)
+		sockaddr_free(osrc);
+	if (odst != NULL)
+		sockaddr_free(odst);
+	kmem_free(ovar, sizeof(*ovar));
+	kmem_free(nullvar, sizeof(*nullvar));
+
+	return 0;
+
+out:
+	mutex_exit(&sc->ipsec_lock);
+	encap_lock_exit();
+
+	sockaddr_free(nsrc);
+	sockaddr_free(ndst);
+	kmem_free(nvar, sizeof(*nvar));
+	kmem_free(nullvar, sizeof(*nullvar));
+
+	return error;
+}
+
+/*
+ * Validate and delete ipsec(4) I/F configurations.
+ *     (1) validate
+ *         (1-1) Check current src and dst address pair are null,
+ *               which means the ipsec(4) I/F is already done deletetunnel.
+ *     (2) delete
+ *         (2-1) Create variant for deleted status.
+ *         (2-2) Create temporary "null" variant used to avoid to access
+ *               dangling variant while SPs are deleted and added.
+ *               NOTE:
+ *               The contents of temporary "null" variant equal to the variant
+ *               of (2-1), however two psref_target_destroy() synchronization
+ *               points are necessary to avoid to access dangling variant
+ *               while SPs are deleted and added. To implement that simply,
+ *               we use the same manner as if_ipsec_set_tunnel(), that is,
+ *               create extra "null" variant and use it temporarily.
+ *         (2-3) Swap variant include its SPs.
+ *         (2-4) Cleanup last configurations.
+ */
+static void
+if_ipsec_delete_tunnel(struct ifnet *ifp)
+{
+	struct ipsec_softc *sc = ifp->if_softc;
+	struct ipsec_variant *ovar, *nvar, *nullvar;
+	struct sockaddr *osrc, *odst;
+	int error;
+
+	error = encap_lock_enter();
+	if (error)
+		return;
+
+	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
+	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
+
+	mutex_enter(&sc->ipsec_lock);
+
+	ovar = sc->ipsec_var;
+	osrc = ovar->iv_psrc;
+	odst = ovar->iv_pdst;
+	/*
+	 * (1-1) Check current src and dst address pair are null,
+	 *       which means the ipsec(4) I/F is already done deletetunnel.
+	 */
+	if (osrc == NULL || odst == NULL) {
+		/* address pair not changed. */
+		mutex_exit(&sc->ipsec_lock);
+		encap_lock_exit();
+		kmem_free(nvar, sizeof(*nvar));
+		return;
+	}
+
+	/*
+	 * (2-1) Create variant for deleted status.
+	 */
+	if_ipsec_copy_variant(nvar, ovar);
+	if_ipsec_clear_config(nvar);
+	psref_target_init(&nvar->iv_psref, iv_psref_class);
+
+	/*
+	 * (2-2) Create temporary "null" variant used to avoid to access
+	 *       dangling variant while SPs are deleted and added.
+	 */
+	if_ipsec_copy_variant(nullvar, ovar);
+	if_ipsec_clear_config(nullvar);
+	psref_target_init(&nullvar->iv_psref, iv_psref_class);
+	membar_producer();
+	/*
+	 * (2-3) Swap variant include its SPs.
+	 */
+	/* if_ipsec_update_variant() does not fail when delete SP only. */
+	(void)if_ipsec_update_variant(sc, nvar, nullvar);
+
+	mutex_exit(&sc->ipsec_lock);
+
+	/*
+	 * (2-4) Cleanup last configurations.
+	 */
+	if (if_ipsec_variant_is_configured(ovar))
+		if_ipsec_encap_detach(ovar);
+	encap_lock_exit();
+
+	sockaddr_free(osrc);
+	sockaddr_free(odst);
+	kmem_free(ovar, sizeof(*ovar));
+	kmem_free(nullvar, sizeof(*nullvar));
+}
+
+/*
+ * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
+ *     (1) check
+ *         (1-1) Check flags are changed.
+ *         (1-2) Check current src and dst address pair. If they are null,
+ *               that means the ipsec(4) I/F is deletetunnel'ed, so it is
+ *               not needed to update.
+ *     (2) update
+ *         (2-1) Create variant for new SPs.
+ *         (2-2) Create temporary "null" variant used to avoid to access
+ *               dangling variant while SPs are deleted and added.
+ *               NOTE:
+ *               There is the same problem as if_ipsec_delete_tunnel().
+ *         (2-3) Swap variant include its SPs.
+ *         (2-4) Cleanup unused configurations.
+ *               NOTE: use the same encap_cookies.
+ */
+static int
+if_ipsec_ensure_flags(struct ifnet *ifp, short oflags)
+{
+	struct ipsec_softc *sc = ifp->if_softc;
+	struct ipsec_variant *ovar, *nvar, *nullvar;
+	int error;
+
+	/*
+	 * (1) Check flags are changed.
+	 */
+	if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
+	    (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
+		return 0; /* flags not changed. */
+
+	error = encap_lock_enter();
+	if (error)
+		return error;
+
+	nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
+	nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
+
+	mutex_enter(&sc->ipsec_lock);
+
+	ovar = sc->ipsec_var;
+	/*
+	 * (1-2) Check current src and dst address pair.
+	 */
+	if (if_ipsec_variant_is_unconfigured(ovar)) {
+		/* nothing to do */
+		mutex_exit(&sc->ipsec_lock);
+		return 0;
+	}
+
+	/*
+	 * (2-1) Create variant for new SPs.
+	 */
+	if_ipsec_copy_variant(nvar, ovar);
+	psref_target_init(&nvar->iv_psref, iv_psref_class);
+	/*
+	 * (2-2) Create temporary "null" variant used to avoid to access
+	 *       dangling variant while SPs are deleted and added.
+	 */
+	if_ipsec_copy_variant(nullvar, ovar);
+	if_ipsec_clear_config(nullvar);
+	psref_target_init(&nullvar->iv_psref, iv_psref_class);
+	membar_producer();
+	/*
+	 * (2-3) Swap variant include its SPs.
+	 */
+	error = if_ipsec_update_variant(sc, nvar, nullvar);
+
+	mutex_exit(&sc->ipsec_lock);
+	encap_lock_exit();
+
+	/*
+	 * (2-4) Cleanup unused configurations.
+	 */
+	if (!error)
+		kmem_free(ovar, sizeof(*ovar));
+	else
+		kmem_free(nvar, sizeof(*ovar));
+	kmem_free(nullvar, sizeof(*nullvar));
+
+	return error;
+}
+
+/*
+ * SPD management
+ */
+
+/*
+ * Share SP set with other NAT-T ipsec(4) I/F(s).
+ *     Return 1, when "var" shares SP set.
+ *     Return 0, when "var" cannot share SP set.
+ *
+ * NOTE:
+ * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
+ * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
+ * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
+ * set_tunnel causes race.
+ * Currently, (fortunately) encap_lock works as this global lock.
+ */
+static int
+if_ipsec_share_sp(struct ipsec_variant *var)
+{
+	struct ipsec_softc *sc = var->iv_softc;
+	struct ipsec_softc *sc2;
+	struct ipsec_variant *var2;
+	struct psref psref;
+
+	KASSERT(encap_lock_held());
+	KASSERT(var->iv_pdst != NULL && var->iv_pdst != NULL);
+
+	mutex_enter(&ipsec_softcs.lock);
+	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
+		if (sc2 == sc)
+			continue;
+		var2 = if_ipsec_getref_variant(sc2, &psref);
+		if (if_ipsec_variant_is_unconfigured(var2)) {
+			if_ipsec_putref_variant(var2, &psref);
+			continue;
+		}
+		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
+		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
+			if_ipsec_putref_variant(var2, &psref);
+			continue;
+		}
+
+		break;
+	}
+	mutex_exit(&ipsec_softcs.lock);
+	if (sc2 == NULL)
+		return 0; /* not shared */
+
+	IV_SP_IN(var) = IV_SP_IN(var2);
+	IV_SP_IN6(var) = IV_SP_IN6(var2);
+	IV_SP_OUT(var) = IV_SP_OUT(var2);
+	IV_SP_OUT6(var) = IV_SP_OUT6(var2);
+
+	if_ipsec_putref_variant(var2, &psref);
+	return 1; /* shared */
+}
+
+/*
+ * Unshare SP set with other NAT-T ipsec(4) I/F(s).
+ *     Return 1, when "var" shared SP set, and then unshare them.
+ *     Return 0, when "var" did not share SP set.
+ *
+ * NOTE:
+ * See if_ipsec_share_sp()'s note.
+ */
+static int
+if_ipsec_unshare_sp(struct ipsec_variant *var)
+{
+	struct ipsec_softc *sc = var->iv_softc;
+	struct ipsec_softc *sc2;
+	struct ipsec_variant *var2;
+	struct psref psref;
+
+	KASSERT(encap_lock_held());
+
+	if (!var->iv_pdst || !var->iv_psrc)
+		return 0;
+
+	mutex_enter(&ipsec_softcs.lock);
+	LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
+		if (sc2 == sc)
+			continue;
+		var2 = if_ipsec_getref_variant(sc2, &psref);
+		if (!var2->iv_pdst || !var2->iv_psrc) {
+			if_ipsec_putref_variant(var2, &psref);
+			continue;
+		}
+		if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
+		    sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
+			if_ipsec_putref_variant(var2, &psref);
+			continue;
+		}
+
+		break;
+	}
+	mutex_exit(&ipsec_softcs.lock);
+	if (sc2 == NULL)
+		return 0; /* not shared */
+
+	IV_SP_IN(var) = NULL;
+	IV_SP_IN6(var) = NULL;
+	IV_SP_OUT(var) = NULL;
+	IV_SP_OUT6(var) = NULL;
+	if_ipsec_putref_variant(var2, &psref);
+	return 1; /* shared */
+}
+
+static inline void
+if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
+{
+	struct mbuf *m;
+
+	MGET(m, M_WAITOK | M_ZERO, MT_DATA);
+	m->m_len = PFKEY_ALIGN8(len);
+	m_copyback(m, 0, len, data);
+	m_cat(m0, m);
+}
+
+static inline void
+if_ipsec_add_pad(struct mbuf *m0, size_t len)
+{
+	struct mbuf *m;
+
+	if (len == 0)
+		return;
+
+	MGET(m, M_WAITOK | M_ZERO, MT_DATA);
+	m->m_len = len;
+	m_cat(m0, m);
+}
+
+static inline size_t
+if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
+    int proto, uint16_t exttype)
+{
+	size_t size;
+
+	KASSERT(saaddr != NULL);
+	KASSERT(addr != NULL);
+
+	size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
+	saaddr->sadb_address_len = PFKEY_UNIT64(size);
+	saaddr->sadb_address_exttype = exttype;
+	saaddr->sadb_address_proto = proto;
+	switch (addr->sa_family) {
+#ifdef INET
+	case AF_INET:
+		saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
+		break;
+#endif /* INET6 */
+	default:
+		log(LOG_DEBUG,
+		    "%s: Invalid address family: %d.\n",
+		    __func__, addr->sa_family);
+		break;
+	}
+	saaddr->sadb_address_reserved = 0;
+
+	return size;
+}
+
+static inline size_t
+if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
+    int proto)
+{
+
+	return if_ipsec_set_sadb_addr(sasrc, src, proto,
+	    SADB_EXT_ADDRESS_SRC);
+}
+
+static inline size_t
+if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
+    int proto)
+{
+
+	return if_ipsec_set_sadb_addr(sadst, dst, proto,
+	    SADB_EXT_ADDRESS_DST);
+}
+
+static inline size_t
+if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
+    struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
+    uint8_t level)
+{
+	size_t size;
+
+	KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
+
+	size = sizeof(*xpl);
+	if (policy == IPSEC_POLICY_IPSEC) {
+		size += PFKEY_ALIGN8(sizeof(*xisr));
+	}
+	xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
+	xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
+	xpl->sadb_x_policy_type = policy;
+	xpl->sadb_x_policy_dir = dir;
+	xpl->sadb_x_policy_reserved = 0;
+	xpl->sadb_x_policy_id = id;
+	xpl->sadb_x_policy_reserved2 = 0;
+
+	if (policy == IPSEC_POLICY_IPSEC) {
+		xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
+		xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
+		xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
+		xisr->sadb_x_ipsecrequest_level = level;
+		xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
+	}
+
+	return size;
+}
+
+static inline void
+if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
+{
+
+	KASSERT(msg != NULL);
+
+	msg->sadb_msg_version = PF_KEY_V2;
+	msg->sadb_msg_type = msgtype;
+	msg->sadb_msg_errno = 0;
+	msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
+	msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
+	msg->sadb_msg_reserved = 0;
+	msg->sadb_msg_seq = 0; /* XXXX */
+	msg->sadb_msg_pid = 0; /* XXXX */
+}
+
+static inline void
+if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
+{
+
+	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
+}
+
+static inline void
+if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
+{
+
+	if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
+}
+
+static int
+if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
+    in_port_t port)
+{
+	int error = 0;
+
+	sockaddr_copy(addrport, addr->sa_len, addr);
+
+	switch (addr->sa_family) {
+#ifdef INET
+	case AF_INET: {
+		struct sockaddr_in *sin = satosin(addrport);
+		sin->sin_port = htons(port);
+		break;
+	}
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6: {
+		struct sockaddr_in6 *sin6 = satosin6(addrport);
+		sin6->sin6_port = htons(port);
+		break;
+	}
+#endif /* INET6 */
+	default:
+		log(LOG_DEBUG,
+		    "%s: Invalid address family: %d.\n",
+		    __func__, addr->sa_family);
+		error = EINVAL;
+	}
+
+	return error;
+}
+
+static struct secpolicy *
+if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
+    struct sockaddr *dst, in_port_t dport,
+    int dir, int proto, int level, u_int policy)
+{
+	struct sadb_msg msg;
+	struct sadb_address xsrc, xdst;
+	struct sadb_x_policy xpl;
+	struct sadb_x_ipsecrequest xisr;
+	size_t size;
+	size_t padlen;
+	uint16_t ext_msg_len = 0;
+	struct mbuf *m;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&xsrc, 0, sizeof(xsrc));
+	memset(&xdst, 0, sizeof(xdst));
+	memset(&xpl, 0, sizeof(xpl));
+	memset(&xisr, 0, sizeof(xisr));
+
+	MGETHDR(m, M_WAITOK, MT_DATA);
+
+	size = if_ipsec_set_sadb_src(&xsrc, src, proto);
+	ext_msg_len += PFKEY_UNIT64(size);
+	size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
+	ext_msg_len += PFKEY_UNIT64(size);
+	size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level);
+	ext_msg_len += PFKEY_UNIT64(size);
+	if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
+
+	/* build PF_KEY message */
+
+	m->m_len = sizeof(msg);
+	m_copyback(m, 0, sizeof(msg), &msg);
+
+	if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
+	if (sport == 0) {
+		if_ipsec_add_mbuf(m, src, src->sa_len);
+	} else {
+		struct sockaddr addrport;
+
+		if_ipsec_set_addr_port(&addrport, src, sport);
+		if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
+	}
+	padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
+		- (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
+	if_ipsec_add_pad(m, padlen);
+
+	if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
+	if (dport == 0) {
+		if_ipsec_add_mbuf(m, dst, dst->sa_len);
+	} else {
+		struct sockaddr addrport;
+
+		if_ipsec_set_addr_port(&addrport, dst, dport);
+		if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
+	}
+	padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
+		- (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
+	if_ipsec_add_pad(m, padlen);
+
+	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
+	if (policy == IPSEC_POLICY_IPSEC)
+		if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
+
+	/* key_kpi_spdadd() has already done KEY_SP_REF(). */
+	return key_kpi_spdadd(m);
+}
+
+static int
+if_ipsec_add_sp(struct ipsec_variant *var,
+    struct sockaddr *src, in_port_t sport,
+    struct sockaddr *dst, in_port_t dport)
+{
+	struct ipsec_softc *sc = var->iv_softc;
+	int level;
+	u_int v6policy;
+
+	/*
+	 * must delete sp before add it.
+	 */
+	KASSERT(IV_SP_IN(var) == NULL);
+	KASSERT(IV_SP_OUT(var) == NULL);
+	KASSERT(IV_SP_IN6(var) == NULL);
+	KASSERT(IV_SP_OUT6(var) == NULL);
+
+	/*
+	 * can be shared?
+	 */
+	if (if_ipsec_share_sp(var))
+		return 0;
+
+	if (if_ipsec_nat_t(sc))
+		level = IPSEC_LEVEL_REQUIRE;
+	else
+		level = IPSEC_LEVEL_UNIQUE;
+
+	if (if_ipsec_fwd_ipv6(sc))
+		v6policy = IPSEC_POLICY_IPSEC;
+	else
+		v6policy = IPSEC_POLICY_DISCARD;
+
+	IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
+	    IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
+	if (IV_SP_IN(var) == NULL)
+		goto fail;
+	IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
+	    IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
+	if (IV_SP_OUT(var) == NULL)
+		goto fail;
+	IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
+	    IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
+	if (IV_SP_IN6(var) == NULL)
+		goto fail;
+	IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
+	    IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
+	if (IV_SP_OUT6(var) == NULL)
+		goto fail;
+
+	return 0;
+
+fail:
+	if (IV_SP_IN6(var) != NULL) {
+		if_ipsec_del_sp0(IV_SP_IN6(var));
+		IV_SP_IN6(var) = NULL;
+	}
+	if (IV_SP_OUT(var) != NULL) {
+		if_ipsec_del_sp0(IV_SP_OUT(var));
+		IV_SP_OUT(var) = NULL;
+	}
+	if (IV_SP_IN(var) != NULL) {
+		if_ipsec_del_sp0(IV_SP_IN(var));
+		IV_SP_IN(var) = NULL;
+	}
+
+	return EEXIST;
+}
+
+static int
+if_ipsec_del_sp0(struct secpolicy *sp)
+{
+	struct sadb_msg msg;
+	struct sadb_x_policy xpl;
+	size_t size;
+	uint16_t ext_msg_len = 0;
+	int error;
+	struct mbuf *m;
+
+	if (sp == NULL)
+		return 0;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&xpl, 0, sizeof(xpl));
+
+	MGETHDR(m, M_WAITOK, MT_DATA);
+
+	size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0);
+	ext_msg_len += PFKEY_UNIT64(size);
+
+	if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
+
+	m->m_len = sizeof(msg);
+	m_copyback(m, 0, sizeof(msg), &msg);
+
+	if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
+
+	/*  unreference correspond to key_kpi_spdadd(). */
+	KEY_SP_UNREF(&sp);
+	error = key_kpi_spddelete2(m);
+	if (error != 0) {
+		log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
+		    __func__, sp->id, error);
+	}
+	return error;
+}
+
+static void
+if_ipsec_del_sp(struct ipsec_variant *var)
+{
+
+	/* are the SPs shared? */
+	if (if_ipsec_unshare_sp(var))
+		return;
+
+	(void)if_ipsec_del_sp0(IV_SP_OUT(var));
+	(void)if_ipsec_del_sp0(IV_SP_IN(var));
+	(void)if_ipsec_del_sp0(IV_SP_OUT6(var));
+	(void)if_ipsec_del_sp0(IV_SP_IN6(var));
+	IV_SP_IN(var) = NULL;
+	IV_SP_IN6(var) = NULL;
+	IV_SP_OUT(var) = NULL;
+	IV_SP_OUT6(var) = NULL;
+}
+
+static int
+if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
+    struct ipsec_variant *nvar)
+{
+	in_port_t src_port = 0;
+	in_port_t dst_port = 0;
+	struct sockaddr *src;
+	struct sockaddr *dst;
+	int error = 0;
+
+	KASSERT(mutex_owned(&sc->ipsec_lock));
+
+	if_ipsec_del_sp(ovar);
+
+	src = nvar->iv_psrc;
+	dst = nvar->iv_pdst;
+	if (if_ipsec_nat_t(sc)) {
+		/* NAT-T enabled */
+		src_port = nvar->iv_sport;
+		dst_port = nvar->iv_dport;
+	}
+	if (src && dst)
+		error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
+
+	return error;
+}
+
+/*
+ * ipsec_variant and its SPs update API.
+ *
+ * Assumption:
+ * reader side dereferences sc->ipsec_var in reader critical section only,
+ * that is, all of reader sides do not reader the sc->ipsec_var after
+ * pserialize_perform().
+ */
+static int
+if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
+    struct ipsec_variant *nullvar)
+{
+	struct ifnet *ifp = &sc->ipsec_if;
+	struct ipsec_variant *ovar = sc->ipsec_var;
+	int error;
+
+	KASSERT(mutex_owned(&sc->ipsec_lock));
+
+	/*
+	 * To keep consistency between ipsec(4) I/F settings and SPs,
+	 * we stop packet processing while replacing SPs, that is, we set
+	 * "null" config variant to sc->ipsec_var.
+	 */
+	sc->ipsec_var = nullvar;
+	pserialize_perform(ipsec_psz);
+	psref_target_destroy(&ovar->iv_psref, iv_psref_class);
+
+	error = if_ipsec_replace_sp(sc, ovar, nvar);
+	if (!error)
+		sc->ipsec_var = nvar;
+	else {
+		sc->ipsec_var = ovar; /* rollback */
+		psref_target_init(&ovar->iv_psref, iv_psref_class);
+	}
+
+	pserialize_perform(ipsec_psz);
+	psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
+
+	if (if_ipsec_variant_is_configured(sc->ipsec_var))
+		ifp->if_flags |= IFF_RUNNING;
+	else
+		ifp->if_flags &= ~IFF_RUNNING;
+
+	return error;
+}
Index: src/sys/net/if_ipsec.h
diff -u /dev/null src/sys/net/if_ipsec.h:1.1
--- /dev/null	Wed Jan 10 10:56:31 2018
+++ src/sys/net/if_ipsec.h	Wed Jan 10 10:56:30 2018
@@ -0,0 +1,231 @@
+/*	$NetBSD: if_ipsec.h,v 1.1 2018/01/10 10:56:30 knakahara Exp $  */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * if_ipsec.h
+ */
+
+#ifndef _NET_IF_IPSEC_H_
+#define _NET_IF_IPSEC_H_
+
+#include <sys/queue.h>
+#ifdef _KERNEL
+#include <sys/psref.h>
+#endif
+
+#ifdef _KERNEL_OPT
+#include "opt_inet.h"
+#endif
+
+#include <netinet/in.h>
+#include <netipsec/ipsec.h>
+
+#ifdef _KERNEL
+/*
+ * This macro controls the upper limitation on nesting of ipsec tunnels.
+ * Since, setting a large value to this macro with a careless configuration
+ * may introduce system crash, we don't allow any nestings by default.
+ * If you need to configure nested ipsec tunnels, you can define this macro
+ * in your kernel configuration file.  However, if you do so, please be
+ * careful to configure the tunnels so that it won't make a loop.
+ */
+#ifndef MAX_IPSEC_NEST
+#define MAX_IPSEC_NEST 1
+#endif
+
+#define IFF_NAT_T	IFF_LINK0	/* enable NAT-T */
+#define IFF_ECN		IFF_LINK1	/* enable ECN */
+#define IFF_FWD_IPV6	IFF_LINK2	/* foward IPv6 packet */
+
+extern struct psref_class *iv_psref_class;
+
+struct ipsec_variant {
+	struct ipsec_softc *iv_softc;
+
+	struct sockaddr	*iv_psrc;	/* Physical src addr */
+	struct sockaddr	*iv_pdst;	/* Physical dst addr */
+	const struct encaptab *iv_encap_cookie4;
+	const struct encaptab *iv_encap_cookie6;
+	int (*iv_output)(struct ipsec_variant *, int, struct mbuf *);
+	in_port_t iv_sport;
+	in_port_t iv_dport;
+
+	/*
+	 * IPsec SPs
+	 * Don't change directly, use if_ipsec_replace_sp().
+	 */
+	struct secpolicy *iv_sp[IPSEC_DIR_MAX];
+	struct secpolicy *iv_sp6[IPSEC_DIR_MAX];
+
+	struct psref_target iv_psref;
+};
+
+struct ipsec_ro {
+	struct route ir_ro;
+	kmutex_t ir_lock;
+};
+
+struct ipsec_softc {
+	struct ifnet	ipsec_if;	/* common area - must be at the top */
+	percpu_t *ipsec_ro_percpu;	/* struct ipsec_ro */
+	struct ipsec_variant *ipsec_var; /*
+					  * reader must use ipsec_getref_variant()
+					  * instead of direct dereference.
+					  */
+	kmutex_t ipsec_lock;		/* writer lock for ipsec_var */
+
+	LIST_ENTRY(ipsec_softc) ipsec_list; /* list of all gifs */
+};
+
+#define IPSEC_MTU		(1280)	/* Default MTU */
+#define	IPSEC_MTU_MIN		(1280)	/* Minimum MTU */
+#define	IPSEC_MTU_MAX		(8192)	/* Maximum MTU */
+
+#define IV_SP_IN(x) ((x)->iv_sp[IPSEC_DIR_INBOUND])
+#define IV_SP_IN6(x) ((x)->iv_sp6[IPSEC_DIR_INBOUND])
+#define IV_SP_OUT(x) ((x)->iv_sp[IPSEC_DIR_OUTBOUND])
+#define IV_SP_OUT6(x) ((x)->iv_sp6[IPSEC_DIR_OUTBOUND])
+
+static inline bool
+if_ipsec_variant_is_configured(struct ipsec_variant *var)
+{
+
+	return (var->iv_psrc != NULL && var->iv_pdst != NULL);
+}
+
+static inline bool
+if_ipsec_variant_is_unconfigured(struct ipsec_variant *var)
+{
+
+	return (var->iv_psrc == NULL || var->iv_pdst == NULL);
+}
+
+static inline void
+if_ipsec_copy_variant(struct ipsec_variant *dst, struct ipsec_variant *src)
+{
+
+	dst->iv_softc = src->iv_softc;
+	dst->iv_psrc = src->iv_psrc;
+	dst->iv_pdst = src->iv_pdst;
+	dst->iv_encap_cookie4 = src->iv_encap_cookie4;
+	dst->iv_encap_cookie6 = src->iv_encap_cookie6;
+	dst->iv_output = src->iv_output;
+	dst->iv_sport = src->iv_sport;
+	dst->iv_dport = src->iv_dport;
+}
+
+static inline void
+if_ipsec_clear_config(struct ipsec_variant *var)
+{
+
+	var->iv_psrc = NULL;
+	var->iv_pdst = NULL;
+	var->iv_encap_cookie4 = NULL;
+	var->iv_encap_cookie6 = NULL;
+	var->iv_output = NULL;
+	var->iv_sport = 0;
+	var->iv_dport = 0;
+}
+
+/*
+ * Get ipsec_variant from ipsec_softc.
+ *
+ * Never return NULL by contract.
+ * ipsec_variant itself is protected not to be freed by lv_psref.
+ * Once a reader dereference sc->sc_var by this API, the reader must not
+ * re-dereference from sc->sc_var.
+ */
+static inline struct ipsec_variant *
+if_ipsec_getref_variant(struct ipsec_softc *sc, struct psref *psref)
+{
+	struct ipsec_variant *var;
+	int s;
+
+	s = pserialize_read_enter();
+	var = sc->ipsec_var;
+	KASSERT(var != NULL);
+	membar_datadep_consumer();
+	psref_acquire(psref, &var->iv_psref, iv_psref_class);
+	pserialize_read_exit(s);
+
+	return var;
+}
+
+static inline void
+if_ipsec_putref_variant(struct ipsec_variant *var, struct psref *psref)
+{
+
+	KASSERT(var != NULL);
+	psref_release(psref, &var->iv_psref, iv_psref_class);
+}
+
+static inline bool
+if_ipsec_heldref_variant(struct ipsec_variant *var)
+{
+
+	return psref_held(&var->iv_psref, iv_psref_class);
+}
+
+void ipsecifattach(int);
+int if_ipsec_encap_func(struct mbuf *, int, int, void *);
+void if_ipsec_input(struct mbuf *, int, struct ifnet *);
+int if_ipsec_output(struct ifnet *, struct mbuf *,
+		    const struct sockaddr *, const struct rtentry *);
+int if_ipsec_ioctl(struct ifnet *, u_long, void *);
+#endif /* _KERNEL */
+
+/*
+ * sharing SP note:
+ * When ipsec(4) I/Fs use NAT-T, they can use the same src and dst address pair
+ * as long as they use different port. Howerver, SPD cannot have the SPs which
+ * use the same src and dst address pair and the same policy. So, such ipsec(4)
+ * I/Fs share the same SPs.
+ * To avoid race between ipsec0 set_tunnel/delete_tunnel and ipsec1
+ * t_tunnel/delete_tunnel, any global lock is needed. See also the following
+ * locking notes.
+ *
+ * Locking notes:
+ * + ipsec_softcs.list is protected by ipsec_softcs.lock (an adaptive mutex)
+ *       ipsec_softc_list is list of all ipsec_softcs. It is used by ioctl
+ *       context only.
+ * + ipsec_softc->ipsec_var is protected by
+ *   - ipsec_softc->ipsec_lock (an adaptive mutex) for writer
+ *   - ipsec_var->iv_psref for reader
+ *       ipsec_softc->ipsec_var is used for variant values while the ipsec tunnel
+ *       exists.
+ * + struct ipsec_ro->ir_ro is protected by struct ipsec_ro->ir_lock.
+ *       This lock is required to exclude softnet/0 lwp(such as output
+ *       processing softint) and  processing lwp(such as DAD timer processing).
+ * + if_ipsec_share_sp() and if_ipsec_unshare_sp() operations are serialized by
+ *   encap_lock
+ *       This only need to be global lock, need not to be encap_lock.
+ *
+ * Locking order:
+ *     - encap_lock => ipsec_softc->ipsec_lock => ipsec_softcs.lock
+ */
+#endif /* _NET_IF_IPSEC_H_ */

Index: src/sys/netipsec/ipsecif.c
diff -u /dev/null src/sys/netipsec/ipsecif.c:1.1
--- /dev/null	Wed Jan 10 10:56:31 2018
+++ src/sys/netipsec/ipsecif.c	Wed Jan 10 10:56:30 2018
@@ -0,0 +1,933 @@
+/*	$NetBSD: ipsecif.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $  */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $");
+
+#ifdef _KERNEL_OPT
+#include "opt_inet.h"
+#include "opt_ipsec.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_encap.h>
+#include <netinet/ip_ecn.h>
+#include <netinet/ip_private.h>
+#include <netinet/udp.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/ip6_private.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
+#include <netinet/ip_ecn.h>
+#endif
+
+#include <netipsec/key.h>
+#include <netipsec/ipsecif.h>
+
+#include <net/if_ipsec.h>
+
+static void ipsecif4_input(struct mbuf *, int, int, void *);
+static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
+static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
+	struct ifnet *);
+
+#ifdef INET6
+static int ipsecif6_input(struct mbuf **, int *, int, void *);
+static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
+static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
+	struct ifnet *);
+#endif
+
+static int ip_ipsec_ttl = IPSEC_TTL;
+static int ip_ipsec_copy_tos = 0;
+#ifdef INET6
+static int ip6_ipsec_hlim = IPSEC_HLIM;
+static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
+static int ip6_ipsec_copy_tos = 0;
+#endif
+
+struct encapsw ipsecif4_encapsw = {
+	.encapsw4 = {
+		.pr_input = ipsecif4_input,
+		.pr_ctlinput = NULL,
+	}
+};
+
+#ifdef INET6
+static const struct encapsw ipsecif6_encapsw;
+#endif
+
+static struct mbuf *
+ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
+    uint8_t proto, uint8_t tos)
+{
+	struct ip *ip;
+	struct sockaddr_in *src, *dst;
+
+	src = satosin(var->iv_psrc);
+	dst = satosin(var->iv_pdst);
+
+	if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
+	    src->sin_addr.s_addr == INADDR_BROADCAST ||
+	    dst->sin_addr.s_addr == INADDR_BROADCAST) {
+		m_freem(m);
+		return NULL;
+	}
+	m->m_flags &= ~M_BCAST;
+
+	if (IN_MULTICAST(src->sin_addr.s_addr) ||
+	   IN_MULTICAST(dst->sin_addr.s_addr)) {
+		m_freem(m);
+		return NULL;
+	}
+
+	M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+	if (m && M_UNWRITABLE(m, sizeof(struct ip)))
+		m = m_pullup(m, sizeof(struct ip));
+	if (m == NULL)
+		return NULL;
+
+	ip = mtod(m, struct ip *);
+	ip->ip_v = IPVERSION;
+	ip->ip_off = htons(0);
+	ip->ip_id = 0;
+	ip->ip_hl = sizeof(*ip) >> 2;
+	if (ip_ipsec_copy_tos)
+		ip->ip_tos = tos;
+	else
+		ip->ip_tos = 0;
+	ip->ip_sum = 0;
+	ip->ip_src = src->sin_addr;
+	ip->ip_dst = dst->sin_addr;
+	ip->ip_p = proto;
+	ip->ip_ttl = ip_ipsec_ttl;
+	ip->ip_len = htons(m->m_pkthdr.len);
+#ifndef IPSEC_TX_TOS_CLEAR
+	struct ifnet *ifp = &var->iv_softc->ipsec_if;
+	if (ifp->if_flags & IFF_ECN)
+		ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
+	else
+		ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
+#endif
+
+	return m;
+}
+
+static int
+ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
+{
+	struct ip ip0;
+	struct ip *ip;
+	int mtu;
+	struct secasvar *sav;
+
+	sav = key_lookup_sa_bysaidx(&isr->saidx);
+	if (sav == NULL)
+		return 0;
+
+	if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) &&
+	    !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) {
+		mtu = 0;
+		goto out;
+	}
+
+	if (m->m_len < sizeof(struct ip)) {
+		m_copydata(m, 0, sizeof(ip0), &ip0);
+		ip = &ip0;
+
+	} else {
+		ip = mtod(m, struct ip *);
+	}
+	mtu = sav->esp_frag;
+	if (ntohs(ip->ip_len) <= mtu)
+		mtu = 0;
+
+out:
+	KEY_SA_UNREF(&sav);
+	return mtu;
+}
+
+static struct mbuf *
+ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
+{
+	const struct ip *ip;
+	int proto;
+	int tos;
+
+	KASSERT(proto0 != NULL);
+	KASSERT(tos0 != NULL);
+
+	switch (family) {
+	case AF_INET:
+		proto = IPPROTO_IPV4;
+		if (m->m_len < sizeof(*ip)) {
+			m = m_pullup(m, sizeof(*ip));
+			if (!m) {
+				*tos0 = 0;
+				*proto0 = 0;
+				return  NULL;
+			}
+		}
+		ip = mtod(m, const struct ip *);
+		tos = ip->ip_tos;
+		/* TODO: support ALTQ for innner packet */
+		break;
+#ifdef INET6
+	case AF_INET6: {
+		const struct ip6_hdr *ip6;
+		proto = IPPROTO_IPV6;
+		if (m->m_len < sizeof(*ip6)) {
+			m = m_pullup(m, sizeof(*ip6));
+			if (!m) {
+				*tos0 = 0;
+				*proto0 = 0;
+				return NULL;
+			}
+		}
+		ip6 = mtod(m, const struct ip6_hdr *);
+		tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+		/* TODO: support ALTQ for innner packet */
+		break;
+	}
+#endif /* INET6 */
+	default:
+		*tos0 = 0;
+		*proto0 = 0;
+		return NULL;
+	}
+
+	*proto0 = proto;
+	*tos0 = tos;
+	return m;
+}
+
+static int
+ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
+{
+	struct ifnet *ifp = &var->iv_softc->ipsec_if;
+	struct mbuf *next;
+	struct m_tag *mtag;
+	int error;
+
+	KASSERT(if_ipsec_heldref_variant(var));
+
+	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
+	if (mtag)
+		m_tag_delete(m, mtag);
+
+	error = ip_fragment(m, ifp, mtu);
+	if (error)
+		return error;
+
+	for (error = 0; m; m = next) {
+		next = m->m_nextpkt;
+		m->m_next = NULL;
+		if (error) {
+			m_freem(m);
+			continue;
+		}
+
+		error = ipsecif4_output(var, family, m);
+	}
+	if (error == 0)
+		IP_STATINC(IP_STAT_FRAGMENTED);
+
+	return error;
+}
+
+int
+ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
+{
+	struct m_tag *mtag;
+	struct sockaddr_in *src, *dst;
+	u_int16_t src_port = 0;
+	u_int16_t dst_port = 0;
+
+	KASSERT(var != NULL);
+
+	src = satosin(var->iv_psrc);
+	dst = satosin(var->iv_pdst);
+	mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
+	if (mtag) {
+		u_int16_t *ports;
+
+		ports = (u_int16_t *)(mtag + 1);
+		src_port = ports[0];
+		dst_port = ports[1];
+	}
+
+	/* address match */
+	if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
+	    dst->sin_addr.s_addr != ip->ip_src.s_addr)
+		return 0;
+
+	/* UDP encap? */
+	if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
+		goto match;
+
+	/* port match */
+	if (src_port != var->iv_dport ||
+	    dst_port != var->iv_sport) {
+#ifdef DEBUG
+		printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
+		    __func__, ntohs(src_port), ntohs(dst_port),
+		    ntohs(var->iv_sport), ntohs(var->iv_dport));
+#endif
+		return 0;
+	}
+
+match:
+	/*
+	 * hide NAT-T information from encapsulated traffics.
+	 * they don't know about IPsec.
+	 */
+	if (mtag)
+		m_tag_delete(m, mtag);
+	return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
+}
+
+static int
+ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
+{
+	struct secpolicy *sp = NULL;
+	u_int8_t tos;
+	int proto;
+	int error;
+	int mtu;
+	u_long sa_mtu = 0;
+
+	KASSERT(if_ipsec_heldref_variant(var));
+	KASSERT(if_ipsec_variant_is_configured(var));
+	KASSERT(var->iv_psrc->sa_family == AF_INET);
+	KASSERT(var->iv_pdst->sa_family == AF_INET);
+
+	sp = IV_SP_OUT(var);
+	KASSERT(sp != NULL);
+	/*
+	 * The SPs in ipsec_variant are prevented from freed by
+	 * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
+	 */
+
+	KASSERT(sp->policy != IPSEC_POLICY_NONE);
+	KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
+	KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
+	if(sp->policy != IPSEC_POLICY_IPSEC) {
+		struct ifnet *ifp = &var->iv_softc->ipsec_if;
+		m_freem(m);
+		IF_DROP(&ifp->if_snd);
+		return 0;
+	}
+
+	/* get flowinfo */
+	m = ipsecif4_flowinfo(m, family, &proto, &tos);
+	if (m == NULL) {
+		error = ENETUNREACH;
+		goto done;
+	}
+
+	/* prepend new IP header */
+	m = ipsecif4_prepend_hdr(var, m, proto, tos);
+	if (m == NULL) {
+		error = ENETUNREACH;
+		goto done;
+	}
+
+	/*
+	 * Normal netipsec's NAT-T fragmentation is done in ip_output().
+	 * See "natt_frag" processing.
+	 * However, ipsec(4) interface's one is not done in the same way,
+	 * so we must do NAT-T fragmentation by own code.
+	 */
+	/* NAT-T ESP fragmentation */
+	mtu = ipsecif4_needfrag(m, sp->req);
+	if (mtu > 0)
+		return ipsecif4_fragout(var, family, m, mtu);
+
+	/* IPsec output */
+	IP_STATINC(IP_STAT_LOCALOUT);
+	error = ipsec4_process_packet(m, sp->req, &sa_mtu);
+	if (error == ENOENT)
+		error = 0;
+	/*
+	 * frangmentation is already done in ipsecif4_fragout(),
+	 * so ipsec4_process_packet() must not do fragmentation here.
+	 */
+	KASSERT(error != 0 || sa_mtu == 0);
+
+done:
+	return error;
+}
+
+#ifdef INET6
+static int
+ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
+{
+	struct ifnet *ifp = &var->iv_softc->ipsec_if;
+	struct ipsec_softc *sc = ifp->if_softc;
+	struct ipsec_ro *iro;
+	struct rtentry *rt;
+	struct sockaddr_in6 *sin6_src;
+	struct sockaddr_in6 *sin6_dst;
+	struct ip6_hdr *ip6;
+	int proto, error;
+	u_int8_t itos, otos;
+	union {
+		struct sockaddr		dst;
+		struct sockaddr_in6	dst6;
+	} u;
+
+	KASSERT(if_ipsec_heldref_variant(var));
+	KASSERT(if_ipsec_variant_is_configured(var));
+
+	sin6_src = satosin6(var->iv_psrc);
+	sin6_dst = satosin6(var->iv_pdst);
+
+	KASSERT(sin6_src->sin6_family == AF_INET6);
+	KASSERT(sin6_dst->sin6_family == AF_INET6);
+
+	switch (family) {
+#ifdef INET
+	case AF_INET:
+	    {
+		struct ip *ip;
+
+		proto = IPPROTO_IPV4;
+		if (m->m_len < sizeof(*ip)) {
+			m = m_pullup(m, sizeof(*ip));
+			if (!m)
+				return ENOBUFS;
+		}
+		ip = mtod(m, struct ip *);
+		itos = ip->ip_tos;
+		/*
+		 * TODO:
+		 *support ALTQ for innner packet
+		 */
+		break;
+	    }
+#endif /* INET */
+	case AF_INET6:
+	    {
+		struct ip6_hdr *xip6;
+		proto = IPPROTO_IPV6;
+		if (m->m_len < sizeof(*xip6)) {
+			m = m_pullup(m, sizeof(*xip6));
+			if (!m)
+				return ENOBUFS;
+		}
+		xip6 = mtod(m, struct ip6_hdr *);
+		itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
+		/* TODO:
+		 * support ALTQ for innner packet
+		 */
+		break;
+	    }
+	default:
+		m_freem(m);
+		return EAFNOSUPPORT;
+	}
+
+	/* prepend new IP header */
+	M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
+	if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
+		m = m_pullup(m, sizeof(struct ip6_hdr));
+	if (m == NULL)
+		return ENOBUFS;
+
+	ip6 = mtod(m, struct ip6_hdr *);
+	ip6->ip6_flow	= 0;
+	ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
+	ip6->ip6_vfc	|= IPV6_VERSION;
+	ip6->ip6_plen	= htons((u_short)m->m_pkthdr.len);
+	ip6->ip6_nxt	= proto;
+	ip6->ip6_hlim	= ip6_ipsec_hlim;
+	ip6->ip6_src	= sin6_src->sin6_addr;
+	/* bidirectional configured tunnel mode */
+	if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
+		ip6->ip6_dst = sin6_dst->sin6_addr;
+	} else  {
+		m_freem(m);
+		return ENETUNREACH;
+	}
+#ifndef IPSEC_TX_TOS_CLEAR
+	if (ifp->if_flags & IFF_ECN)
+		ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
+	else
+		ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
+
+	if (!ip6_ipsec_copy_tos)
+		otos = 0;
+#else
+	if (ip6_ipsec_copy_tos)
+		otos = itos;
+	else
+		otos = 0;
+#endif
+	ip6->ip6_flow &= ~ntohl(0xff00000);
+	ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
+
+	sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
+
+	iro = percpu_getref(sc->ipsec_ro_percpu);
+	mutex_enter(&iro->ir_lock);
+	if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) {
+		mutex_exit(&iro->ir_lock);
+		percpu_putref(sc->ipsec_ro_percpu);
+		m_freem(m);
+		return ENETUNREACH;
+	}
+
+	if (rt->rt_ifp == ifp) {
+		rtcache_unref(rt, &iro->ir_ro);
+		rtcache_free(&iro->ir_ro);
+		mutex_exit(&iro->ir_lock);
+		percpu_putref(sc->ipsec_ro_percpu);
+		m_freem(m);
+		return ENETUNREACH;
+	}
+	rtcache_unref(rt, &iro->ir_ro);
+
+	/*
+	 * force fragmentation to minimum MTU, to avoid path MTU discovery.
+	 * it is too painful to ask for resend of inner packet, to achieve
+	 * path MTU discovery for encapsulated packets.
+	 */
+	error = ip6_output(m, 0, &iro->ir_ro,
+	    ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
+	if (error)
+		rtcache_free(&iro->ir_ro);
+
+	mutex_exit(&iro->ir_lock);
+	percpu_putref(sc->ipsec_ro_percpu);
+
+	return error;
+}
+#endif /* INET6 */
+
+static void
+ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
+{
+	struct ifnet *ipsecp;
+	struct ipsec_softc *sc = eparg;
+	struct ipsec_variant *var;
+	const struct ip *ip;
+	int af;
+#ifndef IPSEC_TX_TOS_CLEAR
+	u_int8_t otos;
+#endif
+	struct psref psref_rcvif;
+	struct psref psref_var;
+	struct ifnet *rcvif;
+
+	KASSERT(sc != NULL);
+
+	ipsecp = &sc->ipsec_if;
+	if ((ipsecp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		ip_statinc(IP_STAT_NOIPSEC);
+		return;
+	}
+
+	var = if_ipsec_getref_variant(sc, &psref_var);
+	if (if_ipsec_variant_is_unconfigured(var)) {
+		if_ipsec_putref_variant(var, &psref_var);
+		m_freem(m);
+		ip_statinc(IP_STAT_NOIPSEC);
+		return;
+	}
+
+	ip = mtod(m, const struct ip *);
+
+	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
+	if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
+		m_put_rcvif_psref(rcvif, &psref_rcvif);
+		if_ipsec_putref_variant(var, &psref_var);
+		m_freem(m);
+		ip_statinc(IP_STAT_NOIPSEC);
+		return;
+	}
+	m_put_rcvif_psref(rcvif, &psref_rcvif);
+	if_ipsec_putref_variant(var, &psref_var);
+#ifndef IPSEC_TX_TOS_CLEAR
+	otos = ip->ip_tos;
+#endif
+	m_adj(m, off);
+
+	switch (proto) {
+	case IPPROTO_IPV4:
+	    {
+		struct ip *xip;
+		af = AF_INET;
+		if (M_UNWRITABLE(m, sizeof(*xip))) {
+			m = m_pullup(m, sizeof(*xip));
+			if (!m)
+				return;
+		}
+		xip = mtod(m, struct ip *);
+#ifndef IPSEC_TX_TOS_CLEAR
+		if (ipsecp->if_flags & IFF_ECN)
+			ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
+		else
+			ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
+#endif
+		break;
+	    }
+#ifdef INET6
+	case IPPROTO_IPV6:
+	    {
+		struct ip6_hdr *ip6;
+		u_int8_t itos;
+		af = AF_INET6;
+		if (M_UNWRITABLE(m, sizeof(*ip6))) {
+			m = m_pullup(m, sizeof(*ip6));
+			if (!m)
+				return;
+		}
+		ip6 = mtod(m, struct ip6_hdr *);
+		itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+#ifndef IPSEC_TX_TOS_CLEAR
+		if (ipsecp->if_flags & IFF_ECN)
+			ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
+		else
+			ip_ecn_egress(ECN_NOCARE, &otos, &itos);
+#endif
+		ip6->ip6_flow &= ~htonl(0xff << 20);
+		ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
+		break;
+	    }
+#endif /* INET6 */
+	default:
+		ip_statinc(IP_STAT_NOIPSEC);
+		m_freem(m);
+		return;
+	}
+	if_ipsec_input(m, af, ipsecp);
+
+	return;
+}
+
+/*
+ * validate and filter the pakcet
+ */
+static int
+ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
+    struct ifnet *ifp)
+{
+	struct sockaddr_in *src, *dst;
+
+	src = satosin(var->iv_psrc);
+	dst = satosin(var->iv_pdst);
+
+	return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
+}
+
+#ifdef INET6
+static int
+ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
+{
+	struct mbuf *m = *mp;
+	struct ifnet *ipsecp;
+	struct ipsec_softc *sc = eparg;
+	struct ipsec_variant *var;
+	struct ip6_hdr *ip6;
+	int af = 0;
+#ifndef IPSEC_TX_TOS_CLEAR
+	u_int32_t otos;
+#endif
+	struct psref psref_rcvif;
+	struct psref psref_var;
+	struct ifnet *rcvif;
+
+	KASSERT(eparg != NULL);
+
+	ipsecp = &sc->ipsec_if;
+	if ((ipsecp->if_flags & IFF_UP) == 0) {
+		m_freem(m);
+		IP6_STATINC(IP6_STAT_NOIPSEC);
+		return IPPROTO_DONE;
+	}
+
+	var = if_ipsec_getref_variant(sc, &psref_var);
+	if (if_ipsec_variant_is_unconfigured(var)) {
+		if_ipsec_putref_variant(var, &psref_var);
+		m_freem(m);
+		IP6_STATINC(IP6_STAT_NOIPSEC);
+		return IPPROTO_DONE;
+	}
+
+	ip6 = mtod(m, struct ip6_hdr *);
+
+	rcvif = m_get_rcvif_psref(m, &psref_rcvif);
+	if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
+		m_put_rcvif_psref(rcvif, &psref_rcvif);
+		if_ipsec_putref_variant(var, &psref_var);
+		m_freem(m);
+		IP6_STATINC(IP6_STAT_NOIPSEC);
+		return IPPROTO_DONE;
+	}
+	m_put_rcvif_psref(rcvif, &psref_rcvif);
+	if_ipsec_putref_variant(var, &psref_var);
+
+#ifndef IPSEC_TX_TOS_CLEAR
+	otos = ip6->ip6_flow;
+#endif
+	m_adj(m, *offp);
+
+	switch (proto) {
+#ifdef INET
+	case IPPROTO_IPV4:
+	    {
+		af = AF_INET;
+#ifndef IPSEC_TX_TOS_CLEAR
+		struct ip *ip;
+		u_int8_t otos8;
+		otos8 = (ntohl(otos) >> 20) & 0xff;
+
+		if (M_UNWRITABLE(m, sizeof(*ip))) {
+			m = m_pullup(m, sizeof(*ip));
+			if (!m)
+				return IPPROTO_DONE;
+		}
+		ip = mtod(m, struct ip *);
+		if (ipsecp->if_flags & IFF_ECN)
+			ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
+		else
+			ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
+#endif
+		break;
+	    }
+#endif /* INET */
+	case IPPROTO_IPV6:
+	    {
+		af = AF_INET6;
+#ifndef IPSEC_TX_TOS_CLEAR
+		struct ip6_hdr *xip6;
+
+		if (M_UNWRITABLE(m, sizeof(*xip6))) {
+			m = m_pullup(m, sizeof(*xip6));
+			if (!m)
+				return IPPROTO_DONE;
+		}
+		xip6 = mtod(m, struct ip6_hdr *);
+		if (ipsecp->if_flags & IFF_ECN)
+			ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
+		else
+			ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
+		break;
+#endif
+	    }
+	default:
+		IP6_STATINC(IP6_STAT_NOIPSEC);
+		m_freem(m);
+		return IPPROTO_DONE;
+	}
+
+	if_ipsec_input(m, af, ipsecp);
+	return IPPROTO_DONE;
+}
+
+/*
+ * validate and filter the packet.
+ */
+static int
+ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
+    struct ifnet *ifp)
+{
+	struct sockaddr_in6 *src, *dst;
+
+	src = satosin6(var->iv_psrc);
+	dst = satosin6(var->iv_pdst);
+
+	return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
+}
+#endif /* INET6 */
+
+int
+ipsecif4_attach(struct ipsec_variant *var)
+{
+	struct ipsec_softc *sc = var->iv_softc;
+
+	KASSERT(if_ipsec_variant_is_configured(var));
+
+	if (var->iv_encap_cookie4 != NULL)
+		return EALREADY;
+	var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
+	    &ipsecif4_encapsw, sc);
+	if (var->iv_encap_cookie4 == NULL)
+		return EEXIST;
+
+	var->iv_output = ipsecif4_output;
+	return 0;
+}
+
+int
+ipsecif4_detach(struct ipsec_variant *var)
+{
+	int error;
+
+	if (var->iv_encap_cookie4 == NULL)
+		return 0;
+
+	var->iv_output = NULL;
+	error = encap_detach(var->iv_encap_cookie4);
+	if (error == 0)
+		var->iv_encap_cookie4 = NULL;
+
+	return error;
+}
+
+#ifdef INET6
+int
+ipsecif6_attach(struct ipsec_variant *var)
+{
+	struct sockaddr_in6 mask6;
+	struct ipsec_softc *sc = var->iv_softc;
+
+	KASSERT(if_ipsec_variant_is_configured(var));
+	KASSERT(var->iv_encap_cookie6 == NULL);
+
+	memset(&mask6, 0, sizeof(mask6));
+	mask6.sin6_len = sizeof(struct sockaddr_in6);
+	mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] =
+	mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0;
+
+	var->iv_encap_cookie6 = encap_attach(AF_INET6, -1,
+	    var->iv_psrc, (struct sockaddr *)&mask6,
+	    var->iv_pdst, (struct sockaddr *)&mask6,
+	    &ipsecif6_encapsw, sc);
+	if (var->iv_encap_cookie6 == NULL)
+		return EEXIST;
+
+	var->iv_output = ipsecif6_output;
+	return 0;
+}
+
+static void
+ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
+{
+	struct ipsec_ro *iro = p;
+
+	mutex_enter(&iro->ir_lock);
+	rtcache_free(&iro->ir_ro);
+	mutex_exit(&iro->ir_lock);
+}
+
+int
+ipsecif6_detach(struct ipsec_variant *var)
+{
+	struct ipsec_softc *sc = var->iv_softc;
+	int error;
+
+	KASSERT(var->iv_encap_cookie6 != NULL);
+
+	percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL);
+
+	var->iv_output = NULL;
+	error = encap_detach(var->iv_encap_cookie6);
+	if (error == 0)
+		var->iv_encap_cookie6 = NULL;
+	return error;
+}
+
+void *
+ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
+{
+	struct ipsec_softc *sc = eparg;
+	struct ip6ctlparam *ip6cp = NULL;
+	struct ip6_hdr *ip6;
+	const struct sockaddr_in6 *dst6;
+	struct ipsec_ro *iro;
+
+	if (sa->sa_family != AF_INET6 ||
+	    sa->sa_len != sizeof(struct sockaddr_in6))
+		return NULL;
+
+	if ((unsigned)cmd >= PRC_NCMDS)
+		return NULL;
+	if (cmd == PRC_HOSTDEAD)
+		d = NULL;
+	else if (inet6ctlerrmap[cmd] == 0)
+		return NULL;
+
+	/* if the parameter is from icmp6, decode it. */
+	if (d != NULL) {
+		ip6cp = (struct ip6ctlparam *)d;
+		ip6 = ip6cp->ip6c_ip6;
+	} else {
+		ip6 = NULL;
+	}
+
+	if (!ip6)
+		return NULL;
+
+	iro = percpu_getref(sc->ipsec_ro_percpu);
+	mutex_enter(&iro->ir_lock);
+	dst6 = satocsin6(rtcache_getdst(&iro->ir_ro));
+	/* XXX scope */
+	if (dst6 == NULL)
+		;
+	else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
+		/* flush route cache */
+		rtcache_free(&iro->ir_ro);
+
+	mutex_exit(&iro->ir_lock);
+	percpu_putref(sc->ipsec_ro_percpu);
+
+	return NULL;
+}
+
+ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
+#define	ipsecif6_ctlinput	ipsecif6_ctlinput_wrapper
+
+static const struct encapsw ipsecif6_encapsw = {
+	.encapsw6 = {
+		.pr_input = ipsecif6_input,
+		.pr_ctlinput = ipsecif6_ctlinput,
+	}
+};
+#endif /* INET6 */
Index: src/sys/netipsec/ipsecif.h
diff -u /dev/null src/sys/netipsec/ipsecif.h:1.1
--- /dev/null	Wed Jan 10 10:56:31 2018
+++ src/sys/netipsec/ipsecif.h	Wed Jan 10 10:56:30 2018
@@ -0,0 +1,47 @@
+/*	$NetBSD: ipsecif.h,v 1.1 2018/01/10 10:56:30 knakahara Exp $  */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NETIPSEC_IPSECIF_H_
+#define _NETIPSEC_IPSECIF_H_
+
+#include <net/if_ipsec.h>
+
+#define IPSEC_TTL	64
+#define IPSEC_HLIM	64
+
+#ifdef _KERNEL
+int ipsecif4_encap_func(struct mbuf *, struct ip *, struct ipsec_variant *);
+int ipsecif4_attach(struct ipsec_variant *);
+int ipsecif4_detach(struct ipsec_variant *);
+
+int ipsecif6_attach(struct ipsec_variant *);
+int ipsecif6_detach(struct ipsec_variant *);
+void *ipsecif6_ctlinput(int, const struct sockaddr *, void *, void *);
+#endif
+
+#endif /*_NETIPSEC_IPSECIF_H_*/

Reply via email to