let pool allocators advertise multiple page sizes

2016-11-07 Thread David Gwynne
this turns the pa_pagesz member of a pool allocator into a bitfield.

pool pages can be many different sizes, they arent restricted to
just the size provided by the hardware. to support this without
introducing a page allocator per page size, a single page allocator
can be used to allocate many different sizes. however, right now
there is no way to pass a custom page allocator to pool_init and
tell it that an allocator can do different page sizes. currently
pool_init only uses the multi page allocators when it's allowed to
choose one itself.

so, as i said above, pool allocators can now indicate what sizes
they can provide.  the low bit in pa_pagesz indicates whether the
allocator can align its allocation to the requested size. this is
necessary if you want to know if you can store the pool page headers
inside the allocation.

the rest of the bits map to supported page sizes. pools only support
page sizes that are powers of two. with that in mind, each power
of two is represented as a single bit which we can or together to
indicate the ranges of pages an allocator can provide.

eg, the multi page pools on sparc64 would have have 0xe001 as
pa_pagesz. the low bit says they can align their pages, and you can
test which sizes they support by oring sizes. eg, ISSET(0xe001,
8192) is true, as is ISSET(0xe001, 65536).

in the future i want to use this to provide an allocator for all
the mbufs and clusters so we can configure how much memory we want
packets to consume rather than how many packets we want to provide.
on my box here kern.maxclusters is 16384, which means we can have
16384 clusters allocated from any of the backend pools. 16384 64k
clusters is a gigabyte of ram, which is probably not what we want.
instead we should say we want all packets to be allocated from a
few meg of ram and let the any of the clusters come out of that
pool. this diff is a step toward that.

another benefit of that would be then enable the per cpu caches for
mbufs and clusters, which will be necessary to scale performance
when the stack is unlocked further.

ok?

Index: sys/pool.h
===
RCS file: /cvs/src/sys/sys/pool.h,v
retrieving revision 1.67
diff -u -p -r1.67 pool.h
--- sys/pool.h  7 Nov 2016 23:45:27 -   1.67
+++ sys/pool.h  8 Nov 2016 00:37:42 -
@@ -77,10 +77,36 @@ struct pool_request;
 TAILQ_HEAD(pool_requests, pool_request);
 
 struct pool_allocator {
-   void *(*pa_alloc)(struct pool *, int, int *);
-   void (*pa_free)(struct pool *, void *);
-   int pa_pagesz;
+   void*(*pa_alloc)(struct pool *, int, int *);
+   void (*pa_free)(struct pool *, void *);
+   size_t pa_pagesz;
 };
+
+/*
+ * The pa_pagesz member encodes the sizes of pages that can be
+ * provided by the allocator, and whether the allocations can be
+ * aligned to their size.
+ *
+ * Page sizes can only be powers of two. Each available page size is
+ * represented by its value set as a bit. e.g., to indicate that an
+ * allocator can provide 16k and 32k pages you initialise pa_pagesz
+ * to (32768 | 16384).
+ *
+ * If the allocator can provide aligned pages the low bit in pa_pagesz
+ * is set. The POOL_ALLOC_ALIGNED macro is provided as a convenience.
+ *
+ * If pa_pagesz is unset (i.e. 0), POOL_ALLOC_DEFAULT will be used
+ * instead.
+ */
+
+#define POOL_ALLOC_ALIGNED 1UL
+#define POOL_ALLOC_SIZE(_sz, _a)   ((_sz) | (_a))
+#define POOL_ALLOC_SIZES(_min, _max, _a) \
+   ((_max) | \
+   (((_max) - 1) & ~((_min) - 1)) | (_a))
+
+#define POOL_ALLOC_DEFAULT \
+   POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
 
 TAILQ_HEAD(pool_pagelist, pool_page_header);
 
Index: kern/subr_pool.c
===
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.203
diff -u -p -r1.203 subr_pool.c
--- kern/subr_pool.c7 Nov 2016 23:45:27 -   1.203
+++ kern/subr_pool.c8 Nov 2016 00:37:42 -
@@ -170,7 +170,8 @@ voidpool_page_free(struct pool *, void 
  */
 struct pool_allocator pool_allocator_single = {
pool_page_alloc,
-   pool_page_free
+   pool_page_free,
+   POOL_ALLOC_SIZE(PAGE_SIZE, POOL_ALLOC_ALIGNED)
 };
 
 void   *pool_multi_alloc(struct pool *, int, int *);
@@ -178,7 +179,8 @@ voidpool_multi_free(struct pool *, void
 
 struct pool_allocator pool_allocator_multi = {
pool_multi_alloc,
-   pool_multi_free
+   pool_multi_free,
+   POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
 };
 
 void   *pool_multi_alloc_ni(struct pool *, int, int *);
@@ -186,7 +188,8 @@ voidpool_multi_free_ni(struct pool *, v
 
 struct pool_allocator pool_allocator_multi_ni = {
pool_multi_alloc_ni,
-   pool_multi_free_ni
+   pool_multi_free_ni,
+   POOL_ALLOC_SIZES(PAGE_SIZE, (1UL << 31), POOL_ALLOC_ALIGNED)
 };
 
 #ifdef DDB
@@ -264,6 +267,7 @@ 

unifdef INSECURE

2016-11-07 Thread Ilya Kaliman
===
RCS file: /cvs/src/sys/stand/boot/cmd.c,v
retrieving revision 1.63
diff -u -p -r1.63 cmd.c
--- cmd.c   20 Jul 2014 19:33:54 -  1.63
+++ cmd.c   7 Nov 2016 22:47:40 -
@@ -95,9 +95,7 @@ getcmd(void)
 int
 read_conf(void)
 {
-#ifndef INSECURE
struct stat sb;
-#endif
int fd, rc = 0;

 #ifdef CHECK_SKIP_CONF
@@ -116,14 +114,12 @@ read_conf(void)
return -1;
}

-#ifndef INSECURE
(void) fstat(fd, );
if (sb.st_uid || (sb.st_mode & 2)) {
printf("non-secure %s, will not proceed\n", cmd.path);
close(fd);
return -1;
}
-#endif

do {
char *p = cmd_buf;



Re: The end of nd6_output()

2016-11-07 Thread Gregor Best
Hi,

On Mon, Nov 07, 2016 at 09:12:12AM +0100, Martin Pieuchot wrote:
> [...]
> Could you capture the route changes via "# route monitor"?  I'd like
> to know if the 'bad gateway value' message is triggered by userland
> or the kernel.
> [...]

I can't reproduce the problem anymore. If it does turn up again, I'll
collect the output of route monitor. In the mean time, thanks a bunch
for your help :)

-- 
Gregor



switchd(8): add more packet-out validations

2016-11-07 Thread Rafael Zalamena
Now that we have the flow-mod validation with the action/instructions
support we can extend the usage of this functions for the packet-out
validation.

This diff increases the packet-out validation coverage by also doing
instructions and packet truncation checks.

ok?

Index: ofp13.c
===
RCS file: /cvs/src/usr.sbin/switchd/ofp13.c,v
retrieving revision 1.25
diff -u -p -r1.25 ofp13.c
--- ofp13.c 7 Nov 2016 13:27:11 -   1.25
+++ ofp13.c 7 Nov 2016 13:33:34 -
@@ -462,10 +462,9 @@ ofp13_validate_packet_out(struct switchd
 struct ofp_header *oh, struct ibuf *ibuf)
 {
struct ofp_packet_out   *pout;
-   size_t   len;
-   off_toff;
+   size_t   len, plen, diff;
+   off_toff, noff;
struct ofp_action_header*ah;
-   struct ofp_action_output*ao;
 
off = 0;
if ((pout = ibuf_seek(ibuf, off, sizeof(*pout))) == NULL) {
@@ -474,36 +473,43 @@ ofp13_validate_packet_out(struct switchd
return (-1);
}
 
-   log_debug("\tbuffer %d port %s "
-   "actions length %u",
+   off += sizeof(*pout);
+   len = ntohs(pout->pout_actions_len);
+   log_debug("\tbuffer %d in_port %s actions_len %lu",
ntohl(pout->pout_buffer_id),
-   print_map(ntohl(pout->pout_in_port), ofp_port_map),
-   ntohs(pout->pout_actions_len));
-   len = ntohl(pout->pout_actions_len);
+   print_map(ntohl(pout->pout_in_port), ofp_port_map), len);
 
-   off += sizeof(*pout);
-   while ((ah = ibuf_seek(ibuf, off, len)) != NULL &&
-   ntohs(ah->ah_len) >= (uint16_t)sizeof(*ah)) {
-   switch (ntohs(ah->ah_type)) {
-   case OFP_ACTION_OUTPUT:
-   ao = (struct ofp_action_output *)ah;
-   log_debug("\t\taction type %s length %d "
-   "port %s max length %d",
-   print_map(ntohs(ao->ao_type), ofp_action_map),
-   ntohs(ao->ao_len),
-   print_map(ntohs(ao->ao_port), ofp_port_map),
-   ntohs(ao->ao_max_len));
-   break;
-   default:
-   log_debug("\t\taction type %s length %d",
-   print_map(ntohs(ah->ah_type), ofp_action_map),
-   ntohs(ah->ah_len));
-   break;
-   }
-   if (pout->pout_buffer_id == (uint32_t)-1)
-   break;
-   off += ntohs(ah->ah_len);
+parse_next_action:
+   if ((ah = ibuf_seek(ibuf, off, sizeof(*ah))) == NULL)
+   return (-1);
+
+   noff = off;
+   ofp13_validate_action(sc, oh, ibuf, , ah);
+
+   diff = off - noff;
+   /* Loop prevention. */
+   if (off < noff || diff == 0)
+   return (-1);
+
+   len -= diff;
+   if (len)
+   goto parse_next_action;
+
+   /* Check for encapsulated packet truncation. */
+   len = ntohs(oh->oh_length) - off;
+   plen = ibuf_length(ibuf) - off;
+
+   if (plen < len) {
+   log_debug("\ttruncated packet %lu < %lu", plen, len);
+
+   /* Buffered packets can be truncated */
+   if (pout->pout_buffer_id != OFP_PKTOUT_NO_BUFFER)
+   len = plen;
+   else
+   return (-1);
}
+   if (ibuf_seek(ibuf, off, len) == NULL)
+   return (-1);
 
return (0);
 }



Re: Kill ifa_ifwithnet()

2016-11-07 Thread Vincent Gross
On Mon, 7 Nov 2016 08:59:53 +0100
Martin Pieuchot  wrote:

> On 04/11/16(Fri) 21:33, Vincent Gross wrote:
> > [...] 
> > Why are you killing Strict Source Route Record ? Just as you did
> > with rtredirect(), you can check whether RTF_GATEWAY is set and
> > send back an ICMP_UNREACH if so. Or did I miss something ?  
> 
> Like that?
> 
> Index: netinet/ip_input.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.282
> diff -u -p -r1.282 ip_input.c
> --- netinet/ip_input.c22 Sep 2016 10:12:25 -  1.282
> +++ netinet/ip_input.c7 Nov 2016 07:59:02 -
> @@ -1117,37 +1117,20 @@ ip_dooptions(struct mbuf *m, struct ifne
>   ipaddr.sin_len = sizeof(ipaddr);
>   memcpy(_addr, cp + off,
>   sizeof(ipaddr.sin_addr));
> - if (opt == IPOPT_SSRR) {
> - if ((ia = ifatoia(ifa_ifwithdstaddr(
> - sintosa(),
> - m->m_pkthdr.ph_rtableid))) ==
> NULL)
> - ia = ifatoia(ifa_ifwithnet(
> - sintosa(),
> -
> m->m_pkthdr.ph_rtableid));
> - if (ia == NULL) {
> - type = ICMP_UNREACH;
> - code = ICMP_UNREACH_SRCFAIL;
> - goto bad;
> - }
> - memcpy(cp + off,
> >ia_addr.sin_addr,
> - sizeof(struct in_addr));
> - cp[IPOPT_OFFSET] += sizeof(struct
> in_addr);
> - } else {
> - /* keep packet in the virtual
> instance */
> - rt = rtalloc(sintosa(),
> RT_RESOLVE,
> - rtableid);
> - if (!rtisvalid(rt)) {
> - type = ICMP_UNREACH;
> - code = ICMP_UNREACH_SRCFAIL;
> - rtfree(rt);
> - goto bad;
> - }
> - ia = ifatoia(rt->rt_ifa);
> - memcpy(cp + off,
> >ia_addr.sin_addr,
> - sizeof(struct in_addr));
> + /* keep packet in the virtual instance */
> + rt = rtalloc(sintosa(), RT_RESOLVE,
> rtableid);
> + if (!rtisvalid(rt) || ((opt == IPOPT_SSRR) &&
> + ISSET(rt->rt_flags, RTF_GATEWAY))) {
> + type = ICMP_UNREACH;
> + code = ICMP_UNREACH_SRCFAIL;
>   rtfree(rt);
> - cp[IPOPT_OFFSET] += sizeof(struct
> in_addr);
> + goto bad;
>   }
> + ia = ifatoia(rt->rt_ifa);
> + memcpy(cp + off, >ia_addr.sin_addr,
> + sizeof(struct in_addr));
> + rtfree(rt);
> + cp[IPOPT_OFFSET] += sizeof(struct in_addr);
>   ip->ip_dst = ipaddr.sin_addr;
>   /*
>* Let ip_intr's mcast routing check handle
> mcast pkts

Ok vgross@



Recursive splsoftnet() in PMTU

2016-11-07 Thread Martin Pieuchot
Timers configured with rt_timer_add(9) are always run under splsoftnet()
so no need to take it recursively.

ok?

Index: netinet/ip_icmp.c
===
RCS file: /cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.152
diff -u -p -r1.152 ip_icmp.c
--- netinet/ip_icmp.c   22 Aug 2016 15:37:23 -  1.152
+++ netinet/ip_icmp.c   7 Nov 2016 09:23:47 -
@@ -1046,7 +1046,8 @@ void
 icmp_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
 {
struct ifnet *ifp;
-   int s;
+
+   splsoftassert(IPL_SOFTNET);
 
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
@@ -1058,7 +1059,6 @@ icmp_mtudisc_timeout(struct rtentry *rt,
 
sin = *satosin(rt_key(rt));
 
-   s = splsoftnet();
rtdeletemsg(rt, ifp, r->rtt_tableid);
 
/* Notify TCP layer of increased Path MTU estimate */
@@ -1066,7 +1066,6 @@ icmp_mtudisc_timeout(struct rtentry *rt,
if (ctlfunc)
(*ctlfunc)(PRC_MTUINC, sintosa(),
r->rtt_tableid, NULL);
-   splx(s);
} else {
if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
rt->rt_rmx.rmx_mtu = 0;
@@ -1097,16 +1096,15 @@ void
 icmp_redirect_timeout(struct rtentry *rt, struct rttimer *r)
 {
struct ifnet *ifp;
-   int s;
+
+   splsoftassert(IPL_SOFTNET);
 
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return;
 
if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
-   s = splsoftnet();
rtdeletemsg(rt, ifp, r->rtt_tableid);
-   splx(s);
}
 
if_put(ifp);
Index: netinet6/icmp6.c
===
RCS file: /cvs/src/sys/netinet6/icmp6.c,v
retrieving revision 1.190
diff -u -p -r1.190 icmp6.c
--- netinet6/icmp6.c24 Aug 2016 09:38:29 -  1.190
+++ netinet6/icmp6.c7 Nov 2016 09:24:37 -
@@ -1947,16 +1947,15 @@ void
 icmp6_mtudisc_timeout(struct rtentry *rt, struct rttimer *r)
 {
struct ifnet *ifp;
-   int s;
+
+   splsoftassert(IPL_SOFTNET);
 
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return;
 
if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
-   s = splsoftnet();
rtdeletemsg(rt, ifp, r->rtt_tableid);
-   splx(s);
} else {
if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
rt->rt_rmx.rmx_mtu = 0;
@@ -1969,16 +1968,15 @@ void
 icmp6_redirect_timeout(struct rtentry *rt, struct rttimer *r)
 {
struct ifnet *ifp;
-   int s;
+
+   splsoftassert(IPL_SOFTNET);
 
ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return;
 
if ((rt->rt_flags & (RTF_DYNAMIC|RTF_HOST)) == (RTF_DYNAMIC|RTF_HOST)) {
-   s = splsoftnet();
rtdeletemsg(rt, ifp, r->rtt_tableid);
-   splx(s);
}
 
if_put(ifp);
Index: net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.333
diff -u -p -r1.333 route.c
--- net/route.c 6 Oct 2016 19:09:08 -   1.333
+++ net/route.c 7 Nov 2016 09:22:11 -
@@ -1498,6 +1498,8 @@ rt_timer_queue_destroy(struct rttimer_qu
 {
struct rttimer  *r;
 
+   splsoftassert(IPL_SOFTNET);
+
while ((r = TAILQ_FIRST(>rtq_head)) != NULL) {
LIST_REMOVE(r, rtt_link);
TAILQ_REMOVE(>rtq_head, r, rtt_next);



Avoid spl recursion in doaccept()

2016-11-07 Thread Martin Pieuchot
We're aiming to replace critical sections protected by splsoftnet() by
a non recursive rwlock.  So we'll have to care about recursivity.

Diff below prevents a recursion in the error path.  Currently closef()
will call soclose() which will take splsoftnet() again.  So let's
release the spl level before.

This is part of my bigger socket lock diff.

ok?

Index: kern/uipc_syscalls.c
===
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.138
diff -u -p -r1.138 uipc_syscalls.c
--- kern/uipc_syscalls.c23 Oct 2016 17:06:40 -  1.138
+++ kern/uipc_syscalls.c7 Nov 2016 09:10:32 -
@@ -276,16 +276,11 @@ doaccept(struct proc *p, int sock, struc
if ((error = getsock(p, sock, )) != 0)
return (error);
 
-   s = splsoftnet();
headfp = fp;
-   head = fp->f_data;
-
-   if (isdnssocket((struct socket *)fp->f_data)) {
-   error = EINVAL;
-   goto bad;
-   }
 redo:
-   if ((head->so_options & SO_ACCEPTCONN) == 0) {
+   s = splsoftnet();
+   head = headfp->f_data;
+   if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto bad;
}
@@ -311,7 +306,7 @@ redo:
head->so_error = 0;
goto bad;
}
-   
+
/* Figure out whether the new socket should be non-blocking. */
nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK)
: (flags & SOCK_NONBLOCK ? FNONBLOCK : 0);
@@ -338,6 +333,7 @@ redo:
 * or another thread or process to accept it.  If so, start over.
 */
if (head->so_qlen == 0) {
+   splx(s);
m_freem(nam);
fdplock(fdp);
fdremove(fdp, tmpfd);
@@ -366,18 +362,23 @@ redo:
 
if (error) {
/* if an error occurred, free the file descriptor */
+   splx(s);
+   m_freem(nam);
fdplock(fdp);
fdremove(fdp, tmpfd);
closef(fp, p);
fdpunlock(fdp);
+   goto out;
} else {
(*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p);
FILE_SET_MATURE(fp, p);
*retval = tmpfd;
+   m_freem(nam);
}
-   m_freem(nam);
+
 bad:
splx(s);
+out:
FRELE(headfp, p);
return (error);
 }



splnet() in socket layer

2016-11-07 Thread Martin Pieuchot
splnet() was necessary when link state changes were executed from
hardware interrupt handlers.  In 2013 they got deferred to their own
task, so KERNEL_LOCK() is what really protect these data structures.

This is part of my bigger socket lock diff.

ok?

Index: kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.163
diff -u -p -r1.163 uipc_socket.c
--- kern/uipc_socket.c  6 Oct 2016 19:09:08 -   1.163
+++ kern/uipc_socket.c  7 Nov 2016 08:52:42 -
@@ -1037,12 +1037,10 @@ sorflush(struct socket *so)
 {
struct sockbuf *sb = >so_rcv;
struct protosw *pr = so->so_proto;
-   int s;
struct sockbuf asb;
 
sb->sb_flags |= SB_NOINTR;
(void) sblock(sb, M_WAITOK);
-   s = splnet();
socantrcvmore(so);
sbunlock(sb);
asb = *sb;
@@ -1052,7 +1050,6 @@ sorflush(struct socket *so)
sb->sb_sel.si_note = asb.sb_sel.si_note;
sb->sb_flags = SB_KNOTE;
}
-   splx(s);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
sbrelease();
@@ -1876,7 +1873,8 @@ soo_kqfilter(struct file *fp, struct kno
 {
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
-   int s;
+
+   KERNEL_ASSERT_LOCKED();
 
switch (kn->kn_filter) {
case EVFILT_READ:
@@ -1894,10 +1892,9 @@ soo_kqfilter(struct file *fp, struct kno
return (EINVAL);
}
 
-   s = splnet();
SLIST_INSERT_HEAD(>sb_sel.si_note, kn, kn_selnext);
sb->sb_flags |= SB_KNOTE;
-   splx(s);
+
return (0);
 }
 
@@ -1905,12 +1902,12 @@ void
 filt_sordetach(struct knote *kn)
 {
struct socket *so = kn->kn_fp->f_data;
-   int s = splnet();
+
+   KERNEL_ASSERT_LOCKED();
 
SLIST_REMOVE(>so_rcv.sb_sel.si_note, kn, knote, kn_selnext);
if (SLIST_EMPTY(>so_rcv.sb_sel.si_note))
so->so_rcv.sb_flags &= ~SB_KNOTE;
-   splx(s);
 }
 
 int
@@ -1939,12 +1936,12 @@ void
 filt_sowdetach(struct knote *kn)
 {
struct socket *so = kn->kn_fp->f_data;
-   int s = splnet();
+
+   KERNEL_ASSERT_LOCKED();
 
SLIST_REMOVE(>so_snd.sb_sel.si_note, kn, knote, kn_selnext);
if (SLIST_EMPTY(>so_snd.sb_sel.si_note))
so->so_snd.sb_flags &= ~SB_KNOTE;
-   splx(s);
 }
 
 int



Re: The end of nd6_output()

2016-11-07 Thread Martin Pieuchot
On 05/11/16(Sat) 19:26, Gregor Best wrote:
> [...] 
> If I can do anything else to be of assistance, please let me know.

Could you capture the route changes via "# route monitor"?  I'd like
to know if the 'bad gateway value' message is triggered by userland
or the kernel.



Re: Stop watchdog timer during DDB session

2016-11-07 Thread Martin Pieuchot
On 02/11/16(Wed) 15:18, Christian Ludwig wrote:
> Deliberately breaking into DDB, e.g. via serial console BREAK, can lead
> to an abrupt end of the debugging session once the hardware watchdog
> reboots the machine. This is because all CPUs are IPL_HIGH. None of them
> tickles the watchdog anymore.
> 
> The same is true on panic, when the system enters DDB. If you really
> want the system to reboot on panic, set ddb.panic to 0.
> 
> So stop the watchdog timer when entering DDB. Restart it after the
> debugging session finished.

ok mpi@

> ---
>  sys/ddb/db_interface.h   |  4 
>  sys/ddb/db_trap.c|  2 ++
>  sys/kern/kern_watchdog.c | 25 +
>  3 files changed, 31 insertions(+)
> 
> diff --git a/sys/ddb/db_interface.h b/sys/ddb/db_interface.h
> index 77602d2..5728764 100644
> --- a/sys/ddb/db_interface.h
> +++ b/sys/ddb/db_interface.h
> @@ -45,6 +45,10 @@ void db_show_all_procs(db_expr_t, int, db_expr_t, char *);
>  /* kern/kern_timeout.c */
>  void db_show_callout(db_expr_t, int, db_expr_t, char *);
>  
> +/* kern/kern_watchdog.c */
> +void db_wdog_disable(void);
> +void db_wdog_enable(void);
> +
>  struct mount;
>  
>  /* kern/vfs_subr.c */
> diff --git a/sys/ddb/db_trap.c b/sys/ddb/db_trap.c
> index 85e5c8a..d347ede 100644
> --- a/sys/ddb/db_trap.c
> +++ b/sys/ddb/db_trap.c
> @@ -53,6 +53,7 @@ db_trap(int type, int code)
>   boolean_t   watchpt;
>  
>   db_is_active = 1;
> + db_wdog_disable();
>   bkpt = IS_BREAKPOINT_TRAP(type, code);
>   watchpt = IS_WATCHPOINT_TRAP(type, code);
>  
> @@ -94,5 +95,6 @@ db_trap(int type, int code)
>   }
>  
>   db_restart_at_pc(_regs, watchpt);
> + db_wdog_enable();
>   db_is_active = 0;
>  }
> diff --git a/sys/kern/kern_watchdog.c b/sys/kern/kern_watchdog.c
> index 5c27b17..e848e24 100644
> --- a/sys/kern/kern_watchdog.c
> +++ b/sys/kern/kern_watchdog.c
> @@ -106,3 +106,28 @@ sysctl_wdog(int *name, u_int namelen, void *oldp, size_t 
> *oldlenp, void *newp,
>  
>   return (error);
>  }
> +
> +#ifdef DDB
> +#include 
> +#include 
> +
> +void
> +db_wdog_disable(void)
> +{
> + if (wdog_ctl_cb == NULL || wdog_period == 0)
> + return;
> +
> + timeout_del(_timeout);
> + (void) (*wdog_ctl_cb)(wdog_ctl_cb_arg, 0);
> +}
> +
> +void
> +db_wdog_enable(void)
> +{
> + if (wdog_ctl_cb == NULL || wdog_period == 0)
> + return;
> +
> + (void) (*wdog_ctl_cb)(wdog_ctl_cb_arg, wdog_period);
> + timeout_add(_timeout, wdog_period * hz / 2);
> +}
> +#endif
> -- 
> 2.1.4
> 



Re: Kill ifa_ifwithnet()

2016-11-07 Thread Martin Pieuchot
On 04/11/16(Fri) 21:33, Vincent Gross wrote:
> [...] 
> Why are you killing Strict Source Route Record ? Just as you did with
> rtredirect(), you can check whether RTF_GATEWAY is set and send back
> an ICMP_UNREACH if so. Or did I miss something ?

Like that?

Index: netinet/ip_input.c
===
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.282
diff -u -p -r1.282 ip_input.c
--- netinet/ip_input.c  22 Sep 2016 10:12:25 -  1.282
+++ netinet/ip_input.c  7 Nov 2016 07:59:02 -
@@ -1117,37 +1117,20 @@ ip_dooptions(struct mbuf *m, struct ifne
ipaddr.sin_len = sizeof(ipaddr);
memcpy(_addr, cp + off,
sizeof(ipaddr.sin_addr));
-   if (opt == IPOPT_SSRR) {
-   if ((ia = ifatoia(ifa_ifwithdstaddr(
-   sintosa(),
-   m->m_pkthdr.ph_rtableid))) == NULL)
-   ia = ifatoia(ifa_ifwithnet(
-   sintosa(),
-   m->m_pkthdr.ph_rtableid));
-   if (ia == NULL) {
-   type = ICMP_UNREACH;
-   code = ICMP_UNREACH_SRCFAIL;
-   goto bad;
-   }
-   memcpy(cp + off, >ia_addr.sin_addr,
-   sizeof(struct in_addr));
-   cp[IPOPT_OFFSET] += sizeof(struct in_addr);
-   } else {
-   /* keep packet in the virtual instance */
-   rt = rtalloc(sintosa(), RT_RESOLVE,
-   rtableid);
-   if (!rtisvalid(rt)) {
-   type = ICMP_UNREACH;
-   code = ICMP_UNREACH_SRCFAIL;
-   rtfree(rt);
-   goto bad;
-   }
-   ia = ifatoia(rt->rt_ifa);
-   memcpy(cp + off, >ia_addr.sin_addr,
-   sizeof(struct in_addr));
+   /* keep packet in the virtual instance */
+   rt = rtalloc(sintosa(), RT_RESOLVE, rtableid);
+   if (!rtisvalid(rt) || ((opt == IPOPT_SSRR) &&
+   ISSET(rt->rt_flags, RTF_GATEWAY))) {
+   type = ICMP_UNREACH;
+   code = ICMP_UNREACH_SRCFAIL;
rtfree(rt);
-   cp[IPOPT_OFFSET] += sizeof(struct in_addr);
+   goto bad;
}
+   ia = ifatoia(rt->rt_ifa);
+   memcpy(cp + off, >ia_addr.sin_addr,
+   sizeof(struct in_addr));
+   rtfree(rt);
+   cp[IPOPT_OFFSET] += sizeof(struct in_addr);
ip->ip_dst = ipaddr.sin_addr;
/*
 * Let ip_intr's mcast routing check handle mcast pkts