let the pool gc reclaim items from the per cpu caches

2017-02-07 Thread David Gwynne
this diff lets the pool gc reclaim idle memory held by cpu caches.

it does this by recording when the global depot of items was first
added to. the gc checks that timestamp and if the depot hasnt been
emptied again in the last 4 seconds, it moves a list of items from
the depot back to the pools free lists. this in turn makes the items
available for freeing back to the system.

the obvious way to put items back on the free lists is to call
pool_put on them, but doing that double counts puts which can lead
to negative INUSE values in vmstat -m and systat pool output. to
avoid that the guts of pool_put have been pulled out into pool_do_put
(which mirrors pool_do_get in some ways). pool_do_put simply
manipulates the free lists, leaving pool_put responsible for work
it usually does around that like updating counters.

pulling pool_do_put out also means the gc can take the mutex once
to put several items on the lists in one go.

ok?

Index: subr_pool.c
===
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.206
diff -u -p -d -r1.206 subr_pool.c
--- subr_pool.c 8 Feb 2017 05:28:30 -   1.206
+++ subr_pool.c 8 Feb 2017 05:42:47 -
@@ -132,6 +132,8 @@ struct pool_cache {
 void   *pool_cache_get(struct pool *);
 voidpool_cache_put(struct pool *, void *);
 voidpool_cache_destroy(struct pool *);
+struct pool_cache_item *
+pool_cache_list_put(struct pool *, struct pool_cache_item *);
 #endif
 voidpool_cache_info(struct pool *, struct kinfo_pool *);
 
@@ -151,6 +153,7 @@ void pool_p_free(struct pool *, struct 
 
 voidpool_update_curpage(struct pool *);
 void   *pool_do_get(struct pool *, int, int *);
+voidpool_do_put(struct pool *, void *);
 int pool_chk_page(struct pool *, struct pool_page_header *, int);
 int pool_chk(struct pool *);
 voidpool_get_done(void *, void *);
@@ -706,7 +709,6 @@ pool_do_get(struct pool *pp, int flags, 
 void
 pool_put(struct pool *pp, void *v)
 {
-   struct pool_item *pi = v;
struct pool_page_header *ph, *freeph = NULL;
 
 #ifdef DIAGNOSTIC
@@ -723,6 +725,37 @@ pool_put(struct pool *pp, void *v)
 
mtx_enter(>pr_mtx);
 
+   pool_do_put(pp, v);
+
+   pp->pr_nout--;
+   pp->pr_nput++;
+
+   /* is it time to free a page? */
+   if (pp->pr_nidle > pp->pr_maxpages &&
+   (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
+   (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
+   freeph = ph;
+   pool_p_remove(pp, freeph);
+   }
+
+   mtx_leave(>pr_mtx);
+
+   if (freeph != NULL)
+   pool_p_free(pp, freeph);
+
+   if (!TAILQ_EMPTY(>pr_requests)) {
+   mtx_enter(>pr_requests_mtx);
+   pool_runqueue(pp, PR_NOWAIT);
+   mtx_leave(>pr_requests_mtx);
+   }
+}
+
+void
+pool_do_put(struct pool *pp, void *v)
+{
+   struct pool_item *pi = v;
+   struct pool_page_header *ph;
+
splassert(pp->pr_ipl);
 
ph = pr_find_pagehead(pp, v);
@@ -766,27 +799,6 @@ pool_put(struct pool *pp, void *v)
TAILQ_INSERT_TAIL(>pr_emptypages, ph, ph_entry);
pool_update_curpage(pp);
}
-
-   pp->pr_nout--;
-   pp->pr_nput++;
-
-   /* is it time to free a page? */
-   if (pp->pr_nidle > pp->pr_maxpages &&
-   (ph = TAILQ_FIRST(>pr_emptypages)) != NULL &&
-   (ticks - ph->ph_tick) > (hz * pool_wait_free)) {
-   freeph = ph;
-   pool_p_remove(pp, freeph);
-   }
-   mtx_leave(>pr_mtx);
-
-   if (freeph != NULL)
-   pool_p_free(pp, freeph);
-
-   if (!TAILQ_EMPTY(>pr_requests)) {
-   mtx_enter(>pr_requests_mtx);
-   pool_runqueue(pp, PR_NOWAIT);
-   mtx_leave(>pr_requests_mtx);
-   }
 }
 
 /*
@@ -1446,9 +1458,32 @@ pool_gc_pages(void *null)
 {
struct pool *pp;
struct pool_page_header *ph, *freeph;
+   int diff = hz * pool_wait_gc;
 
rw_enter_read(_lock);
SIMPLEQ_FOREACH(pp, _head, pr_poollist) {
+#ifdef MULTIPROCESSOR
+   /* if there's percpu caches, try and gc a list of items */
+   if (pp->pr_cache != NULL &&
+   (ticks - pp->pr_cache_tick) > diff &&
+   !TAILQ_EMPTY(>pr_cache_lists) &&
+   mtx_enter_try(>pr_cache_mtx)) {
+   struct pool_cache_item *pl = NULL;
+
+   if ((ticks - pp->pr_cache_tick) > diff &&
+   (pl = TAILQ_FIRST(>pr_cache_lists)) != NULL) {
+   TAILQ_REMOVE(>pr_cache_lists, pl, ci_nextl);
+   pp->pr_cache_tick = ticks;
+   pp->pr_cache_nlist--;
+   }
+
+   mtx_leave(>pr_cache_mtx);
+
+   if (pl != NULL)
+   

remove more obsolete emulation hooks

2017-02-07 Thread Philip Guenther

Emulation had multiple sets of hooks; here's the diff to kill the hooks 
used at exec, fork, and exit time, as they are always NULL.

ok?


Philip


Index: sys/proc.h
===
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.232
diff -u -p -r1.232 proc.h
--- sys/proc.h  31 Jan 2017 07:44:55 -  1.232
+++ sys/proc.h  8 Feb 2017 05:59:48 -
@@ -115,10 +115,6 @@ struct emul {
char*e_esigret; /* sigaction RET position */
int e_flags;/* Flags, see below */
struct uvm_object *e_sigobject; /* shared sigcode object */
-   /* Per-process hooks */
-   void(*e_proc_exec)(struct proc *, struct exec_package *);
-   void(*e_proc_fork)(struct proc *p, struct proc *parent);
-   void(*e_proc_exit)(struct proc *);
 };
 /* Flags for e_flags */
 #defineEMUL_ENABLED0x0001  /* Allow exec to continue */
@@ -324,8 +320,6 @@ struct proc {
struct  tusage p_tu;/* accumulated times. */
struct  timespec p_rtime;   /* Real time. */
 
-   void*p_emuldata;/* Per-process emulation data, or */
-   /* NULL. Malloc type M_EMULDATA */
int  p_siglist; /* Signals arrived but not delivered. */
 
 /* End area that is zeroed on creation. */
Index: kern/kern_exec.c
===
RCS file: /cvs/src/sys/kern/kern_exec.c,v
retrieving revision 1.185
diff -u -p -r1.185 kern_exec.c
--- kern/kern_exec.c21 Jan 2017 05:42:03 -  1.185
+++ kern/kern_exec.c8 Feb 2017 05:59:51 -
@@ -680,20 +680,6 @@ sys_execve(struct proc *p, void *v, regi
 
free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen);
 
-   /*
-* Call emulation specific exec hook. This can setup per-process
-* p->p_emuldata or do any other per-process stuff an emulation needs.
-*
-* If we are executing process of different emulation than the
-* original forked process, call e_proc_exit() of the old emulation
-* first, then e_proc_exec() of new emulation. If the emulation is
-* same, the exec hook code should deallocate any old emulation
-* resources held previously by this process.
-*/
-   if (pr->ps_emul && pr->ps_emul->e_proc_exit &&
-   pr->ps_emul != pack.ep_emul)
-   (*pr->ps_emul->e_proc_exit)(p);
-
p->p_descfd = 255;
if ((pack.ep_flags & EXEC_HASFD) && pack.ep_fd < 255)
p->p_descfd = pack.ep_fd;
@@ -702,13 +688,6 @@ sys_execve(struct proc *p, void *v, regi
p->p_p->ps_flags |= PS_WXNEEDED;
else
p->p_p->ps_flags &= ~PS_WXNEEDED;
-
-   /*
-* Call exec hook. Emulation code may NOT store reference to anything
-* from 
-*/
-   if (pack.ep_emul->e_proc_exec)
-   (*pack.ep_emul->e_proc_exec)(p, );
 
/* update ps_emul, the old value is no longer needed */
pr->ps_emul = pack.ep_emul;
Index: kern/kern_exit.c
===
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.158
diff -u -p -r1.158 kern_exit.c
--- kern/kern_exit.c7 Nov 2016 00:26:32 -   1.158
+++ kern/kern_exit.c8 Feb 2017 05:59:51 -
@@ -242,12 +242,6 @@ exit1(struct proc *p, int rv, int flags)
 
p->p_fd = NULL; /* zap the thread's copy */
 
-   /*
-* If emulation has thread exit hook, call it now.
-*/
-   if (pr->ps_emul->e_proc_exit)
-   (*pr->ps_emul->e_proc_exit)(p);
-
 /*
 * Remove proc from pidhash chain and allproc so looking
 * it up won't work.  We will put the proc on the
Index: kern/kern_fork.c
===
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.193
diff -u -p -r1.193 kern_fork.c
--- kern/kern_fork.c24 Jan 2017 00:58:55 -  1.193
+++ kern/kern_fork.c8 Feb 2017 05:59:51 -
@@ -395,12 +395,6 @@ fork1(struct proc *curp, int flags, void
if (flags & FORK_THREAD)
sigstkinit(>p_sigstk);
 
-   /*
-* If emulation has thread fork hook, call it now.
-*/
-   if (pr->ps_emul->e_proc_fork)
-   (*pr->ps_emul->e_proc_fork)(p, curp);
-
p->p_addr = (struct user *)uaddr;
 
/*



ipsec protocol input callbacks

2017-02-07 Thread Alexander Bluhm
Hi,

Remove the ipsec protocol callbacks which all do the same.  Implement
it in ipsec_common_input_cb() instead.  The code that was copied
to ah6_input_cb() is now in ip6_ours() so we can call it directly.

ok?

bluhm

Index: netinet/ipsec_input.c
===
RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.143
diff -u -p -r1.143 ipsec_input.c
--- netinet/ipsec_input.c   7 Feb 2017 22:28:37 -   1.143
+++ netinet/ipsec_input.c   7 Feb 2017 23:20:49 -
@@ -80,15 +80,6 @@
 #include "bpfilter.h"
 
 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
-void ah4_input_cb(struct mbuf *, ...);
-void esp4_input_cb(struct mbuf *, ...);
-void ipcomp4_input_cb(struct mbuf *, ...);
-
-#ifdef INET6
-void ah6_input_cb(struct mbuf *, int, int);
-void esp6_input_cb(struct mbuf *, int, int);
-void ipcomp6_input_cb(struct mbuf *, int, int);
-#endif
 
 #ifdef ENCDEBUG
 #define DPRINTF(x) if (encdebug) printf x
@@ -325,7 +316,7 @@ void
 ipsec_common_input_cb(struct mbuf *m, struct tdb *tdbp, int skip, int protoff)
 {
int af, sproto;
-   u_char prot;
+   u_int8_t prot;
 
 #if NBPFILTER > 0
struct ifnet *encif;
@@ -577,49 +568,33 @@ ipsec_common_input_cb(struct mbuf *m, st
}
 #endif
 
+   switch (sproto) {
+   case IPPROTO_ESP:
+   case IPPROTO_AH:
+   case IPPROTO_IPCOMP:
+   break;
+   default:
+   DPRINTF(("ipsec_common_input_cb(): unknown/unsupported"
+   " security protocol %d\n", sproto));
+   m_freem(m);
+   return;
+   }
+
/* Call the appropriate IPsec transform callback. */
switch (af) {
case AF_INET:
-   switch (sproto)
-   {
-   case IPPROTO_ESP:
-   esp4_input_cb(m);
-   return;
-   case IPPROTO_AH:
-   ah4_input_cb(m);
-   return;
-   case IPPROTO_IPCOMP:
-   ipcomp4_input_cb(m);
-   return;
-   default:
-   DPRINTF(("ipsec_common_input_cb(): unknown/unsupported"
-   " security protocol %d\n", sproto));
-   m_freem(m);
-   return;
+   if (niq_enqueue(, m) != 0) {
+   DPRINTF(("ipsec_common_input_cb(): dropped packet "
+   "because of full IP queue\n"));
+   IPSEC_ISTAT(espstat.esps_qfull, ahstat.ahs_qfull,
+   ipcompstat.ipcomps_qfull);
}
-   break;
-
+   return;
 #ifdef INET6
case AF_INET6:
-   switch (sproto) {
-   case IPPROTO_ESP:
-   esp6_input_cb(m, skip, protoff);
-   return;
-   case IPPROTO_AH:
-   ah6_input_cb(m, skip, protoff);
-   return;
-   case IPPROTO_IPCOMP:
-   ipcomp6_input_cb(m, skip, protoff);
-   return;
-   default:
-   DPRINTF(("ipsec_common_input_cb(): unknown/unsupported"
-   " security protocol %d\n", sproto));
-   m_freem(m);
-   return;
-   }
-   break;
+   ip6_ours(m, skip, prot);
+   return;
 #endif /* INET6 */
-
default:
DPRINTF(("ipsec_common_input_cb(): unknown/unsupported "
"protocol family %d\n", af));
@@ -704,24 +679,6 @@ ah4_input(struct mbuf **mp, int *offp, i
return IPPROTO_DONE;
 }
 
-/* IPv4 AH callback. */
-void
-ah4_input_cb(struct mbuf *m, ...)
-{
-   /*
-* Interface pointer is already in first mbuf; chop off the
-* `outer' header and reschedule.
-*/
-
-   if (niq_enqueue(, m) != 0) {
-   ahstat.ahs_qfull++;
-   DPRINTF(("ah4_input_cb(): dropped packet because of full "
-   "IP queue\n"));
-   return;
-   }
-}
-
-
 /* XXX rdomain */
 void
 ah4_ctlinput(int cmd, struct sockaddr *sa, u_int rdomain, void *v)
@@ -742,22 +699,6 @@ esp4_input(struct mbuf **mp, int *offp, 
return IPPROTO_DONE;
 }
 
-/* IPv4 ESP callback. */
-void
-esp4_input_cb(struct mbuf *m, ...)
-{
-   /*
-* Interface pointer is already in first mbuf; chop off the
-* `outer' header and reschedule.
-*/
-   if (niq_enqueue(, m) != 0) {
-   espstat.esps_qfull++;
-   DPRINTF(("esp4_input_cb(): dropped packet because of full "
-   "IP queue\n"));
-   return;
-   }
-}
-
 /* IPv4 IPCOMP wrapper */
 int
 ipcomp4_input(struct mbuf **mp, int *offp, int proto)
@@ -767,21 +708,6 @@ ipcomp4_input(struct mbuf 

Re: PF & CPU hogging

2017-02-07 Thread Ted Unangst
Martin Pieuchot wrote:
> On 07/02/17(Tue) 10:29, Ted Unangst wrote:
> > Martin Pieuchot wrote:
> > > PF has its own home-brewed solution for dealing with CPU hogging.  It
> > > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
> > > explanation why it is different than the idiom we use in other places.
> > 
> > oh, just because i didn't bother making them all the same.
> > 
> > > 
> > > So let's use the same idiom, I promise to introduce a macro an unify all
> > > of them once this is in.
> > 
> > so i think sched_pause is supposed to be that macro, but gets into the 
> > preempt
> > vs yield debate.
> 
> I want to introduce a sched_preempt() and convert all the current
> callers of preempt() in a second step.  Does that sound like a plan?

spitballing...

a sched_pause() macro that takes the function to call? sched_pause(yield) or
sched_pause(preempt). is that more clear? i think it's less mysterious what
happens.

The only problem is that preempt requires a NULL argument, but after 30 years
of not fixing that, I'd say we can also just delete the argument.



ipsec void functions

2017-02-07 Thread Alexander Bluhm
Hi,

Error propagation does neither make sense for ip input path nor for
asynchronous callbacks.  Make the IPsec functions void, there is
already a counter in the error path.

ok?

bluhm

Index: netinet/ip_ipsp.h
===
RCS file: /cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.177
diff -u -p -r1.177 ip_ipsp.h
--- netinet/ip_ipsp.h   29 Jan 2017 19:58:47 -  1.177
+++ netinet/ip_ipsp.h   7 Feb 2017 18:59:26 -
@@ -551,7 +551,7 @@ struct ipsec_ids *ipsp_ids_lookup(u_int3
 void   ipsp_ids_free(struct ipsec_ids *);
 
 intipsec_common_input(struct mbuf *, int, int, int, int, int);
-intipsec_common_input_cb(struct mbuf *, struct tdb *, int, int);
+void   ipsec_common_input_cb(struct mbuf *, struct tdb *, int, int);
 intipsec_delete_policy(struct ipsec_policy *);
 ssize_tipsec_hdrsz(struct tdb *);
 void   ipsec_adjust_mtu(struct mbuf *, u_int32_t);
Index: netinet/ipsec_input.c
===
RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.142
diff -u -p -r1.142 ipsec_input.c
--- netinet/ipsec_input.c   5 Feb 2017 16:04:14 -   1.142
+++ netinet/ipsec_input.c   7 Feb 2017 18:59:26 -
@@ -80,14 +80,14 @@
 #include "bpfilter.h"
 
 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
-int ah4_input_cb(struct mbuf *, ...);
-int esp4_input_cb(struct mbuf *, ...);
-int ipcomp4_input_cb(struct mbuf *, ...);
+void ah4_input_cb(struct mbuf *, ...);
+void esp4_input_cb(struct mbuf *, ...);
+void ipcomp4_input_cb(struct mbuf *, ...);
 
 #ifdef INET6
-int ah6_input_cb(struct mbuf *, int, int);
-int esp6_input_cb(struct mbuf *, int, int);
-int ipcomp6_input_cb(struct mbuf *, int, int);
+void ah6_input_cb(struct mbuf *, int, int);
+void esp6_input_cb(struct mbuf *, int, int);
+void ipcomp6_input_cb(struct mbuf *, int, int);
 #endif
 
 #ifdef ENCDEBUG
@@ -321,7 +321,7 @@ ipsec_common_input(struct mbuf *m, int s
  * IPsec input callback, called by the transform callback. Takes care of
  * filtering and other sanity checks on the processed packet.
  */
-int
+void
 ipsec_common_input_cb(struct mbuf *m, struct tdb *tdbp, int skip, int protoff)
 {
int af, sproto;
@@ -353,7 +353,7 @@ ipsec_common_input_cb(struct mbuf *m, st
/* The called routine will print a message if necessary */
IPSEC_ISTAT(espstat.esps_badkcr, ahstat.ahs_badkcr,
ipcompstat.ipcomps_badkcr);
-   return EINVAL;
+   return;
}
 
/* Fix IPv4 header */
@@ -364,7 +364,7 @@ ipsec_common_input_cb(struct mbuf *m, st
buf, sizeof(buf)), ntohl(tdbp->tdb_spi)));
IPSEC_ISTAT(espstat.esps_hdrops, ahstat.ahs_hdrops,
ipcompstat.ipcomps_hdrops);
-   return ENOBUFS;
+   return;
}
 
ip = mtod(m, struct ip *);
@@ -380,7 +380,7 @@ ipsec_common_input_cb(struct mbuf *m, st
IPSEC_ISTAT(espstat.esps_hdrops,
ahstat.ahs_hdrops,
ipcompstat.ipcomps_hdrops);
-   return EINVAL;
+   return;
}
/* ipn will now contain the inner IPv4 header */
m_copydata(m, skip, sizeof(struct ip),
@@ -395,7 +395,7 @@ ipsec_common_input_cb(struct mbuf *m, st
IPSEC_ISTAT(espstat.esps_hdrops,
ahstat.ahs_hdrops,
ipcompstat.ipcomps_hdrops);
-   return EINVAL;
+   return;
}
/* ip6n will now contain the inner IPv6 header. */
m_copydata(m, skip, sizeof(struct ip6_hdr),
@@ -417,7 +417,7 @@ ipsec_common_input_cb(struct mbuf *m, st
 
IPSEC_ISTAT(espstat.esps_hdrops, ahstat.ahs_hdrops,
ipcompstat.ipcomps_hdrops);
-   return EACCES;
+   return;
}
 
ip6 = mtod(m, struct ip6_hdr *);
@@ -433,7 +433,7 @@ ipsec_common_input_cb(struct mbuf *m, st
IPSEC_ISTAT(espstat.esps_hdrops,
ahstat.ahs_hdrops,
ipcompstat.ipcomps_hdrops);
-   return EINVAL;
+   return;
}
/* ipn will now contain the inner IPv4 header */
m_copydata(m, skip, sizeof(struct ip), (caddr_t) );
@@ -446,7 +446,7 @@ ipsec_common_input_cb(struct mbuf *m, st

Re: tcpstat percpu counters

2017-02-07 Thread Alexander Bluhm
On Mon, Feb 06, 2017 at 07:44:23PM +0100, Jeremie Courreges-Anglas wrote:
> Revised after the change in the counters api.  Passed make release on
> amd64, runtime tested on GENERIC armv7 and GENERIC.MP amd64.

OK bluhm@

> 
> 
> Index: net/pf.c
> ===
> RCS file: /d/cvs/src/sys/net/pf.c,v
> retrieving revision 1.1014
> diff -u -p -r1.1014 pf.c
> --- net/pf.c  5 Feb 2017 16:04:14 -   1.1014
> +++ net/pf.c  6 Feb 2017 07:26:32 -
> @@ -6026,7 +6026,7 @@ pf_check_tcp_cksum(struct mbuf *m, int o
>   }
>  
>   /* need to do it in software */
> - tcpstat.tcps_inswcsum++;
> + tcpstat_inc(tcps_inswcsum);
>  
>   switch (af) {
>   case AF_INET:
> @@ -6047,7 +6047,7 @@ pf_check_tcp_cksum(struct mbuf *m, int o
>   unhandled_af(af);
>   }
>   if (sum) {
> - tcpstat.tcps_rcvbadsum++;
> + tcpstat_inc(tcps_rcvbadsum);
>   m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD;
>   return (1);
>   }
> Index: netinet/ip_output.c
> ===
> RCS file: /d/cvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.335
> diff -u -p -r1.335 ip_output.c
> --- netinet/ip_output.c   1 Feb 2017 20:59:47 -   1.335
> +++ netinet/ip_output.c   5 Feb 2017 16:27:51 -
> @@ -1789,7 +1789,7 @@ in_proto_cksum_out(struct mbuf *m, struc
>   if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
>   if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
>   ip->ip_hl != 5 || ifp->if_bridgeport != NULL) {
> - tcpstat.tcps_outswcsum++;
> + tcpstat_inc(tcps_outswcsum);
>   in_delayed_cksum(m);
>   m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
>   }
> Index: netinet/tcp_input.c
> ===
> RCS file: /d/cvs/src/sys/netinet/tcp_input.c,v
> retrieving revision 1.337
> diff -u -p -r1.337 tcp_input.c
> --- netinet/tcp_input.c   29 Jan 2017 19:58:47 -  1.337
> +++ netinet/tcp_input.c   31 Jan 2017 13:33:00 -
> @@ -234,7 +234,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
>   if (tiqe == NULL || th->th_seq != tp->rcv_nxt) {
>   /* Flush segment queue for this connection */
>   tcp_freeq(tp);
> - tcpstat.tcps_rcvmemdrop++;
> + tcpstat_inc(tcps_rcvmemdrop);
>   m_freem(m);
>   return (0);
>   }
> @@ -261,8 +261,8 @@ tcp_reass(struct tcpcb *tp, struct tcphd
>   i = phdr->th_seq + phdr->th_reseqlen - th->th_seq;
>   if (i > 0) {
>   if (i >= *tlen) {
> - tcpstat.tcps_rcvduppack++;
> - tcpstat.tcps_rcvdupbyte += *tlen;
> + tcpstat_pkt(tcps_rcvduppack, tcps_rcvdupbyte,
> + *tlen);
>   m_freem(m);
>   pool_put(_pool, tiqe);
>   return (0);
> @@ -272,8 +272,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
>   th->th_seq += i;
>   }
>   }
> - tcpstat.tcps_rcvoopack++;
> - tcpstat.tcps_rcvoobyte += *tlen;
> + tcpstat_pkt(tcps_rcvoopack, tcps_rcvoobyte, *tlen);
>  
>   /*
>* While we overlap succeeding segments trim them or,
> @@ -389,7 +388,7 @@ tcp_input(struct mbuf **mp, int *offp, i
>   u_char iptos;
>  #endif
>  
> - tcpstat.tcps_rcvtotal++;
> + tcpstat_inc(tcps_rcvtotal);
>  
>   opti.ts_present = 0;
>   opti.maxseg = 0;
> @@ -448,7 +447,7 @@ tcp_input(struct mbuf **mp, int *offp, i
>  
>   IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, sizeof(*th));
>   if (!th) {
> - tcpstat.tcps_rcvshort++;
> + tcpstat_inc(tcps_rcvshort);
>   return IPPROTO_DONE;
>   }
>  
> @@ -508,10 +507,10 @@ tcp_input(struct mbuf **mp, int *offp, i
>   int sum;
>  
>   if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD) {
> - tcpstat.tcps_rcvbadsum++;
> + tcpstat_inc(tcps_rcvbadsum);
>   goto drop;
>   }
> - tcpstat.tcps_inswcsum++;
> + tcpstat_inc(tcps_inswcsum);
>   switch (af) {
>   case AF_INET:
>   sum = in4_cksum(m, IPPROTO_TCP, iphlen, tlen);
> @@ -524,7 +523,7 @@ tcp_input(struct mbuf **mp, int *offp, i
>  #endif
>   }
>   if (sum != 0) {
> - tcpstat.tcps_rcvbadsum++;
> + tcpstat_inc(tcps_rcvbadsum);
>   goto drop;
>   }
>   }
> @@ -535,14 +534,14 @@ tcp_input(struct mbuf **mp, int 

Re: ipsec output failure counter

2017-02-07 Thread David Hill

OK once the XXX from ip_esp.c is removed too.


On Tue, Feb 07, 2017 at 06:15:03PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> As mentioned before, IPsec packets could be dropped unaccounted if
> output after crypto failed.  Add a counter for that case.
> 
> ok?
> 
> bluhm
> 
> Index: sys/netinet/ip_ah.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ah.c,v
> retrieving revision 1.126
> diff -u -p -r1.126 ip_ah.c
> --- sys/netinet/ip_ah.c   7 Feb 2017 15:10:48 -   1.126
> +++ sys/netinet/ip_ah.c   7 Feb 2017 16:55:11 -
> @@ -1247,8 +1247,8 @@ ah_output_cb(struct cryptop *crp)
>   /* No longer needed. */
>   crypto_freereq(crp);
>  
> - ipsp_process_done(m, tdb);
> - /* XXX missing error counter if ipsp_process_done() drops packet */
> + if (ipsp_process_done(m, tdb))
> + ahstat.ahs_outfail++;
>   NET_UNLOCK(s);
>  
>   baddone:
> Index: sys/netinet/ip_ah.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ah.h,v
> retrieving revision 1.33
> diff -u -p -r1.33 ip_ah.h
> --- sys/netinet/ip_ah.h   10 Jan 2010 12:43:07 -  1.33
> +++ sys/netinet/ip_ah.h   7 Feb 2017 16:55:11 -
> @@ -38,8 +38,7 @@
>  #ifndef _NETINET_IP_AH_H_
>  #define _NETINET_IP_AH_H_
>  
> -struct ahstat
> -{
> +struct ahstat {
>  u_int32_tahs_hdrops; /* Packet shorter than header shows */
>  u_int32_tahs_nopf;   /* Protocol family not supported */
>  u_int32_tahs_notdb;
> @@ -58,10 +57,10 @@ struct ahstat
>  u_int32_tahs_toobig; /* Packet got larger than IP_MAXPACKET 
> */
>  u_int32_tahs_pdrops; /* Packet blocked due to policy */
>  u_int32_tahs_crypto; /* Crypto processing failure */
> +u_int32_tahs_outfail;/* Packet output failure */
>  };
>  
> -struct ah
> -{
> +struct ah {
>  u_int8_t   ah_nh;
>  u_int8_t   ah_hl;
>  u_int16_t  ah_rv;
> Index: sys/netinet/ip_esp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_esp.c,v
> retrieving revision 1.144
> diff -u -p -r1.144 ip_esp.c
> --- sys/netinet/ip_esp.c  7 Feb 2017 15:10:48 -   1.144
> +++ sys/netinet/ip_esp.c  7 Feb 2017 16:55:11 -
> @@ -1088,7 +1088,8 @@ esp_output_cb(struct cryptop *crp)
>   crypto_freereq(crp);
>  
>   /* Call the IPsec input callback. */
> - ipsp_process_done(m, tdb);
> + if (ipsp_process_done(m, tdb))
> + espstat.esps_outfail++;
>   /* XXX missing error counter if ipsp_process_done() drops packet */
>   NET_UNLOCK(s);
>   return;
> Index: sys/netinet/ip_esp.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_esp.h,v
> retrieving revision 1.43
> diff -u -p -r1.43 ip_esp.h
> --- sys/netinet/ip_esp.h  2 Sep 2016 09:39:32 -   1.43
> +++ sys/netinet/ip_esp.h  7 Feb 2017 16:55:11 -
> @@ -38,8 +38,7 @@
>  #ifndef _NETINET_IP_ESP_H_
>  #define _NETINET_IP_ESP_H_
>  
> -struct espstat
> -{
> +struct espstat {
>  u_int32_tesps_hdrops;/* Packet shorter than header shows */
>  u_int32_tesps_nopf;  /* Protocol family not supported */
>  u_int32_tesps_notdb;
> @@ -63,6 +62,7 @@ struct espstat
>  u_int32_tesps_udpencout; /* Output ESP-in-UDP packets */
>  u_int32_tesps_udpinval;  /* Invalid input ESP-in-UDP packets */
>  u_int32_tesps_udpneeded; /* Trying to use a ESP-in-UDP TDB */
> +u_int32_tesps_outfail;   /* Packet output failure */
>  };
>  
>  /*
> Index: sys/netinet/ip_ipcomp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipcomp.c,v
> retrieving revision 1.51
> diff -u -p -r1.51 ip_ipcomp.c
> --- sys/netinet/ip_ipcomp.c   7 Feb 2017 15:10:48 -   1.51
> +++ sys/netinet/ip_ipcomp.c   7 Feb 2017 16:55:11 -
> @@ -579,8 +579,8 @@ ipcomp_output_cb(struct cryptop *crp)
>   if (rlen < crp->crp_olen) {
>   /* Compression was useless, we have lost time. */
>   crypto_freereq(crp);
> - ipsp_process_done(m, tdb);
> - /* XXX missing counter if ipsp_process_done() drops packet */
> + if (ipsp_process_done(m, tdb))
> + ipcompstat.ipcomps_outfail++;
>   NET_UNLOCK(s);
>   return;
>   }
> @@ -628,8 +628,8 @@ ipcomp_output_cb(struct cryptop *crp)
>   /* Release the crypto descriptor. */
>   crypto_freereq(crp);
>  
> - ipsp_process_done(m, tdb);
> - /* XXX missing error counter if ipsp_process_done() drops packet */
> + if (ipsp_process_done(m, tdb))
> + ipcompstat.ipcomps_outfail++;
>   

ipsec output failure counter

2017-02-07 Thread Alexander Bluhm
Hi,

As mentioned before, IPsec packets could be dropped unaccounted if
output after crypto failed.  Add a counter for that case.

ok?

bluhm

Index: sys/netinet/ip_ah.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ah.c,v
retrieving revision 1.126
diff -u -p -r1.126 ip_ah.c
--- sys/netinet/ip_ah.c 7 Feb 2017 15:10:48 -   1.126
+++ sys/netinet/ip_ah.c 7 Feb 2017 16:55:11 -
@@ -1247,8 +1247,8 @@ ah_output_cb(struct cryptop *crp)
/* No longer needed. */
crypto_freereq(crp);
 
-   ipsp_process_done(m, tdb);
-   /* XXX missing error counter if ipsp_process_done() drops packet */
+   if (ipsp_process_done(m, tdb))
+   ahstat.ahs_outfail++;
NET_UNLOCK(s);
 
  baddone:
Index: sys/netinet/ip_ah.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ah.h,v
retrieving revision 1.33
diff -u -p -r1.33 ip_ah.h
--- sys/netinet/ip_ah.h 10 Jan 2010 12:43:07 -  1.33
+++ sys/netinet/ip_ah.h 7 Feb 2017 16:55:11 -
@@ -38,8 +38,7 @@
 #ifndef _NETINET_IP_AH_H_
 #define _NETINET_IP_AH_H_
 
-struct ahstat
-{
+struct ahstat {
 u_int32_t  ahs_hdrops; /* Packet shorter than header shows */
 u_int32_t  ahs_nopf;   /* Protocol family not supported */
 u_int32_t  ahs_notdb;
@@ -58,10 +57,10 @@ struct ahstat
 u_int32_t  ahs_toobig; /* Packet got larger than IP_MAXPACKET */
 u_int32_t  ahs_pdrops; /* Packet blocked due to policy */
 u_int32_t  ahs_crypto; /* Crypto processing failure */
+u_int32_t  ahs_outfail;/* Packet output failure */
 };
 
-struct ah
-{
+struct ah {
 u_int8_t   ah_nh;
 u_int8_t   ah_hl;
 u_int16_t  ah_rv;
Index: sys/netinet/ip_esp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_esp.c,v
retrieving revision 1.144
diff -u -p -r1.144 ip_esp.c
--- sys/netinet/ip_esp.c7 Feb 2017 15:10:48 -   1.144
+++ sys/netinet/ip_esp.c7 Feb 2017 16:55:11 -
@@ -1088,7 +1088,8 @@ esp_output_cb(struct cryptop *crp)
crypto_freereq(crp);
 
/* Call the IPsec input callback. */
-   ipsp_process_done(m, tdb);
+   if (ipsp_process_done(m, tdb))
+   espstat.esps_outfail++;
/* XXX missing error counter if ipsp_process_done() drops packet */
NET_UNLOCK(s);
return;
Index: sys/netinet/ip_esp.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_esp.h,v
retrieving revision 1.43
diff -u -p -r1.43 ip_esp.h
--- sys/netinet/ip_esp.h2 Sep 2016 09:39:32 -   1.43
+++ sys/netinet/ip_esp.h7 Feb 2017 16:55:11 -
@@ -38,8 +38,7 @@
 #ifndef _NETINET_IP_ESP_H_
 #define _NETINET_IP_ESP_H_
 
-struct espstat
-{
+struct espstat {
 u_int32_t  esps_hdrops;/* Packet shorter than header shows */
 u_int32_t  esps_nopf;  /* Protocol family not supported */
 u_int32_t  esps_notdb;
@@ -63,6 +62,7 @@ struct espstat
 u_int32_t  esps_udpencout; /* Output ESP-in-UDP packets */
 u_int32_t  esps_udpinval;  /* Invalid input ESP-in-UDP packets */
 u_int32_t  esps_udpneeded; /* Trying to use a ESP-in-UDP TDB */
+u_int32_t  esps_outfail;   /* Packet output failure */
 };
 
 /*
Index: sys/netinet/ip_ipcomp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipcomp.c,v
retrieving revision 1.51
diff -u -p -r1.51 ip_ipcomp.c
--- sys/netinet/ip_ipcomp.c 7 Feb 2017 15:10:48 -   1.51
+++ sys/netinet/ip_ipcomp.c 7 Feb 2017 16:55:11 -
@@ -579,8 +579,8 @@ ipcomp_output_cb(struct cryptop *crp)
if (rlen < crp->crp_olen) {
/* Compression was useless, we have lost time. */
crypto_freereq(crp);
-   ipsp_process_done(m, tdb);
-   /* XXX missing counter if ipsp_process_done() drops packet */
+   if (ipsp_process_done(m, tdb))
+   ipcompstat.ipcomps_outfail++;
NET_UNLOCK(s);
return;
}
@@ -628,8 +628,8 @@ ipcomp_output_cb(struct cryptop *crp)
/* Release the crypto descriptor. */
crypto_freereq(crp);
 
-   ipsp_process_done(m, tdb);
-   /* XXX missing error counter if ipsp_process_done() drops packet */
+   if (ipsp_process_done(m, tdb))
+   ipcompstat.ipcomps_outfail++;
NET_UNLOCK(s);
return;
 
Index: sys/netinet/ip_ipcomp.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipcomp.h,v
retrieving revision 1.7
diff -u -p -r1.7 ip_ipcomp.h
--- sys/netinet/ip_ipcomp.h 14 Dec 2007 18:33:41 -  1.7
+++ sys/netinet/ip_ipcomp.h 7 Feb 2017 16:55:11 -
@@ -51,6 +51,7 @@ struct 

divert(4) percpu counters

2017-02-07 Thread Jeremie Courreges-Anglas

Could someone who uses divert socket confirm that this diff works fine?

ok?


Index: netinet/ip_divert.c
===
RCS file: /d/cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.43
diff -u -p -r1.43 ip_divert.c
--- netinet/ip_divert.c 29 Jan 2017 19:58:47 -  1.43
+++ netinet/ip_divert.c 6 Feb 2017 23:15:49 -
@@ -42,7 +42,7 @@
 #include 
 
 struct inpcbtable  divbtable;
-struct divstat divstat;
+struct cpumem  *divcounters;
 
 #ifndef DIVERT_SENDSPACE
 #define DIVERT_SENDSPACE   (65536 + 100)
@@ -69,6 +69,7 @@ void
 divert_init(void)
 {
in_pcbinit(, divbhashsize);
+   divcounters = counters_alloc(divs_ncounters);
 }
 
 int
@@ -96,7 +97,7 @@ divert_output(struct inpcb *inp, struct 
goto fail;
if ((m = m_pullup(m, sizeof(struct ip))) == NULL) {
/* m_pullup() has freed the mbuf, so just return. */
-   divstat.divs_errors++;
+   divstat_inc(divs_errors);
return (ENOBUFS);
}
ip = mtod(m, struct ip *);
@@ -157,12 +158,12 @@ divert_output(struct inpcb *inp, struct 
error = EHOSTUNREACH;
}
 
-   divstat.divs_opackets++;
+   divstat_inc(divs_opackets);
return (error);
 
 fail:
m_freem(m);
-   divstat.divs_errors++;
+   divstat_inc(divs_errors);
return (error ? error : EINVAL);
 }
 
@@ -174,11 +175,11 @@ divert_packet(struct mbuf *m, int dir, u
struct sockaddr_in addr;
 
inp = NULL;
-   divstat.divs_ipackets++;
+   divstat_inc(divs_ipackets);
 
if (m->m_len < sizeof(struct ip) &&
(m = m_pullup(m, sizeof(struct ip))) == NULL) {
-   divstat.divs_errors++;
+   divstat_inc(divs_errors);
return (0);
}
 
@@ -220,7 +221,7 @@ divert_packet(struct mbuf *m, int dir, u
if (inp) {
sa = inp->inp_socket;
if (sbappendaddr(>so_rcv, sintosa(), m, NULL) == 0) {
-   divstat.divs_fullsock++;
+   divstat_inc(divs_fullsock);
m_freem(m);
return (0);
} else
@@ -228,7 +229,7 @@ divert_packet(struct mbuf *m, int dir, u
}
 
if (sa == NULL) {
-   divstat.divs_noport++;
+   divstat_inc(divs_noport);
m_freem(m);
}
return (0);
@@ -331,6 +332,25 @@ release:
return (error);
 }
 
+int
+divert_sysctl_divstat(void *oldp, size_t *oldlenp, void *newp)
+{
+   uint64_t counters[divs_ncounters];
+   struct divstat divstat;
+   u_long *words = (u_long *)
+   int i;
+
+   CTASSERT(sizeof(divstat) == (nitems(counters) * sizeof(u_long)));
+
+   counters_read(divcounters, counters, nitems(counters));
+
+   for (i = 0; i < nitems(counters); i++)
+   words[i] = (u_long)counters[i];
+
+   return (sysctl_rdstruct(oldp, oldlenp, newp,
+   , sizeof(divstat)));
+}
+
 /*
  * Sysctl for divert variables.
  */
@@ -350,10 +370,7 @@ divert_sysctl(int *name, u_int namelen, 
return (sysctl_int(oldp, oldlenp, newp, newlen,
_recvspace));
case DIVERTCTL_STATS:
-   if (newp != NULL)
-   return (EPERM);
-   return (sysctl_struct(oldp, oldlenp, newp, newlen,
-   , sizeof(divstat)));
+   return (divert_sysctl_divstat(oldp, oldlenp, newp));
default:
if (name[0] < DIVERTCTL_MAXID)
return sysctl_int_arr(divertctl_vars, name, namelen,
Index: netinet/ip_divert.h
===
RCS file: /d/cvs/src/sys/netinet/ip_divert.h,v
retrieving revision 1.7
diff -u -p -r1.7 ip_divert.h
--- netinet/ip_divert.h 25 Jan 2017 17:34:31 -  1.7
+++ netinet/ip_divert.h 6 Feb 2017 13:57:37 -
@@ -50,8 +50,27 @@ struct divstat {
 }
 
 #ifdef _KERNEL
+
+#include 
+
+enum divstat_counters {
+   divs_ipackets,
+   divs_noport,
+   divs_fullsock,
+   divs_opackets,
+   divs_errors,
+   divs_ncounters,
+};
+
+extern struct cpumem *divcounters;
+
+static inline void
+divstat_inc(enum divstat_counters c)
+{
+   counters_inc(divcounters, c);
+}
+
 extern struct  inpcbtable  divbtable;
-extern struct  divstat divstat;
 
 voiddivert_init(void);
 voiddivert_input(struct mbuf *, int, int);
Index: netinet6/ip6_divert.c
===
RCS file: /d/cvs/src/sys/netinet6/ip6_divert.c,v
retrieving revision 1.44
diff -u -p -r1.44 ip6_divert.c
--- netinet6/ip6_divert.c   29 Jan 2017 19:58:47 -  1.44
+++ netinet6/ip6_divert.c   6 Feb 2017 23:16:07 -
@@ -43,7 +43,7 @@
 #include 
 
 struct inpcbtable  divb6table;
-struct div6statdiv6stat;

libressl-2.5.1 patches

2017-02-07 Thread John Boyd


libressl-2.5.1-medusade.tar.gz
Description: GNU Zip compressed data


Re: PF & CPU hogging

2017-02-07 Thread Martin Pieuchot
On 07/02/17(Tue) 10:29, Ted Unangst wrote:
> Martin Pieuchot wrote:
> > PF has its own home-brewed solution for dealing with CPU hogging.  It
> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
> > explanation why it is different than the idiom we use in other places.
> 
> oh, just because i didn't bother making them all the same.
> 
> > 
> > So let's use the same idiom, I promise to introduce a macro an unify all
> > of them once this is in.
> 
> so i think sched_pause is supposed to be that macro, but gets into the preempt
> vs yield debate.

I want to introduce a sched_preempt() and convert all the current
callers of preempt() in a second step.  Does that sound like a plan?



Re: PF & CPU hogging

2017-02-07 Thread Ted Unangst
Martin Pieuchot wrote:
> PF has its own home-brewed solution for dealing with CPU hogging.  It
> has been introduced in r1.88 of net/pf_table.c and I couldn't find any
> explanation why it is different than the idiom we use in other places.

oh, just because i didn't bother making them all the same.

> 
> So let's use the same idiom, I promise to introduce a macro an unify all
> of them once this is in.

so i think sched_pause is supposed to be that macro, but gets into the preempt
vs yield debate.




Re: SOCKET_LOCK makes his come back

2017-02-07 Thread Martin Pieuchot
On 07/02/17(Tue) 13:54, Martin Pieuchot wrote:
> Now that we found and fixed multiple deadlocks involving the NET_LOCK()
> and unix domain sockets, let's do as if it wasn't strictly necessary.
> 
> The NET_LOCK() has been introduced to prevent the softnet thread to run
> while a userland process is pushing packets down into the Network Stack.
> This do not happen with PF_LOCAL sockets, so they simply don't need to
> grab the NET_LOCK().
> 
> So this diff introduces a refreshed SOCKET_LOCK/SOCKET_UNLOCK set of
> macro that are currently doing nothing for PF_LOCAL sockets.  Most of
> the code has been converted, except soclose() which is tricky due to
> sofree(9).
> 
> The goal is then to use these macros to start un-KERNEL_LOCK()ing unix
> socket related syscalls.
> 
> Tests and comments welcome.

Now with the AF check corrected, as found by tb@

Index: kern/sys_socket.c
===
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.28
diff -u -p -r1.28 sys_socket.c
--- kern/sys_socket.c   31 Jan 2017 12:16:20 -  1.28
+++ kern/sys_socket.c   7 Feb 2017 14:26:19 -
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -127,10 +128,10 @@ soo_ioctl(struct file *fp, u_long cmd, c
}
if (IOCGROUP(cmd) == 'r')
return (EOPNOTSUPP);
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, 
(struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)NULL, p));
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
 
return (error);
 }
@@ -187,10 +188,10 @@ soo_stat(struct file *fp, struct stat *u
ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
ub->st_uid = so->so_euid;
ub->st_gid = so->so_egid;
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
(void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
(struct mbuf *)ub, NULL, NULL, p));
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (0);
 }
 
Index: kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.176
diff -u -p -r1.176 uipc_socket.c
--- kern/uipc_socket.c  1 Feb 2017 20:59:47 -   1.176
+++ kern/uipc_socket.c  7 Feb 2017 14:26:04 -
@@ -135,16 +135,16 @@ socreate(int dom, struct socket **aso, i
so->so_egid = p->p_ucred->cr_gid;
so->so_cpid = p->p_p->ps_pid;
so->so_proto = prp;
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
(struct mbuf *)(long)proto, NULL, p);
if (error) {
so->so_state |= SS_NOFDREF;
+   SOCKET_UNLOCK(so, s);
sofree(so);
-   NET_UNLOCK(s);
return (error);
}
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
*aso = so;
return (0);
 }
@@ -154,9 +154,9 @@ sobind(struct socket *so, struct mbuf *n
 {
int s, error;
 
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (error);
 }
 
@@ -171,11 +171,11 @@ solisten(struct socket *so, int backlog)
if (isspliced(so) || issplicedback(so))
return (EOPNOTSUPP);
 #endif /* SOCKET_SPLICE */
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
curproc);
if (error) {
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (error);
}
if (TAILQ_FIRST(>so_q) == NULL)
@@ -185,15 +185,13 @@ solisten(struct socket *so, int backlog)
if (backlog < sominconn)
backlog = sominconn;
so->so_qlimit = backlog;
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (0);
 }
 
 void
 sofree(struct socket *so)
 {
-   NET_ASSERT_LOCKED();
-
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
return;
if (so->so_head) {
@@ -294,7 +292,7 @@ soaccept(struct socket *so, struct mbuf 
 {
int error = 0;
 
-   NET_ASSERT_LOCKED();
+   SOCKET_ASSERT_LOCKED(so);
 
if ((so->so_state & SS_NOFDREF) == 0)
panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
@@ -315,7 +313,7 @@ soconnect(struct socket *so, struct mbuf
 
if (so->so_options & SO_ACCEPTCONN)
return (EOPNOTSUPP);
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
/*
 * If protocol is connection-based, can only connect once.
 * Otherwise, if connected, try to disconnect first.
@@ -329,7 +327,7 @@ soconnect(struct socket *so, struct mbuf
else
error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,

SOCKET_LOCK makes his come back

2017-02-07 Thread Martin Pieuchot
Now that we found and fixed multiple deadlocks involving the NET_LOCK()
and unix domain sockets, let's do as if it wasn't strictly necessary.

The NET_LOCK() has been introduced to prevent the softnet thread to run
while a userland process is pushing packets down into the Network Stack.
This do not happen with PF_LOCAL sockets, so they simply don't need to
grab the NET_LOCK().

So this diff introduces a refreshed SOCKET_LOCK/SOCKET_UNLOCK set of
macro that are currently doing nothing for PF_LOCAL sockets.  Most of
the code has been converted, except soclose() which is tricky due to
sofree(9).

The goal is then to use these macros to start un-KERNEL_LOCK()ing unix
socket related syscalls.

Tests and comments welcome.

Index: kern/sys_socket.c
===
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.28
diff -u -p -r1.28 sys_socket.c
--- kern/sys_socket.c   31 Jan 2017 12:16:20 -  1.28
+++ kern/sys_socket.c   7 Feb 2017 12:23:20 -
@@ -127,10 +127,10 @@ soo_ioctl(struct file *fp, u_long cmd, c
}
if (IOCGROUP(cmd) == 'r')
return (EOPNOTSUPP);
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = ((*so->so_proto->pr_usrreq)(so, PRU_CONTROL, 
(struct mbuf *)cmd, (struct mbuf *)data, (struct mbuf *)NULL, p));
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
 
return (error);
 }
@@ -187,10 +187,10 @@ soo_stat(struct file *fp, struct stat *u
ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
ub->st_uid = so->so_euid;
ub->st_gid = so->so_egid;
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
(void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
(struct mbuf *)ub, NULL, NULL, p));
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (0);
 }
 
Index: kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.176
diff -u -p -r1.176 uipc_socket.c
--- kern/uipc_socket.c  1 Feb 2017 20:59:47 -   1.176
+++ kern/uipc_socket.c  7 Feb 2017 12:39:03 -
@@ -135,16 +135,16 @@ socreate(int dom, struct socket **aso, i
so->so_egid = p->p_ucred->cr_gid;
so->so_cpid = p->p_p->ps_pid;
so->so_proto = prp;
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = (*prp->pr_usrreq)(so, PRU_ATTACH, NULL,
(struct mbuf *)(long)proto, NULL, p);
if (error) {
so->so_state |= SS_NOFDREF;
+   SOCKET_UNLOCK(so, s);
sofree(so);
-   NET_UNLOCK(s);
return (error);
}
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
*aso = so;
return (0);
 }
@@ -154,9 +154,9 @@ sobind(struct socket *so, struct mbuf *n
 {
int s, error;
 
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = (*so->so_proto->pr_usrreq)(so, PRU_BIND, NULL, nam, NULL, p);
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (error);
 }
 
@@ -171,11 +171,11 @@ solisten(struct socket *so, int backlog)
if (isspliced(so) || issplicedback(so))
return (EOPNOTSUPP);
 #endif /* SOCKET_SPLICE */
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
curproc);
if (error) {
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (error);
}
if (TAILQ_FIRST(>so_q) == NULL)
@@ -185,15 +185,13 @@ solisten(struct socket *so, int backlog)
if (backlog < sominconn)
backlog = sominconn;
so->so_qlimit = backlog;
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (0);
 }
 
 void
 sofree(struct socket *so)
 {
-   NET_ASSERT_LOCKED();
-
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
return;
if (so->so_head) {
@@ -294,7 +292,7 @@ soaccept(struct socket *so, struct mbuf 
 {
int error = 0;
 
-   NET_ASSERT_LOCKED();
+   SOCKET_ASSERT_LOCKED(so);
 
if ((so->so_state & SS_NOFDREF) == 0)
panic("soaccept !NOFDREF: so %p, so_type %d", so, so->so_type);
@@ -315,7 +313,7 @@ soconnect(struct socket *so, struct mbuf
 
if (so->so_options & SO_ACCEPTCONN)
return (EOPNOTSUPP);
-   NET_LOCK(s);
+   SOCKET_LOCK(so, s);
/*
 * If protocol is connection-based, can only connect once.
 * Otherwise, if connected, try to disconnect first.
@@ -329,7 +327,7 @@ soconnect(struct socket *so, struct mbuf
else
error = (*so->so_proto->pr_usrreq)(so, PRU_CONNECT,
NULL, nam, NULL, curproc);
-   NET_UNLOCK(s);
+   SOCKET_UNLOCK(so, s);
return (error);
 }
 
@@ -338,10 +336,10 @@ soconnect2(struct socket *so1, struct so
 {
int s, error;
 
-  

11n hostap: enable short slot time

2017-02-07 Thread Stefan Sperling
This diff enables short slot time, accidentally left disabled in 11n mode.
Short vs. long slot time controls the (very small) amount of time devices
spend waiting between frame transmissions.

The only wifi devices which do not support short slot time are 11b ones.

Index: ieee80211_proto.c
===
RCS file: /cvs/src/sys/net80211/ieee80211_proto.c,v
retrieving revision 1.71
diff -u -p -r1.71 ieee80211_proto.c
--- ieee80211_proto.c   2 Feb 2017 16:47:53 -   1.71
+++ ieee80211_proto.c   7 Feb 2017 09:20:27 -
@@ -310,23 +310,23 @@ ieee80211_reset_erp(struct ieee80211com 
 {
ic->ic_flags &= ~IEEE80211_F_USEPROT;
 
-   /*
-* Enable short slot time iff:
-* - we're operating in 802.11a or
-* - we're operating in 802.11g and we're not in IBSS mode and
-*   the device supports short slot time
-*/
ieee80211_set_shortslottime(ic,
-   ic->ic_curmode == IEEE80211_MODE_11A
+   ic->ic_curmode == IEEE80211_MODE_11A ||
+   (ic->ic_curmode == IEEE80211_MODE_11N &&
+   IEEE80211_IS_CHAN_5GHZ(ic->ic_ibss_chan))
 #ifndef IEEE80211_STA_ONLY
||
-   (ic->ic_curmode == IEEE80211_MODE_11G &&
+   ((ic->ic_curmode == IEEE80211_MODE_11G ||
+   (ic->ic_curmode == IEEE80211_MODE_11N &&
+   IEEE80211_IS_CHAN_2GHZ(ic->ic_ibss_chan))) &&
 ic->ic_opmode == IEEE80211_M_HOSTAP &&
 (ic->ic_caps & IEEE80211_C_SHSLOT))
 #endif
);
 
if (ic->ic_curmode == IEEE80211_MODE_11A ||
+   (ic->ic_curmode == IEEE80211_MODE_11N &&
+   IEEE80211_IS_CHAN_5GHZ(ic->ic_ibss_chan)) ||
(ic->ic_caps & IEEE80211_C_SHPREAMBLE))
ic->ic_flags |= IEEE80211_F_SHPREAMBLE;
else



Re: specify curves via ecdhe statement in httpd.conf

2017-02-07 Thread Joel Sing
On Monday 06 February 2017 20:18:48 Andreas Bartelt wrote:
> Yes, right - thanks. I wasn't aware that this is actually a MUST
> requirement from RFC 4492. I'm quite surprised that the "Supported
> Elliptic Curves Extension" is also used in order to specify any allowed
> curves for use in the context of certificates. I think this is quite
> inconsistent but it's what this RFC seems to mandate. It's inconsistent
> because there is no such binding with regard to -ECDHE-RSA- or -DHE-RSA-
> cipher suites.

Both DHE and RSA are either supported or they are not. If they are not then 
the client should not include cipher suites that use these key exchange and/or 
authentication methods. ECDHE and ECDSA are somewhat different, the main reason 
being the use of named curves vs arbitrary curves. If named curves were not in 
use then more data has to be provided in the Server Key Exchange and it is 
more like a DHE key exchange (see RFC 4492 section 5.4 for more details if 
you're interested). The trade off is effectively providing EC parameters to the 
client versus requiring the client and server to support the same named 
curves.
 
> I've successfully tested a P-384-based certificate which allowed me to
> explicitly specify -groups secp384r1 for ECDHE on the client side.

Excellent.

> I think it would be beneficial to allow to explicitly specify multiple
> groups with the ecdhe statement in httpd.conf, and also to respect their
> order.

Absolutely - as noted in an earlier reply, this is what is planned.



Re: Add quirks to support M-Audio FastTrack Pro (uaudio)

2017-02-07 Thread Martin Pieuchot
On 03/02/17(Fri) 12:06, Christopher Zimmermann wrote:
> [...]
> > > > > @@ -444,6 +447,11 @@ uaudio_match(struct device *parent, void
> > > > >   if (uaa->iface == NULL || uaa->device == NULL)
> > > > >   return (UMATCH_NONE);
> > > > >  
> > > > > + if (uaa->vendor == USB_VENDOR_MAUDIO &&
> > > > > + uaa->product == USB_PRODUCT_MAUDIO_FASTTRACKPRO &&
> > > > > + uaa->configno != 2)
> > > > > + return UMATCH_NONE;
> > > > 
> > > > Why to you need this?  
> > > 
> > > This device exposes only a very limited set of features in
> > > configuration 1.  
> > 
> > But configuration 1 is midi not audio right?  So it won't match, so
> > why do you need that?
> 
> In config 1 the device exposes these ifaces:
> 0 audio control
> 1 midi
> 2 audio play
> 3 audio rec
> 
> in config 2 the device exposes these ifaces:
> 0 audio control
> 1 midi
> 2 audio analog out
> 3 audio analog / digital out
> 4 audio analog in (with mic preamps)
> 5 audio digital in
> 
> if any iface in config 1 is claimed the device cannot switch to config 2.

But do you still want umidi(4) to attach or not at all?

> > > > > + if (uaa->vendor == USB_VENDOR_MAUDIO &&
> > > > > + uaa->product == USB_PRODUCT_MAUDIO_FASTTRACKPRO &&
> > > > > + uaa->configno != 2)
> > > > >   return UMATCH_NONE;
> > > > 
> > > > I'd leave the configno check out and add a comment explaining why
> > > > we want to force this driver to attach to uaudio(4).  
> > > 
> > > See my comment above about the same piece of code in uaudio.c.
> > > If I don't add this condition umidi will attach to configuration 1
> > > and configuration 2 wouldn't be tried, uaudio wouldn't attach.  
> > 
> > Please re-read my comment.  I'm talking about the "configno" check.
> > Think about somebody reading your code in 5 years, it will just looks
> > like black magic.
> 
> What about adding this comment:
> 
> /*
>  * This combined audio / midi device exposes its
>  * full set of features only in configuration 2.
>  */

Or something along the way of:

 /*
  * As long as the USB stack does not support switching between multiple
  * configurations, only match the second one, it offers more features.
  */

> > What you're doing is working around our stupid USB detection mechanism
> > to use a specific configuration.  So you don't want your device to
> > attach to umidi(4) no matter with which configuration.
> > 
> > > + if (p->precision > 8)
> > > + /*
> > > +  * Don't search for matching encoding.
> > > +  * Just tell the upper layers which one we support.
> > > +  */
> > > + p->encoding = sc->sc_alts[i].encoding;  
> > 
> > Why do you need that?
> 
> - The audio device is big endian.
> - The uaudio driver was written for little endian devices.
> - The audio driver assumes _host_ endianness.
> - The uaudio driver will reject AUDIO_SETPAR (audio(4)) requests which don't
>   exactly match the device encoding. 
> - I change uaudio to communicate the supported endianness back to audio(4), so
>   that userspace will receive the supported parameters via AUDIO_GETPAR and
>   will use a supported encoding.

Ok I don't understand this magic, maybe ratchov@ can comment on it?  

> > > + else if (p->encoding != sc->sc_alts[i].encoding)
> > > + continue;
> > > +
> > >   alts_eh |= 1 << i;
> > >   DPRINTFN(6,("%s: matched %s alt %d for enc/pre\n",
> > > __func__, mode == AUMODE_RECORD ? "rec" : "play", i));  
> 
> 
> 
> -- 
> http://gmerlin.de
> OpenPGP: http://gmerlin.de/christopher.pub
> 2779 7F73 44FD 0736 B67A  C410 69EC 7922 34B4 2566




Re: PF & CPU hogging

2017-02-07 Thread Martin Pieuchot
On 07/02/17(Tue) 11:15, Mike Belopuhov wrote:
> On 7 February 2017 at 10:12, Martin Pieuchot  wrote:
> > On 06/02/17(Mon) 17:18, Mike Belopuhov wrote:
> >> On 6 February 2017 at 17:02, Martin Pieuchot  wrote:
> >> > PF has its own home-brewed solution for dealing with CPU hogging.  It
> >> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
> >> > explanation why it is different than the idiom we use in other places.
> >> >
> >> > So let's use the same idiom, I promise to introduce a macro an unify all
> >> > of them once this is in.
> >> >
> >> > ok?
> >> >
> >>
> >> Why not replace YIELD with sched_pause() then?
> >
> > Because we use preempt() for userland processes and I don't want to
> > introduce a difference for this case.
> >
> > Why we do that is a different topic.
> 
> they look almost the same esp. with your upcoming change to preempt,
> but if you think this is better then go ahead, i'd say.

They look the same if you look at the code, not if you look at counters
and try to analyze what has been happening in the life of a process.



Re: PF & CPU hogging

2017-02-07 Thread Mike Belopuhov
On 7 February 2017 at 11:14, Martin Pieuchot  wrote:
> On 07/02/17(Tue) 11:03, Mike Belopuhov wrote:
>> On 7 February 2017 at 10:13, Martin Pieuchot  wrote:
>> > On 06/02/17(Mon) 17:19, Mike Belopuhov wrote:
>> >> On 6 February 2017 at 17:02, Martin Pieuchot  wrote:
>> >> > PF has its own home-brewed solution for dealing with CPU hogging.  It
>> >> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
>> >> > explanation why it is different than the idiom we use in other places.
>> >> >
>> >>
>> >> The idea was just to be able to load a huge table semi-efficiently.
>> >
>> > Can you explain what you mean with semi-efficiently?
>> >
>>
>> you'd yield too much as far as i understand.  try loading those
>> monster spamd tables on apu/soekris and see if there's a
>> noticeable slowdown.
>
> I'm not sure to understand what do you mean with "too much" nor in which
> context it applies.  With or without my diff?
>
> What I know is that my diff introduces fewer context switches, see:
> https://marc.info/?l=openbsd-bugs=148639395411957=2
>
> I also know that it kills some magic and makes kernel code consistent.
>
>> >> I don't think SPCF_SHOULDYIELD existed back then.
>> >
>> > cvs blame tells me otherwise.
>>
>> strange, but i can't rule that out.
>
> I still can't understand why a userland thread should yield after 1024
> insertions.  Maybe tedu@ can explain his original intention? IMHO a
> thread should yield if it hogs the CPU and that's exactly what my diff
> does.

i think it was just chosen empirically.  hogging cpu to load table data
into the kernel pf table was an acceptable behavior.  hogging too much
for too long wasn't. 1024 table entries is more than most of tables normal
setups have and the rest use huge ones from spamd.



Re: PF & CPU hogging

2017-02-07 Thread Martin Pieuchot
On 07/02/17(Tue) 11:03, Mike Belopuhov wrote:
> On 7 February 2017 at 10:13, Martin Pieuchot  wrote:
> > On 06/02/17(Mon) 17:19, Mike Belopuhov wrote:
> >> On 6 February 2017 at 17:02, Martin Pieuchot  wrote:
> >> > PF has its own home-brewed solution for dealing with CPU hogging.  It
> >> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
> >> > explanation why it is different than the idiom we use in other places.
> >> >
> >>
> >> The idea was just to be able to load a huge table semi-efficiently.
> >
> > Can you explain what you mean with semi-efficiently?
> >
> 
> you'd yield too much as far as i understand.  try loading those
> monster spamd tables on apu/soekris and see if there's a
> noticeable slowdown.

I'm not sure to understand what do you mean with "too much" nor in which
context it applies.  With or without my diff?

What I know is that my diff introduces fewer context switches, see:
https://marc.info/?l=openbsd-bugs=148639395411957=2

I also know that it kills some magic and makes kernel code consistent.

> >> I don't think SPCF_SHOULDYIELD existed back then.
> >
> > cvs blame tells me otherwise.
> 
> strange, but i can't rule that out.

I still can't understand why a userland thread should yield after 1024
insertions.  Maybe tedu@ can explain his original intention? IMHO a
thread should yield if it hogs the CPU and that's exactly what my diff
does.



Re: PF & CPU hogging

2017-02-07 Thread Mike Belopuhov
On 7 February 2017 at 10:12, Martin Pieuchot  wrote:
> On 06/02/17(Mon) 17:18, Mike Belopuhov wrote:
>> On 6 February 2017 at 17:02, Martin Pieuchot  wrote:
>> > PF has its own home-brewed solution for dealing with CPU hogging.  It
>> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
>> > explanation why it is different than the idiom we use in other places.
>> >
>> > So let's use the same idiom, I promise to introduce a macro an unify all
>> > of them once this is in.
>> >
>> > ok?
>> >
>>
>> Why not replace YIELD with sched_pause() then?
>
> Because we use preempt() for userland processes and I don't want to
> introduce a difference for this case.
>
> Why we do that is a different topic.

they look almost the same esp. with your upcoming change to preempt,
but if you think this is better then go ahead, i'd say.



Re: PF & CPU hogging

2017-02-07 Thread Mike Belopuhov
On 7 February 2017 at 10:13, Martin Pieuchot  wrote:
> On 06/02/17(Mon) 17:19, Mike Belopuhov wrote:
>> On 6 February 2017 at 17:02, Martin Pieuchot  wrote:
>> > PF has its own home-brewed solution for dealing with CPU hogging.  It
>> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
>> > explanation why it is different than the idiom we use in other places.
>> >
>>
>> The idea was just to be able to load a huge table semi-efficiently.
>
> Can you explain what you mean with semi-efficiently?
>

you'd yield too much as far as i understand.  try loading those
monster spamd tables on apu/soekris and see if there's a
noticeable slowdown.

>> I don't think SPCF_SHOULDYIELD existed back then.
>
> cvs blame tells me otherwise.

strange, but i can't rule that out.



Re: Xorg vs wifi scanning

2017-02-07 Thread Paul Irofti
On Mon, Feb 06, 2017 at 03:54:56PM +0100, Martin Pieuchot wrote:
> On 06/02/17(Mon) 12:27, Martin Pieuchot wrote:
> > Paul and Antoine reported that, since the NET_LOCK() went in, doing a
> > channel scan with iwm(4) and iwn(4) freezes X.
> > 
> > This is a deadlock due to the fact that wireless drivers sleep during
> > a long time holding the NET_LOCK() while the X server tries to
> > communicate with unix sockets, that still need the NET_LOCK().
> > 
> > The obvious solution to this problem is to not hold the NET_LOCK() for
> > unix socket related operations.  We're working on that.
> > 
> > However since hardware ioctl(2) can sleep there's no problem to release
> > the NET_LOCK() there.  This is true for all ioctls that are not
> > modifying any stack state (address, multicast group, etc).  Hence the
> > diff below that should fix the problem in a generic way.
> 
> tb@ found that another code path needs to be unlocked.  Diff below does
> that.  These ioctl(2)s are only messing at the driver level, so it is
> safe to drop the NET_LOCK() there as well.
> 
> ok?

OK pirofti@. Thanks!

> 
> Index: netinet/in.c
> ===
> RCS file: /cvs/src/sys/netinet/in.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 in.c
> --- netinet/in.c  20 Dec 2016 12:35:38 -  1.133
> +++ netinet/in.c  6 Feb 2017 13:43:06 -
> @@ -390,7 +390,11 @@ in_ioctl(u_long cmd, caddr_t data, struc
>   default:
>   if (ifp->if_ioctl == NULL)
>   return (EOPNOTSUPP);
> - return ((*ifp->if_ioctl)(ifp, cmd, data));
> + /* XXXSMP breaks atomicity */
> + rw_exit_write();
> + error = ((*ifp->if_ioctl)(ifp, cmd, data));
> + rw_enter_write();
> + return (error);
>   }
>   return (0);
>  }
> Index: netinet6/in6.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6.c,v
> retrieving revision 1.197
> diff -u -p -r1.197 in6.c
> --- netinet6/in6.c5 Feb 2017 16:04:14 -   1.197
> +++ netinet6/in6.c6 Feb 2017 13:43:06 -
> @@ -190,6 +190,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru
>   struct  in6_ifaddr *ia6 = NULL;
>   struct  in6_aliasreq *ifra = (struct in6_aliasreq *)data;
>   struct sockaddr_in6 *sa6;
> + int error;
>  
>   if (ifp == NULL)
>   return (EOPNOTSUPP);
> @@ -469,9 +470,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru
>   break;
>  
>   default:
> - if (ifp == NULL || ifp->if_ioctl == 0)
> + if (ifp->if_ioctl == NULL)
>   return (EOPNOTSUPP);
> - return ((*ifp->if_ioctl)(ifp, cmd, data));
> + /* XXXSMP breaks atomicity */
> + rw_exit_write();
> + error = ((*ifp->if_ioctl)(ifp, cmd, data));
> + rw_enter_write();
> + return (error);
>   }
>  
>   return (0);
> Index: dev/pci/if_iwm.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
> retrieving revision 1.162
> diff -u -p -r1.162 if_iwm.c
> --- dev/pci/if_iwm.c  4 Feb 2017 19:20:59 -   1.162
> +++ dev/pci/if_iwm.c  6 Feb 2017 14:52:24 -
> @@ -6133,18 +6133,13 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
>   struct ifreq *ifr;
>   int s, err = 0;
>  
> - /* XXXSMP breaks atomicity */
> - rw_exit_write();
> -
>   /*
>* Prevent processes from entering this function while another
>* process is tsleep'ing in it.
>*/
>   err = rw_enter(>ioctl_rwl, RW_WRITE | RW_INTR);
> - if (err) {
> - rw_enter_write();
> + if (err)
>   return err;
> - }
>  
>   s = splnet();
>  
> @@ -6190,7 +6185,6 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
>  
>   splx(s);
>   rw_exit(>ioctl_rwl);
> - rw_enter_write();
>  
>   return err;
>  }
> Index: net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.485
> diff -u -p -r1.485 if.c
> --- net/if.c  1 Feb 2017 02:02:01 -   1.485
> +++ net/if.c  6 Feb 2017 14:27:47 -
> @@ -2029,7 +2029,10 @@ ifioctl(struct socket *so, u_long cmd, c
>   case SIOCGIFPARENT:
>   if (ifp->if_ioctl == 0)
>   return (EOPNOTSUPP);
> + /* XXXSMP breaks atomicity */
> + rw_exit_write();
>   error = (*ifp->if_ioctl)(ifp, cmd, data);
> + rw_enter_write();
>   break;
>  
>   case SIOCGIFDESCR:



Re: PF & CPU hogging

2017-02-07 Thread Martin Pieuchot
On 06/02/17(Mon) 17:19, Mike Belopuhov wrote:
> On 6 February 2017 at 17:02, Martin Pieuchot  wrote:
> > PF has its own home-brewed solution for dealing with CPU hogging.  It
> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
> > explanation why it is different than the idiom we use in other places.
> >
> 
> The idea was just to be able to load a huge table semi-efficiently.

Can you explain what you mean with semi-efficiently?

> I don't think SPCF_SHOULDYIELD existed back then.

cvs blame tells me otherwise.



Re: PF & CPU hogging

2017-02-07 Thread Martin Pieuchot
On 06/02/17(Mon) 17:18, Mike Belopuhov wrote:
> On 6 February 2017 at 17:02, Martin Pieuchot  wrote:
> > PF has its own home-brewed solution for dealing with CPU hogging.  It
> > has been introduced in r1.88 of net/pf_table.c and I couldn't find any
> > explanation why it is different than the idiom we use in other places.
> >
> > So let's use the same idiom, I promise to introduce a macro an unify all
> > of them once this is in.
> >
> > ok?
> >
> 
> Why not replace YIELD with sched_pause() then?

Because we use preempt() for userland processes and I don't want to
introduce a difference for this case.

Why we do that is a different topic.