splnet() and SIOCSIFADDR

2014-09-03 Thread Martin Pieuchot
Drivers that need a splnet() protection inside their SIOCSIFADDR
generally raise the spl level themselves, so we should not need
to do that in in{6,}_ifinit().  One exception to this rule is,
as always, carp(4)...

So the diff below moves the spl dance inside carp's SIOCSIFADDR
handler, it's a baby step, so we can take care of carp_set_addr{6,}
later.

ok?

Index: netinet/in.c
===
RCS file: /home/ncvs/src/sys/netinet/in.c,v
retrieving revision 1.103
diff -u -p -r1.103 in.c
--- netinet/in.c3 Sep 2014 08:59:06 -   1.103
+++ netinet/in.c3 Sep 2014 13:08:43 -
@@ -612,7 +612,7 @@ in_ifinit(struct ifnet *ifp, struct in_i
 {
u_int32_t i = sin-sin_addr.s_addr;
struct sockaddr_in oldaddr;
-   int s, error = 0;
+   int error = 0;
 
splsoftassert(IPL_SOFTNET);
 
@@ -627,7 +627,6 @@ in_ifinit(struct ifnet *ifp, struct in_i
rt_ifa_delloop(ia-ia_ifa);
ifa_del(ifp, ia-ia_ifa);
}
-   s = splnet();
oldaddr = ia-ia_addr;
ia-ia_addr = *sin;
 
@@ -639,10 +638,8 @@ in_ifinit(struct ifnet *ifp, struct in_i
if (ifp-if_ioctl 
(error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
ia-ia_addr = oldaddr;
-   splx(s);
goto out;
}
-   splx(s);
 
if (ia-ia_netmask == 0) {
if (IN_CLASSA(i))
Index: netinet/ip_carp.c
===
RCS file: /home/ncvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.233
diff -u -p -r1.233 ip_carp.c
--- netinet/ip_carp.c   22 Jul 2014 11:06:10 -  1.233
+++ netinet/ip_carp.c   3 Sep 2014 13:08:43 -
@@ -2060,10 +2060,11 @@ carp_ioctl(struct ifnet *ifp, u_long cmd
struct ifaddr *ifa = (struct ifaddr *)addr;
struct ifreq *ifr = (struct ifreq *)addr;
struct ifnet *cdev = NULL;
-   int i, error = 0;
+   int s, i, error = 0;
 
switch (cmd) {
case SIOCSIFADDR:
+   s = splnet();
switch (ifa-ifa_addr-sa_family) {
 #ifdef INET
case AF_INET:
@@ -2088,6 +2089,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd
break;
}
break;
+   splx(s);
 
case SIOCSIFFLAGS:
vhe = LIST_FIRST(sc-carp_vhosts);
Index: netinet6/in6.c
===
RCS file: /home/ncvs/src/sys/netinet6/in6.c,v
retrieving revision 1.140
diff -u -p -r1.140 in6.c
--- netinet6/in6.c  26 Aug 2014 21:44:29 -  1.140
+++ netinet6/in6.c  3 Sep 2014 13:08:43 -
@@ -1078,7 +1078,7 @@ in6_purgeaddr(struct ifaddr *ifa)
 void
 in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp)
 {
-   int s = splnet();
+   splsoftassert(IPL_SOFTNET);
 
ifa_del(ifp, ia6-ia_ifa);
 
@@ -1107,8 +1107,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s
 * Note that we should decrement the refcnt at least once for all *BSD.
 */
ifafree(ia6-ia_ifa);
-
-   splx(s);
 }
 
 /*
@@ -1355,9 +1353,10 @@ int
 in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia6, int newhost)
 {
int error = 0, plen, ifacount = 0;
-   int s = splnet();
struct ifaddr *ifa;
 
+   splsoftassert(IPL_SOFTNET);
+
/*
 * Give the interface a chance to initialize
 * if this is its first address (or it is a CARP interface)
@@ -1374,10 +1373,8 @@ in6_ifinit(struct ifnet *ifp, struct in6
if ((ifacount = 1 || ifp-if_type == IFT_CARP ||
(ifp-if_flags  IFF_POINTOPOINT))  ifp-if_ioctl 
(error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia6))) {
-   splx(s);
return (error);
}
-   splx(s);
 
ia6-ia_ifa.ifa_metric = ifp-if_metric;
 



Re: splnet() and SIOCSIFADDR

2014-09-03 Thread Martin Pieuchot
On 03/09/14(Wed) 15:25, Martin Pieuchot wrote:
 Drivers that need a splnet() protection inside their SIOCSIFADDR
 generally raise the spl level themselves, so we should not need
 to do that in in{6,}_ifinit().  One exception to this rule is,
 as always, carp(4)...
 
 So the diff below moves the spl dance inside carp's SIOCSIFADDR
 handler, it's a baby step, so we can take care of carp_set_addr{6,}
 later.

mikeb@ pointed out that a spl dance was missing from the SIOCDIFADDR_IN6
ioctl, updated diff below also fixes that.

Index: netinet/in.c
===
RCS file: /home/ncvs/src/sys/netinet/in.c,v
retrieving revision 1.103
diff -u -p -r1.103 in.c
--- netinet/in.c3 Sep 2014 08:59:06 -   1.103
+++ netinet/in.c3 Sep 2014 13:52:41 -
@@ -612,7 +612,7 @@ in_ifinit(struct ifnet *ifp, struct in_i
 {
u_int32_t i = sin-sin_addr.s_addr;
struct sockaddr_in oldaddr;
-   int s, error = 0;
+   int error = 0;
 
splsoftassert(IPL_SOFTNET);
 
@@ -627,7 +627,6 @@ in_ifinit(struct ifnet *ifp, struct in_i
rt_ifa_delloop(ia-ia_ifa);
ifa_del(ifp, ia-ia_ifa);
}
-   s = splnet();
oldaddr = ia-ia_addr;
ia-ia_addr = *sin;
 
@@ -639,10 +638,8 @@ in_ifinit(struct ifnet *ifp, struct in_i
if (ifp-if_ioctl 
(error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
ia-ia_addr = oldaddr;
-   splx(s);
goto out;
}
-   splx(s);
 
if (ia-ia_netmask == 0) {
if (IN_CLASSA(i))
Index: netinet/ip_carp.c
===
RCS file: /home/ncvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.233
diff -u -p -r1.233 ip_carp.c
--- netinet/ip_carp.c   22 Jul 2014 11:06:10 -  1.233
+++ netinet/ip_carp.c   3 Sep 2014 13:52:41 -
@@ -2060,10 +2060,11 @@ carp_ioctl(struct ifnet *ifp, u_long cmd
struct ifaddr *ifa = (struct ifaddr *)addr;
struct ifreq *ifr = (struct ifreq *)addr;
struct ifnet *cdev = NULL;
-   int i, error = 0;
+   int s, i, error = 0;
 
switch (cmd) {
case SIOCSIFADDR:
+   s = splnet();
switch (ifa-ifa_addr-sa_family) {
 #ifdef INET
case AF_INET:
@@ -2088,6 +2089,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd
break;
}
break;
+   splx(s);
 
case SIOCSIFFLAGS:
vhe = LIST_FIRST(sc-carp_vhosts);
Index: netinet6/in6.c
===
RCS file: /home/ncvs/src/sys/netinet6/in6.c,v
retrieving revision 1.140
diff -u -p -r1.140 in6.c
--- netinet6/in6.c  26 Aug 2014 21:44:29 -  1.140
+++ netinet6/in6.c  3 Sep 2014 13:52:41 -
@@ -172,7 +172,7 @@ in6_control(struct socket *so, u_long cm
struct  in6_ifaddr *ia6 = NULL;
struct  in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
-   int privileged;
+   int s, privileged;
 
privileged = 0;
if ((so-so_state  SS_PRIV) != 0)
@@ -463,7 +463,6 @@ in6_control(struct socket *so, u_long cm
{
int i, error = 0;
struct nd_prefix pr0, *pr;
-   int s;
 
/* reject read-only flags */
if ((ifra-ifra_flags  IN6_IFF_DUPLICATED) != 0 ||
@@ -561,8 +560,10 @@ in6_control(struct socket *so, u_long cm
}
 
case SIOCDIFADDR_IN6:
+   s = splsoftnet();
in6_purgeaddr(ia6-ia_ifa);
dohooks(ifp-if_addrhooks, 0);
+   splx(s);
break;
 
default:
@@ -1078,7 +1079,7 @@ in6_purgeaddr(struct ifaddr *ifa)
 void
 in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp)
 {
-   int s = splnet();
+   splsoftassert(IPL_SOFTNET);
 
ifa_del(ifp, ia6-ia_ifa);
 
@@ -1107,8 +1108,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s
 * Note that we should decrement the refcnt at least once for all *BSD.
 */
ifafree(ia6-ia_ifa);
-
-   splx(s);
 }
 
 /*
@@ -1355,9 +1354,10 @@ int
 in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia6, int newhost)
 {
int error = 0, plen, ifacount = 0;
-   int s = splnet();
struct ifaddr *ifa;
 
+   splsoftassert(IPL_SOFTNET);
+
/*
 * Give the interface a chance to initialize
 * if this is its first address (or it is a CARP interface)
@@ -1374,10 +1374,8 @@ in6_ifinit(struct ifnet *ifp, struct in6
if ((ifacount = 1 || ifp-if_type == IFT_CARP ||
(ifp-if_flags  IFF_POINTOPOINT))  ifp-if_ioctl 
(error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia6))) {
-   splx(s);
return (error);
}
-   splx(s);
 
ia6-ia_ifa.ifa_metric = 

Re: splnet() and SIOCSIFADDR

2014-09-03 Thread Mike Belopuhov
On 3 September 2014 15:53, Martin Pieuchot mpieuc...@nolizard.org wrote:
 On 03/09/14(Wed) 15:25, Martin Pieuchot wrote:
 Drivers that need a splnet() protection inside their SIOCSIFADDR
 generally raise the spl level themselves, so we should not need
 to do that in in{6,}_ifinit().  One exception to this rule is,
 as always, carp(4)...

 So the diff below moves the spl dance inside carp's SIOCSIFADDR
 handler, it's a baby step, so we can take care of carp_set_addr{6,}
 later.

 mikeb@ pointed out that a spl dance was missing from the SIOCDIFADDR_IN6
 ioctl, updated diff below also fixes that.


i think we're ready to step on a carefully placed landmine. OK mikeb



Re: syslogd libevent

2014-09-03 Thread Alexander Bluhm
On Fri, Aug 29, 2014 at 11:25:52PM +0200, Alexander Bluhm wrote:
 So I will write more tests before committing this.

My regression tests found a bug in syslogd.  When adding the maximum
number of paths with the -a option, the arrays for unix domain
socket paths and the poll file descriptors overflow by one.

- increase pfd array by one
- operate on size of arrays not length
- null check for path not necessary when doing fd -1 check
- rename variables consistently ...unix
- make number of unix fds a local variable

ok?

bluhm

Index: usr.sbin/syslogd/privsep.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/privsep.c,v
retrieving revision 1.43
diff -u -p -r1.43 privsep.c
--- usr.sbin/syslogd/privsep.c  25 Aug 2014 20:19:14 -  1.43
+++ usr.sbin/syslogd/privsep.c  3 Sep 2014 16:08:03 -
@@ -172,7 +172,7 @@ priv_init(char *conf, int numeric, int l
close(socks[1]);
 
/* Close descriptors that only the unpriv child needs */
-   for (i = 0; i  nfunix; i++)
+   for (i = 0; i  MAXUNIX + 1; i++)
if (pfd[PFD_UNIX_0 + i].fd != -1)
close(pfd[PFD_UNIX_0 + i].fd);
if (pfd[PFD_INET].fd != -1)
@@ -369,9 +369,9 @@ priv_init(char *conf, int numeric, int l
close(socks[0]);
 
/* Unlink any domain sockets that have been opened */
-   for (i = 0; i  nfunix; i++)
-   if (funixn[i] != NULL  pfd[PFD_UNIX_0 + i].fd != -1)
-   (void)unlink(funixn[i]);
+   for (i = 0; i  MAXUNIX; i++)
+   if (pfd[PFD_UNIX_0 + i].fd != -1)
+   (void)unlink(path_unix[i]);
if (ctlsock_path != NULL  pfd[PFD_CTLSOCK].fd != -1)
(void)unlink(ctlsock_path);
 
Index: usr.sbin/syslogd/syslogd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.121
diff -u -p -r1.121 syslogd.c
--- usr.sbin/syslogd/syslogd.c  31 Aug 2014 22:11:43 -  1.121
+++ usr.sbin/syslogd/syslogd.c  3 Sep 2014 16:40:54 -
@@ -183,8 +183,7 @@ char*TypeNames[9] = {
 struct filed *Files;
 struct filed consfile;
 
-intnfunix = 1; /* Number of Unix domain sockets requested */
-char   *funixn[MAXFUNIX] = { _PATH_LOG }; /* Paths to Unix domain sockets */
+char   *path_unix[MAXUNIX] = { _PATH_LOG }; /* Paths to Unix domain sockets */
 intDebug;  /* debug flag */
 intStartup = 1;/* startup flag */
 char   LocalHostName[MAXHOSTNAMELEN];  /* our hostname */
@@ -282,7 +281,7 @@ voidlogto_ctlconn(char *);
 int
 main(int argc, char *argv[])
 {
-   int ch, i, linesize, fd;
+   int ch, i, linesize, fd, nunix = 1;
struct sockaddr_un fromunix;
struct sockaddr_storage from;
socklen_t len;
@@ -292,6 +291,9 @@ main(int argc, char *argv[])
struct addrinfo hints, *res, *res0;
FILE *fp;
 
+   for (i = 0; i  N_PFD; i++)
+   pfd[i].fd = -1;
+
while ((ch = getopt(argc, argv, 46dhnuf:m:p:a:s:)) != -1)
switch (ch) {
case '4':   /* disable IPv6 */
@@ -318,18 +320,18 @@ main(int argc, char *argv[])
NoDNS = 1;
break;
case 'p':   /* path */
-   funixn[0] = optarg;
+   path_unix[0] = optarg;
break;
case 'u':   /* allow udp input port */
SecureMode = 0;
break;
case 'a':
-   if (nfunix = MAXFUNIX)
+   if (nunix = MAXUNIX)
fprintf(stderr, syslogd: 
out of descriptors, ignoring %s\n,
optarg);
else
-   funixn[nfunix++] = optarg;
+   path_unix[nunix++] = optarg;
break;
case 's':
ctlsock_path = optarg;
@@ -439,8 +441,8 @@ main(int argc, char *argv[])
 #ifndef SUN_LEN
 #define SUN_LEN(unp) (strlen((unp)-sun_path) + 2)
 #endif
-   for (i = 0; i  nfunix; i++) {
-   if ((fd = unix_socket(funixn[i], SOCK_DGRAM, 0666)) == -1) {
+   for (i = 0; i  nunix; i++) {
+   if ((fd = unix_socket(path_unix[i], SOCK_DGRAM, 0666)) == -1) {
if (i == 0  !Debug)
die(0);
continue;
@@ -450,7 +452,7 @@ main(int argc, char *argv[])
pfd[PFD_UNIX_0 + i].events = POLLIN;
}
 
-   nfunix++;
+   nunix++;
if (socketpair(AF_UNIX, SOCK_DGRAM, PF_UNSPEC, pair) == -1)
die(0);
fd = pair[0];
@@ -575,7 +577,7 @@ main(int 

Re: minphys woes

2014-09-03 Thread Stefan Fritsch
On Tuesday 02 September 2014 15:22:16, Philip Guenther wrote:
  From physio(9):
   minphys
   A device specific routine called to determine the
 maximum transfer size that the device's strategy routine can
 handle.
 
  Since we have seen that the driver must be able to handle 64k
  blocks in any case, the fact that minphys is device specific is
  useless, isn't it?
 
 physio() is used by character device access.  Looks to me like
 sdminphys() will change the chunking behavior of this:
 dd if=/dev/zero of=/dev/rsd0a bs=100M count=1
 
 depending on whether sd0 is a SCSI-I device, no?

Yes, but that does not make it any less useless. File systems will 
call the very same strategy routine and expect it to deal with 64K 
blocks. The statement in the man page gives the misleading impression 
that the strategy routine could be used to avoid that the strategy 
routine has to handle 64K blocks, which is not true.

So, maybe the minphys mechanism is useful for block devices that are 
not used for file systems. Are there any such devices?

Maybe we could add a sentence to the man page like following?

Beware that a block device's strategy routine that is used for file 
systems must always be able to accept transfers sizes up to MAXPHYS.



Re: splnet() and SIOCSIFADDR

2014-09-03 Thread Alexander Bluhm
On Wed, Sep 03, 2014 at 03:53:34PM +0200, Martin Pieuchot wrote:
 @@ -1078,7 +1079,7 @@ in6_purgeaddr(struct ifaddr *ifa)
  void
  in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp)
  {
 - int s = splnet();
 + splsoftassert(IPL_SOFTNET);
  
   ifa_del(ifp, ia6-ia_ifa);
  

I think there are code paths that can trigger this assertion

netinet6/in6.c: in6_unlink_ifa()
netinet6/in6.c: in6_purgeaddr()
netinet6/nd6_rtr.c: purge_detached()
netinet6/nd6_rtr.c: nd6_prelist_add()
netinet6/in6.c: in6_control()
netinet/tcp_usrreq.c:   tcp_usrreq()
kern/sys_socket.c:  soo_ioctl()

netinet6/in6.c: in6_unlink_ifa()
netinet6/in6.c: in6_purgeaddr()
netinet6/nd6_rtr.c: purge_detached()
netinet6/nd6_rtr.c: nd6_prelist_add()
netinet6/in6_ifattach.c:in6_ifattach_linklocal()
netinet/ip_carp.c   carp_set_enaddr()
netinet/ip_carp.c   carp_ioctl()
...

nd6_prelist_add() does some splsoftnet() already.  I think you
should put one around purge_detached() there.

bluhm



Re: syslogd libevent handler

2014-09-03 Thread Alexander Bluhm
On Sun, Aug 31, 2014 at 10:46:50PM +0200, Alexander Bluhm wrote:
 On Fri, Aug 29, 2014 at 11:25:52PM +0200, Alexander Bluhm wrote:
  I will try to pull parts of the diff into separate changes to
  make review easier.
 
 Move the handlers for the poll events into separate functions.  They
 will become the libevent callbacks later.
 
 ok?

anyone?

 bluhm
 
 Index: usr.sbin/syslogd/syslogd.c
 ===
 RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
 retrieving revision 1.119
 diff -u -p -r1.119 syslogd.c
 --- usr.sbin/syslogd/syslogd.c25 Aug 2014 18:19:18 -  1.119
 +++ usr.sbin/syslogd/syslogd.c31 Aug 2014 20:34:01 -
 @@ -245,6 +245,13 @@ char *reply_text;/* Start of reply tex
  size_t   ctl_reply_size = 0; /* Number of bytes used in reply */
  size_t   ctl_reply_offset = 0;   /* Number of bytes of reply written so 
 far */
  
 +char *linebuf;
 +int   linesize;
 +
 +void  klog_read_handler(int);
 +void  udp_read_handler(int);
 +void  unix_read_handler(int);
 +
  struct pollfd pfd[N_PFD];
  
  volatile sig_atomic_t MarkSet;
 @@ -283,12 +290,8 @@ void logto_ctlconn(char *);
  int
  main(int argc, char *argv[])
  {
 - int ch, i, linesize, fd;
 - struct sockaddr_un fromunix;
 - struct sockaddr_storage from;
 - socklen_t len;
 - char *p, *line;
 - char resolve[MAXHOSTNAMELEN];
 + int ch, i, fd;
 + char *p;
   int lockpipe[2] = { -1, -1}, pair[2], nullfd;
   struct addrinfo hints, *res, *res0;
   FILE *fp;
 @@ -368,7 +371,7 @@ main(int argc, char *argv[])
   if (linesize  MAXLINE)
   linesize = MAXLINE;
   linesize++;
 - if ((line = malloc(linesize)) == NULL) {
 + if ((linebuf = malloc(linesize)) == NULL) {
   logerror(Couldn't allocate line buffer);
   die(0);
   }
 @@ -586,41 +589,13 @@ main(int argc, char *argv[])
   }
  
   if ((pfd[PFD_KLOG].revents  POLLIN) != 0) {
 - i = read(pfd[PFD_KLOG].fd, line, linesize - 1);
 - if (i  0) {
 - line[i] = '\0';
 - printsys(line);
 - } else if (i  0  errno != EINTR) {
 - logerror(klog);
 - pfd[PFD_KLOG].fd = -1;
 - pfd[PFD_KLOG].events = 0;
 - }
 + klog_read_handler(pfd[PFD_KLOG].fd);
   }
   if ((pfd[PFD_INET].revents  POLLIN) != 0) {
 - len = sizeof(from);
 - i = recvfrom(pfd[PFD_INET].fd, line, MAXLINE, 0,
 - (struct sockaddr *)from, len);
 - if (i  0) {
 - line[i] = '\0';
 - cvthname((struct sockaddr *)from, resolve,
 - sizeof(resolve));
 - dprintf(cvthname res: %s\n, resolve);
 - printline(resolve, line);
 - } else if (i  0  errno != EINTR)
 - logerror(recvfrom inet);
 + udp_read_handler(pfd[PFD_INET].fd);
   }
   if ((pfd[PFD_INET6].revents  POLLIN) != 0) {
 - len = sizeof(from);
 - i = recvfrom(pfd[PFD_INET6].fd, line, MAXLINE, 0,
 - (struct sockaddr *)from, len);
 - if (i  0) {
 - line[i] = '\0';
 - cvthname((struct sockaddr *)from, resolve,
 - sizeof(resolve));
 - dprintf(cvthname res: %s\n, resolve);
 - printline(resolve, line);
 - } else if (i  0  errno != EINTR)
 - logerror(recvfrom inet6);
 + udp_read_handler(pfd[PFD_INET6].fd);
   }
   if ((pfd[PFD_CTLSOCK].revents  POLLIN) != 0)
   ctlsock_accept_handler();
 @@ -631,23 +606,65 @@ main(int argc, char *argv[])
  
   for (i = 0; i  nfunix; i++) {
   if ((pfd[PFD_UNIX_0 + i].revents  POLLIN) != 0) {
 - ssize_t rlen;
 -
 - len = sizeof(fromunix);
 - rlen = recvfrom(pfd[PFD_UNIX_0 + i].fd, line,
 - MAXLINE, 0, (struct sockaddr *)fromunix,
 - len);
 - if (rlen  0) {
 - line[rlen] = '\0';
 - printline(LocalHostName, line);
 - } else if (rlen == -1  errno != EINTR)
 - logerror(recvfrom unix);
 +   

Re: minphys woes

2014-09-03 Thread Claudio Jeker
On Wed, Sep 03, 2014 at 07:51:25PM +0200, Stefan Fritsch wrote:
 On Tuesday 02 September 2014 15:22:16, Philip Guenther wrote:
   From physio(9):
minphys
A device specific routine called to determine the
  maximum transfer size that the device's strategy routine can
  handle.
  
   Since we have seen that the driver must be able to handle 64k
   blocks in any case, the fact that minphys is device specific is
   useless, isn't it?
  
  physio() is used by character device access.  Looks to me like
  sdminphys() will change the chunking behavior of this:
  dd if=/dev/zero of=/dev/rsd0a bs=100M count=1
  
  depending on whether sd0 is a SCSI-I device, no?
 
 Yes, but that does not make it any less useless. File systems will 
 call the very same strategy routine and expect it to deal with 64K 
 blocks. The statement in the man page gives the misleading impression 
 that the strategy routine could be used to avoid that the strategy 
 routine has to handle 64K blocks, which is not true.
 
 So, maybe the minphys mechanism is useful for block devices that are 
 not used for file systems. Are there any such devices?
 
 Maybe we could add a sentence to the man page like following?
 
 Beware that a block device's strategy routine that is used for file 
 systems must always be able to accept transfers sizes up to MAXPHYS.
 

Maybe it is time to fix the block side and make sure that it respects
minphys as well by chopping up the IO. This would also allow to buffer
bigger blocks than MAXPHYS in the buffer cache (not sure if this is
something we want though).

-- 
:wq Claudio



Re: splnet() and SIOCSIFADDR

2014-09-03 Thread Claudio Jeker
On Wed, Sep 03, 2014 at 03:25:34PM +0200, Martin Pieuchot wrote:
 Drivers that need a splnet() protection inside their SIOCSIFADDR
 generally raise the spl level themselves, so we should not need
 to do that in in{6,}_ifinit().  One exception to this rule is,
 as always, carp(4)...
 
 So the diff below moves the spl dance inside carp's SIOCSIFADDR
 handler, it's a baby step, so we can take care of carp_set_addr{6,}
 later.

Hmm. My gut feeling is that this is scary. Calling ifp functions at
non-splnet is always a cause for troubles (e.g. the watchdog callbacks
miss often the needed splnet calls).
I think the ioctl code pathes in the various places may suffer from
similar problems with missing splnet() calls. Guess a bit of an audit is
needed in that area.

Something else I wonder is if splsoftnet() is good enough when dealing
with ifp in general (aren't we detaching interfaces at splnet())?
Removable interfaces opened for sure a bucket sized can of worms.

 ok?
 
 Index: netinet/in.c
 ===
 RCS file: /home/ncvs/src/sys/netinet/in.c,v
 retrieving revision 1.103
 diff -u -p -r1.103 in.c
 --- netinet/in.c  3 Sep 2014 08:59:06 -   1.103
 +++ netinet/in.c  3 Sep 2014 13:08:43 -
 @@ -612,7 +612,7 @@ in_ifinit(struct ifnet *ifp, struct in_i
  {
   u_int32_t i = sin-sin_addr.s_addr;
   struct sockaddr_in oldaddr;
 - int s, error = 0;
 + int error = 0;
  
   splsoftassert(IPL_SOFTNET);
  
 @@ -627,7 +627,6 @@ in_ifinit(struct ifnet *ifp, struct in_i
   rt_ifa_delloop(ia-ia_ifa);
   ifa_del(ifp, ia-ia_ifa);
   }
 - s = splnet();
   oldaddr = ia-ia_addr;
   ia-ia_addr = *sin;
  
 @@ -639,10 +638,8 @@ in_ifinit(struct ifnet *ifp, struct in_i
   if (ifp-if_ioctl 
   (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) {
   ia-ia_addr = oldaddr;
 - splx(s);
   goto out;
   }
 - splx(s);
  
   if (ia-ia_netmask == 0) {
   if (IN_CLASSA(i))
 Index: netinet/ip_carp.c
 ===
 RCS file: /home/ncvs/src/sys/netinet/ip_carp.c,v
 retrieving revision 1.233
 diff -u -p -r1.233 ip_carp.c
 --- netinet/ip_carp.c 22 Jul 2014 11:06:10 -  1.233
 +++ netinet/ip_carp.c 3 Sep 2014 13:08:43 -
 @@ -2060,10 +2060,11 @@ carp_ioctl(struct ifnet *ifp, u_long cmd
   struct ifaddr *ifa = (struct ifaddr *)addr;
   struct ifreq *ifr = (struct ifreq *)addr;
   struct ifnet *cdev = NULL;
 - int i, error = 0;
 + int s, i, error = 0;
  
   switch (cmd) {
   case SIOCSIFADDR:
 + s = splnet();
   switch (ifa-ifa_addr-sa_family) {
  #ifdef INET
   case AF_INET:
 @@ -2088,6 +2089,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd
   break;
   }
   break;
 + splx(s);
  
   case SIOCSIFFLAGS:
   vhe = LIST_FIRST(sc-carp_vhosts);
 Index: netinet6/in6.c
 ===
 RCS file: /home/ncvs/src/sys/netinet6/in6.c,v
 retrieving revision 1.140
 diff -u -p -r1.140 in6.c
 --- netinet6/in6.c26 Aug 2014 21:44:29 -  1.140
 +++ netinet6/in6.c3 Sep 2014 13:08:43 -
 @@ -1078,7 +1078,7 @@ in6_purgeaddr(struct ifaddr *ifa)
  void
  in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp)
  {
 - int s = splnet();
 + splsoftassert(IPL_SOFTNET);
  
   ifa_del(ifp, ia6-ia_ifa);
  
 @@ -1107,8 +1107,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s
* Note that we should decrement the refcnt at least once for all *BSD.
*/
   ifafree(ia6-ia_ifa);
 -
 - splx(s);
  }
  
  /*
 @@ -1355,9 +1353,10 @@ int
  in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia6, int newhost)
  {
   int error = 0, plen, ifacount = 0;
 - int s = splnet();
   struct ifaddr *ifa;
  
 + splsoftassert(IPL_SOFTNET);
 +
   /*
* Give the interface a chance to initialize
* if this is its first address (or it is a CARP interface)
 @@ -1374,10 +1373,8 @@ in6_ifinit(struct ifnet *ifp, struct in6
   if ((ifacount = 1 || ifp-if_type == IFT_CARP ||
   (ifp-if_flags  IFF_POINTOPOINT))  ifp-if_ioctl 
   (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia6))) {
 - splx(s);
   return (error);
   }
 - splx(s);
  
   ia6-ia_ifa.ifa_metric = ifp-if_metric;
  
 

-- 
:wq Claudio



Re: minphys woes

2014-09-03 Thread Philip Guenther
On Wed, Sep 3, 2014 at 10:51 AM, Stefan Fritsch s...@sfritsch.de wrote:
 On Tuesday 02 September 2014 15:22:16, Philip Guenther wrote:
  From physio(9):
   minphys
   A device specific routine called to determine the
 maximum transfer size that the device's strategy routine can
 handle.
 
  Since we have seen that the driver must be able to handle 64k
  blocks in any case, the fact that minphys is device specific is
  useless, isn't it?

 physio() is used by character device access.  Looks to me like
 sdminphys() will change the chunking behavior of this:
 dd if=/dev/zero of=/dev/rsd0a bs=100M count=1

 depending on whether sd0 is a SCSI-I device, no?

 Yes, but that does not make it any less useless. File systems will
 call the very same strategy routine and expect it to deal with 64K
 blocks. The statement in the man page gives the misleading impression
 that the [minphys] routine could be used to avoid that the strategy
 routine has to handle 64K blocks, which is not true.

We don't seem to have a manpage describing the routines that a block
device must implement and their requirements, nor the same for
character devices.  Those would be nice additions to our section 9
manpages.

This requirement on a block device's strategy routine comes from the
filesystem layer.  These *do not use physio()*, so how does it make
sense to document that requirement in physio(9)?  That manpage doesn't
even contain the word block!


 So, maybe the minphys mechanism is useful for block devices that are
 not used for file systems. Are there any such devices?

The current minphys mechanism *should* be useful for devices which do
not store file systems with a block size greater than the maximum I/O
size of the device.  IMO, if a filesystem is configured with block
size less than the device's minphys, then it should not be doing
breads of more than that block size.  If that's the case, then
manually setting the file system block size to less than the device's
minphys should let you create a filesystem on that device.

(Creating/using a filesystem with block size greater the device
minphys seems like a Bad Idea for performance and possibly correctness
reasons.  It's like the disk I/O version of IP fragmentation...)


To go back to a previous email, you wrote:
 But what is really strange is, why does openbsd then have
 an infrastructure to set different max transfer sizes for physio on a
 per-adapter basis? This makes no sense. Either the drivers have to support
 64k transfers, in which case most of the minphys infrastructure is
 useless, or they don't have to. In the latter case the minphys
 infrastructure would need to be used in all code paths.

The answer to this question can be found by reading cvs logs and
diffs: MAXBSIZE used to be strictly less than MAXPHYS and less than or
equal to the smallest minphys of a device supported on the given
architecture.  MAXBSIZE was MD then, and also apparently reflected
limitations in the pmap; i386 pmap bugs delayed it from going to 64k
MAXBSIZE until after all the others did.

Why bother with permitting character device access in chunks greater
than MAXBSIZE?  Probably because some tools (newfs, dump, fsck) had
significant performance gains by doing I/O in the largest chunk
supported by the device, irrespective of the limitations of the pmap,
buffer cache, or filesystems.  Oh, and tapes really wanted larger I/Os
so that they could stream, IIRC.


IMO, the problem that you're hitting with your vioblk device isn't a
problem with MAXPHYS, physio(), or minphys, but rather with MAXBSIZE.
Back in the 1990's, the fact that you want to be able to configure a
device with minphys 20kB on your arch means that the MD MAXBSIZE
define for your arch should be 20kB.

If MAXBSIZE could be set/limited on a per-device basis (i.e., the code
using it queried the device) then your problem would be resolved, no?


Philip Guenther



Re: Refactoring process-local file descriptor data

2014-09-03 Thread Kent R. Spillner
Need to re-roll for -current?

/usr/src/sys/kern/kern_descrip.c: In function 'dodup3':
/usr/src/sys/kern/kern_descrip.c:312: error: 'struct filedesc' has no member 
named 'fd_ofileflags'
cc  -Werror -Wall -Wstrict-prototypes -Wmissing-prototypes  -Wno-main 
-Wno-uninitialized  -Wstack-larger-than-2047 -mcmodel=kernel -mno-red-zone 
-mno-sse2 -mno-sse -mno-3dnow  -mno-mmx -msoft-float -fno-omit-frame-pointer 
-fno-builtin-printf -fno-builtin-snprintf  -fno-builtin-vsnprintf 
-fno-builtin-log  -fno-builtin-log2 -fno-builtin-malloc -fno-pie -O2 -pipe 
-nostdinc -I/usr/src/sys -I. -I/usr/src/sys/arch -DDDB -DDIAGNOSTIC -DKTRACE 
-DACCOUNTING -DKMEMSTATS -DPTRACE -DPOOL_DEBUG -DCRYPTO -DSYSVMSG -DSYSVSEM 
-DSYSVSHM -DUVM_SWAP_ENCRYPT -DCOMPAT_43 -DLKM -DFFS -DFFS2 -DFFS_SOFTUPDATES 
-DUFS_DIRHASH -DQUOTA -DEXT2FS -DMFS -DNFSCLIENT -DNFSSERVER -DCD9660 -DUDF 
-DMSDOSFS -DFIFO -DTMPFS -DFUSE -DSOCKET_SPLICE -DTCP_SACK -DTCP_ECN 
-DTCP_SIGNATURE -DINET -DINET6 -DIPSEC -DPPP_BSDCOMP -DPPP_DEFLATE -DPIPEX 
-DMROUTING -DMPLS -DBOOT_CONFIG -DUSER_PCICONF -DAPERTURE -DMTRR -DNTFS 
-DHIBERNATE -DPCIVERBOSE -DUSBVERBOSE -DWSDISPLAY_COMPAT_USL 
-DWSDISPLAY_COMPAT_RAWKBD -DWSDISPLAY_DEFA!
 ULTSCREENS=6 -DWSDISPLAY_COMPAT_PCVT -DX86EMU -DONEWIREVERBOSE 
-DMULTIPROCESSOR -DMAXUSERS=80 -D_KERNEL -MD -MP  -c 
/usr/src/sys/kern/kern_rwlock.c
cc  -Werror -Wall -Wstrict-prototypes -Wmissing-prototypes  -Wno-main 
-Wno-uninitialized  -Wstack-larger-than-2047 -mcmodel=kernel -mno-red-zone 
-mno-sse2 -mno-sse -mno-3dnow  -mno-mmx -msoft-float -fno-omit-frame-pointer 
-fno-builtin-printf -fno-builtin-snprintf  -fno-builtin-vsnprintf 
-fno-builtin-log  -fno-builtin-log2 -fno-builtin-malloc -fno-pie -O2 -pipe 
-nostdinc -I/usr/src/sys -I. -I/usr/src/sys/arch -DDDB -DDIAGNOSTIC -DKTRACE 
-DACCOUNTING -DKMEMSTATS -DPTRACE -DPOOL_DEBUG -DCRYPTO -DSYSVMSG -DSYSVSEM 
-DSYSVSHM -DUVM_SWAP_ENCRYPT -DCOMPAT_43 -DLKM -DFFS -DFFS2 -DFFS_SOFTUPDATES 
-DUFS_DIRHASH -DQUOTA -DEXT2FS -DMFS -DNFSCLIENT -DNFSSERVER -DCD9660 -DUDF 
-DMSDOSFS -DFIFO -DTMPFS -DFUSE -DSOCKET_SPLICE -DTCP_SACK -DTCP_ECN 
-DTCP_SIGNATURE -DINET -DINET6 -DIPSEC -DPPP_BSDCOMP -DPPP_DEFLATE -DPIPEX 
-DMROUTING -DMPLS -DBOOT_CONFIG -DUSER_PCICONF -DAPERTURE -DMTRR -DNTFS 
-DHIBERNATE -DPCIVERBOSE -DUSBVERBOSE -DWSDISPLAY_COMPAT_USL 
-DWSDISPLAY_COMPAT_RAWKBD -DWSDISPLAY_DEFA!
 ULTSCREENS=6 -DWSDISPLAY_COMPAT_PCVT -DX86EMU -DONEWIREVERBOSE 
-DMULTIPROCESSOR -DMAXUSERS=80 -D_KERNEL -MD -MP  -c 
/usr/src/sys/kern/kern_physio.c
*** Error 1 in target 'kern_descrip.o'
*** Error 1 in /home/sl4mmy/kernel (Makefile:931 'kern_descrip.o')


On Mon, Aug 11, 2014 at 12:32:24AM -0400, Jean-Philippe Ouellet wrote:
 Ping?
 
 On Sun, Jul 13, 2014 at 03:45:44PM -0400, Jean-Philippe Ouellet wrote:
  Updated for mallocarray() and free(size).
  
  On Thu, Jul 10, 2014 at 04:13:38PM -0400, Jean-Philippe Ouellet wrote:
   This diff adds another struct between filedesc and file to store
   process-local per-descriptor information. Currently, the only thing in
   this struct is the file pointer and some flags, however I have another
   patch on top of this that adds capsicum capabilities to it. (And
   another that uses mallocarray(9) where applicable. One thing at a time.)
   
   The data is currently stored in one big allocation with all the file *s
   in front, and all the flags after. When that allocation grows, there are
   two copies and two zeroings. This is fine when we only have two fields
   (fd_ofiles and fd_ofileflags), but adding more is ugly.
   
   Instead, I propose we have a struct for each file descriptor table entry
   (filedescent) and put the files, flags, and capabilities in that.
   
   The immediate impact is some memory waste due to padding (3 or 7 bytes
   per file descriptor, depending on the arch), however once capabilities
   are added it will be negligible. I discussed it with matthew@ and
   guenther@ and concluded this was the saner way. For what it's worth,
   FreeBSD and NetBSD have done the same thing.
   
   I've been running different versions of this for about a month or so,
   no issues noticed so far.
   
   A similar earlier version was reviewed by matthew@ and guenther@.
 
 
 Index: sys/filedesc.h
 ===
 RCS file: /cvs/src/sys/sys/filedesc.h,v
 retrieving revision 1.28
 diff -u -p -r1.28 filedesc.h
 --- sys/filedesc.h15 May 2014 03:52:25 -  1.28
 +++ sys/filedesc.h13 Jul 2014 19:40:01 -
 @@ -34,9 +34,6 @@
  
  #include sys/rwlock.h
  /*
 - * This structure is used for the management of descriptors.  It may be
 - * shared by multiple processes.
 - *
   * A process is initially started out with NDFILE descriptors stored within
   * this structure, selected to be enough for typical applications based on
   * the historical limit of 20 open files (and the usage of descriptors by
 @@ -56,22 +53,33 @@
  #define NDHISLOTS(x) (NDREDUCE(NDREDUCE(x)))
  #define NDLOSLOTS(x) (NDHISLOTS(x)  

Re: syslogd libevent handler

2014-09-03 Thread Doug Hogan
On Sun, Aug 31, 2014 at 10:46:50PM +0200, Alexander Bluhm wrote:
 Move the handlers for the poll events into separate functions.  They
 will become the libevent callbacks later.
...

 @@ -631,23 +606,65 @@ main(int argc, char *argv[])
  
   for (i = 0; i  nfunix; i++) {
   if ((pfd[PFD_UNIX_0 + i].revents  POLLIN) != 0) {
 - ssize_t rlen;
 -
 - len = sizeof(fromunix);
 - rlen = recvfrom(pfd[PFD_UNIX_0 + i].fd, line,
 - MAXLINE, 0, (struct sockaddr *)fromunix,
 - len);
 - if (rlen  0) {
 - line[rlen] = '\0';
 - printline(LocalHostName, line);
 - } else if (rlen == -1  errno != EINTR)
 - logerror(recvfrom unix);
 + udp_read_handler(pfd[PFD_UNIX_0 + i].fd);
...

Shouldn't this be a call to unix_read_handler() instead of
udp_read_handler()?