splnet() and SIOCSIFADDR
Drivers that need a splnet() protection inside their SIOCSIFADDR generally raise the spl level themselves, so we should not need to do that in in{6,}_ifinit(). One exception to this rule is, as always, carp(4)... So the diff below moves the spl dance inside carp's SIOCSIFADDR handler, it's a baby step, so we can take care of carp_set_addr{6,} later. ok? Index: netinet/in.c === RCS file: /home/ncvs/src/sys/netinet/in.c,v retrieving revision 1.103 diff -u -p -r1.103 in.c --- netinet/in.c3 Sep 2014 08:59:06 - 1.103 +++ netinet/in.c3 Sep 2014 13:08:43 - @@ -612,7 +612,7 @@ in_ifinit(struct ifnet *ifp, struct in_i { u_int32_t i = sin-sin_addr.s_addr; struct sockaddr_in oldaddr; - int s, error = 0; + int error = 0; splsoftassert(IPL_SOFTNET); @@ -627,7 +627,6 @@ in_ifinit(struct ifnet *ifp, struct in_i rt_ifa_delloop(ia-ia_ifa); ifa_del(ifp, ia-ia_ifa); } - s = splnet(); oldaddr = ia-ia_addr; ia-ia_addr = *sin; @@ -639,10 +638,8 @@ in_ifinit(struct ifnet *ifp, struct in_i if (ifp-if_ioctl (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) { ia-ia_addr = oldaddr; - splx(s); goto out; } - splx(s); if (ia-ia_netmask == 0) { if (IN_CLASSA(i)) Index: netinet/ip_carp.c === RCS file: /home/ncvs/src/sys/netinet/ip_carp.c,v retrieving revision 1.233 diff -u -p -r1.233 ip_carp.c --- netinet/ip_carp.c 22 Jul 2014 11:06:10 - 1.233 +++ netinet/ip_carp.c 3 Sep 2014 13:08:43 - @@ -2060,10 +2060,11 @@ carp_ioctl(struct ifnet *ifp, u_long cmd struct ifaddr *ifa = (struct ifaddr *)addr; struct ifreq *ifr = (struct ifreq *)addr; struct ifnet *cdev = NULL; - int i, error = 0; + int s, i, error = 0; switch (cmd) { case SIOCSIFADDR: + s = splnet(); switch (ifa-ifa_addr-sa_family) { #ifdef INET case AF_INET: @@ -2088,6 +2089,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd break; } break; + splx(s); case SIOCSIFFLAGS: vhe = LIST_FIRST(sc-carp_vhosts); Index: netinet6/in6.c === RCS file: /home/ncvs/src/sys/netinet6/in6.c,v retrieving revision 1.140 diff -u -p -r1.140 in6.c --- netinet6/in6.c 26 Aug 2014 21:44:29 - 1.140 +++ netinet6/in6.c 3 Sep 2014 13:08:43 - @@ -1078,7 +1078,7 @@ in6_purgeaddr(struct ifaddr *ifa) void in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp) { - int s = splnet(); + splsoftassert(IPL_SOFTNET); ifa_del(ifp, ia6-ia_ifa); @@ -1107,8 +1107,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s * Note that we should decrement the refcnt at least once for all *BSD. */ ifafree(ia6-ia_ifa); - - splx(s); } /* @@ -1355,9 +1353,10 @@ int in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia6, int newhost) { int error = 0, plen, ifacount = 0; - int s = splnet(); struct ifaddr *ifa; + splsoftassert(IPL_SOFTNET); + /* * Give the interface a chance to initialize * if this is its first address (or it is a CARP interface) @@ -1374,10 +1373,8 @@ in6_ifinit(struct ifnet *ifp, struct in6 if ((ifacount = 1 || ifp-if_type == IFT_CARP || (ifp-if_flags IFF_POINTOPOINT)) ifp-if_ioctl (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia6))) { - splx(s); return (error); } - splx(s); ia6-ia_ifa.ifa_metric = ifp-if_metric;
Re: splnet() and SIOCSIFADDR
On 03/09/14(Wed) 15:25, Martin Pieuchot wrote: Drivers that need a splnet() protection inside their SIOCSIFADDR generally raise the spl level themselves, so we should not need to do that in in{6,}_ifinit(). One exception to this rule is, as always, carp(4)... So the diff below moves the spl dance inside carp's SIOCSIFADDR handler, it's a baby step, so we can take care of carp_set_addr{6,} later. mikeb@ pointed out that a spl dance was missing from the SIOCDIFADDR_IN6 ioctl, updated diff below also fixes that. Index: netinet/in.c === RCS file: /home/ncvs/src/sys/netinet/in.c,v retrieving revision 1.103 diff -u -p -r1.103 in.c --- netinet/in.c3 Sep 2014 08:59:06 - 1.103 +++ netinet/in.c3 Sep 2014 13:52:41 - @@ -612,7 +612,7 @@ in_ifinit(struct ifnet *ifp, struct in_i { u_int32_t i = sin-sin_addr.s_addr; struct sockaddr_in oldaddr; - int s, error = 0; + int error = 0; splsoftassert(IPL_SOFTNET); @@ -627,7 +627,6 @@ in_ifinit(struct ifnet *ifp, struct in_i rt_ifa_delloop(ia-ia_ifa); ifa_del(ifp, ia-ia_ifa); } - s = splnet(); oldaddr = ia-ia_addr; ia-ia_addr = *sin; @@ -639,10 +638,8 @@ in_ifinit(struct ifnet *ifp, struct in_i if (ifp-if_ioctl (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) { ia-ia_addr = oldaddr; - splx(s); goto out; } - splx(s); if (ia-ia_netmask == 0) { if (IN_CLASSA(i)) Index: netinet/ip_carp.c === RCS file: /home/ncvs/src/sys/netinet/ip_carp.c,v retrieving revision 1.233 diff -u -p -r1.233 ip_carp.c --- netinet/ip_carp.c 22 Jul 2014 11:06:10 - 1.233 +++ netinet/ip_carp.c 3 Sep 2014 13:52:41 - @@ -2060,10 +2060,11 @@ carp_ioctl(struct ifnet *ifp, u_long cmd struct ifaddr *ifa = (struct ifaddr *)addr; struct ifreq *ifr = (struct ifreq *)addr; struct ifnet *cdev = NULL; - int i, error = 0; + int s, i, error = 0; switch (cmd) { case SIOCSIFADDR: + s = splnet(); switch (ifa-ifa_addr-sa_family) { #ifdef INET case AF_INET: @@ -2088,6 +2089,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd break; } break; + splx(s); case SIOCSIFFLAGS: vhe = LIST_FIRST(sc-carp_vhosts); Index: netinet6/in6.c === RCS file: /home/ncvs/src/sys/netinet6/in6.c,v retrieving revision 1.140 diff -u -p -r1.140 in6.c --- netinet6/in6.c 26 Aug 2014 21:44:29 - 1.140 +++ netinet6/in6.c 3 Sep 2014 13:52:41 - @@ -172,7 +172,7 @@ in6_control(struct socket *so, u_long cm struct in6_ifaddr *ia6 = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; struct sockaddr_in6 *sa6; - int privileged; + int s, privileged; privileged = 0; if ((so-so_state SS_PRIV) != 0) @@ -463,7 +463,6 @@ in6_control(struct socket *so, u_long cm { int i, error = 0; struct nd_prefix pr0, *pr; - int s; /* reject read-only flags */ if ((ifra-ifra_flags IN6_IFF_DUPLICATED) != 0 || @@ -561,8 +560,10 @@ in6_control(struct socket *so, u_long cm } case SIOCDIFADDR_IN6: + s = splsoftnet(); in6_purgeaddr(ia6-ia_ifa); dohooks(ifp-if_addrhooks, 0); + splx(s); break; default: @@ -1078,7 +1079,7 @@ in6_purgeaddr(struct ifaddr *ifa) void in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp) { - int s = splnet(); + splsoftassert(IPL_SOFTNET); ifa_del(ifp, ia6-ia_ifa); @@ -1107,8 +1108,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s * Note that we should decrement the refcnt at least once for all *BSD. */ ifafree(ia6-ia_ifa); - - splx(s); } /* @@ -1355,9 +1354,10 @@ int in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia6, int newhost) { int error = 0, plen, ifacount = 0; - int s = splnet(); struct ifaddr *ifa; + splsoftassert(IPL_SOFTNET); + /* * Give the interface a chance to initialize * if this is its first address (or it is a CARP interface) @@ -1374,10 +1374,8 @@ in6_ifinit(struct ifnet *ifp, struct in6 if ((ifacount = 1 || ifp-if_type == IFT_CARP || (ifp-if_flags IFF_POINTOPOINT)) ifp-if_ioctl (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia6))) { - splx(s); return (error); } - splx(s); ia6-ia_ifa.ifa_metric =
Re: splnet() and SIOCSIFADDR
On 3 September 2014 15:53, Martin Pieuchot mpieuc...@nolizard.org wrote: On 03/09/14(Wed) 15:25, Martin Pieuchot wrote: Drivers that need a splnet() protection inside their SIOCSIFADDR generally raise the spl level themselves, so we should not need to do that in in{6,}_ifinit(). One exception to this rule is, as always, carp(4)... So the diff below moves the spl dance inside carp's SIOCSIFADDR handler, it's a baby step, so we can take care of carp_set_addr{6,} later. mikeb@ pointed out that a spl dance was missing from the SIOCDIFADDR_IN6 ioctl, updated diff below also fixes that. i think we're ready to step on a carefully placed landmine. OK mikeb
Re: syslogd libevent
On Fri, Aug 29, 2014 at 11:25:52PM +0200, Alexander Bluhm wrote: So I will write more tests before committing this. My regression tests found a bug in syslogd. When adding the maximum number of paths with the -a option, the arrays for unix domain socket paths and the poll file descriptors overflow by one. - increase pfd array by one - operate on size of arrays not length - null check for path not necessary when doing fd -1 check - rename variables consistently ...unix - make number of unix fds a local variable ok? bluhm Index: usr.sbin/syslogd/privsep.c === RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/privsep.c,v retrieving revision 1.43 diff -u -p -r1.43 privsep.c --- usr.sbin/syslogd/privsep.c 25 Aug 2014 20:19:14 - 1.43 +++ usr.sbin/syslogd/privsep.c 3 Sep 2014 16:08:03 - @@ -172,7 +172,7 @@ priv_init(char *conf, int numeric, int l close(socks[1]); /* Close descriptors that only the unpriv child needs */ - for (i = 0; i nfunix; i++) + for (i = 0; i MAXUNIX + 1; i++) if (pfd[PFD_UNIX_0 + i].fd != -1) close(pfd[PFD_UNIX_0 + i].fd); if (pfd[PFD_INET].fd != -1) @@ -369,9 +369,9 @@ priv_init(char *conf, int numeric, int l close(socks[0]); /* Unlink any domain sockets that have been opened */ - for (i = 0; i nfunix; i++) - if (funixn[i] != NULL pfd[PFD_UNIX_0 + i].fd != -1) - (void)unlink(funixn[i]); + for (i = 0; i MAXUNIX; i++) + if (pfd[PFD_UNIX_0 + i].fd != -1) + (void)unlink(path_unix[i]); if (ctlsock_path != NULL pfd[PFD_CTLSOCK].fd != -1) (void)unlink(ctlsock_path); Index: usr.sbin/syslogd/syslogd.c === RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v retrieving revision 1.121 diff -u -p -r1.121 syslogd.c --- usr.sbin/syslogd/syslogd.c 31 Aug 2014 22:11:43 - 1.121 +++ usr.sbin/syslogd/syslogd.c 3 Sep 2014 16:40:54 - @@ -183,8 +183,7 @@ char*TypeNames[9] = { struct filed *Files; struct filed consfile; -intnfunix = 1; /* Number of Unix domain sockets requested */ -char *funixn[MAXFUNIX] = { _PATH_LOG }; /* Paths to Unix domain sockets */ +char *path_unix[MAXUNIX] = { _PATH_LOG }; /* Paths to Unix domain sockets */ intDebug; /* debug flag */ intStartup = 1;/* startup flag */ char LocalHostName[MAXHOSTNAMELEN]; /* our hostname */ @@ -282,7 +281,7 @@ voidlogto_ctlconn(char *); int main(int argc, char *argv[]) { - int ch, i, linesize, fd; + int ch, i, linesize, fd, nunix = 1; struct sockaddr_un fromunix; struct sockaddr_storage from; socklen_t len; @@ -292,6 +291,9 @@ main(int argc, char *argv[]) struct addrinfo hints, *res, *res0; FILE *fp; + for (i = 0; i N_PFD; i++) + pfd[i].fd = -1; + while ((ch = getopt(argc, argv, 46dhnuf:m:p:a:s:)) != -1) switch (ch) { case '4': /* disable IPv6 */ @@ -318,18 +320,18 @@ main(int argc, char *argv[]) NoDNS = 1; break; case 'p': /* path */ - funixn[0] = optarg; + path_unix[0] = optarg; break; case 'u': /* allow udp input port */ SecureMode = 0; break; case 'a': - if (nfunix = MAXFUNIX) + if (nunix = MAXUNIX) fprintf(stderr, syslogd: out of descriptors, ignoring %s\n, optarg); else - funixn[nfunix++] = optarg; + path_unix[nunix++] = optarg; break; case 's': ctlsock_path = optarg; @@ -439,8 +441,8 @@ main(int argc, char *argv[]) #ifndef SUN_LEN #define SUN_LEN(unp) (strlen((unp)-sun_path) + 2) #endif - for (i = 0; i nfunix; i++) { - if ((fd = unix_socket(funixn[i], SOCK_DGRAM, 0666)) == -1) { + for (i = 0; i nunix; i++) { + if ((fd = unix_socket(path_unix[i], SOCK_DGRAM, 0666)) == -1) { if (i == 0 !Debug) die(0); continue; @@ -450,7 +452,7 @@ main(int argc, char *argv[]) pfd[PFD_UNIX_0 + i].events = POLLIN; } - nfunix++; + nunix++; if (socketpair(AF_UNIX, SOCK_DGRAM, PF_UNSPEC, pair) == -1) die(0); fd = pair[0]; @@ -575,7 +577,7 @@ main(int
Re: minphys woes
On Tuesday 02 September 2014 15:22:16, Philip Guenther wrote: From physio(9): minphys A device specific routine called to determine the maximum transfer size that the device's strategy routine can handle. Since we have seen that the driver must be able to handle 64k blocks in any case, the fact that minphys is device specific is useless, isn't it? physio() is used by character device access. Looks to me like sdminphys() will change the chunking behavior of this: dd if=/dev/zero of=/dev/rsd0a bs=100M count=1 depending on whether sd0 is a SCSI-I device, no? Yes, but that does not make it any less useless. File systems will call the very same strategy routine and expect it to deal with 64K blocks. The statement in the man page gives the misleading impression that the strategy routine could be used to avoid that the strategy routine has to handle 64K blocks, which is not true. So, maybe the minphys mechanism is useful for block devices that are not used for file systems. Are there any such devices? Maybe we could add a sentence to the man page like following? Beware that a block device's strategy routine that is used for file systems must always be able to accept transfers sizes up to MAXPHYS.
Re: splnet() and SIOCSIFADDR
On Wed, Sep 03, 2014 at 03:53:34PM +0200, Martin Pieuchot wrote: @@ -1078,7 +1079,7 @@ in6_purgeaddr(struct ifaddr *ifa) void in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp) { - int s = splnet(); + splsoftassert(IPL_SOFTNET); ifa_del(ifp, ia6-ia_ifa); I think there are code paths that can trigger this assertion netinet6/in6.c: in6_unlink_ifa() netinet6/in6.c: in6_purgeaddr() netinet6/nd6_rtr.c: purge_detached() netinet6/nd6_rtr.c: nd6_prelist_add() netinet6/in6.c: in6_control() netinet/tcp_usrreq.c: tcp_usrreq() kern/sys_socket.c: soo_ioctl() netinet6/in6.c: in6_unlink_ifa() netinet6/in6.c: in6_purgeaddr() netinet6/nd6_rtr.c: purge_detached() netinet6/nd6_rtr.c: nd6_prelist_add() netinet6/in6_ifattach.c:in6_ifattach_linklocal() netinet/ip_carp.c carp_set_enaddr() netinet/ip_carp.c carp_ioctl() ... nd6_prelist_add() does some splsoftnet() already. I think you should put one around purge_detached() there. bluhm
Re: syslogd libevent handler
On Sun, Aug 31, 2014 at 10:46:50PM +0200, Alexander Bluhm wrote: On Fri, Aug 29, 2014 at 11:25:52PM +0200, Alexander Bluhm wrote: I will try to pull parts of the diff into separate changes to make review easier. Move the handlers for the poll events into separate functions. They will become the libevent callbacks later. ok? anyone? bluhm Index: usr.sbin/syslogd/syslogd.c === RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v retrieving revision 1.119 diff -u -p -r1.119 syslogd.c --- usr.sbin/syslogd/syslogd.c25 Aug 2014 18:19:18 - 1.119 +++ usr.sbin/syslogd/syslogd.c31 Aug 2014 20:34:01 - @@ -245,6 +245,13 @@ char *reply_text;/* Start of reply tex size_t ctl_reply_size = 0; /* Number of bytes used in reply */ size_t ctl_reply_offset = 0; /* Number of bytes of reply written so far */ +char *linebuf; +int linesize; + +void klog_read_handler(int); +void udp_read_handler(int); +void unix_read_handler(int); + struct pollfd pfd[N_PFD]; volatile sig_atomic_t MarkSet; @@ -283,12 +290,8 @@ void logto_ctlconn(char *); int main(int argc, char *argv[]) { - int ch, i, linesize, fd; - struct sockaddr_un fromunix; - struct sockaddr_storage from; - socklen_t len; - char *p, *line; - char resolve[MAXHOSTNAMELEN]; + int ch, i, fd; + char *p; int lockpipe[2] = { -1, -1}, pair[2], nullfd; struct addrinfo hints, *res, *res0; FILE *fp; @@ -368,7 +371,7 @@ main(int argc, char *argv[]) if (linesize MAXLINE) linesize = MAXLINE; linesize++; - if ((line = malloc(linesize)) == NULL) { + if ((linebuf = malloc(linesize)) == NULL) { logerror(Couldn't allocate line buffer); die(0); } @@ -586,41 +589,13 @@ main(int argc, char *argv[]) } if ((pfd[PFD_KLOG].revents POLLIN) != 0) { - i = read(pfd[PFD_KLOG].fd, line, linesize - 1); - if (i 0) { - line[i] = '\0'; - printsys(line); - } else if (i 0 errno != EINTR) { - logerror(klog); - pfd[PFD_KLOG].fd = -1; - pfd[PFD_KLOG].events = 0; - } + klog_read_handler(pfd[PFD_KLOG].fd); } if ((pfd[PFD_INET].revents POLLIN) != 0) { - len = sizeof(from); - i = recvfrom(pfd[PFD_INET].fd, line, MAXLINE, 0, - (struct sockaddr *)from, len); - if (i 0) { - line[i] = '\0'; - cvthname((struct sockaddr *)from, resolve, - sizeof(resolve)); - dprintf(cvthname res: %s\n, resolve); - printline(resolve, line); - } else if (i 0 errno != EINTR) - logerror(recvfrom inet); + udp_read_handler(pfd[PFD_INET].fd); } if ((pfd[PFD_INET6].revents POLLIN) != 0) { - len = sizeof(from); - i = recvfrom(pfd[PFD_INET6].fd, line, MAXLINE, 0, - (struct sockaddr *)from, len); - if (i 0) { - line[i] = '\0'; - cvthname((struct sockaddr *)from, resolve, - sizeof(resolve)); - dprintf(cvthname res: %s\n, resolve); - printline(resolve, line); - } else if (i 0 errno != EINTR) - logerror(recvfrom inet6); + udp_read_handler(pfd[PFD_INET6].fd); } if ((pfd[PFD_CTLSOCK].revents POLLIN) != 0) ctlsock_accept_handler(); @@ -631,23 +606,65 @@ main(int argc, char *argv[]) for (i = 0; i nfunix; i++) { if ((pfd[PFD_UNIX_0 + i].revents POLLIN) != 0) { - ssize_t rlen; - - len = sizeof(fromunix); - rlen = recvfrom(pfd[PFD_UNIX_0 + i].fd, line, - MAXLINE, 0, (struct sockaddr *)fromunix, - len); - if (rlen 0) { - line[rlen] = '\0'; - printline(LocalHostName, line); - } else if (rlen == -1 errno != EINTR) - logerror(recvfrom unix); +
Re: minphys woes
On Wed, Sep 03, 2014 at 07:51:25PM +0200, Stefan Fritsch wrote: On Tuesday 02 September 2014 15:22:16, Philip Guenther wrote: From physio(9): minphys A device specific routine called to determine the maximum transfer size that the device's strategy routine can handle. Since we have seen that the driver must be able to handle 64k blocks in any case, the fact that minphys is device specific is useless, isn't it? physio() is used by character device access. Looks to me like sdminphys() will change the chunking behavior of this: dd if=/dev/zero of=/dev/rsd0a bs=100M count=1 depending on whether sd0 is a SCSI-I device, no? Yes, but that does not make it any less useless. File systems will call the very same strategy routine and expect it to deal with 64K blocks. The statement in the man page gives the misleading impression that the strategy routine could be used to avoid that the strategy routine has to handle 64K blocks, which is not true. So, maybe the minphys mechanism is useful for block devices that are not used for file systems. Are there any such devices? Maybe we could add a sentence to the man page like following? Beware that a block device's strategy routine that is used for file systems must always be able to accept transfers sizes up to MAXPHYS. Maybe it is time to fix the block side and make sure that it respects minphys as well by chopping up the IO. This would also allow to buffer bigger blocks than MAXPHYS in the buffer cache (not sure if this is something we want though). -- :wq Claudio
Re: splnet() and SIOCSIFADDR
On Wed, Sep 03, 2014 at 03:25:34PM +0200, Martin Pieuchot wrote: Drivers that need a splnet() protection inside their SIOCSIFADDR generally raise the spl level themselves, so we should not need to do that in in{6,}_ifinit(). One exception to this rule is, as always, carp(4)... So the diff below moves the spl dance inside carp's SIOCSIFADDR handler, it's a baby step, so we can take care of carp_set_addr{6,} later. Hmm. My gut feeling is that this is scary. Calling ifp functions at non-splnet is always a cause for troubles (e.g. the watchdog callbacks miss often the needed splnet calls). I think the ioctl code pathes in the various places may suffer from similar problems with missing splnet() calls. Guess a bit of an audit is needed in that area. Something else I wonder is if splsoftnet() is good enough when dealing with ifp in general (aren't we detaching interfaces at splnet())? Removable interfaces opened for sure a bucket sized can of worms. ok? Index: netinet/in.c === RCS file: /home/ncvs/src/sys/netinet/in.c,v retrieving revision 1.103 diff -u -p -r1.103 in.c --- netinet/in.c 3 Sep 2014 08:59:06 - 1.103 +++ netinet/in.c 3 Sep 2014 13:08:43 - @@ -612,7 +612,7 @@ in_ifinit(struct ifnet *ifp, struct in_i { u_int32_t i = sin-sin_addr.s_addr; struct sockaddr_in oldaddr; - int s, error = 0; + int error = 0; splsoftassert(IPL_SOFTNET); @@ -627,7 +627,6 @@ in_ifinit(struct ifnet *ifp, struct in_i rt_ifa_delloop(ia-ia_ifa); ifa_del(ifp, ia-ia_ifa); } - s = splnet(); oldaddr = ia-ia_addr; ia-ia_addr = *sin; @@ -639,10 +638,8 @@ in_ifinit(struct ifnet *ifp, struct in_i if (ifp-if_ioctl (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia))) { ia-ia_addr = oldaddr; - splx(s); goto out; } - splx(s); if (ia-ia_netmask == 0) { if (IN_CLASSA(i)) Index: netinet/ip_carp.c === RCS file: /home/ncvs/src/sys/netinet/ip_carp.c,v retrieving revision 1.233 diff -u -p -r1.233 ip_carp.c --- netinet/ip_carp.c 22 Jul 2014 11:06:10 - 1.233 +++ netinet/ip_carp.c 3 Sep 2014 13:08:43 - @@ -2060,10 +2060,11 @@ carp_ioctl(struct ifnet *ifp, u_long cmd struct ifaddr *ifa = (struct ifaddr *)addr; struct ifreq *ifr = (struct ifreq *)addr; struct ifnet *cdev = NULL; - int i, error = 0; + int s, i, error = 0; switch (cmd) { case SIOCSIFADDR: + s = splnet(); switch (ifa-ifa_addr-sa_family) { #ifdef INET case AF_INET: @@ -2088,6 +2089,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd break; } break; + splx(s); case SIOCSIFFLAGS: vhe = LIST_FIRST(sc-carp_vhosts); Index: netinet6/in6.c === RCS file: /home/ncvs/src/sys/netinet6/in6.c,v retrieving revision 1.140 diff -u -p -r1.140 in6.c --- netinet6/in6.c26 Aug 2014 21:44:29 - 1.140 +++ netinet6/in6.c3 Sep 2014 13:08:43 - @@ -1078,7 +1078,7 @@ in6_purgeaddr(struct ifaddr *ifa) void in6_unlink_ifa(struct in6_ifaddr *ia6, struct ifnet *ifp) { - int s = splnet(); + splsoftassert(IPL_SOFTNET); ifa_del(ifp, ia6-ia_ifa); @@ -1107,8 +1107,6 @@ in6_unlink_ifa(struct in6_ifaddr *ia6, s * Note that we should decrement the refcnt at least once for all *BSD. */ ifafree(ia6-ia_ifa); - - splx(s); } /* @@ -1355,9 +1353,10 @@ int in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia6, int newhost) { int error = 0, plen, ifacount = 0; - int s = splnet(); struct ifaddr *ifa; + splsoftassert(IPL_SOFTNET); + /* * Give the interface a chance to initialize * if this is its first address (or it is a CARP interface) @@ -1374,10 +1373,8 @@ in6_ifinit(struct ifnet *ifp, struct in6 if ((ifacount = 1 || ifp-if_type == IFT_CARP || (ifp-if_flags IFF_POINTOPOINT)) ifp-if_ioctl (error = (*ifp-if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia6))) { - splx(s); return (error); } - splx(s); ia6-ia_ifa.ifa_metric = ifp-if_metric; -- :wq Claudio
Re: minphys woes
On Wed, Sep 3, 2014 at 10:51 AM, Stefan Fritsch s...@sfritsch.de wrote: On Tuesday 02 September 2014 15:22:16, Philip Guenther wrote: From physio(9): minphys A device specific routine called to determine the maximum transfer size that the device's strategy routine can handle. Since we have seen that the driver must be able to handle 64k blocks in any case, the fact that minphys is device specific is useless, isn't it? physio() is used by character device access. Looks to me like sdminphys() will change the chunking behavior of this: dd if=/dev/zero of=/dev/rsd0a bs=100M count=1 depending on whether sd0 is a SCSI-I device, no? Yes, but that does not make it any less useless. File systems will call the very same strategy routine and expect it to deal with 64K blocks. The statement in the man page gives the misleading impression that the [minphys] routine could be used to avoid that the strategy routine has to handle 64K blocks, which is not true. We don't seem to have a manpage describing the routines that a block device must implement and their requirements, nor the same for character devices. Those would be nice additions to our section 9 manpages. This requirement on a block device's strategy routine comes from the filesystem layer. These *do not use physio()*, so how does it make sense to document that requirement in physio(9)? That manpage doesn't even contain the word block! So, maybe the minphys mechanism is useful for block devices that are not used for file systems. Are there any such devices? The current minphys mechanism *should* be useful for devices which do not store file systems with a block size greater than the maximum I/O size of the device. IMO, if a filesystem is configured with block size less than the device's minphys, then it should not be doing breads of more than that block size. If that's the case, then manually setting the file system block size to less than the device's minphys should let you create a filesystem on that device. (Creating/using a filesystem with block size greater the device minphys seems like a Bad Idea for performance and possibly correctness reasons. It's like the disk I/O version of IP fragmentation...) To go back to a previous email, you wrote: But what is really strange is, why does openbsd then have an infrastructure to set different max transfer sizes for physio on a per-adapter basis? This makes no sense. Either the drivers have to support 64k transfers, in which case most of the minphys infrastructure is useless, or they don't have to. In the latter case the minphys infrastructure would need to be used in all code paths. The answer to this question can be found by reading cvs logs and diffs: MAXBSIZE used to be strictly less than MAXPHYS and less than or equal to the smallest minphys of a device supported on the given architecture. MAXBSIZE was MD then, and also apparently reflected limitations in the pmap; i386 pmap bugs delayed it from going to 64k MAXBSIZE until after all the others did. Why bother with permitting character device access in chunks greater than MAXBSIZE? Probably because some tools (newfs, dump, fsck) had significant performance gains by doing I/O in the largest chunk supported by the device, irrespective of the limitations of the pmap, buffer cache, or filesystems. Oh, and tapes really wanted larger I/Os so that they could stream, IIRC. IMO, the problem that you're hitting with your vioblk device isn't a problem with MAXPHYS, physio(), or minphys, but rather with MAXBSIZE. Back in the 1990's, the fact that you want to be able to configure a device with minphys 20kB on your arch means that the MD MAXBSIZE define for your arch should be 20kB. If MAXBSIZE could be set/limited on a per-device basis (i.e., the code using it queried the device) then your problem would be resolved, no? Philip Guenther
Re: Refactoring process-local file descriptor data
Need to re-roll for -current? /usr/src/sys/kern/kern_descrip.c: In function 'dodup3': /usr/src/sys/kern/kern_descrip.c:312: error: 'struct filedesc' has no member named 'fd_ofileflags' cc -Werror -Wall -Wstrict-prototypes -Wmissing-prototypes -Wno-main -Wno-uninitialized -Wstack-larger-than-2047 -mcmodel=kernel -mno-red-zone -mno-sse2 -mno-sse -mno-3dnow -mno-mmx -msoft-float -fno-omit-frame-pointer -fno-builtin-printf -fno-builtin-snprintf -fno-builtin-vsnprintf -fno-builtin-log -fno-builtin-log2 -fno-builtin-malloc -fno-pie -O2 -pipe -nostdinc -I/usr/src/sys -I. -I/usr/src/sys/arch -DDDB -DDIAGNOSTIC -DKTRACE -DACCOUNTING -DKMEMSTATS -DPTRACE -DPOOL_DEBUG -DCRYPTO -DSYSVMSG -DSYSVSEM -DSYSVSHM -DUVM_SWAP_ENCRYPT -DCOMPAT_43 -DLKM -DFFS -DFFS2 -DFFS_SOFTUPDATES -DUFS_DIRHASH -DQUOTA -DEXT2FS -DMFS -DNFSCLIENT -DNFSSERVER -DCD9660 -DUDF -DMSDOSFS -DFIFO -DTMPFS -DFUSE -DSOCKET_SPLICE -DTCP_SACK -DTCP_ECN -DTCP_SIGNATURE -DINET -DINET6 -DIPSEC -DPPP_BSDCOMP -DPPP_DEFLATE -DPIPEX -DMROUTING -DMPLS -DBOOT_CONFIG -DUSER_PCICONF -DAPERTURE -DMTRR -DNTFS -DHIBERNATE -DPCIVERBOSE -DUSBVERBOSE -DWSDISPLAY_COMPAT_USL -DWSDISPLAY_COMPAT_RAWKBD -DWSDISPLAY_DEFA! ULTSCREENS=6 -DWSDISPLAY_COMPAT_PCVT -DX86EMU -DONEWIREVERBOSE -DMULTIPROCESSOR -DMAXUSERS=80 -D_KERNEL -MD -MP -c /usr/src/sys/kern/kern_rwlock.c cc -Werror -Wall -Wstrict-prototypes -Wmissing-prototypes -Wno-main -Wno-uninitialized -Wstack-larger-than-2047 -mcmodel=kernel -mno-red-zone -mno-sse2 -mno-sse -mno-3dnow -mno-mmx -msoft-float -fno-omit-frame-pointer -fno-builtin-printf -fno-builtin-snprintf -fno-builtin-vsnprintf -fno-builtin-log -fno-builtin-log2 -fno-builtin-malloc -fno-pie -O2 -pipe -nostdinc -I/usr/src/sys -I. -I/usr/src/sys/arch -DDDB -DDIAGNOSTIC -DKTRACE -DACCOUNTING -DKMEMSTATS -DPTRACE -DPOOL_DEBUG -DCRYPTO -DSYSVMSG -DSYSVSEM -DSYSVSHM -DUVM_SWAP_ENCRYPT -DCOMPAT_43 -DLKM -DFFS -DFFS2 -DFFS_SOFTUPDATES -DUFS_DIRHASH -DQUOTA -DEXT2FS -DMFS -DNFSCLIENT -DNFSSERVER -DCD9660 -DUDF -DMSDOSFS -DFIFO -DTMPFS -DFUSE -DSOCKET_SPLICE -DTCP_SACK -DTCP_ECN -DTCP_SIGNATURE -DINET -DINET6 -DIPSEC -DPPP_BSDCOMP -DPPP_DEFLATE -DPIPEX -DMROUTING -DMPLS -DBOOT_CONFIG -DUSER_PCICONF -DAPERTURE -DMTRR -DNTFS -DHIBERNATE -DPCIVERBOSE -DUSBVERBOSE -DWSDISPLAY_COMPAT_USL -DWSDISPLAY_COMPAT_RAWKBD -DWSDISPLAY_DEFA! ULTSCREENS=6 -DWSDISPLAY_COMPAT_PCVT -DX86EMU -DONEWIREVERBOSE -DMULTIPROCESSOR -DMAXUSERS=80 -D_KERNEL -MD -MP -c /usr/src/sys/kern/kern_physio.c *** Error 1 in target 'kern_descrip.o' *** Error 1 in /home/sl4mmy/kernel (Makefile:931 'kern_descrip.o') On Mon, Aug 11, 2014 at 12:32:24AM -0400, Jean-Philippe Ouellet wrote: Ping? On Sun, Jul 13, 2014 at 03:45:44PM -0400, Jean-Philippe Ouellet wrote: Updated for mallocarray() and free(size). On Thu, Jul 10, 2014 at 04:13:38PM -0400, Jean-Philippe Ouellet wrote: This diff adds another struct between filedesc and file to store process-local per-descriptor information. Currently, the only thing in this struct is the file pointer and some flags, however I have another patch on top of this that adds capsicum capabilities to it. (And another that uses mallocarray(9) where applicable. One thing at a time.) The data is currently stored in one big allocation with all the file *s in front, and all the flags after. When that allocation grows, there are two copies and two zeroings. This is fine when we only have two fields (fd_ofiles and fd_ofileflags), but adding more is ugly. Instead, I propose we have a struct for each file descriptor table entry (filedescent) and put the files, flags, and capabilities in that. The immediate impact is some memory waste due to padding (3 or 7 bytes per file descriptor, depending on the arch), however once capabilities are added it will be negligible. I discussed it with matthew@ and guenther@ and concluded this was the saner way. For what it's worth, FreeBSD and NetBSD have done the same thing. I've been running different versions of this for about a month or so, no issues noticed so far. A similar earlier version was reviewed by matthew@ and guenther@. Index: sys/filedesc.h === RCS file: /cvs/src/sys/sys/filedesc.h,v retrieving revision 1.28 diff -u -p -r1.28 filedesc.h --- sys/filedesc.h15 May 2014 03:52:25 - 1.28 +++ sys/filedesc.h13 Jul 2014 19:40:01 - @@ -34,9 +34,6 @@ #include sys/rwlock.h /* - * This structure is used for the management of descriptors. It may be - * shared by multiple processes. - * * A process is initially started out with NDFILE descriptors stored within * this structure, selected to be enough for typical applications based on * the historical limit of 20 open files (and the usage of descriptors by @@ -56,22 +53,33 @@ #define NDHISLOTS(x) (NDREDUCE(NDREDUCE(x))) #define NDLOSLOTS(x) (NDHISLOTS(x)
Re: syslogd libevent handler
On Sun, Aug 31, 2014 at 10:46:50PM +0200, Alexander Bluhm wrote: Move the handlers for the poll events into separate functions. They will become the libevent callbacks later. ... @@ -631,23 +606,65 @@ main(int argc, char *argv[]) for (i = 0; i nfunix; i++) { if ((pfd[PFD_UNIX_0 + i].revents POLLIN) != 0) { - ssize_t rlen; - - len = sizeof(fromunix); - rlen = recvfrom(pfd[PFD_UNIX_0 + i].fd, line, - MAXLINE, 0, (struct sockaddr *)fromunix, - len); - if (rlen 0) { - line[rlen] = '\0'; - printline(LocalHostName, line); - } else if (rlen == -1 errno != EINTR) - logerror(recvfrom unix); + udp_read_handler(pfd[PFD_UNIX_0 + i].fd); ... Shouldn't this be a call to unix_read_handler() instead of udp_read_handler()?