Re: Special casing fd's opened before pledge(2)
> There has been developer pressure to permit an increasing number of > ioctl's to pledged programs. The problem is that providing a specific > ioctl under a promise to one program, means it becomes supplied to all > other programs that make that promise. There is no discrete method > to differentiate further, until now. > > This proposal annotates file descriptors allocated before the first > pledge(2) call in a process, with a marker. Such file descriptors can > be dup'd; the mark is retained. They can fdpassed, which also retains > the mark in the receiver (therefore, a non-pledged process can feed a > pledged process). Execve clears this flag. > > That is the proposed semantic. There are still a few glitches. > > Once that mark exists, I feel more comfortable adding additional ioctl's > which demand early-open fd's. Specifically in privsep daemons. > > 5 programs have been discovered in the tree which needed small changes > (csh, ksh, less, tmux, dhclient). > > less and tmux fixes are not commited, be cautious about tmux. New version. Index: sys/filedesc.h === RCS file: /cvs/src/sys/sys/filedesc.h,v retrieving revision 1.30 diff -u -p -u -r1.30 filedesc.h --- sys/filedesc.h 6 May 2015 08:52:17 - 1.30 +++ sys/filedesc.h 23 Jan 2017 16:00:14 - @@ -105,6 +105,7 @@ struct filedesc0 { * Per-process open flags. */ #defineUF_EXCLOSE 0x01/* auto-close on exec */ +#defineUF_PLEDGED 0x02/* opened after pledge(2) */ /* * Flags on the file descriptor table. @@ -121,7 +122,7 @@ struct filedesc0 { * Kernel global variables and routines. */ void filedesc_init(void); -intdupfdopen(struct filedesc *, int, int, int); +intdupfdopen(struct proc *p, struct filedesc *, int, int); intfdalloc(struct proc *p, int want, int *result); void fdexpand(struct proc *); intfalloc(struct proc *p, struct file **resultfp, int *resultfd); Index: sys/pledge.h === RCS file: /cvs/src/sys/sys/pledge.h,v retrieving revision 1.30 diff -u -p -u -r1.30 pledge.h --- sys/pledge.h23 Jan 2017 04:25:05 - 1.30 +++ sys/pledge.h23 Jan 2017 11:40:58 - @@ -126,7 +126,7 @@ int pledge_adjtime(struct proc *p, const intpledge_sendit(struct proc *p, const void *to); intpledge_sockopt(struct proc *p, int set, int level, int optname); intpledge_socket(struct proc *p, int domain, int state); -intpledge_ioctl(struct proc *p, long com, struct file *); +intpledge_ioctl(struct proc *p, long com, struct file *, int, int); intpledge_ioctl_drm(struct proc *p, long com, dev_t device); intpledge_ioctl_vmm(struct proc *p, long com); intpledge_flock(struct proc *p); Index: sys/unpcb.h === RCS file: /cvs/src/sys/sys/unpcb.h,v retrieving revision 1.12 diff -u -p -u -r1.12 unpcb.h --- sys/unpcb.h 28 Aug 2015 04:38:47 - 1.12 +++ sys/unpcb.h 22 Jan 2017 09:19:54 - @@ -86,17 +86,22 @@ struct unpcb { #definesotounpcb(so) ((struct unpcb *)((so)->so_pcb)) #ifdef _KERNEL +struct fdpass { + struct file *fp; + int flags; +}; + intunp_attach(struct socket *); intunp_bind(struct unpcb *, struct mbuf *, struct proc *); intunp_connect(struct socket *, struct mbuf *, struct proc *); intunp_connect2(struct socket *, struct socket *); void unp_detach(struct unpcb *); -void unp_discard(struct file **, int); +void unp_discard(struct fdpass *, int); void unp_disconnect(struct unpcb *); void unp_drop(struct unpcb *, int); void unp_gc(void *); -void unp_mark(struct file **, int); -void unp_scan(struct mbuf *, void (*)(struct file **, int)); +void unp_mark(struct fdpass *, int); +void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); void unp_shutdown(struct unpcb *); intunp_externalize(struct mbuf *, socklen_t, int); intunp_internalize(struct mbuf *, struct proc *); Index: kern/kern_descrip.c === RCS file: /cvs/src/sys/kern/kern_descrip.c,v retrieving revision 1.136 diff -u -p -u -r1.136 kern_descrip.c --- kern/kern_descrip.c 24 Sep 2016 18:39:17 - 1.136 +++ kern/kern_descrip.c 23 Jan 2017 16:01:59 - @@ -611,7 +611,7 @@ finishdup(struct proc *p, struct file *f FREF(oldfp); fdp->fd_ofiles[new] = fp; - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~(UF_EXCLOSE|UF_PLEDGED); fp->f_count++; FRELE(fp, p); if (dup2 && oldfp == NULL) @@ -632,6 +632,7 @@ fdremove(struct filedesc *fdp, int fd) { fdpassertlocked(fdp); fdp->fd_ofiles[fd] = NULL; + fdp->fd_ofileflags[fd] = 0;
Re: less
Theo de Raadt(dera...@openbsd.org) on 2017.01.22 22:01:22 -0700: > This change causes less(1) to open /dev/tty slightly earlier, > or fallback to stderr. > > As a result the TIOCGWINSZ operation can be done against the most > likely tty descriptor. > > As a side effect, that tty descriptor will be opened before the > first pledge call. > > Anyone see a downside? no, ok > > Index: usr.bin/less/main.c > === > RCS file: /cvs/src/usr.bin/less/main.c,v > retrieving revision 1.35 > diff -u -p -u -r1.35 main.c > --- usr.bin/less/main.c 17 Sep 2016 15:06:41 - 1.35 > +++ usr.bin/less/main.c 21 Jan 2017 06:10:53 - > @@ -90,6 +90,8 @@ main(int argc, char *argv[]) > if (s != NULL && *s != '\0') > secure = 1; > > + open_getchr(); > + > if (secure) { > if (pledge("stdio rpath wpath tty", NULL) == -1) { > perror("pledge"); > @@ -225,7 +227,6 @@ main(int argc, char *argv[]) > if (missing_cap && !know_dumb) > error("WARNING: terminal is not fully functional", NULL); > init_mark(); > - open_getchr(); > > if (secure) > if (pledge("stdio rpath tty", NULL) == -1) { > Index: usr.bin/less/screen.c > === > RCS file: /cvs/src/usr.bin/less/screen.c,v > retrieving revision 1.24 > diff -u -p -u -r1.24 screen.c > --- usr.bin/less/screen.c 8 Jul 2016 15:23:44 - 1.24 > +++ usr.bin/less/screen.c 21 Jan 2017 12:06:22 - > @@ -181,7 +181,7 @@ scrsize(void) > #define DEF_SC_WIDTH80 > #define DEF_SC_HEIGHT 24 > > - if (ioctl(2, TIOCGWINSZ, ) == 0) { > + if (ioctl(tty, TIOCGWINSZ, ) == 0) { > if (w.ws_row > 0) > sys_height = w.ws_row; > if (w.ws_col > 0) >
network booting efi systems with dhcpd(8)
For network boot clients, dhcpd(8) can supply a filename for the initial boot file for the client, which is something like pxeboot (or pxelinux.0). EFI and BIOS clients need different boot files, though, so the server needs to know what mode the client is booting in, in order to supply the right filename. RFC 4578 defines DHCP client option 93 for this purpose. The ISC dhcpd approach to using this looks something like this: option arch code 93 = unsigned integer 16; if option arch = 00:00 { filename "bios/pxelinux.0"; } elsif option arch = 00:07 { filename "efi.x64/syslinux.efi"; } which seems a bit complicated, and also a lot of work to implement. Instead I propose adding 'efi-filename' (and 'efi32-filename', though I'm not sure that's worth doing) next to the existing 'filename' statement and having dhcpd itself interpret the option values to figure out which one should be used. I don't actually need this yet because we don't have many EFI-only client machines, so we can just run everything in legacy mode, but some day this may change. Does this seem reasonable? Index: conflex.c === RCS file: /cvs/src/usr.sbin/dhcpd/conflex.c,v retrieving revision 1.16 diff -u -p -u -p -r1.16 conflex.c --- conflex.c 6 Feb 2016 23:50:10 - 1.16 +++ conflex.c 23 Jan 2017 08:09:15 - @@ -310,6 +310,8 @@ static const struct keywords { { "dynamic-bootp", TOK_DYNAMIC_BOOTP }, { "dynamic-bootp-lease-cutoff", TOK_DYNAMIC_BOOTP_LEASE_CUTOFF }, { "dynamic-bootp-lease-length", TOK_DYNAMIC_BOOTP_LEASE_LENGTH }, + { "efi-filename", TOK_EFI64_FILENAME }, + { "efi32-filename", TOK_EFI32_FILENAME }, { "ends", TOK_ENDS }, { "ethernet", TOK_ETHERNET }, { "filename", TOK_FILENAME }, Index: confpars.c === RCS file: /cvs/src/usr.sbin/dhcpd/confpars.c,v retrieving revision 1.28 diff -u -p -u -p -r1.28 confpars.c --- confpars.c 17 Aug 2016 00:55:33 - 1.28 +++ confpars.c 23 Jan 2017 08:09:15 - @@ -380,6 +380,14 @@ parse_statement(FILE *cfile, struct grou group->filename = parse_string(cfile); break; + case TOK_EFI32_FILENAME: + group->efi32_filename = parse_string(cfile); + break; + + case TOK_EFI64_FILENAME: + group->efi64_filename = parse_string(cfile); + break; + case TOK_SERVER_NAME: group->server_name = parse_string(cfile); break; Index: dhcp.c === RCS file: /cvs/src/usr.sbin/dhcpd/dhcp.c,v retrieving revision 1.52 diff -u -p -u -p -r1.52 dhcp.c --- dhcp.c 24 Oct 2016 21:05:55 - 1.52 +++ dhcp.c 23 Jan 2017 08:09:15 - @@ -699,6 +699,35 @@ nak_lease(struct packet *packet, struct outgoing.packet_length, from, , NULL); } +static const char * +group_filename(struct group *group, struct packet *packet) +{ + u_int16_t arch, *opt; + int i; + + opt = (u_int16_t *)packet->options[DHO_DHCP_CLIENT_ARCHITECTURE].data; + for (i = 0; i < packet->options[DHO_DHCP_CLIENT_ARCHITECTURE].len; + i += 2) { + arch = betoh16(*opt++); + switch (arch) { + case DCA_EFI_IA32: + if (group->efi32_filename != NULL) + return group->efi32_filename; + break; + + case DCA_EFI_BC: + case DCA_EFI_X86_64: + if (group->efi64_filename != NULL) + return group->efi64_filename; + break; + default: + break; + } + } + + return group->filename; +} + void ack_lease(struct packet *packet, struct lease *lease, unsigned int offer, time_t when) @@ -707,6 +736,7 @@ ack_lease(struct packet *packet, struct struct lease_state *state; time_t lease_time, offered_lease_time, max_lease_time, default_lease_time; struct class *vendor_class, *user_class; + const char *filename; int ulafdr, i; /* If we're already acking this lease, don't do it again. */ @@ -808,21 +838,20 @@ ack_lease(struct packet *packet, struct * Choose a filename; first from the host_decl, if any, then from * the user class, then from the vendor class. */ - if (lease->host && lease->host->group->filename) - strlcpy(state->filename, lease->host->group->filename, - sizeof state->filename); - else if (user_class &&
Re: network booting efi systems with dhcpd(8)
> > The ISC dhcpd approach to using this looks something like this: > > > > option arch code 93 = unsigned integer 16; > > > > if option arch = 00:00 { > > filename "bios/pxelinux.0"; > > } elsif option arch = 00:07 { > > filename "efi.x64/syslinux.efi"; > > } > > [...] IIUC OpenBSD dhcpd doesn't support conditionals... Ugly workaround is to use rewrite via tftpd's socket but one has to known IP address in advance. -r socket Issue filename rewrite requests to the specified UNIX domain socket. tftpd will write lines in the format "IP OP filename", terminated by a newline, where IP is the client's IP address, and OP is one of "read" or "write". tftpd expects replies in the format "filename" terminated by a newline. All rewrite requests from the daemon must be answered (even if it is with the original filename) before the TFTP request will continue. By default tftpd does not use filename rewriting. j.
Re: network booting efi systems with dhcpd(8)
On Mon, Jan 23, 2017 at 08:39:17PM +1000, Jonathan Matthew wrote: > For network boot clients, dhcpd(8) can supply a filename for the initial > boot file for the client, which is something like pxeboot (or pxelinux.0). > EFI and BIOS clients need different boot files, though, so the server > needs to know what mode the client is booting in, in order to supply the > right filename. RFC 4578 defines DHCP client option 93 for this purpose. > > The ISC dhcpd approach to using this looks something like this: > > option arch code 93 = unsigned integer 16; > > if option arch = 00:00 { > filename "bios/pxelinux.0"; > } elsif option arch = 00:07 { > filename "efi.x64/syslinux.efi"; > } > > which seems a bit complicated, and also a lot of work to implement. Instead > I propose adding 'efi-filename' (and 'efi32-filename', though I'm not sure > that's worth doing) next to the existing 'filename' statement and having > dhcpd itself interpret the option values to figure out which one should be > used. 'filename' or 'option bootfile-name' are used by autoinstall. I'm not sure diverting dhcpd option for boot image from rest of world is good way. IIUC OpenBSD dhcpd doesn't support conditionals... j.
Re: network booting efi systems with dhcpd(8)
On Mon, Jan 23, 2017 at 08:39:17PM +1000, Jonathan Matthew wrote: > For network boot clients, dhcpd(8) can supply a filename for the initial > boot file for the client, which is something like pxeboot (or pxelinux.0). > EFI and BIOS clients need different boot files, though, so the server > needs to know what mode the client is booting in, in order to supply the > right filename. RFC 4578 defines DHCP client option 93 for this purpose. > > The ISC dhcpd approach to using this looks something like this: > > option arch code 93 = unsigned integer 16; > > if option arch = 00:00 { > filename "bios/pxelinux.0"; > } elsif option arch = 00:07 { > filename "efi.x64/syslinux.efi"; > } > > which seems a bit complicated, and also a lot of work to implement. Instead > I propose adding 'efi-filename' (and 'efi32-filename', though I'm not sure > that's worth doing) next to the existing 'filename' statement and having > dhcpd itself interpret the option values to figure out which one should be > used. > > I don't actually need this yet because we don't have many EFI-only client > machines, so we can just run everything in legacy mode, but some day this > may change. > > Does this seem reasonable? How will values 10 (32 bit arm) and 11 (64 bit arm) be handled in future if this went in? Wouldn't it be better to map the arch numbers to strings? The uefi spec refers to http://www.iana.org/assignments/dhcpv6-parameters/dhcpv6-parameters.xhtml#processor-architecture Which also lists different numbers for PXE vs boot from HTTP. > > Index: conflex.c > === > RCS file: /cvs/src/usr.sbin/dhcpd/conflex.c,v > retrieving revision 1.16 > diff -u -p -u -p -r1.16 conflex.c > --- conflex.c 6 Feb 2016 23:50:10 - 1.16 > +++ conflex.c 23 Jan 2017 08:09:15 - > @@ -310,6 +310,8 @@ static const struct keywords { > { "dynamic-bootp", TOK_DYNAMIC_BOOTP }, > { "dynamic-bootp-lease-cutoff", TOK_DYNAMIC_BOOTP_LEASE_CUTOFF > }, > { "dynamic-bootp-lease-length", TOK_DYNAMIC_BOOTP_LEASE_LENGTH > }, > + { "efi-filename", TOK_EFI64_FILENAME }, > + { "efi32-filename", TOK_EFI32_FILENAME }, > { "ends", TOK_ENDS }, > { "ethernet", TOK_ETHERNET }, > { "filename", TOK_FILENAME }, > Index: confpars.c > === > RCS file: /cvs/src/usr.sbin/dhcpd/confpars.c,v > retrieving revision 1.28 > diff -u -p -u -p -r1.28 confpars.c > --- confpars.c17 Aug 2016 00:55:33 - 1.28 > +++ confpars.c23 Jan 2017 08:09:15 - > @@ -380,6 +380,14 @@ parse_statement(FILE *cfile, struct grou > group->filename = parse_string(cfile); > break; > > + case TOK_EFI32_FILENAME: > + group->efi32_filename = parse_string(cfile); > + break; > + > + case TOK_EFI64_FILENAME: > + group->efi64_filename = parse_string(cfile); > + break; > + > case TOK_SERVER_NAME: > group->server_name = parse_string(cfile); > break; > Index: dhcp.c > === > RCS file: /cvs/src/usr.sbin/dhcpd/dhcp.c,v > retrieving revision 1.52 > diff -u -p -u -p -r1.52 dhcp.c > --- dhcp.c24 Oct 2016 21:05:55 - 1.52 > +++ dhcp.c23 Jan 2017 08:09:15 - > @@ -699,6 +699,35 @@ nak_lease(struct packet *packet, struct > outgoing.packet_length, from, , NULL); > } > > +static const char * > +group_filename(struct group *group, struct packet *packet) > +{ > + u_int16_t arch, *opt; > + int i; > + > + opt = (u_int16_t *)packet->options[DHO_DHCP_CLIENT_ARCHITECTURE].data; > + for (i = 0; i < packet->options[DHO_DHCP_CLIENT_ARCHITECTURE].len; > + i += 2) { > + arch = betoh16(*opt++); > + switch (arch) { > + case DCA_EFI_IA32: > + if (group->efi32_filename != NULL) > + return group->efi32_filename; > + break; > + > + case DCA_EFI_BC: > + case DCA_EFI_X86_64: > + if (group->efi64_filename != NULL) > + return group->efi64_filename; > + break; > + default: > + break; > + } > + } > + > + return group->filename; > +} > + > void > ack_lease(struct packet *packet, struct lease *lease, unsigned int offer, > time_t when) > @@ -707,6 +736,7 @@ ack_lease(struct packet *packet, struct > struct lease_state *state; > time_t lease_time, offered_lease_time, max_lease_time, > default_lease_time; > struct class *vendor_class, *user_class; > +
Re: rtsock refactoring
On Sat, Jan 21, 2017 at 01:28:02AM +0100, Claudio Jeker wrote: > On Fri, Jan 20, 2017 at 02:51:52AM +0100, Claudio Jeker wrote: > > I sent this diff out some time ago and would really like to get this in. > > This is one step on makeing rtsock.c less of a hornets nest. > > This reduces the side effects in route_output and simplifies some other > > bits as well. For example route_input is less variadic and simpler. > > > > Here is just the route_input change. Which should be easier to OK. > Changed a few things based on input from bluhm@ OK bluhm@ > > -- > :wq Claudio > > Index: net/rtsock.c > === > RCS file: /cvs/src/sys/net/rtsock.c,v > retrieving revision 1.214 > diff -u -p -r1.214 rtsock.c > --- net/rtsock.c 20 Jan 2017 08:10:54 - 1.214 > +++ net/rtsock.c 21 Jan 2017 00:10:16 - > @@ -92,7 +92,6 @@ > > struct sockaddr route_dst = { 2, PF_ROUTE, }; > struct sockaddr route_src = { 2, PF_ROUTE, }; > -struct sockproto route_proto = { PF_ROUTE, }; > > struct walkarg { > int w_op, w_arg, w_given, w_needed, w_tmemsize; > @@ -100,7 +99,7 @@ struct walkarg { > }; > > int route_ctloutput(int, struct socket *, int, int, struct mbuf **); > -void route_input(struct mbuf *m0, ...); > +void route_input(struct mbuf *m0, sa_family_t); > int route_arp_conflict(struct rtentry *, struct rt_addrinfo *); > int route_cleargateway(struct rtentry *, void *, unsigned int); > > @@ -332,7 +331,7 @@ rt_senddesync(void *data) > } > > void > -route_input(struct mbuf *m0, ...) > +route_input(struct mbuf *m0, sa_family_t sa_family) > { > struct rawcb *rp; > struct routecb *rop; > @@ -340,15 +339,10 @@ route_input(struct mbuf *m0, ...) > struct mbuf *m = m0; > int s, sockets = 0; > struct socket *last = NULL; > - va_list ap; > - struct sockproto *proto; > struct sockaddr *sosrc, *sodst; > > - va_start(ap, m0); > - proto = va_arg(ap, struct sockproto *); > - sosrc = va_arg(ap, struct sockaddr *); > - sodst = va_arg(ap, struct sockaddr *); > - va_end(ap); > + sosrc = _src; > + sodst = _dst; > > /* ensure that we can access the rtm_type via mtod() */ > if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { > @@ -359,10 +353,16 @@ route_input(struct mbuf *m0, ...) > LIST_FOREACH(rp, , rcb_list) { > if (rp->rcb_socket->so_state & SS_CANTRCVMORE) > continue; > - if (rp->rcb_proto.sp_family != proto->sp_family) > + if (rp->rcb_proto.sp_family != PF_ROUTE) > continue; > - if (rp->rcb_proto.sp_protocol && proto->sp_protocol && > - rp->rcb_proto.sp_protocol != proto->sp_protocol) > + /* > + * If route socket is bound to an address family only send > + * messages that match the address family. Address family > + * agnostic messages are always send. > + */ > + if (rp->rcb_proto.sp_protocol != AF_UNSPEC && > + sa_family != AF_UNSPEC && > + rp->rcb_proto.sp_protocol != sa_family) > continue; > /* >* We assume the lower level routines have > @@ -953,8 +953,6 @@ flush: > rtm->rtm_flags |= RTF_DONE; > } > } > - if (info.rti_info[RTAX_DST]) > - route_proto.sp_protocol = info.rti_info[RTAX_DST]->sa_family; > if (rt) > rtfree(rt); > > @@ -970,9 +968,8 @@ fail: > } > /* There is another listener, so construct message */ > rp = sotorawcb(so); > - } > - if (rp) > rp->rcb_proto.sp_family = 0; /* Avoid us */ > + } > if (rtm) { > if (m_copyback(m, 0, rtm->rtm_msglen, rtm, M_NOWAIT)) { > m_freem(m); > @@ -982,9 +979,10 @@ fail: > free(rtm, M_RTABLE, 0); > } > if (m) > - route_input(m, _proto, _src, _dst); > + route_input(m, info.rti_info[RTAX_DST] ? > + info.rti_info[RTAX_DST]->sa_family : AF_UNSPEC); > if (rp) > - rp->rcb_proto.sp_family = PF_ROUTE; > + rp->rcb_proto.sp_family = PF_ROUTE; /* Readd us */ > > return (error); > } > @@ -1056,7 +1054,6 @@ rt_setmetrics(u_long which, const struct > > out->rmx_expire = expire; > } > - /* RTV_PRIORITY handled before */ > } > > void > @@ -1258,11 +1255,7 @@ rt_missmsg(int type, struct rt_addrinfo > rtm->rtm_tableid = tableid; > rtm->rtm_addrs = rtinfo->rti_addrs; > rtm->rtm_index = ifidx; > - if (sa == NULL) > - route_proto.sp_protocol = 0; > - else > - route_proto.sp_protocol = sa->sa_family; > - route_input(m, _proto, _src,
Re: rtwn: fix iq calibration
On Sun, Jan 22, 2017 at 11:07:00PM +0100, Jeremie Courreges-Anglas wrote: > Stefan Sperlingwrites: > > > FreeBSD committed a couple of interesting things in r307529 > > https://svnweb.freebsd.org/base?view=revision=r307529 > > Unfortunately the commit is a giant patch bomb which makes > > it hard to pick out individual fixes :-( > > > > This patch extracts what I believe "fix IQ calibration bug" refers to. > > The current code is only ever setting bits but not clearing bits which > > should be cleared. > > > > My diff looks very different because FreeBSD made a lot of style > > changes in their driver (e.g. they have an rtwn_bb_setbits() function > > which we don't have). We could think about porting these over but > > for now I'd like to focus on fixing bugs instead of style. > > Appears to work fine with > > urtwn0 at uhub0 port 1 configuration 1 interface 0 "vendor 0x0bda ALFA > AWUS036NHR" rev 2.00/2.00 addr 2 > urtwn0: MAC/BB RTL8188RU, RF 6052 1T1R, address 00:c0:ca:xx:xx:xx Thanks for testing. I should have mentioned that this diff only affects PCI devices since IQ calibration is disabled for USB devices. Running IQ calib has always been causing USB devices to stop receiving packets for some reason.
Re: network booting efi systems with dhcpd(8)
On Mon, Jan 23, 2017 at 09:57:28PM +1100, Jonathan Gray wrote: > On Mon, Jan 23, 2017 at 08:39:17PM +1000, Jonathan Matthew wrote: > > For network boot clients, dhcpd(8) can supply a filename for the initial > > boot file for the client, which is something like pxeboot (or pxelinux.0). > > EFI and BIOS clients need different boot files, though, so the server > > needs to know what mode the client is booting in, in order to supply the > > right filename. RFC 4578 defines DHCP client option 93 for this purpose. > > > > The ISC dhcpd approach to using this looks something like this: > > > > option arch code 93 = unsigned integer 16; > > > > if option arch = 00:00 { > > filename "bios/pxelinux.0"; > > } elsif option arch = 00:07 { > > filename "efi.x64/syslinux.efi"; > > } > > > > which seems a bit complicated, and also a lot of work to implement. Instead > > I propose adding 'efi-filename' (and 'efi32-filename', though I'm not sure > > that's worth doing) next to the existing 'filename' statement and having > > dhcpd itself interpret the option values to figure out which one should be > > used. > > > > I don't actually need this yet because we don't have many EFI-only client > > machines, so we can just run everything in legacy mode, but some day this > > may change. > > > > Does this seem reasonable? > > How will values 10 (32 bit arm) and 11 (64 bit arm) be handled > in future if this went in? Wouldn't it be better to map the arch numbers > to strings? > > The uefi spec refers to > http://www.iana.org/assignments/dhcpv6-parameters/dhcpv6-parameters.xhtml#processor-architecture > Which also lists different numbers for PXE vs boot from HTTP. If we have more than a couple of architectures to deal with, we probably need something smarter. Adding arm-filename and arm64-filename in the same way seems ugly. I don't think our dhcpd has any existing mechanisms that would support this kind of thing, so some actual thought may be required.
Re: rtsock refactoring
On Mon, Jan 23, 2017 at 01:18:05AM +0100, Claudio Jeker wrote: > Last bit for now. This is changing the reporting madness. It moves it in > its own function which is called after the big switch statement. > If you hit a bad error in the switch the code should eiter goto fail or > flush. > The new function rt_report is actually constructing the rt message out of > a rtentry. It does not magically update the message that was passed in > from userland. I think this is much safer and works better. OK bluhm@ > +rt_report(struct rtentry *rt, u_char type, int seq, int tableid) ... > + ifp = if_get(rt->rt_ifidx); > + if (ifp != NULL) { > + info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); > + info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; > + if (ifp->if_flags & IFF_POINTOPOINT) > + info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; > + } > + if_put(ifp); The if_put() could be in the if (ifp != NULL) block. > flush: > + if (rt) > + rtfree(rt); rtfree() checks (rt != NULL) itself.
Re: split pledge "ioctl" into "bpf" and "tape"
Theo de Raadt wrote: > So let's just split these cases out. "ioctl"'s number gets reused for > tape, and a new "bpf" promise is added.. That paves the way for a > more complex diff coming in a few hours. The mention of bpf made me worried that dhclient would be affected, but I checked and it's not. So this shouldn't cause trouble crossing over.
Re: split pledge "ioctl" into "bpf" and "tape"
>Theo de Raadt wrote: >> So let's just split these cases out. "ioctl"'s number gets reused for >> tape, and a new "bpf" promise is added.. That paves the way for a >> more complex diff coming in a few hours. > >The mention of bpf made me worried that dhclient would be affected, but I >checked and it's not. So this shouldn't cause trouble crossing over. the "bpf" pledge allows only one tiny minor operation. that is not new. the programs you are thinking of perform all their bpf setup before pledge (or don't pledge at all). after that they simply read/write.
fd-passing: convert internalized format to a struct
File descriptor passing internalizes process fd's to an array of struct file *. This results in tricky pointer management. How about passing it as an array of structs instead. Only one field inside for now, struct file *. However soon I'll need to pass additional fields along with the file, and I didn't want to use the low bits of the pointer :) Index: kern/uipc_usrreq.c === RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v retrieving revision 1.109 diff -u -p -u -r1.109 uipc_usrreq.c --- kern/uipc_usrreq.c 29 Dec 2016 12:12:43 - 1.109 +++ kern/uipc_usrreq.c 23 Jan 2017 23:02:15 - @@ -63,8 +63,8 @@ LIST_HEAD(unp_head, unpcb) unp_head = LI struct unp_deferral { SLIST_ENTRY(unp_deferral) ud_link; int ud_n; - /* followed by ud_n struct file * pointers */ - struct file *ud_fp[]; + /* followed by ud_n struct fdpass */ + struct fdpass ud_fp[]; }; /* list of sets of files that were sent over sockets that are now closed */ @@ -664,12 +664,12 @@ unp_externalize(struct mbuf *rights, soc struct proc *p = curproc; /* XXX */ struct cmsghdr *cm = mtod(rights, struct cmsghdr *); int i, *fdp = NULL; - struct file **rp; + struct fdpass *rp; struct file *fp; int nfds, error = 0; nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / - sizeof(struct file *); + sizeof(struct fdpass); if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) controllen = 0; else @@ -680,9 +680,10 @@ unp_externalize(struct mbuf *rights, soc } /* Make sure the recipient should be able to see the descriptors.. */ - rp = (struct file **)CMSG_DATA(cm); + rp = (struct fdpass *)CMSG_DATA(cm); for (i = 0; i < nfds; i++) { - fp = *rp++; + fp = rp->fp; + rp++; error = pledge_recvfd(p, fp); if (error) break; @@ -709,7 +710,7 @@ restart: fdplock(p->p_fd); if (error != 0) { if (nfds > 0) { - rp = ((struct file **)CMSG_DATA(cm)); + rp = ((struct fdpass *)CMSG_DATA(cm)); unp_discard(rp, nfds); } goto out; @@ -719,7 +720,7 @@ restart: * First loop -- allocate file descriptor table slots for the * new descriptors. */ - rp = ((struct file **)CMSG_DATA(cm)); + rp = ((struct fdpass *)CMSG_DATA(cm)); for (i = 0; i < nfds; i++) { if ((error = fdalloc(p, 0, [i])) != 0) { /* @@ -748,7 +749,8 @@ restart: * fdalloc() works properly.. We finalize it all * in the loop below. */ - p->p_fd->fd_ofiles[fdp[i]] = *rp++; + p->p_fd->fd_ofiles[fdp[i]] = rp->fp; + rp++; if (flags & MSG_CMSG_CLOEXEC) p->p_fd->fd_ofileflags[fdp[i]] |= UF_EXCLOSE; @@ -758,11 +760,12 @@ restart: * Now that adding them has succeeded, update all of the * descriptor passing state. */ - rp = (struct file **)CMSG_DATA(cm); + rp = (struct fdpass *)CMSG_DATA(cm); for (i = 0; i < nfds; i++) { struct unpcb *unp; - fp = *rp++; + fp = rp->fp; + rp++; if ((unp = fptounp(fp)) != NULL) unp->unp_msgcount--; unp_rights--; @@ -787,7 +790,8 @@ unp_internalize(struct mbuf *control, st { struct filedesc *fdp = p->p_fd; struct cmsghdr *cm = mtod(control, struct cmsghdr *); - struct file **rp, *fp; + struct fdpass *rp; + struct file *fp; struct unpcb *unp; int i, error; int nfds, *ip, fd, neededspace; @@ -807,7 +811,7 @@ unp_internalize(struct mbuf *control, st /* Make sure we have room for the struct file pointers */ morespace: - neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - + neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - control->m_len; if (neededspace > M_TRAILINGSPACE(control)) { char *tmp; @@ -834,11 +838,11 @@ morespace: } /* adjust message & mbuf to note amount of space actually used. */ - cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); - control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); + cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); + control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); ip = ((int *)CMSG_DATA(cm)) + nfds - 1; - rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; + rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; for (i = 0; i < nfds; i++) { memcpy(, ip,
Re: pfind(9) -> tfind(9)
On Tue, 24 Jan 2017, Martin Pieuchot wrote: > We live in a thread world! Let's face it. > > ok? Heh, I lost track that I managed to get it down to such a small set. ok on the rename, just some small manpage wankery: > --- /dev/null 1 Jan 1970 00:00:00 - > +++ share/man/man9/tfind.924 Jan 2017 00:07:27 - ... > +.Sh SYNOPSIS > +.In sys/cdefs.h > +.In sys/proc.h This should be .In sys/types.h .In sys/signal.h .In sys/proc.h (never ) > +.Ft "struct proc *" > +.Fn tfind "pid_t tid" Let's add prfind() too: .Ft "struct process *" .Fn prfind "pid_t pid" > +.Ft "struct pgrp *" > +.Fn pgfind "pid_t pgid" > +.Sh DESCRIPTION > +The > +.Fn tfind .Fn tfind , .Fn prfind , > +.Fn pgfind > +functions retrieve thread and progress group structures from thread and functions retrieve thread, process, and process group structures from thread, process, and process group IDs, respectively. > +.Pp > +Both functions return These functions return
Re: NET_LOCK() ordering issue
On Tue, Jan 24, 2017 at 01:48:49AM +0100, Alexander Bluhm wrote: > On Tue, Jan 24, 2017 at 10:01:02AM +1000, Martin Pieuchot wrote: > > Updated diff, thanks for your review. > > > @@ -360,24 +358,20 @@ redo: > > error = soaccept(so, nam); > > if (!error && name != NULL) > > error = copyaddrout(p, nam, name, namelen, anamelen); > > - > > + if (!error) { > > + (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); > > + FILE_SET_MATURE(fp, p); > > + *retval = tmpfd; > > + } > > +out: > > Perhaps a goto out would be nicer than two if (!error). No, there is a error = copyaddrout() assignment. Please disregard my comment. > error = soaccept(so, nam); > if (error) > goto out; > if (name != NULL) > error = copyaddrout(p, nam, name, namelen, anamelen); > (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); > FILE_SET_MATURE(fp, p); > *retval = tmpfd; > out: > > anyway OK bluhm@ Still OK bluhm@
Re: rtsock refactoring
On 23/01/17(Mon) 01:18, Claudio Jeker wrote: > [...] > Last bit for now. This is changing the reporting madness. It moves it in > its own function which is called after the big switch statement. > If you hit a bad error in the switch the code should eiter goto fail or > flush. > The new function rt_report is actually constructing the rt message out of > a rtentry. It does not magically update the message that was passed in > from userland. I think this is much safer and works better. [...] > + /* > + * From here on these vars need to be valid > + * rt, rtm, error, so, m, tableid, sa_family Why not use KASSERT() rather than a comment? ok mpi@ either way .
Re: fd-passing: convert internalized format to a struct
On 23/01/17(Mon) 16:05, Theo de Raadt wrote: > File descriptor passing internalizes process fd's to an array of > struct file *. This results in tricky pointer management. > > How about passing it as an array of structs instead. Only one field > inside for now, struct file *. However soon I'll need to pass additional > fields along with the file, and I didn't want to use the low bits of the > pointer :) Less magic is good. If you're only going to use 'struct fdpass' inside kern/uipc_usrreq.c I'd suggest to declare it there, to avoid namespace pollution. One nit below, with that ok mpi@ > @@ -911,7 +915,7 @@ unp_gc(void *arg __unused) > unp_rights--; > (void) closef(fp, NULL); > } > - free(defer, M_TEMP, sizeof(*defer) + sizeof(fp) * defer->ud_n); > + free(defer, M_TEMP, sizeof(*defer) + sizeof(struct fdpass) * > defer->ud_n); ^ Line too long.
Re: rtsock refactoring
On Mon, Jan 23, 2017 at 04:56:02PM +0100, Alexander Bluhm wrote: > On Mon, Jan 23, 2017 at 01:18:05AM +0100, Claudio Jeker wrote: > > Last bit for now. This is changing the reporting madness. It moves it in > > its own function which is called after the big switch statement. > > If you hit a bad error in the switch the code should eiter goto fail or > > flush. > > The new function rt_report is actually constructing the rt message out of > > a rtentry. It does not magically update the message that was passed in > > from userland. I think this is much safer and works better. > > OK bluhm@ > > > +rt_report(struct rtentry *rt, u_char type, int seq, int tableid) > ... > > + ifp = if_get(rt->rt_ifidx); > > + if (ifp != NULL) { > > + info.rti_info[RTAX_IFP] = sdltosa(ifp->if_sadl); > > + info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr; > > + if (ifp->if_flags & IFF_POINTOPOINT) > > + info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr; > > + } > > + if_put(ifp); > > The if_put() could be in the if (ifp != NULL) block. > While true, I like the symetry of if_get and if_put. Makes sure the if_put is not lost in some other later code changes. > > flush: > > + if (rt) > > + rtfree(rt); > > rtfree() checks (rt != NULL) itself. > Thanks, will change that. -- :wq Claudio
Re: Help for ddb(4)'s "tr /p"
On Tue, 24 Jan 2017, Martin Pieuchot wrote: > Now that pfind(9) takes tid we need a way to show TID in ddb(4) ps > output. Otherwise it's hard to use "ps /p" You mean "tr /p", right? > Here's the difference: > > Before: >PID PPID PGRPUID S FLAGS WAIT COMMAND > *50672 7 50672 0 7 0x3sysctl > 7 1 7 0 30x10008b pause ksh > 90555 1 90555 0 30x100098 poll cron > > After: >PID TID PPID PGRPUID S FLAGS WAIT COMMAND > *52636 496486 51956 52636 0 7 0x3sysctl > 51956 407679 1 51956 0 30x10008b pause ksh > 42726 321266 1 42726 0 30x100098 poll cron > > ok? Hmm, ps/n (the default) is the only of the ps subcommands to not show the TID...so that it can show more columns of COMMAND. You've added 8 columns without shrinking any of the existing ones, which will cause an unreadable mess when it wraps around. Maybe drop PGRP from /n and we add a new one that's more like (userspace) ps -j ? The outer two chunks here are clearly correct, regardless of how we bikeshed ps: > --- share/man/man4/ddb.4 1 Sep 2016 12:24:56 - 1.82 > +++ share/man/man4/ddb.4 23 Jan 2017 23:38:09 - > @@ -542,7 +542,7 @@ The > .Cm /p > modifier interprets the > .Ar frameaddr > -argument as the PID of a process and shows the stack trace of > +argument as the TID of a process and shows the stack trace of > that process. > The > .Cm /p ... > @@ -928,7 +928,7 @@ A synonym for the > .Ic show all callout > command. > .\" > -.It Ic ps Op Cm /anw > +.It Ic ps Op Cm /anow > A synonym for > .Ic show all procs . > .\" > >
refactor PF option parsing loops
Hi, PF implements six distinct TCP option parsing loops. This patch converts these to one inline function in pfvar_priv.h, normalises their semantics, and strips ~100 lines. I've laid out the existing semantics below. The new loop implements the timestamp parser's semantics of "(s-b) (v-3) (t-a) (t-b)". As a result, - MSS and WSCALE parsers are stricter about candidate option len. - MSS normalization is more tolerant of malformed option lists. These changes were immaterial to the live traffic I've examined (thanks to Richard Cziva for help in gathering the data). I'd like test reports before committing. comments, oks? best, Richard. * postcondition on identity (return correct option) "validity" - 'r[]' contains the returned option. r[0] = TYPE /\ MIN_TYPELEN >= 2 /\ MAX_TYPELEN = r[1] = MIN_TYPELEN (v-1) (fixed-len opt) or MAX_TYPELEN >= r[1] >= MIN_TYPELEN (v-2) (weaker) or r[1] >= MIN_TYPELEN (v-3) (weaker) or r[1] >= 2(v-4) (weaker) or true (v-5) (weakest) * postcondition on (eoh - opt) (ensure buffer safety) "safety" - 'eoh' points to one past the end of option header - 'opt' points to returned option (eoh - opt) >= r[1] (s-a) (eoh - opt) >= MIN_TYPELEN (s-b) * parsing (tolerance of malformed headers) "tolerance" r[0] = EOL=> continue with r[1] := 1 (t-b) r[1] < 2 => continue with r[1] := 2 (t-a) r[1] < 2 => break (t-d) (eoh - opt) < r[1]=> break (t-c) * Analysis of existing parsers safety validity tolerance [TS:fixed:10] pf_norm.c:pf_normalize_tcp_init(): (s-b) (v-3) (t-a) (t-b) [TS:fixed:10] pf_norm.c:pf_normalize_tcp_stateful(): (s-b) (v-3) (t-a) (t-b) [SACK:var:2 + (8*n)] pf.c:pf_modulate_sack(): (s-a) (s-b) (v-3) (t-a) (t-b) [MSS:fixed:4] pf_norm.c:pf_normalize_mss(): (s-a) (s-b) (v-4) (t-c) (t-d) [MSS:fixed:4] pf.c:pf_get_mss(): (s-b) (v-5) (t-a) (t-b) [WSCL:fixed:3] pf.c:pf_get_wscale(): (s-b) (v-5) (t-a) (t-b) --- Index: net/pf_norm.c === --- net.orig/pf_norm.c +++ net/pf_norm.c @@ -901,8 +901,9 @@ pf_normalize_tcp_init(struct pf_pdesc *p { struct tcphdr *th = >hdr.tcp; u_int32_ttsval, tsecr; - u_int8_t hdr[60]; - u_int8_t*opt; + int olen; + u_int8_t opts[MAX_TCPOPTLEN], *opt = opts; + KASSERT(src->scrub == NULL); @@ -935,44 +936,28 @@ pf_normalize_tcp_init(struct pf_pdesc *p if ((th->th_flags & TH_SYN) == 0) return (0); - if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && - pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, - pd->af)) { - /* Diddle with TCP options */ - int hlen; + if (src->scrub == NULL) + return (0); - opt = hdr + sizeof(struct tcphdr); - hlen = (th->th_off << 2) - sizeof(struct tcphdr); - while (hlen >= TCPOLEN_TIMESTAMP) { - switch (*opt) { - case TCPOPT_EOL:/* FALLTHROUGH */ - case TCPOPT_NOP: - opt++; - hlen--; - break; - case TCPOPT_TIMESTAMP: - if (opt[1] >= TCPOLEN_TIMESTAMP) { - src->scrub->pfss_flags |= - PFSS_TIMESTAMP; - src->scrub->pfss_ts_mod = arc4random(); - - /* note PFSS_PAWS not set yet */ - memcpy(, [2], - sizeof(u_int32_t)); - memcpy(, [6], - sizeof(u_int32_t)); - src->scrub->pfss_tsval0 = ntohl(tsval); - src->scrub->pfss_tsval = ntohl(tsval); - src->scrub->pfss_tsecr = ntohl(tsecr); - getmicrouptime(>scrub->pfss_last); - } - /* FALLTHROUGH */ - default: - hlen -= MAX(opt[1], 2); - opt += MAX(opt[1], 2); -
Re: refactor PF option parsing loops
PS Find this patch broken down for easier review here: http://203.79.107.124/opts/ On Tue, 24 Jan 2017, Richard Procter wrote: > Hi, > > PF implements six distinct TCP option parsing loops. This patch converts > these to one inline function in pfvar_priv.h, normalises their semantics, > and strips ~100 lines.
Re: NET_LOCK() ordering issue
On Sat, Jan 21, 2017 at 10:46:42AM +1000, Martin Pieuchot wrote: > Here's another way to fix the problem, call falloc() before grabbing the > NET_LOCK(). > > Comments? There a two bugs in the "goto redo" block that is not part of the diff. You could do a m_freem(nam), then goto redo, get an error and goto out. This will result in a double m_freem(nam). Also as you have moved the falloc() before the redo label, you must not fdremove() and closef() before goto redo. You you use only if (error) and if (!error) and not mix it with (error != 0) and (error == 0)? That makes checking the error paths easier. bluhm > > Index: kern/uipc_syscalls.c > === > RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v > retrieving revision 1.144 > diff -u -p -r1.144 uipc_syscalls.c > --- kern/uipc_syscalls.c 29 Dec 2016 12:12:43 - 1.144 > +++ kern/uipc_syscalls.c 21 Jan 2017 00:38:18 - > @@ -265,7 +265,7 @@ doaccept(struct proc *p, int sock, struc > { > struct filedesc *fdp = p->p_fd; > struct file *fp, *headfp; > - struct mbuf *nam; > + struct mbuf *nam = NULL; > socklen_t namelen; > int error, s, tmpfd; > struct socket *head, *so; > @@ -277,19 +277,35 @@ doaccept(struct proc *p, int sock, struc > return (error); > > headfp = fp; > + head = headfp->f_data; > + > + fdplock(fdp); > + error = falloc(p, , ); > + if (error == 0 && (flags & SOCK_CLOEXEC)) > + fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; > + fdpunlock(fdp); > + if (error != 0) { > + /* > + * Probably ran out of file descriptors. Wakeup > + * so some other process might have a chance at it. > + */ > + wakeup_one(>so_timeo); > + FRELE(headfp, p); > + return (error); > + } > + > redo: > NET_LOCK(s); > - head = headfp->f_data; > if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { > error = EINVAL; > - goto bad; > + goto out; > } > if ((head->so_state & SS_NBIO) && head->so_qlen == 0) { > if (head->so_state & SS_CANTRCVMORE) > error = ECONNABORTED; > else > error = EWOULDBLOCK; > - goto bad; > + goto out; > } > while (head->so_qlen == 0 && head->so_error == 0) { > if (head->so_state & SS_CANTRCVMORE) { > @@ -298,34 +314,19 @@ redo: > } > error = tsleep(>so_timeo, PSOCK | PCATCH, > "netcon", 0); > - if (error) { > - goto bad; > - } > + if (error) > + goto out; > } > if (head->so_error) { > error = head->so_error; > head->so_error = 0; > - goto bad; > + goto out; > } > > /* Figure out whether the new socket should be non-blocking. */ > nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK) > : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0); > > - fdplock(fdp); > - error = falloc(p, , ); > - if (error == 0 && (flags & SOCK_CLOEXEC)) > - fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; > - fdpunlock(fdp); > - if (error != 0) { > - /* > - * Probably ran out of file descriptors. Wakeup > - * so some other process might have a chance at it. > - */ > - wakeup_one(>so_timeo); > - goto bad; > - } > - > nam = m_get(M_WAIT, MT_SONAME); > > /* > @@ -360,24 +361,20 @@ redo: > error = soaccept(so, nam); > if (!error && name != NULL) > error = copyaddrout(p, nam, name, namelen, anamelen); > - > - if (error) { > - /* if an error occurred, free the file descriptor */ > - NET_UNLOCK(s); > - m_freem(nam); > + if (!error) { > + (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); > + FILE_SET_MATURE(fp, p); > + *retval = tmpfd; > + } > +out: > + NET_UNLOCK(s); > + m_freem(nam); > + if (error != 0) { > fdplock(fdp); > fdremove(fdp, tmpfd); > closef(fp, p); > fdpunlock(fdp); > - goto out; > } > - (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); > - FILE_SET_MATURE(fp, p); > - *retval = tmpfd; > - m_freem(nam); > -bad: > - NET_UNLOCK(s); > -out: > FRELE(headfp, p); > return (error); > }
Re: NET_LOCK() ordering issue
On 24/01/17(Tue) 00:51, Alexander Bluhm wrote: > On Sat, Jan 21, 2017 at 10:46:42AM +1000, Martin Pieuchot wrote: > > Here's another way to fix the problem, call falloc() before grabbing the > > NET_LOCK(). > > > > Comments? > > There a two bugs in the "goto redo" block that is not part of the > diff. You could do a m_freem(nam), then goto redo, get an error > and goto out. This will result in a double m_freem(nam). Ok, I include this fix. > Also as you have moved the falloc() before the redo label, you must > not fdremove() and closef() before goto redo. Nice catch. > You you use only if (error) and if (!error) and not mix it with > (error != 0) and (error == 0)? That makes checking the error paths > easier. Fine. Updated diff, thanks for your review. Index: kern/uipc_syscalls.c === RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.144 diff -u -p -r1.144 uipc_syscalls.c --- kern/uipc_syscalls.c29 Dec 2016 12:12:43 - 1.144 +++ kern/uipc_syscalls.c23 Jan 2017 23:59:34 - @@ -265,7 +265,7 @@ doaccept(struct proc *p, int sock, struc { struct filedesc *fdp = p->p_fd; struct file *fp, *headfp; - struct mbuf *nam; + struct mbuf *nam = NULL; socklen_t namelen; int error, s, tmpfd; struct socket *head, *so; @@ -277,19 +277,35 @@ doaccept(struct proc *p, int sock, struc return (error); headfp = fp; + head = headfp->f_data; + + fdplock(fdp); + error = falloc(p, , ); + if (!error && (flags & SOCK_CLOEXEC)) + fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; + fdpunlock(fdp); + if (error) { + /* +* Probably ran out of file descriptors. Wakeup +* so some other process might have a chance at it. +*/ + wakeup_one(>so_timeo); + FRELE(headfp, p); + return (error); + } + redo: NET_LOCK(s); - head = headfp->f_data; if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; - goto bad; + goto out; } if ((head->so_state & SS_NBIO) && head->so_qlen == 0) { if (head->so_state & SS_CANTRCVMORE) error = ECONNABORTED; else error = EWOULDBLOCK; - goto bad; + goto out; } while (head->so_qlen == 0 && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { @@ -298,34 +314,19 @@ redo: } error = tsleep(>so_timeo, PSOCK | PCATCH, "netcon", 0); - if (error) { - goto bad; - } + if (error) + goto out; } if (head->so_error) { error = head->so_error; head->so_error = 0; - goto bad; + goto out; } /* Figure out whether the new socket should be non-blocking. */ nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK) : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0); - fdplock(fdp); - error = falloc(p, , ); - if (error == 0 && (flags & SOCK_CLOEXEC)) - fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; - fdpunlock(fdp); - if (error != 0) { - /* -* Probably ran out of file descriptors. Wakeup -* so some other process might have a chance at it. -*/ - wakeup_one(>so_timeo); - goto bad; - } - nam = m_get(M_WAIT, MT_SONAME); /* @@ -336,10 +337,7 @@ redo: if (head->so_qlen == 0) { NET_UNLOCK(s); m_freem(nam); - fdplock(fdp); - fdremove(fdp, tmpfd); - closef(fp, p); - fdpunlock(fdp); + nam = NULL; goto redo; } @@ -360,24 +358,20 @@ redo: error = soaccept(so, nam); if (!error && name != NULL) error = copyaddrout(p, nam, name, namelen, anamelen); - + if (!error) { + (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); + FILE_SET_MATURE(fp, p); + *retval = tmpfd; + } +out: + NET_UNLOCK(s); + m_freem(nam); if (error) { - /* if an error occurred, free the file descriptor */ - NET_UNLOCK(s); - m_freem(nam); fdplock(fdp); fdremove(fdp, tmpfd); closef(fp, p); fdpunlock(fdp); - goto out; } - (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); - FILE_SET_MATURE(fp, p); - *retval = tmpfd; -
Re: NET_LOCK() ordering issue
On Tue, Jan 24, 2017 at 10:01:02AM +1000, Martin Pieuchot wrote: > Updated diff, thanks for your review. > @@ -360,24 +358,20 @@ redo: > error = soaccept(so, nam); > if (!error && name != NULL) > error = copyaddrout(p, nam, name, namelen, anamelen); > - > + if (!error) { > + (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); > + FILE_SET_MATURE(fp, p); > + *retval = tmpfd; > + } > +out: Perhaps a goto out would be nicer than two if (!error). error = soaccept(so, nam); if (error) goto out; if (name != NULL) error = copyaddrout(p, nam, name, namelen, anamelen); (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); FILE_SET_MATURE(fp, p); *retval = tmpfd; out: anyway OK bluhm@
Re: global mbuf memory limit
On Wed, Dec 14, 2016 at 03:52:32PM +1000, David Gwynne wrote: > > Wouldn't it make sense to use atomic operations to keep track of the > > amount of memory that was allocated? > > the mtx may be slow, but it is a slow path id like to keep simple. Is allocate and free of mbuf clusters in the slow path? I would expect that it has to be done for every packet. An atomic operation would not solve the slowdown in the fast path, perhaps a multi-cpu counter? > > Long run I suppose we want to drop nmbclust and let users tune the > > total amount of memory available for clusters and set the initial > > amount to a percentage of physical memory? > > i do want to move to specifying memory in terms of bytes instead of 2k > clusters. That might be better. But leave it as it is for now. Percentage of physical memory is bad as we have architectures that cannot do DMA everywhere. > how much should be available by default is a complicated issue. Yes. Maybe we have to reconsier the default. We had it per cluster size before, now the limit is for all clusters. > >> void > >> mbcpuinit() > >> { > >> + int i; > >> + > >>mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF); > >> + > >> + pool_cache_init(); > >> + pool_cache_init(); > >> + > >> + for (i = 0; i < nitems(mclsizes); i++) > >> + pool_cache_init([i]); > >> } Is the pool cache related to the global mbuf limit? Apart from the problem that I don't know wether the mutex kills performance, the diff looks good. bluhm
Re: rtsock refactoring
On Tue, Jan 24, 2017 at 08:54:23AM +1000, Martin Pieuchot wrote: > On 23/01/17(Mon) 01:18, Claudio Jeker wrote: > > [...] > > Last bit for now. This is changing the reporting madness. It moves it in > > its own function which is called after the big switch statement. > > If you hit a bad error in the switch the code should eiter goto fail or > > flush. > > The new function rt_report is actually constructing the rt message out of > > a rtentry. It does not magically update the message that was passed in > > from userland. I think this is much safer and works better. > > [...] > > > + /* > > +* From here on these vars need to be valid > > +* rt, rtm, error, so, m, tableid, sa_family > > Why not use KASSERT() rather than a comment? I could not come up easily with proper KASSERT() statements. Will ponder a bit more about this. > > ok mpi@ either way . > -- :wq Claudio
[WWW] Reverse chronological order for faq/current.html
Hi all, As faq/current.html[0] grows, each major change is being added at the very bottom, chronologically. There already are several other pages where this kind of ordering makes sense, i.e. innovations.html[1]. Given the "current" (unintentional pun) nature of changes on the aforementioned page, it seem like reverse chronological order would suit it better, as is the case with, i.e. events.html[2]. The proposed change has a clear benefit to those following -current - i.e. not having to scroll all the way to the bottom of the page (whichever way one does it) to get the information about the most recent changes - since they would always be available at the very top. Obviously, doing it now, before the -release has been cut, doesn't make any sense whatsoever, but the change could be implemented when faq/current.html gets a reset... perhaps even with a style guide as it is done in events.html? What do people think about introducing such change? Regards, Raf [0] https://www.openbsd.org/faq/current.html [1] https://www.openbsd.org/innovations.html [2] https://www.openbsd.org/events.html
Help for ddb(4)'s "ps /p"
Now that pfind(9) takes tid we need a way to show TID in ddb(4) ps output. Otherwise it's hard to use "ps /p" Diff below does that and fix some documentation about "show all procs". pfind(9) manual still needs a fix, and we can all decide afterward if we should rename "/p" into "/t". Here's the difference: Before: PID PPID PGRPUID S FLAGS WAIT COMMAND *50672 7 50672 0 7 0x3sysctl 7 1 7 0 30x10008b pause ksh 90555 1 90555 0 30x100098 poll cron After: PID TID PPID PGRPUID S FLAGS WAIT COMMAND *52636 496486 51956 52636 0 7 0x3sysctl 51956 407679 1 51956 0 30x10008b pause ksh 42726 321266 1 42726 0 30x100098 poll cron ok? Index: sys/kern/kern_proc.c === RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.72 diff -u -p -r1.72 kern_proc.c --- sys/kern/kern_proc.c21 Jan 2017 05:42:03 - 1.72 +++ sys/kern/kern_proc.c23 Jan 2017 23:47:49 - @@ -471,8 +471,8 @@ db_show_all_procs(db_expr_t addr, int ha "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); break; case 'n': - db_printf(" PID %5s %5s %5s S %10s %-12s %-16s\n", - "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); + db_printf(" PID %6s %5s %5s %5s S %10s %-12s %-16s\n", + "TID", "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); break; case 'w': db_printf(" TID %-16s %-8s %18s %s\n", @@ -497,8 +497,14 @@ db_show_all_procs(db_expr_t addr, int ha ci_schedstate.spc_idleproc == p) continue; } - db_printf("%c%5d ", p == curproc ? '*' : ' ', - *mode == 'n' ? pr->ps_pid : p->p_tid); + + if (*mode == 'n') { + db_printf("%c%5d ", (p == curproc ? + '*' : ' '), pr->ps_pid); + } else { + db_printf("%c%6d ", (p == curproc ? + '*' : ' '), p->p_tid); + } switch (*mode) { @@ -508,9 +514,9 @@ db_show_all_procs(db_expr_t addr, int ha break; case 'n': - db_printf("%5d %5d %5d %d %#10x " - "%-12.12s %-16s\n", - ppr ? ppr->ps_pid : -1, + db_printf("%6d %5d %5d %5d %d " + "%#10x %-12.12s %-16s\n", + p->p_tid, ppr ? ppr->ps_pid : -1, pr->ps_pgrp ? pr->ps_pgrp->pg_id : -1, pr->ps_ucred->cr_ruid, p->p_stat, p->p_flag | pr->ps_flags, Index: share/man/man4/ddb.4 === RCS file: /cvs/src/share/man/man4/ddb.4,v retrieving revision 1.82 diff -u -p -r1.82 ddb.4 --- share/man/man4/ddb.41 Sep 2016 12:24:56 - 1.82 +++ share/man/man4/ddb.423 Jan 2017 23:38:09 - @@ -542,7 +542,7 @@ The .Cm /p modifier interprets the .Ar frameaddr -argument as the PID of a process and shows the stack trace of +argument as the TID of a process and shows the stack trace of that process. The .Cm /p @@ -832,7 +832,7 @@ Display information on all processes. (Default) Show process information in a .Xr ps 1 Ns \&-like format. -Information printed includes thread ID, parent +Information printed includes process ID, thread ID, parent process ID, process group, UID, process status, process flags, process command name, and process wait channel message. .It Cm /a @@ -928,7 +928,7 @@ A synonym for the .Ic show all callout command. .\" -.It Ic ps Op Cm /anw +.It Ic ps Op Cm /anow A synonym for .Ic show all procs . .\"
pfind(9) -> tfind(9)
We live in a thread world! Let's face it. ok? Index: sys/arch/amd64/amd64/db_trace.c === RCS file: /cvs/src/sys/arch/amd64/amd64/db_trace.c,v retrieving revision 1.23 diff -u -p -r1.23 db_trace.c --- sys/arch/amd64/amd64/db_trace.c 16 Sep 2016 19:13:16 - 1.23 +++ sys/arch/amd64/amd64/db_trace.c 24 Jan 2017 00:10:47 - @@ -181,7 +181,7 @@ db_stack_trace_print(db_expr_t addr, boo callpc = (db_addr_t)ddb_regs.tf_rip; } else { if (trace_proc) { - struct proc *p = pfind((pid_t)addr); + struct proc *p = tfind((pid_t)addr); if (p == NULL) { (*pr) ("not found\n"); return; Index: sys/arch/arm/arm/db_trace.c === RCS file: /cvs/src/sys/arch/arm/arm/db_trace.c,v retrieving revision 1.9 diff -u -p -r1.9 db_trace.c --- sys/arch/arm/arm/db_trace.c 20 Sep 2016 08:35:25 - 1.9 +++ sys/arch/arm/arm/db_trace.c 24 Jan 2017 00:10:47 - @@ -104,7 +104,7 @@ db_stack_trace_print(db_expr_t addr, int struct proc *p; struct user *u; (*pr) ("trace: pid %d ", (int)addr); - p = pfind(addr); + p = tfind(addr); if (p == NULL) { (*pr)("not found\n"); return; Index: sys/arch/i386/i386/db_trace.c === RCS file: /cvs/src/sys/arch/i386/i386/db_trace.c,v retrieving revision 1.22 diff -u -p -r1.22 db_trace.c --- sys/arch/i386/i386/db_trace.c 18 Sep 2016 13:38:01 - 1.22 +++ sys/arch/i386/i386/db_trace.c 24 Jan 2017 00:10:47 - @@ -195,7 +195,7 @@ db_stack_trace_print(db_expr_t addr, boo } else if (trace_thread) { (*pr) ("%s: can't trace thread\n", __func__); } else if (trace_proc) { - struct proc *p = pfind((pid_t)addr); + struct proc *p = tfind((pid_t)addr); if (p == NULL) { (*pr) ("not found\n"); return; Index: sys/arch/powerpc/ddb/db_trace.c === RCS file: /cvs/src/sys/arch/powerpc/ddb/db_trace.c,v retrieving revision 1.10 diff -u -p -r1.10 db_trace.c --- sys/arch/powerpc/ddb/db_trace.c 10 Sep 2016 06:36:26 - 1.10 +++ sys/arch/powerpc/ddb/db_trace.c 24 Jan 2017 00:10:47 - @@ -134,7 +134,7 @@ db_stack_trace_print(db_expr_t addr, int lr = ddb_regs.srr0; } else { if (trace_proc) { - struct proc *p = pfind((pid_t)addr); + struct proc *p = tfind((pid_t)addr); if (p == NULL) { (*pr) ("not found\n"); return; Index: sys/arch/sparc64/sparc64/db_trace.c === RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_trace.c,v retrieving revision 1.12 diff -u -p -r1.12 db_trace.c --- sys/arch/sparc64/sparc64/db_trace.c 11 Sep 2016 03:14:04 - 1.12 +++ sys/arch/sparc64/sparc64/db_trace.c 24 Jan 2017 00:10:47 - @@ -78,7 +78,7 @@ db_stack_trace_print(db_expr_t addr, int struct proc *p; struct user *u; (*pr)("trace: pid %d ", (int)addr); - p = pfind(addr); + p = tfind(addr); if (p == NULL) { (*pr)("not found\n"); return; Index: sys/kern/kern_fork.c === RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.192 diff -u -p -r1.192 kern_fork.c --- sys/kern/kern_fork.c7 Nov 2016 00:26:32 - 1.192 +++ sys/kern/kern_fork.c24 Jan 2017 00:04:58 - @@ -559,7 +559,7 @@ alloctid(void) do { /* (0 .. TID_MASK+1] */ tid = 1 + (arc4random() & TID_MASK); - } while (pfind(tid) != NULL); + } while (tfind(tid) != NULL); return (tid); } Index: sys/kern/kern_proc.c === RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.72 diff -u -p -r1.72 kern_proc.c --- sys/kern/kern_proc.c21 Jan 2017 05:42:03 - 1.72 +++ sys/kern/kern_proc.c24 Jan 2017 00:05:06 - @@ -169,7 +169,7 @@ inferior(struct process *pr, struct proc * Locate a proc (thread) by number */ struct proc * -pfind(pid_t tid) +tfind(pid_t tid) { struct proc *p; Index: sys/kern/kern_sig.c
let bfd survive link-down
When an interface loses link, we delete all directly connected and cloned routes. However, this means we also lose any BFD configuration on those routes. Surviving link-down is pretty much mandatory for BFD to work. So instead, I bypass the route deletion, and clean up the link-local gateway information. $ arp -n | grep 203.0.113.9 203.0.113.9 (incomplete) em1 expired $ netstat -rnf inet | grep 203.0.113.9 203.0.113.9link#2 HLcF 2 53 - 3 em1 OK? Index: sys/net/route.c === RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.347 diff -u -p -u -p -r1.347 route.c --- sys/net/route.c 20 Jan 2017 08:10:54 - 1.347 +++ sys/net/route.c 24 Jan 2017 01:32:23 - @@ -1760,13 +1760,28 @@ rt_if_linkstate_change(struct rtentry *r * new routes from a better source. */ if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) && - !ISSET(rt->rt_flags, RTF_CACHED)) { + !ISSET(rt->rt_flags, RTF_CACHED) +#ifdef BFD + && !ISSET(rt->rt_flags, RTF_BFD) +#endif + ) { int error; if ((error = rtdeletemsg(rt, ifp, id))) return (error); return (EAGAIN); } +#ifdef BFD + if (ISSET(rt->rt_flags, RTF_BFD) && + ISSET(rt->rt_flags, RTF_CLONED) && + (rt->rt_gateway->sa_family == AF_LINK)) { + struct sockaddr_dl *sdl; + sdl = (struct sockaddr_dl *)rt->rt_gateway; + memset(sdl->sdl_data, 0, sdl->sdl_alen); + sdl->sdl_alen = 0; + rt->rt_expire = time_uptime; + } +#endif /* take route down */ rt->rt_flags &= ~RTF_UP; rtable_mpath_reprio(id, rt_key(rt), -- Show me a man who is a good loser and I'll show you a man who is playing golf with his boss.
Re: [WWW] Reverse chronological order for faq/current.html
> As faq/current.html[0] grows, each major change is being added at > the very bottom, chronologically. There already are several other > pages where this kind of ordering makes sense, i.e. innovations.html[1]. > > Given the "current" (unintentional pun) nature of changes on the > aforementioned page, it seem like reverse chronological order would > suit it better, as is the case with, i.e. events.html[2]. This page includes remedial actions a current-follower needs, which are generally cut A reader decides "Where was I last time", then would intuitively move forward. The proposal doesn't make sense to me.
Re: Help for ddb(4)'s "tr /p"
On 24/01/17(Tue) 10:08, Philip Guenther wrote: > On Tue, 24 Jan 2017, Martin Pieuchot wrote: > > Now that pfind(9) takes tid we need a way to show TID in ddb(4) ps > > output. Otherwise it's hard to use "ps /p" > > You mean "tr /p", right? I do :) > Hmm, ps/n (the default) is the only of the ps subcommands to not show the > TID...so that it can show more columns of COMMAND. You've added 8 columns > without shrinking any of the existing ones, which will cause an unreadable > mess when it wraps around. Maybe drop PGRP from /n and we add a new one > that's more like (userspace) ps -j ? Here's the updated output. PID TID PPIDUID S FLAGS WAIT COMMAND *97567 517976 11478 0 7 0x3sysctl 11478 302289 1 0 30x10008b pause ksh 68659 470461 1 0 30x100098 poll cron I like the idea of "ps /j", I could also merge the information in the current "ps /w" since EMUL is always 'native' now. What do you think? ok for the diff below? Index: sys/kern/kern_proc.c === RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.73 diff -u -p -r1.73 kern_proc.c --- sys/kern/kern_proc.c24 Jan 2017 00:58:55 - 1.73 +++ sys/kern/kern_proc.c24 Jan 2017 01:36:05 - @@ -471,8 +471,8 @@ db_show_all_procs(db_expr_t addr, int ha "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); break; case 'n': - db_printf(" PID %5s %5s %5s S %10s %-12s %-16s\n", - "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); + db_printf(" PID %6s %5s %5s S %10s %-12s %-16s\n", + "TID", "PPID", "UID", "FLAGS", "WAIT", "COMMAND"); break; case 'w': db_printf(" TID %-16s %-8s %18s %s\n", @@ -497,8 +497,14 @@ db_show_all_procs(db_expr_t addr, int ha ci_schedstate.spc_idleproc == p) continue; } - db_printf("%c%5d ", p == curproc ? '*' : ' ', - *mode == 'n' ? pr->ps_pid : p->p_tid); + + if (*mode == 'n') { + db_printf("%c%5d ", (p == curproc ? + '*' : ' '), pr->ps_pid); + } else { + db_printf("%c%6d ", (p == curproc ? + '*' : ' '), p->p_tid); + } switch (*mode) { @@ -508,10 +514,9 @@ db_show_all_procs(db_expr_t addr, int ha break; case 'n': - db_printf("%5d %5d %5d %d %#10x " + db_printf("%6d %5d %5d %d %#10x " "%-12.12s %-16s\n", - ppr ? ppr->ps_pid : -1, - pr->ps_pgrp ? pr->ps_pgrp->pg_id : -1, + p->p_tid, ppr ? ppr->ps_pid : -1, pr->ps_ucred->cr_ruid, p->p_stat, p->p_flag | pr->ps_flags, (p->p_wchan && p->p_wmesg) ? Index: share/man/man4/ddb.4 === RCS file: /cvs/src/share/man/man4/ddb.4,v retrieving revision 1.83 diff -u -p -r1.83 ddb.4 --- share/man/man4/ddb.424 Jan 2017 01:01:33 - 1.83 +++ share/man/man4/ddb.424 Jan 2017 01:45:48 - @@ -833,8 +833,8 @@ Display information on all processes. .Xr ps 1 Ns \&-like format. Information printed includes process ID, thread ID, parent -process ID, process group, UID, process status, process flags, process -command name, and process wait channel message. +process ID, UID, process status, process flags, process +wait channel message and process command name. .It Cm /a Shows the kernel virtual addresses of each process' proc structure, u-area, and vmspace structure.
Re: Help for ddb(4)'s "tr /p"
On Tue, 24 Jan 2017, Martin Pieuchot wrote: > On 24/01/17(Tue) 10:08, Philip Guenther wrote: ... > > Hmm, ps/n (the default) is the only of the ps subcommands to not show > > the TID...so that it can show more columns of COMMAND. You've added 8 > > columns without shrinking any of the existing ones, which will cause > > an unreadable mess when it wraps around. Maybe drop PGRP from /n and > > we add a new one that's more like (userspace) ps -j ? > > Here's the updated output. > >PID TID PPIDUID S FLAGS WAIT COMMAND > *97567 517976 11478 0 7 0x3sysctl > 11478 302289 1 0 30x10008b pause ksh > 68659 470461 1 0 30x100098 poll cron > > > I like the idea of "ps /j", I could also merge the information in the > current "ps /w" since EMUL is always 'native' now. What do you think? Just s/EMUL/PGID/ and wait for someone to need something more? > ok for the diff below? Almost > Index: sys/kern/kern_proc.c > === > RCS file: /cvs/src/sys/kern/kern_proc.c,v > retrieving revision 1.73 > diff -u -p -r1.73 kern_proc.c > --- sys/kern/kern_proc.c 24 Jan 2017 00:58:55 - 1.73 > +++ sys/kern/kern_proc.c 24 Jan 2017 01:36:05 - > @@ -471,8 +471,8 @@ db_show_all_procs(db_expr_t addr, int ha > "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); > break; > case 'n': > - db_printf(" PID %5s %5s %5s S %10s %-12s %-16s\n", > - "PPID", "PGRP", "UID", "FLAGS", "WAIT", "COMMAND"); > + db_printf(" PID %6s %5s %5s S %10s %-12s %-16s\n", > + "TID", "PPID", "UID", "FLAGS", "WAIT", "COMMAND"); TID is %6s instead of %5s, so should change %-16s to %-15s for COMMAND, both here and... > case 'n': > - db_printf("%5d %5d %5d %d %#10x " > + db_printf("%6d %5d %5d %d %#10x " > "%-12.12s %-16s\n", here.
document that RES_USE_EDNS0 and RES_USE_DNSSEC currently do nothing
To reduce the risk that others spend hours doing the same excercise that myself (https://marc.info/?l=openbsd-misc=148513378718363), here's a patch that mentiones that RES_USE_EDNS0 and RES_USE_DNSSEC options currently do nothing. Index: lib/libc/net/resolver.3 === RCS file: /cvs/src/lib/libc/net/resolver.3,v retrieving revision 1.33 diff -u -p -r1.33 resolver.3 --- lib/libc/net/resolver.3 16 Dec 2015 18:12:42 - 1.33 +++ lib/libc/net/resolver.3 23 Jan 2017 22:53:58 - @@ -187,11 +187,11 @@ This informs DNS servers of a client's r allowing them to take advantage of a non-default receive buffer size, and thus to send larger replies. DNS query packets with the EDNS0 extension are not compatible with -non-EDNS0 DNS servers. +non-EDNS0 DNS servers. At the moment this option does nothing. .It Dv RES_USE_DNSSEC Request that the resolver uses Domain Name System Security Extensions (DNSSEC), -as defined in RFCs 4033, 4034, and 4035. +as defined in RFCs 4033, 4034, and 4035. At the moment this option does nothing. .El .Pp The
httpd TLS ticket support
Since I just added ticket support to libtls here is a diff to enable it in httpd. Cheers -- :wq Claudio Index: config.c === RCS file: /cvs/src/usr.sbin/httpd/config.c,v retrieving revision 1.50 diff -u -p -r1.50 config.c --- config.c6 Nov 2016 10:49:38 - 1.50 +++ config.c22 Jan 2017 02:02:03 - @@ -146,6 +146,7 @@ config_getcfg(struct httpd *env, struct memcpy(, imsg->data, sizeof(cf)); env->sc_opts = cf.cf_opts; env->sc_flags = cf.cf_flags; + memcpy(env->sc_tls_sid, cf.cf_tls_sid, sizeof(env->sc_tls_sid)); what = ps->ps_what[privsep_process]; Index: httpd.c === RCS file: /cvs/src/usr.sbin/httpd/httpd.c,v retrieving revision 1.63 diff -u -p -r1.63 httpd.c --- httpd.c 9 Jan 2017 14:49:22 - 1.63 +++ httpd.c 23 Jan 2017 04:18:44 - @@ -57,6 +57,7 @@ intparent_dispatch_server(int, struct struct imsg *); int parent_dispatch_logger(int, struct privsep_proc *, struct imsg *); +voidparent_tls_ticket_rekey(int, short, void *); struct httpd *httpd_env; @@ -253,6 +254,11 @@ main(int argc, char *argv[]) exit(0); } + /* rekey the TLS tickets before pushing the config */ + parent_tls_ticket_rekey(0, 0, env); + /* initialize the TLS session id to a random key for all procs */ + arc4random_buf(env->sc_tls_sid, sizeof(env->sc_tls_sid)); + if (parent_configure(env) == -1) fatalx("configuration failed"); @@ -307,6 +313,7 @@ parent_configure(struct httpd *env) continue; cf.cf_opts = env->sc_opts; cf.cf_flags = env->sc_flags; + memcpy(cf.cf_tls_sid, env->sc_tls_sid, sizeof(cf.cf_tls_sid)); proc_compose(env->sc_ps, id, IMSG_CFG_DONE, , sizeof(cf)); } @@ -450,6 +457,26 @@ parent_dispatch_logger(int fd, struct pr } return (0); +} + +void +parent_tls_ticket_rekey(int fd, short events, void *arg) +{ + static struct event rekeyev; + struct httpd*env = arg; + struct timeval tv; + struct httpd_tls_ticket key; + + key.tt_keyrev = arc4random(); + arc4random_buf(key.tt_key, sizeof(key.tt_key)); + + proc_compose_imsg(env->sc_ps, PROC_SERVER, -1, IMSG_TLSTICKET_REKEY, + -1, -1, , sizeof(key)); + + evtimer_set(, parent_tls_ticket_rekey, env); + timerclear(); + tv.tv_sec = SERVER_TLS_LIFE_TIME / 4; + evtimer_add(, ); } /* Index: httpd.conf.5 === RCS file: /cvs/src/usr.sbin/httpd/httpd.conf.5,v retrieving revision 1.76 diff -u -p -r1.76 httpd.conf.5 --- httpd.conf.514 Nov 2016 10:28:31 - 1.76 +++ httpd.conf.524 Jan 2017 01:52:34 - @@ -556,6 +556,8 @@ will be used (secure protocols; TLSv1.2- Refer to the .Xr tls_config_parse_protocols 3 function for other valid protocol string values. +.It Oo Ic no Oc Ic tickets +Enable or disable TLS session tickets. .El .El .Sh TYPES Index: httpd.h === RCS file: /cvs/src/usr.sbin/httpd/httpd.h,v retrieving revision 1.125 diff -u -p -r1.125 httpd.h --- httpd.h 9 Jan 2017 14:49:22 - 1.125 +++ httpd.h 23 Jan 2017 04:17:52 - @@ -73,6 +73,8 @@ #define SERVER_MAX_PREFETCH256 #define SERVER_MIN_PREFETCHED 32 #define SERVER_HSTS_DEFAULT_AGE31536000 +//#define SERVER_TLS_LIFE_TIME (4 * 3600) +#define SERVER_TLS_LIFE_TIME (4 * 60) #define MEDIATYPE_NAMEMAX 128 /* file name extension */ #define MEDIATYPE_TYPEMAX 64 /* length of type/subtype */ @@ -105,6 +107,7 @@ enum httpchunk { struct ctl_flags { uint8_t cf_opts; uint32_t cf_flags; + uint8_t cf_tls_sid[TLS_MAX_SESSION_ID_LENGTH]; }; enum key_type { @@ -215,7 +218,8 @@ enum imsg_type { IMSG_CFG_DONE, IMSG_LOG_ACCESS, IMSG_LOG_ERROR, - IMSG_LOG_OPEN + IMSG_LOG_OPEN, + IMSG_TLSTICKET_REKEY }; enum privsep_procid { @@ -452,6 +456,7 @@ struct server_config { uint8_t *tls_ocsp_staple; size_t tls_ocsp_staple_len; char*tls_ocsp_staple_file; + int tls_ticket_enabled; uint32_t flags; int strip; @@ -504,6 +509,11 @@ struct server { }; TAILQ_HEAD(serverlist, server); +struct httpd_tls_ticket { + uint32_ttt_keyrev; + unsigned char tt_key[TLS_TICKET_KEY_SIZE]; +}; + struct httpd { uint8_t sc_opts; uint32_t sc_flags; @@ -514,6
Re: freestanding take 2, part 1
On Tue, Jan 24, 2017 at 3:33 PM, Mark Ketteniswrote: > Turns out our compilers support different sets of builtins. So the > diff below does the following: > > * Enables bzero, memcmp, memcpy and memset for all compilers. > * Enables bcmp, bcopy and memmove for gcc4. > > This diff should be a no-op for kernels compiled without > -ffreestanding. I've checked a couple of architectures and so far > have not found any evidence that this isn't the case. > > As soon as we start adding -ffreestanding to the kernel Makefiles, > things will start to change. That'll be part2. > > ok? Built and rebooted to kernels with this and -ffreestanding (instead of the list of -fno-builtin-foo options) on both loongson and macppc and they're happy. Idea makes sense. ok guenther@
Re: global mbuf memory limit
On Tue, Jan 24, 2017 at 02:55:50AM +0100, Alexander Bluhm wrote: > On Wed, Dec 14, 2016 at 03:52:32PM +1000, David Gwynne wrote: > > > Wouldn't it make sense to use atomic operations to keep track of the > > > amount of memory that was allocated? > > > > the mtx may be slow, but it is a slow path id like to keep simple. > > Is allocate and free of mbuf clusters in the slow path? I would > expect that it has to be done for every packet. An atomic operation > would not solve the slowdown in the fast path, perhaps a multi-cpu > counter? no, this is only done at the level of pool page allocation. the pool pages are what get cut up into mbufs and mbuf clusters. the only difference between mbufs and clusters at the pool level is their size, and therefore which pool they come from. on my firewall the interesting numbers are: $ vmstat -m | grep -e ^mbuf -e ^mcl -e ^Name NameSize Requests FailInUse Pgreq Pgrel Npage Hiwat Minpg Maxpg Idle mbufpl 256 1502227362815 357415 2547 22176623 22176415 208 953 181 mcl2k 2048 1446266146051 0709 104851993 104851840 153 1712 08 46 mcl4k 4096 7438867115 00 519649 519648 135 0 81 mcl8k 8192 3388545041 00 583882 583881 161 0 81 mcl9k 9216 15293011574 0 18 507792 507790 231 0 80 mbufs get used 1502227362815 / 22176623 or 67739 times per page allocation. 2k clusters get used 1446266146051 / 104851993 or 13793 per page allocation. > > > Long run I suppose we want to drop nmbclust and let users tune the > > > total amount of memory available for clusters and set the initial > > > amount to a percentage of physical memory? > > > > i do want to move to specifying memory in terms of bytes instead of 2k > > clusters. > > That might be better. But leave it as it is for now. Percentage > of physical memory is bad as we have architectures that cannot do > DMA everywhere. > > > how much should be available by default is a complicated issue. > > Yes. Maybe we have to reconsier the default. We had it per cluster > size before, now the limit is for all clusters. percentage of memory isnt a good idea. the memory available to the stack should be sized by how much tcp you want to do, where how much is determined by the bandwidth and latency you need to support. that is regardless of whether you have 4G or 512G of RAM. > > >> void > > >> mbcpuinit() > > >> { > > >> +int i; > > >> + > > >> mbstat = counters_alloc_ncpus(mbstat, MBSTAT_COUNT, M_DEVBUF); > > >> + > > >> +pool_cache_init(); > > >> +pool_cache_init(); > > >> + > > >> +for (i = 0; i < nitems(mclsizes); i++) > > >> +pool_cache_init([i]); > > >> } > > Is the pool cache related to the global mbuf limit? no, but it is possible because of it. the idea of the per cpu caches is to let cpus operate completely independently most of the time. having an item limit on a pool means the cpus need to coordinate on every pool put and get so they can check and update that limit. > Apart from the problem that I don't know wether the mutex kills > performance, the diff looks good. the tests ive done and simon mages has done show a slight benefit. id expect to see that grow as we use pools more concurrently.
Re: clean up and modernize test calls in bsd.obj.mk
On Tue, Jan 24, 2017 at 01:01:57PM +1000, Theo Buehler wrote: > We're currently using several idioms for conditionally executing code in > bsd.obj.mk. I'd like to unify them for the sake of readability and > consistency. This was done joint with rpe. OK rpe@ > Index: share/mk/bsd.obj.mk > === > RCS file: /cvs/src/share/mk/bsd.obj.mk,v > retrieving revision 1.18 > diff -u -p -r1.18 bsd.obj.mk > --- share/mk/bsd.obj.mk 24 Jan 2017 02:56:50 - 1.18 > +++ share/mk/bsd.obj.mk 24 Jan 2017 02:58:50 - > @@ -33,20 +33,19 @@ obj! _SUBDIRUSE > SETOWNER=:; \ > fi; \ > [[ -z $$MKDIRS ]] && MKDIRS="mkdir -p"; \ > - if test $$here != $$subdir ; then \ > + if [[ $$here != $$subdir ]]; then \ > dest=${BSDOBJDIR}/$$subdir ; \ > echo "$$here/${__objdir} -> $$dest"; \ > - if test ! -L ${__objdir} -o \ > - X`readlink ${__objdir}` != X$$dest; \ > + if [[ ! -L ${__objdir} || `readlink ${__objdir}` != $$dest ]]; \ > then \ > - if test -e ${__objdir}; then rm -rf ${__objdir}; fi; \ > + [[ -e ${__objdir} ]] && rm -rf ${__objdir}; \ > ln -sf $$dest ${__objdir}; \ > $$SETOWNER ${__objdir}; \ > fi; \ > - if test -d ${BSDOBJDIR}; then \ > - test -d $$dest || $$MKDIRS $$dest; \ > + if [[ -d ${BSDOBJDIR} ]]; then \ > + [[ -d $$dest ]] || $$MKDIRS $$dest; \ > else \ > - if test -e ${BSDOBJDIR}; then \ > + if [[ -e ${BSDOBJDIR} ]]; then \ > echo "${BSDOBJDIR} is not a directory"; \ > else \ > echo "${BSDOBJDIR} does not exist"; \ > @@ -54,7 +53,7 @@ obj! _SUBDIRUSE > fi; \ > else \ > dest=$$here/${__objdir} ; \ > - if test ! -d ${__objdir} ; then \ > + if [[ ! -d ${__objdir} ]]; then \ > echo "making $$dest" ; \ > $$MKDIRS $$dest; \ > $$SETOWNER $$dest; \
Re: let bfd survive link-down
On 2017 Jan 24 (Tue) at 02:38:54 +0100 (+0100), Peter Hessler wrote: :When an interface loses link, we delete all directly connected and :cloned routes. However, this means we also lose any BFD configuration :on those routes. : :Surviving link-down is pretty much mandatory for BFD to work. : :So instead, I bypass the route deletion, and clean up the link-local :gateway information. : :$ arp -n | grep 203.0.113.9 :203.0.113.9 (incomplete) em1 expired : :$ netstat -rnf inet | grep 203.0.113.9 :203.0.113.9link#2 HLcF 2 53 - 3 em1 : :OK? : Even nicer, now I only expire the route, instead of poking at some of the internals. Index: sys/net/route.c === RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.347 diff -u -p -u -p -r1.347 route.c --- sys/net/route.c 20 Jan 2017 08:10:54 - 1.347 +++ sys/net/route.c 24 Jan 2017 03:55:01 - @@ -1760,13 +1760,27 @@ rt_if_linkstate_change(struct rtentry *r * new routes from a better source. */ if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) && - !ISSET(rt->rt_flags, RTF_CACHED)) { + !ISSET(rt->rt_flags, RTF_CACHED) +#ifdef BFD + && !ISSET(rt->rt_flags, RTF_BFD) +#endif + ) { int error; if ((error = rtdeletemsg(rt, ifp, id))) return (error); return (EAGAIN); } +#ifdef BFD + /* +* in the BFD case, expire the route +*/ + if (ISSET(rt->rt_flags, RTF_BFD) && + ISSET(rt->rt_flags, RTF_CLONED) && + (rt->rt_gateway->sa_family == AF_LINK)) { + rt->rt_expire = time_uptime; + } +#endif /* take route down */ rt->rt_flags &= ~RTF_UP; rtable_mpath_reprio(id, rt_key(rt), -- Only presidents, editors, and people with tapeworms have the right to use the editorial "we."
Re: let bfd survive link-down
On 24/01/17(Tue) 04:55, Peter Hessler wrote: > On 2017 Jan 24 (Tue) at 02:38:54 +0100 (+0100), Peter Hessler wrote: > :When an interface loses link, we delete all directly connected and > :cloned routes. However, this means we also lose any BFD configuration > :on those routes. > : > :Surviving link-down is pretty much mandatory for BFD to work. > : > :So instead, I bypass the route deletion, and clean up the link-local > :gateway information. > : > :$ arp -n | grep 203.0.113.9 > :203.0.113.9 (incomplete) em1 expired > : > :$ netstat -rnf inet | grep 203.0.113.9 > :203.0.113.9link#2 HLcF 2 53 - 3 em1 > : > :OK? > : > > Even nicer, now I only expire the route, instead of poking at some of > the internals. There's no need to expire it, bringing the route down is enough. Expiration means you assume the route represents a L2 entry. Plus rt_expire should be only modified by ARP/NDP code. Just change: !ISSET(rt->rt_flags, RTF_CACHED) into !ISSET(rt->rt_flags, RTF_CACHED|RTF_BFD) But this really show that BFD data structure don't need to be linked to rtentries. > Index: sys/net/route.c > === > RCS file: /cvs/src/sys/net/route.c,v > retrieving revision 1.347 > diff -u -p -u -p -r1.347 route.c > --- sys/net/route.c 20 Jan 2017 08:10:54 - 1.347 > +++ sys/net/route.c 24 Jan 2017 03:55:01 - > @@ -1760,13 +1760,27 @@ rt_if_linkstate_change(struct rtentry *r >* new routes from a better source. >*/ > if (ISSET(rt->rt_flags, RTF_CLONED|RTF_DYNAMIC) && > - !ISSET(rt->rt_flags, RTF_CACHED)) { > + !ISSET(rt->rt_flags, RTF_CACHED) > +#ifdef BFD > + && !ISSET(rt->rt_flags, RTF_BFD) > +#endif > + ) { > int error; > > if ((error = rtdeletemsg(rt, ifp, id))) > return (error); > return (EAGAIN); > } > +#ifdef BFD > + /* > + * in the BFD case, expire the route > + */ > + if (ISSET(rt->rt_flags, RTF_BFD) && > + ISSET(rt->rt_flags, RTF_CLONED) && > + (rt->rt_gateway->sa_family == AF_LINK)) { > + rt->rt_expire = time_uptime; > + } > +#endif > /* take route down */ > rt->rt_flags &= ~RTF_UP; > rtable_mpath_reprio(id, rt_key(rt), > > > -- > Only presidents, editors, and people with tapeworms have the right to > use the editorial "we." >
ld.so: don't use _dl_exit() for fatal errors
So right now, ld.so simply exits iun various error cases, like unknown relocation. This isn't great, as it's a normal exit when a linking failure really should be an abnormal exit as from a fatal signal. If "grep" has a linking failure I want it to *die* instead of meekly returning non-zero. Diff below adds three functions to make fatal error handling simpler: - _dl_diedie() This makes the process kill itself with SIGKILL via thrkill(2). It might be nicer to do SIGABRT, but that can be caught so it would need to be prepared to use sigaction and suddenly it seems like overkill. - _dl_die(fmt, ...) This is a printf-like function that prints a prefix of "ld.so: progname: " then the provided format, and finally it adds a newline and calls _dl_diedie(). Makes reporting fatal errors trivial. - _dl_oom() Like _dl_die("out of memory"), for use in the common case of a failed malloc/etc The rest of the diff then converts the existing "_dl_printf()+_dl_exit()" and "_dl_printf() + forced segfault" cases into calls to _dl_die() or _dl_oom() as appropriate. Only places which already used _dl_exit() or tried to trigger a segfault have been changed to use these. Tested on amd64, armv7, loongson, macppc, and sparc64 by building a version of libc with the 'vis' line removed from Symbols.list, then running 'vis' against that .so and verifying the output is something like: vis:vis: undefined symbol 'vis' ld.so: vis: lazy binding failed! Killed Test results for other archs? oks? Philip Index: dl_printf.c === RCS file: /cvs/src/libexec/ld.so/dl_printf.c,v retrieving revision 1.18 diff -u -p -r1.18 dl_printf.c --- dl_printf.c 23 Jan 2017 13:00:09 - 1.18 +++ dl_printf.c 24 Jan 2017 05:16:04 - @@ -56,6 +56,7 @@ #include #include +#include "resolve.h" /* for __progname */ #include "syscall.h" #include "util.h" @@ -132,8 +133,10 @@ kdoprnt(int fd, const char *fmt, va_list for (;;) { while ((ch = *fmt++) != '%') { - if (ch == '\0') + if (ch == '\0') { + _dl_flushbuf(); return; + } putcharfd(ch, fd); } lflag = 0; @@ -221,7 +224,6 @@ reswitch: putcharfd(ch, fd); } } - _dl_flushbuf(); } static void @@ -237,4 +239,31 @@ kprintn(int fd, unsigned long ul, int ba do { putcharfd(*--p, fd); } while (p > buf); +} + +static char ldso[] = "ld.so: "; + +__dead void +_dl_die(const char *fmt, ...) +{ + va_list ap; + + _dl_printf("%s%s: ", ldso, __progname); + va_start(ap, fmt); + kdoprnt(2, fmt, ap); + _dl_write(2, "\n", 1); + va_end(ap); + + _dl_diedie(); +} + +__dead void +_dl_oom(void) +{ + static const char oom[] = ": out of memory\n"; + + _dl_write(2, ldso, sizeof(ldso) - 1); + _dl_write(2, __progname, _dl_strlen(__progname)); + _dl_write(2, oom, sizeof(oom) - 1); + _dl_diedie(); } Index: library.c === RCS file: /cvs/src/libexec/ld.so/library.c,v retrieving revision 1.79 diff -u -p -r1.79 library.c --- library.c 12 Aug 2016 20:39:01 - 1.79 +++ library.c 24 Jan 2017 05:16:04 - @@ -236,7 +236,7 @@ _dl_tryload_shlib(const char *libname, i res = NULL; /* silence gcc */ next_load = _dl_calloc(1, sizeof(struct load_list)); if (next_load == NULL) - _dl_exit(7); + _dl_oom(); next_load->next = load_list; load_list = next_load; next_load->start = start; Index: library_mquery.c === RCS file: /cvs/src/libexec/ld.so/library_mquery.c,v retrieving revision 1.56 diff -u -p -r1.56 library_mquery.c --- library_mquery.c12 Aug 2016 20:39:01 - 1.56 +++ library_mquery.c24 Jan 2017 05:16:04 - @@ -176,7 +176,7 @@ _dl_tryload_shlib(const char *libname, i if (size != 0) { ld = _dl_malloc(sizeof(struct load_list)); if (ld == NULL) - _dl_exit(7); + _dl_oom(); ld->start = NULL; ld->size = size; ld->moff = TRUNC_PG(phdp->p_vaddr); @@ -191,7 +191,7 @@ _dl_tryload_shlib(const char *libname, i /* This phdr has a zfod section */ ld = _dl_calloc(1,
pool_debug
I'd like to force a yield() for every pool_get(9) using PR_WAITOK, just like we do with malloc(9), in order to ensure that the NET_LOCK() is not held across context switches. ok? Index: kern/subr_pool.c === RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.204 diff -u -p -r1.204 subr_pool.c --- kern/subr_pool.c21 Nov 2016 01:44:06 - 1.204 +++ kern/subr_pool.c24 Jan 2017 06:29:09 - @@ -513,7 +513,7 @@ pool_get(struct pool *pp, int flags) } mtx_leave(>pr_mtx); - if (slowdown && ISSET(flags, PR_WAITOK)) + if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) yield(); if (v == NULL) {
Scheduler ping-pong with preempt()
Userland threads are preempt()'d when hogging a CPU or when processing an AST. Currently when such a thread is preempted the scheduler looks for an idle CPU and puts it on its run queue. That means the number of involuntary context switch often result in a migration. This is not a problem per se and one could argue that if another CPU is idle it makes sense to move. However with the KERNEL_LOCK() moving to another CPU won't necessarily allows the preempt()'d thread to run. It's even worse, it increases contention. If you add to this behavior the fact that sched_choosecpu() prefers idle CPUs in a linear order, meaning CPU0 > CPU1 > .. > CPUN, you'll understand that the set of idle CPUs will change every time preempt() is called. I believe this behavior affects kernel threads by side effect, since the set of idle CPU changes every time a thread is preempted. With this diff the 'softnet' thread didn't move on a 2 CPUs machine during simple benchmarks. Without, it plays ping-pong between CPU. The goal of this diff is to reduce the number of migrations. You can compare the value of 'sched_nomigrations' and 'sched_nmigrations' with and without it. As usual, I'd like to know what's the impact of this diff on your favorite benchmark. Please test and report back. Index: kern/kern_sched.c === RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.44 diff -u -p -r1.44 kern_sched.c --- kern/kern_sched.c 21 Jan 2017 05:42:03 - 1.44 +++ kern/kern_sched.c 24 Jan 2017 03:08:23 - @@ -51,6 +51,8 @@ uint64_t sched_noidle;/* Times we didn uint64_t sched_stolen; /* Times we stole proc from other cpus */ uint64_t sched_choose; /* Times we chose a cpu */ uint64_t sched_wasidle;/* Times we came out of idle */ +uint64_t sched_nvcsw; /* voluntary context switches */ +uint64_t sched_nivcsw; /* involuntary context switches */ #ifdef MULTIPROCESSOR struct taskq *sbartq; Index: kern/kern_synch.c === RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.136 diff -u -p -r1.136 kern_synch.c --- kern/kern_synch.c 21 Jan 2017 05:42:03 - 1.136 +++ kern/kern_synch.c 24 Jan 2017 03:08:23 - @@ -296,6 +296,7 @@ sleep_finish(struct sleep_state *sls, in if (sls->sls_do_sleep && do_sleep) { p->p_stat = SSLEEP; p->p_ru.ru_nvcsw++; + sched_nvcsw++; SCHED_ASSERT_LOCKED(); mi_switch(); } else if (!do_sleep) { @@ -481,6 +482,7 @@ sys_sched_yield(struct proc *p, void *v, p->p_stat = SRUN; setrunqueue(p); p->p_ru.ru_nvcsw++; + sched_nvcsw++; mi_switch(); SCHED_UNLOCK(s); Index: kern/sched_bsd.c === RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.43 diff -u -p -r1.43 sched_bsd.c --- kern/sched_bsd.c9 Mar 2016 13:38:50 - 1.43 +++ kern/sched_bsd.c24 Jan 2017 03:18:24 - @@ -302,6 +302,7 @@ yield(void) p->p_stat = SRUN; setrunqueue(p); p->p_ru.ru_nvcsw++; + sched_nvcsw++; mi_switch(); SCHED_UNLOCK(s); } @@ -327,9 +328,12 @@ preempt(struct proc *newp) SCHED_LOCK(s); p->p_priority = p->p_usrpri; p->p_stat = SRUN; +#if 0 p->p_cpu = sched_choosecpu(p); +#endif setrunqueue(p); p->p_ru.ru_nivcsw++; + sched_nivcsw++; mi_switch(); SCHED_UNLOCK(s); } Index: sys/sched.h === RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.41 diff -u -p -r1.41 sched.h --- sys/sched.h 17 Mar 2016 13:18:47 - 1.41 +++ sys/sched.h 24 Jan 2017 02:10:41 - @@ -134,6 +134,9 @@ struct schedstate_percpu { extern int schedhz;/* ideally: 16 */ extern int rrticks_init; /* ticks per roundrobin() */ +extern uint64_t sched_nvcsw; /* voluntary context switches */ +extern uint64_t sched_nivcsw; /* involuntary context switches */ + struct proc; void schedclock(struct proc *); struct cpu_info;
ypldap(8) tls
This adds TLS to ypldap(8), both 'ldaps' (TLS on port 636) and 'tls' (on port 389, after a 'starttls' extended operation) variants. I've tried this against one of our AD servers and against ldapd. Index: Makefile === RCS file: /cvs/src/usr.sbin/ypldap/Makefile,v retrieving revision 1.8 diff -u -p -u -p -r1.8 Makefile --- Makefile9 Sep 2015 15:33:18 - 1.8 +++ Makefile24 Jan 2017 03:27:47 - @@ -9,7 +9,7 @@ SRCS= parse.y ypldap.c log.c \ MAN= ypldap.8 ypldap.conf.5 DPADD= ${LIBEVENT} ${LIBUTIL} ${LIBRPCSVC} -LDADD= -levent -lutil -lrpcsvc +LDADD= -ltls -levent -lutil -lrpcsvc CFLAGS+= -I${.CURDIR} CFLAGS+= -Wall CFLAGS+= -Wstrict-prototypes -Wmissing-prototypes Index: aldap.c === RCS file: /cvs/src/usr.sbin/ypldap/aldap.c,v retrieving revision 1.34 diff -u -p -u -p -r1.34 aldap.c --- aldap.c 22 Oct 2016 03:37:13 - 1.34 +++ aldap.c 24 Jan 2017 03:27:47 - @@ -25,6 +25,8 @@ #include #include +#include + #include "aldap.h" #if 0 @@ -42,6 +44,9 @@ static int isu8cont(unsigned char); char *parseval(char *, size_t); intaldap_create_page_control(struct ber_element *, int, struct aldap_page_control *); +intaldap_send(struct aldap *, + struct ber_element *); +unsigned long aldap_application(struct ber_element *); #ifdef DEBUG voidldap_debug_elements(struct ber_element *); @@ -55,13 +60,21 @@ void ldap_debug_elements(struct ber_e #define LDAP_DEBUG(x, y) do { } while (0) #endif +unsigned long +aldap_application(struct ber_element *elm) +{ + return BER_TYPE_OCTETSTRING; +} + int aldap_close(struct aldap *al) { - if (close(al->ber.fd) == -1) - return (-1); + if (al->fd != -1) + if (close(al->ber.fd) == -1) + return (-1); ber_free(>ber); + evbuffer_free(al->buf); free(al); return (0); @@ -74,16 +87,109 @@ aldap_init(int fd) if ((a = calloc(1, sizeof(*a))) == NULL) return NULL; - a->ber.fd = fd; + a->buf = evbuffer_new(); + a->fd = fd; + a->ber.fd = -1; + ber_set_application(>ber, aldap_application); return a; } int +aldap_tls(struct aldap *ldap, struct tls_config *cfg, const char *name) +{ + ldap->tls = tls_client(); + if (ldap->tls == NULL) { + ldap->err = ALDAP_ERR_OPERATION_FAILED; + return (-1); + } + + if (tls_configure(ldap->tls, cfg)) { + ldap->err = ALDAP_ERR_TLS_ERROR; + return (-1); + } + + if (tls_connect_socket(ldap->tls, ldap->fd, name)) { + ldap->err = ALDAP_ERR_TLS_ERROR; + return (-1); + } + + if (tls_handshake(ldap->tls)) { + ldap->err = ALDAP_ERR_TLS_ERROR; + return (-1); + } + + ldap->fd = -1; + return (0); +} + +int +aldap_send(struct aldap *ldap, struct ber_element *root) +{ + int error, wrote; + void *data; + size_t len, done; + + len = ber_calc_len(root); + error = ber_write_elements(>ber, root); + ber_free_elements(root); + if (error == -1) + return -1; + + ber_get_writebuf(>ber, ); + done = 0; + while (len > 0) { + if (ldap->tls != NULL) { + wrote = tls_write(ldap->tls, ((char *)data) + done, + len); + if (wrote == TLS_WANT_POLLIN || + wrote == TLS_WANT_POLLOUT) + continue; + } else + wrote = write(ldap->fd, ((char *)data) + done, len); + + if (wrote == -1) + return -1; + + len -= wrote; + done += wrote; + } + + return 0; +} + +int +aldap_req_starttls(struct aldap *ldap) +{ + struct ber_element *root = NULL, *ber; + + if ((root = ber_add_sequence(NULL)) == NULL) + goto fail; + + ber = ber_printf_elements(root, "d{tst", ++ldap->msgid, BER_CLASS_APP, + (unsigned long) LDAP_REQ_EXTENDED, LDAP_STARTTLS_OID, + BER_CLASS_CONTEXT, (unsigned long) 0); + if (ber == NULL) { + ldap->err = ALDAP_ERR_OPERATION_FAILED; + goto fail; + } + + if (aldap_send(ldap, root) == -1) + goto fail; + + return (ldap->msgid); +fail: + if (root != NULL) + ber_free_elements(root); + + ldap->err = ALDAP_ERR_OPERATION_FAILED; +
freestanding take 2, part 1
Turns out our compilers support different sets of builtins. So the diff below does the following: * Enables bzero, memcmp, memcpy and memset for all compilers. * Enables bcmp, bcopy and memmove for gcc4. This diff should be a no-op for kernels compiled without -ffreestanding. I've checked a couple of architectures and so far have not found any evidence that this isn't the case. As soon as we start adding -ffreestanding to the kernel Makefiles, things will start to change. That'll be part2. ok? Index: sys/systm.h === RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.121 diff -u -p -r1.121 systm.h --- sys/systm.h 29 Dec 2016 12:12:44 - 1.121 +++ sys/systm.h 24 Jan 2017 05:28:04 - @@ -322,6 +322,16 @@ extern int (*mountroot)(void); #include +#define bzero(b, n)__builtin_bzero((b), (n)) +#define memcmp(b1, b2, n) __builtin_memcmp((b1), (b2), (n)) +#define memcpy(d, s, n)__builtin_memcpy((d), (s), (n)) +#define memset(b, c, n)__builtin_memset((b), (c), (n)) +#if !defined(__clang__) && (defined(__GNUC__) && __GNUC__ >= 4) +#define bcmp(b1, b2, n)__builtin_bcmp((b1), (b2), (n)) +#define bcopy(s, d, n) __builtin_bcopy((s), (d), (n)) +#define memmove(d, s, n) __builtin_memmove((d), (s), (n)) +#endif + #if defined(DDB) || defined(KGDB) /* debugger entry points */ void Debugger(void); /* in DDB only */ Index: lib/libkern/bcmp.c === RCS file: /cvs/src/sys/lib/libkern/bcmp.c,v retrieving revision 1.10 diff -u -p -r1.10 bcmp.c --- lib/libkern/bcmp.c 10 Jun 2014 04:16:57 - 1.10 +++ lib/libkern/bcmp.c 24 Jan 2017 05:28:04 - @@ -31,6 +31,8 @@ #include +#undef bcmp + /* * bcmp -- vax cmpc3 instruction */ Index: lib/libkern/bcopy.c === RCS file: /cvs/src/sys/lib/libkern/bcopy.c,v retrieving revision 1.1 diff -u -p -r1.1 bcopy.c --- lib/libkern/bcopy.c 13 Jun 2013 04:33:26 - 1.1 +++ lib/libkern/bcopy.c 24 Jan 2017 05:28:04 - @@ -35,6 +35,8 @@ #include #include +#undef bcopy + /* * sizeof(word) MUST BE A POWER OF TWO * SO THAT wmask BELOW IS ALL ONES Index: lib/libkern/bzero.c === RCS file: /cvs/src/sys/lib/libkern/bzero.c,v retrieving revision 1.9 diff -u -p -r1.9 bzero.c --- lib/libkern/bzero.c 10 Jun 2014 04:16:57 - 1.9 +++ lib/libkern/bzero.c 24 Jan 2017 05:28:04 - @@ -31,6 +31,8 @@ #include +#undef bzero + /* * bzero -- vax movc5 instruction */ Index: lib/libkern/memcmp.c === RCS file: /cvs/src/sys/lib/libkern/memcmp.c,v retrieving revision 1.6 diff -u -p -r1.6 memcmp.c --- lib/libkern/memcmp.c10 Jun 2014 04:16:57 - 1.6 +++ lib/libkern/memcmp.c24 Jan 2017 05:28:04 - @@ -34,6 +34,8 @@ #include +#undef memcmp + /* * Compare memory regions. */ Index: lib/libkern/memcpy.c === RCS file: /cvs/src/sys/lib/libkern/memcpy.c,v retrieving revision 1.3 diff -u -p -r1.3 memcpy.c --- lib/libkern/memcpy.c12 Jun 2013 16:44:22 - 1.3 +++ lib/libkern/memcpy.c24 Jan 2017 05:28:04 - @@ -32,6 +32,8 @@ #include #include +#undef memcpy + /* * This is designed to be small, not fast. */ Index: lib/libkern/memmove.c === RCS file: /cvs/src/sys/lib/libkern/memmove.c,v retrieving revision 1.1 diff -u -p -r1.1 memmove.c --- lib/libkern/memmove.c 11 Jun 2013 18:04:41 - 1.1 +++ lib/libkern/memmove.c 24 Jan 2017 05:28:04 - @@ -32,6 +32,8 @@ #include #include +#undef memmove + /* * This is designed to be small, not fast. */ Index: lib/libkern/memset.c === RCS file: /cvs/src/sys/lib/libkern/memset.c,v retrieving revision 1.7 diff -u -p -r1.7 memset.c --- lib/libkern/memset.c10 Jun 2014 04:16:57 - 1.7 +++ lib/libkern/memset.c24 Jan 2017 05:28:04 - @@ -39,6 +39,9 @@ #include #include +#undef bzero +#undef memset + #definewsize sizeof(u_int) #definewmask (wsize - 1)
Re: httpd TLS ticket support
> Am 24.01.2017 um 02:54 schrieb Claudio Jeker: > > Since I just added ticket support to libtls here is a diff to enable it > in httpd. > Thanks, comments below. Reyk > Cheers > -- > :wq Claudio > > Index: config.c > === > RCS file: /cvs/src/usr.sbin/httpd/config.c,v > retrieving revision 1.50 > diff -u -p -r1.50 config.c > --- config.c6 Nov 2016 10:49:38 -1.50 > +++ config.c22 Jan 2017 02:02:03 - > @@ -146,6 +146,7 @@ config_getcfg(struct httpd *env, struct >memcpy(, imsg->data, sizeof(cf)); >env->sc_opts = cf.cf_opts; >env->sc_flags = cf.cf_flags; > +memcpy(env->sc_tls_sid, cf.cf_tls_sid, sizeof(env->sc_tls_sid)); > >what = ps->ps_what[privsep_process]; > > Index: httpd.c > === > RCS file: /cvs/src/usr.sbin/httpd/httpd.c,v > retrieving revision 1.63 > diff -u -p -r1.63 httpd.c > --- httpd.c9 Jan 2017 14:49:22 -1.63 > +++ httpd.c23 Jan 2017 04:18:44 - > @@ -57,6 +57,7 @@ int parent_dispatch_server(int, struct >struct imsg *); > int parent_dispatch_logger(int, struct privsep_proc *, >struct imsg *); > +void parent_tls_ticket_rekey(int, short, void *); > > struct httpd*httpd_env; > > @@ -253,6 +254,11 @@ main(int argc, char *argv[]) >exit(0); >} > > +/* rekey the TLS tickets before pushing the config */ > +parent_tls_ticket_rekey(0, 0, env); Why not doing this in parent_configure() - this way it would rekey on config reload and look like the better place. > +/* initialize the TLS session id to a random key for all procs */ > +arc4random_buf(env->sc_tls_sid, sizeof(env->sc_tls_sid)); > + >if (parent_configure(env) == -1) >fatalx("configuration failed"); > > @@ -307,6 +313,7 @@ parent_configure(struct httpd *env) >continue; >cf.cf_opts = env->sc_opts; >cf.cf_flags = env->sc_flags; > +memcpy(cf.cf_tls_sid, env->sc_tls_sid, sizeof(cf.cf_tls_sid)); > >proc_compose(env->sc_ps, id, IMSG_CFG_DONE, , sizeof(cf)); >} > @@ -450,6 +457,26 @@ parent_dispatch_logger(int fd, struct pr >} > >return (0); > +} > + > +void > +parent_tls_ticket_rekey(int fd, short events, void *arg) > +{ > +static struct event rekeyev; > +struct httpd*env = arg; > +struct timeval tv; > +struct httpd_tls_ticket key; > + > +key.tt_keyrev = arc4random(); > +arc4random_buf(key.tt_key, sizeof(key.tt_key)); > + > +proc_compose_imsg(env->sc_ps, PROC_SERVER, -1, IMSG_TLSTICKET_REKEY, > +-1, -1, , sizeof(key)); > + > +evtimer_set(, parent_tls_ticket_rekey, env); > +timerclear(); > +tv.tv_sec = SERVER_TLS_LIFE_TIME / 4; > +evtimer_add(, ); > } > > /* > Index: httpd.conf.5 > === > RCS file: /cvs/src/usr.sbin/httpd/httpd.conf.5,v > retrieving revision 1.76 > diff -u -p -r1.76 httpd.conf.5 > --- httpd.conf.514 Nov 2016 10:28:31 -1.76 > +++ httpd.conf.524 Jan 2017 01:52:34 - > @@ -556,6 +556,8 @@ will be used (secure protocols; TLSv1.2- > Refer to the > .Xr tls_config_parse_protocols 3 > function for other valid protocol string values. > +.It Oo Ic no Oc Ic tickets > +Enable or disable TLS session tickets. What is the default? Should be mentioned here. > .El > .El > .Sh TYPES > Index: httpd.h > === > RCS file: /cvs/src/usr.sbin/httpd/httpd.h,v > retrieving revision 1.125 > diff -u -p -r1.125 httpd.h > --- httpd.h9 Jan 2017 14:49:22 -1.125 > +++ httpd.h23 Jan 2017 04:17:52 - > @@ -73,6 +73,8 @@ > #define SERVER_MAX_PREFETCH256 > #define SERVER_MIN_PREFETCHED32 > #define SERVER_HSTS_DEFAULT_AGE31536000 > +//#define SERVER_TLS_LIFE_TIME(4 * 3600) This // comment seems to be a leftover. > +#define SERVER_TLS_LIFE_TIME(4 * 60) > > #define MEDIATYPE_NAMEMAX128/* file name extension */ > #define MEDIATYPE_TYPEMAX64/* length of type/subtype */ > @@ -105,6 +107,7 @@ enum httpchunk { > struct ctl_flags { >uint8_t cf_opts; >uint32_t cf_flags; > +uint8_t cf_tls_sid[TLS_MAX_SESSION_ID_LENGTH]; > }; > > enum key_type { > @@ -215,7 +218,8 @@ enum imsg_type { >IMSG_CFG_DONE, >IMSG_LOG_ACCESS, >IMSG_LOG_ERROR, > -IMSG_LOG_OPEN > +IMSG_LOG_OPEN, > +IMSG_TLSTICKET_REKEY > }; > > enum privsep_procid { > @@ -452,6 +456,7 @@ struct server_config { >uint8_t*tls_ocsp_staple; >size_t tls_ocsp_staple_len; >char*tls_ocsp_staple_file; > +int tls_ticket_enabled; > >uint32_t flags; >int strip; > @@ -504,6 +509,11 @@ struct server { > }; > TAILQ_HEAD(serverlist, server); > >
Re: NET_LOCK() ordering issue
On Mon, Jan 23, 2017 at 4:01 PM, Martin Pieuchotwrote: > On 24/01/17(Tue) 00:51, Alexander Bluhm wrote: >> On Sat, Jan 21, 2017 at 10:46:42AM +1000, Martin Pieuchot wrote: ... > Updated diff, thanks for your review. Very close... > --- kern/uipc_syscalls.c29 Dec 2016 12:12:43 - 1.144 > +++ kern/uipc_syscalls.c23 Jan 2017 23:59:34 - ... > @@ -277,19 +277,35 @@ doaccept(struct proc *p, int sock, struc > return (error); > > headfp = fp; > + head = headfp->f_data; > + > + fdplock(fdp); > + error = falloc(p, , ); > + if (!error && (flags & SOCK_CLOEXEC)) > + fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; > + fdpunlock(fdp); > + if (error) { > + /* > +* Probably ran out of file descriptors. Wakeup > +* so some other process might have a chance at it. > +*/ > + wakeup_one(>so_timeo); This comment and wakeup_one() call should be left out. They were needed in the original location because the thread had consumed the original wakeup from the new connection being completed at that point and when it gave up (because falloc() failed) it needed to recreate that for the next process. Now, doing the falloc() before we don't need or want that extra wakeup. Otherwise, looks good. Philip Guenther
Re: ypldap(8) tls
DPADD= ${LIBEVENT} ${LIBUTIL} ${LIBRPCSVC} -LDADD= -levent -lutil -lrpcsvc +LDADD= -ltls -levent -lutil -lrpcsvc You will need to use: DPADD=${LIBTLS} ${LIBSSL} ${LIBCRYPTO} ${LIBEVENT} ${LIBUTIL} ${LIBRPCSVC} LDADD=-ltls -lssl -lcrypto -levent -lutil -lrpcsvc
Re: Help for ddb(4)'s "tr /p"
On 24/01/17(Tue) 11:35, Philip Guenther wrote: > On Tue, 24 Jan 2017, Martin Pieuchot wrote: > > On 24/01/17(Tue) 10:08, Philip Guenther wrote: > ... > > > Hmm, ps/n (the default) is the only of the ps subcommands to not show > > > the TID...so that it can show more columns of COMMAND. You've added 8 > > > columns without shrinking any of the existing ones, which will cause > > > an unreadable mess when it wraps around. Maybe drop PGRP from /n and > > > we add a new one that's more like (userspace) ps -j ? > > > > Here's the updated output. > > > >PID TID PPIDUID S FLAGS WAIT COMMAND > > *97567 517976 11478 0 7 0x3sysctl > > 11478 302289 1 0 30x10008b pause ksh > > 68659 470461 1 0 30x100098 poll cron > > > > > > I like the idea of "ps /j", I could also merge the information in the > > current "ps /w" since EMUL is always 'native' now. What do you think? > > Just s/EMUL/PGID/ and wait for someone to need something more? Diff below does that and fix alignment issues with /a /o and /w. The problem was that TID needs 6 columns not 5. With this ps /a no longer wraps on i386. ddb{0}> ps /a TID COMMAND STRUCT PROC * UAREA * VMSPACE/VM_MAP * 70709 sysctl 0xd94e9418 0xf551c000 0xd94e7144 154620 ksh0xd94e9c28 0xf5544000 0xd94e7324 278101 cron 0xd94e9820 0xf557c000 0xd94e7004 ok? Index: sys/kern/kern_proc.c === RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.74 diff -u -p -r1.74 kern_proc.c --- sys/kern/kern_proc.c24 Jan 2017 04:50:48 - 1.74 +++ sys/kern/kern_proc.c24 Jan 2017 05:03:38 - @@ -467,7 +467,7 @@ db_show_all_procs(db_expr_t addr, int ha switch (*mode) { case 'a': - db_printf(" TID %-10s %18s %18s %18s\n", + db_printf("TID %-9s %18s %18s %18s\n", "COMMAND", "STRUCT PROC *", "UAREA *", "VMSPACE/VM_MAP"); break; case 'n': @@ -475,12 +475,12 @@ db_show_all_procs(db_expr_t addr, int ha "TID", "PPID", "UID", "FLAGS", "WAIT", "COMMAND"); break; case 'w': - db_printf(" TID %-16s %-8s %18s %s\n", - "COMMAND", "EMUL", "WAIT-CHANNEL", "WAIT-MSG"); + db_printf("TID %-15s %-5s %18s %s\n", + "COMMAND", "PGRP", "WAIT-CHANNEL", "WAIT-MSG"); break; case 'o': skipzomb = 1; - db_printf(" TID %5s %5s %10s %10s %3s %-31s\n", + db_printf("TID %5s %5s %10s %10s %3s %-30s\n", "PID", "UID", "PRFLAGS", "PFLAGS", "CPU", "COMMAND"); break; } @@ -509,7 +509,7 @@ db_show_all_procs(db_expr_t addr, int ha switch (*mode) { case 'a': - db_printf("%-10.10s %18p %18p %18p\n", + db_printf("%-9.9s %18p %18p %18p\n", pr->ps_comm, p, p->p_addr, p->p_vmspace); break; @@ -524,9 +524,11 @@ db_show_all_procs(db_expr_t addr, int ha break; case 'w': - db_printf("%-16s %-8s %18p %s\n", pr->ps_comm, - pr->ps_emul->e_name, p->p_wchan, - (p->p_wchan && p->p_wmesg) ? + db_printf("%-15s %-5d %18p %s\n", + pr->ps_comm, (pr->ps_pgrp ? + pr->ps_pgrp->pg_id : -1), + p->p_wchan, + (p->p_wchan && p->p_wmesg) ? p->p_wmesg : ""); break; Index: share/man/man4/ddb.4 === RCS file: /cvs/src/share/man/man4/ddb.4,v retrieving revision 1.84 diff -u -p -r1.84 ddb.4 --- share/man/man4/ddb.424 Jan 2017 04:50:48 - 1.84 +++ share/man/man4/ddb.424 Jan 2017 05:03:38 - @@ -849,7 +849,7 @@ Shows non-idle threads that were on CPU Information printed includes thread ID, process ID, UID, process flags, thread flags, current CPU, and command name. .It Cm /w -Shows each thread's ID, command, system call emulation, +Shows each thread's ID, command, process group, wait channel address, and wait channel message. .El .\"
Re: pool_debug
On Tue, Jan 24, 2017 at 04:46:01PM +1000, Martin Pieuchot wrote: > I'd like to force a yield() for every pool_get(9) using PR_WAITOK, just > like we do with malloc(9), in order to ensure that the NET_LOCK() is not > held across context switches. > > ok? ok > > Index: kern/subr_pool.c > === > RCS file: /cvs/src/sys/kern/subr_pool.c,v > retrieving revision 1.204 > diff -u -p -r1.204 subr_pool.c > --- kern/subr_pool.c 21 Nov 2016 01:44:06 - 1.204 > +++ kern/subr_pool.c 24 Jan 2017 06:29:09 - > @@ -513,7 +513,7 @@ pool_get(struct pool *pp, int flags) > } > mtx_leave(>pr_mtx); > > - if (slowdown && ISSET(flags, PR_WAITOK)) > + if ((slowdown || pool_debug == 2) && ISSET(flags, PR_WAITOK)) > yield(); > > if (v == NULL) { >
Re: NET_LOCK() ordering issue
On 23/01/17(Mon) 18:45, Philip Guenther wrote: > On Mon, Jan 23, 2017 at 4:01 PM, Martin Pieuchotwrote: > [...] > > @@ -277,19 +277,35 @@ doaccept(struct proc *p, int sock, struc > > return (error); > > > > headfp = fp; > > + head = headfp->f_data; > > + > > + fdplock(fdp); > > + error = falloc(p, , ); > > + if (!error && (flags & SOCK_CLOEXEC)) > > + fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; > > + fdpunlock(fdp); > > + if (error) { > > + /* > > +* Probably ran out of file descriptors. Wakeup > > +* so some other process might have a chance at it. > > +*/ > > + wakeup_one(>so_timeo); > > This comment and wakeup_one() call should be left out. They were > needed in the original location because the thread had consumed the > original wakeup from the new connection being completed at that point > and when it gave up (because falloc() failed) it needed to recreate > that for the next process. Now, doing the falloc() before we don't > need or want that extra wakeup. > > Otherwise, looks good. Thanks, I'm going to commit the version below then. Index: kern/uipc_syscalls.c === RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.144 diff -u -p -r1.144 uipc_syscalls.c --- kern/uipc_syscalls.c29 Dec 2016 12:12:43 - 1.144 +++ kern/uipc_syscalls.c24 Jan 2017 03:39:56 - @@ -265,7 +265,7 @@ doaccept(struct proc *p, int sock, struc { struct filedesc *fdp = p->p_fd; struct file *fp, *headfp; - struct mbuf *nam; + struct mbuf *nam = NULL; socklen_t namelen; int error, s, tmpfd; struct socket *head, *so; @@ -277,19 +277,30 @@ doaccept(struct proc *p, int sock, struc return (error); headfp = fp; + + fdplock(fdp); + error = falloc(p, , ); + if (!error && (flags & SOCK_CLOEXEC)) + fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; + fdpunlock(fdp); + if (error) { + FRELE(headfp, p); + return (error); + } + redo: NET_LOCK(s); head = headfp->f_data; if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; - goto bad; + goto out; } if ((head->so_state & SS_NBIO) && head->so_qlen == 0) { if (head->so_state & SS_CANTRCVMORE) error = ECONNABORTED; else error = EWOULDBLOCK; - goto bad; + goto out; } while (head->so_qlen == 0 && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { @@ -298,34 +309,19 @@ redo: } error = tsleep(>so_timeo, PSOCK | PCATCH, "netcon", 0); - if (error) { - goto bad; - } + if (error) + goto out; } if (head->so_error) { error = head->so_error; head->so_error = 0; - goto bad; + goto out; } /* Figure out whether the new socket should be non-blocking. */ nflag = flags & SOCK_NONBLOCK_INHERIT ? (headfp->f_flag & FNONBLOCK) : (flags & SOCK_NONBLOCK ? FNONBLOCK : 0); - fdplock(fdp); - error = falloc(p, , ); - if (error == 0 && (flags & SOCK_CLOEXEC)) - fdp->fd_ofileflags[tmpfd] |= UF_EXCLOSE; - fdpunlock(fdp); - if (error != 0) { - /* -* Probably ran out of file descriptors. Wakeup -* so some other process might have a chance at it. -*/ - wakeup_one(>so_timeo); - goto bad; - } - nam = m_get(M_WAIT, MT_SONAME); /* @@ -336,10 +332,7 @@ redo: if (head->so_qlen == 0) { NET_UNLOCK(s); m_freem(nam); - fdplock(fdp); - fdremove(fdp, tmpfd); - closef(fp, p); - fdpunlock(fdp); + nam = NULL; goto redo; } @@ -360,24 +353,20 @@ redo: error = soaccept(so, nam); if (!error && name != NULL) error = copyaddrout(p, nam, name, namelen, anamelen); - + if (!error) { + (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t), p); + FILE_SET_MATURE(fp, p); + *retval = tmpfd; + } +out: + NET_UNLOCK(s); + m_freem(nam); if (error) { - /* if an error occurred, free the file descriptor */ - NET_UNLOCK(s); - m_freem(nam); fdplock(fdp); fdremove(fdp, tmpfd);
Re: Help for ddb(4)'s "tr /p"
On Tue, 24 Jan 2017, Martin Pieuchot wrote: > Diff below does that and fix alignment issues with /a /o and /w. The > problem was that TID needs 6 columns not 5. > > With this ps /a no longer wraps on i386. > > ddb{0}> ps /a > TID COMMAND STRUCT PROC * UAREA * VMSPACE/VM_MAP > * 70709 sysctl 0xd94e9418 0xf551c000 0xd94e7144 > 154620 ksh0xd94e9c28 0xf5544000 0xd94e7324 > 278101 cron 0xd94e9820 0xf557c000 0xd94e7004 > > ok? Looks good. ok guenther@
clean up and modernize test calls in bsd.obj.mk
We're currently using several idioms for conditionally executing code in bsd.obj.mk. I'd like to unify them for the sake of readability and consistency. This was done joint with rpe. Index: share/mk/bsd.obj.mk === RCS file: /cvs/src/share/mk/bsd.obj.mk,v retrieving revision 1.18 diff -u -p -r1.18 bsd.obj.mk --- share/mk/bsd.obj.mk 24 Jan 2017 02:56:50 - 1.18 +++ share/mk/bsd.obj.mk 24 Jan 2017 02:58:50 - @@ -33,20 +33,19 @@ obj! _SUBDIRUSE SETOWNER=:; \ fi; \ [[ -z $$MKDIRS ]] && MKDIRS="mkdir -p"; \ - if test $$here != $$subdir ; then \ + if [[ $$here != $$subdir ]]; then \ dest=${BSDOBJDIR}/$$subdir ; \ echo "$$here/${__objdir} -> $$dest"; \ - if test ! -L ${__objdir} -o \ - X`readlink ${__objdir}` != X$$dest; \ + if [[ ! -L ${__objdir} || `readlink ${__objdir}` != $$dest ]]; \ then \ - if test -e ${__objdir}; then rm -rf ${__objdir}; fi; \ + [[ -e ${__objdir} ]] && rm -rf ${__objdir}; \ ln -sf $$dest ${__objdir}; \ $$SETOWNER ${__objdir}; \ fi; \ - if test -d ${BSDOBJDIR}; then \ - test -d $$dest || $$MKDIRS $$dest; \ + if [[ -d ${BSDOBJDIR} ]]; then \ + [[ -d $$dest ]] || $$MKDIRS $$dest; \ else \ - if test -e ${BSDOBJDIR}; then \ + if [[ -e ${BSDOBJDIR} ]]; then \ echo "${BSDOBJDIR} is not a directory"; \ else \ echo "${BSDOBJDIR} does not exist"; \ @@ -54,7 +53,7 @@ obj! _SUBDIRUSE fi; \ else \ dest=$$here/${__objdir} ; \ - if test ! -d ${__objdir} ; then \ + if [[ ! -d ${__objdir} ]]; then \ echo "making $$dest" ; \ $$MKDIRS $$dest; \ $$SETOWNER $$dest; \
Re: clean up and modernize test calls in bsd.obj.mk
fOn Tue, 24 Jan 2017, Theo Buehler wrote: > We're currently using several idioms for conditionally executing code in > bsd.obj.mk. I'd like to unify them for the sake of readability and > consistency. This was done joint with rpe. Looks good. The only caveat that comes to mind is that [[...]]'s = and != operators treat the right-hand side as a *pattern*, but that looks okay for the two uses of != being converted here. Philip