Re: pfctl: make functions return void, merge two ifs
On Tue, Jun 13, 2017 at 12:43:51AM +0200, Adam Wolk wrote: > On Mon, Jun 12, 2017 at 11:43:44PM +0200, Alexandr Nedvedicky wrote: > > Hello Adam, > > > > > > > > > It was a rainy evening here, so here's the updated pfctl diff. > > > > I'm sorry to hear about the rainy weather [1]. > > anyway, you might want to run regression test for pfctl. > > > > cd $SRC/src/regress/sbin/pfctl > > cat Makefile > > # follow instructions > > > > just for sure. > > Ran the tests both on the unmodified and changed pfctl using a stock > unmodified > GENERIC kernel. > > One test case fails pfcmd1. > Yet another rainy day, so revisiting the diff. 1. Re-basing the diff on the recently committed change from the OP 2. Removed the redundant error reporting from: - pfctl_clear_tables - pfctl_show_tables 3. pfctl_show_ifaces back to using radix_perror for output consistency 4. regress test altered, we are passing in the -n which just results in the rules being parsed. The definition for the pfctlcmd testing group in the Makefile is: # pfcmd: test pfctl command line parsing after the change the test failed not because of command line parsing but from the attempt of trying to clear tables from a non existing anchor. with the -n flag we still test if the anchor command can be combined with -Fa but don't actually attempt to run the clearing code. This regress modification passes both with the pfctl before this change as well as the newly modified one. Feedback, OK's? ? openbsd-daily-pfctl-1.diff ? openbsd-daily-pfctl-2.diff ? openbsd-daily-pfctl-3.diff ? parse.c ? pfctl ? rain.diff Index: pfctl.h === RCS file: /cvs/src/sbin/pfctl/pfctl.h,v retrieving revision 1.53 diff -u -p -r1.53 pfctl.h --- pfctl.h 19 Jan 2015 23:52:02 - 1.53 +++ pfctl.h 16 Jun 2017 21:05:38 - @@ -75,12 +75,12 @@ int pfi_get_ifaces(const char *, struct int pfi_clr_istats(const char *, int *, int); voidpfctl_print_title(char *); -int pfctl_clear_tables(const char *, int); -int pfctl_show_tables(const char *, int); +voidpfctl_clear_tables(const char *, int); +voidpfctl_show_tables(const char *, int); int pfctl_command_tables(int, char *[], char *, const char *, char *, const char *, int); voidwarn_namespace_collision(const char *); -int pfctl_show_ifaces(const char *, int); +voidpfctl_show_ifaces(const char *, int); FILE *pfctl_fopen(const char *, const char *); /* Index: pfctl_table.c === RCS file: /cvs/src/sbin/pfctl/pfctl_table.c,v retrieving revision 1.75 diff -u -p -r1.75 pfctl_table.c --- pfctl_table.c 13 Apr 2017 07:30:21 - 1.75 +++ pfctl_table.c 16 Jun 2017 21:05:38 - @@ -102,16 +102,18 @@ static const char *istats_text[2][2][2] table.pfrt_flags &= ~PFR_TFLAG_PERSIST; \ } while(0) -int +void pfctl_clear_tables(const char *anchor, int opts) { - return pfctl_table(0, NULL, NULL, "-F", NULL, anchor, opts); + if (pfctl_table(0, NULL, NULL, "-F", NULL, anchor, opts) == -1) + exit(1); } -int +void pfctl_show_tables(const char *anchor, int opts) { - return pfctl_table(0, NULL, NULL, "-s", NULL, anchor, opts); + if (pfctl_table(0, NULL, NULL, "-s", NULL, anchor, opts) == -1) + exit(1); } int @@ -594,7 +596,7 @@ xprintf(int opts, const char *fmt, ...) /* interface stuff */ -int +void pfctl_show_ifaces(const char *filter, int opts) { struct pfr_bufferb; @@ -608,7 +610,7 @@ pfctl_show_ifaces(const char *filter, in b.pfrb_size = b.pfrb_msize; if (pfi_get_ifaces(filter, b.pfrb_caddr, _size)) { radix_perror(); - return (1); + exit(1); } if (b.pfrb_size <= b.pfrb_msize) break; @@ -618,7 +620,6 @@ pfctl_show_ifaces(const char *filter, in pfctl_print_title("INTERFACES:"); PFRB_FOREACH(p, ) print_iface(p, opts); - return (0); } void ? pfctl-regress-2.diff ? pfctl-regress.diff Index: pfcmd1.opts === RCS file: /cvs/src/regress/sbin/pfctl/pfcmd1.opts,v retrieving revision 1.1 diff -u -p -r1.1 pfcmd1.opts --- pfcmd1.opts 3 Jul 2010 02:32:45 - 1.1 +++ pfcmd1.opts 16 Jun 2017 21:34:29 - @@ -1 +1 @@ --a regress/does_not_exist -Fa +-n -a regress/does_not_exist -Fa
Re: m4(1): Don't need to link with -ly
On Fri, 16 Jun 2017 16:22:15 -0400, Brian Callahan wrote: > m4 links with -ly, presumably for the yyerror() function. > But there already is a yyerror() in usr.bin/m4/expr.c, which is > written specifically for m4. So we don't need -ly. OK, but if you want to remove -ll as well you could do this. Either way OK millert@ - todd Index: usr.bin/m4/Makefile === RCS file: /cvs/src/usr.bin/m4/Makefile,v retrieving revision 1.13 diff -u -p -u -r1.13 Makefile --- usr.bin/m4/Makefile 12 May 2014 19:11:19 - 1.13 +++ usr.bin/m4/Makefile 16 Jun 2017 20:58:42 - @@ -8,8 +8,8 @@ CFLAGS+=-DEXTENDED -I. CDIAGFLAGS=-W -Wall -Wstrict-prototypes -pedantic \ -Wno-unused -Wno-char-subscripts -Wno-sign-compare -LDADD= -ly -ll -lm -lutil -DPADD= ${LIBY} ${LIBL} ${LIBM} ${LIBUTIL} +LDADD= -lm -lutil +DPADD= ${LIBM} ${LIBUTIL} SRCS= eval.c expr.c look.c main.c misc.c gnum4.c trace.c tokenizer.l parser.y MAN= m4.1 Index: usr.bin/m4/tokenizer.l === RCS file: /cvs/src/usr.bin/m4/tokenizer.l,v retrieving revision 1.9 diff -u -p -u -r1.9 tokenizer.l --- usr.bin/m4/tokenizer.l 15 Jun 2017 13:48:42 - 1.9 +++ usr.bin/m4/tokenizer.l 16 Jun 2017 21:00:34 - @@ -37,6 +37,8 @@ oct 0[0-7]* dec[1-9][0-9]* radix 0[rR][0-9]+:[0-9a-zA-Z]+ +%option noyywrap + %% {ws} {/* just skip it */} {hex}|{oct}|{dec} { yylval = number(); return(NUMBER); }
Re: wsfont: remove iso7/pcvt encoding macros and entries
On Thu, Jun 15, 2017 at 01:52:07PM +0200, Frederic Cambus wrote: > On Tue, Jun 13, 2017 at 11:29:59AM +, Miod Vallat wrote: > > > > > Hi tech@, > > > > > > We do not support iso7 nor pcvt encoding, so remove macro definitions > > > and commented entries. > > > > If you do that, then you can probably optimize away > > vga_valid_primary_font() in sys/dev/ic/vga.c as well. > > Makes sense to me as well, thanks for pointing that out. Diff below. Please discard the last diff, what did I have in mind? The fonts we are looping through in vga_selectfont() are the builtin font which is using the IBM encoding, and some potentially loaded fonts which are either IBM or ISO encoded. Therefore the condition checked by vga_valid_font() is always true, and we can remove it. Comments? OK? Index: sys/dev/ic/vga.c === RCS file: /cvs/src/sys/dev/ic/vga.c,v retrieving revision 1.68 diff -u -p -r1.68 vga.c --- sys/dev/ic/vga.c9 Sep 2015 18:23:39 - 1.68 +++ sys/dev/ic/vga.c16 Jun 2017 20:50:42 - @@ -344,10 +344,6 @@ bad: * We want at least ASCII 32..127 be present in the * first font slot. */ -#define vga_valid_primary_font(f) \ - (f->encoding == WSDISPLAY_FONTENC_IBM || \ - f->encoding == WSDISPLAY_FONTENC_ISO) - int vga_selectfont(struct vga_config *vc, struct vgascreen *scr, const char *name1, const char *name2) /* NULL: take first found */ @@ -362,9 +358,7 @@ vga_selectfont(struct vga_config *vc, st struct vgafont *f = vc->vc_fonts[i]; if (!f || f->height != type->fontheight) continue; - if (!f1 && - vga_valid_primary_font(f) && - (!name1 || !*name1 || + if (!f1 && (!name1 || !*name1 || !strncmp(name1, f->name, WSFONT_NAME_SIZE))) { f1 = f; continue;
m4(1): Don't need to link with -ly
Hi tech -- m4 links with -ly, presumably for the yyerror() function. But there already is a yyerror() in usr.bin/m4/expr.c, which is written specifically for m4. So we don't need -ly. OK? ~Brian Index: Makefile === RCS file: /cvs/src/usr.bin/m4/Makefile,v retrieving revision 1.13 diff -u -p -u -p -r1.13 Makefile --- Makefile12 May 2014 19:11:19 - 1.13 +++ Makefile16 Jun 2017 20:11:21 - @@ -8,8 +8,8 @@ CFLAGS+=-DEXTENDED -I. CDIAGFLAGS=-W -Wall -Wstrict-prototypes -pedantic \ -Wno-unused -Wno-char-subscripts -Wno-sign-compare -LDADD= -ly -ll -lm -lutil -DPADD= ${LIBY} ${LIBL} ${LIBM} ${LIBUTIL} +LDADD= -ll -lm -lutil +DPADD= ${LIBL} ${LIBM} ${LIBUTIL} SRCS= eval.c expr.c look.c main.c misc.c gnum4.c trace.c tokenizer.l parser.y MAN= m4.1
Re: viocon.4: mention all ttyVI devices
On Sun, Jun 11, 2017 at 09:43:17PM +0200, Michal Mazurek wrote: > There are 5 devices in /dev/. Is there any reason why just two are > listed in the viocon.4 manpage? > > OK to mention them all? > evening. i haven;t seen anyone pick up on this. if no one has (reasonably) objected, please go ahead. jmc > Index: share/man/man4/viocon.4 > === > RCS file: /cvs/src/share/man/man4/viocon.4,v > retrieving revision 1.2 > diff -u -p -r1.2 viocon.4 > --- share/man/man4/viocon.4 21 Dec 2015 23:21:19 - 1.2 > +++ share/man/man4/viocon.4 11 Jun 2017 19:40:29 - > @@ -38,6 +38,9 @@ The multiport feature is not yet support > .Bl -tag -width Pa -compact > .It Pa /dev/ttyVI00 > .It Pa /dev/ttyVI10 > +.It Pa /dev/ttyVI20 > +.It Pa /dev/ttyVI30 > +.It Pa /dev/ttyVI40 > .El > .Sh SEE ALSO > .Xr intro 4 , > > > -- > Michal Mazurek >
Re: faster timecounters for kvm/xen
On Fri, Jun 16, 2017 at 11:09:01PM +1000, Jonathan Gray wrote: > On Fri, Jun 16, 2017 at 02:23:36PM +0200, Mike Belopuhov wrote: > > On Fri, Jun 16, 2017 at 16:31 +1000, Jonathan Matthew wrote: > > > Index: arch/i386/include/cpufunc.h > > > === > > > RCS file: /cvs/src/sys/arch/i386/include/cpufunc.h,v > > > retrieving revision 1.25 > > > diff -u -p -u -p -r1.25 cpufunc.h > > > --- arch/i386/include/cpufunc.h 27 May 2017 12:21:50 - 1.25 > > > +++ arch/i386/include/cpufunc.h 16 Jun 2017 06:07:16 - > > > @@ -217,6 +217,15 @@ mfence(void) > > > __asm volatile("mfence" : : : "memory"); > > > } > > > > > > +static __inline u_int64_t > > > +rdtsc(void) > > > +{ > > > + uint32_t hi, lo; > > > + > > > + __asm volatile("rdtsc" : "=d" (hi), "=a" (lo)); > > > + return (((uint64_t)hi << 32) | (uint64_t) lo); > > > +} > > > + > > > static __inline void > > > wrmsr(u_int msr, u_int64_t newval) > > > { > > > > I think it's OK to get this chunk in. amd64 has got this already. > > > > Perhaps make it __asm volatile ("rdtsc" : "=A" (v)); like the pctr.h version? > > That's also what the gcc example uses for rdtsc in > https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints > > I guess the chance of something pretending to be a 486 without TSC attaching > pvbus is slim. > I was going to mention the same thing but I think the risk is low, so ok mlarkin@ on this if it isn't in already. -ml
Re: smtpd session hang
On Fri, Jun 16, 2017 at 07:12:43PM +0300, Henri Kemppainen wrote: > > > Nice catch, the diff reads fine to me, I'll commit later today when I > > > have another ok from eric@ > > > Yes, this looks correct. But, I would rather move the resume test before > > the EOM test, to avoid touching the session after the transfer has been > > finalized by smtp_data_io_done(). > > It oughtn't make a difference as long as the duplex control is correct, > but I'm fine with that change. > > > > side note, smtpd should not have been able to leak more than 5 fd, it > > > should not have been able to exhaust, is this what you observed ? > > For the record, we discussed this with Gilles on irc and while we saw > more than a dozen leaked fds, it's okay as smtpd will allow as many > incoming sessions as the dtable can accommodate (with an fd reserve). > The lower limits are on outgoing connections. > > New diff with reordered code. I'll see if I can get Adam to run one more > round of testing.. > Tested the diff on -current OpenSMTPD running on a 6.1 server. Sent 11 emails with ~2MB of base64 encoded garbage (like with our previous tests) all emails were delivered with no FD leaks. I am also willing to test an errata diff on 6.1 if this qualifies (reliability/performance). > > Index: usr.sbin/smtpd/smtp_session.c > === > RCS file: /cvs/src/usr.sbin/smtpd/smtp_session.c,v > retrieving revision 1.303 > diff -u -p -r1.303 smtp_session.c > --- usr.sbin/smtpd/smtp_session.c 17 May 2017 14:00:06 - 1.303 > +++ usr.sbin/smtpd/smtp_session.c 16 Jun 2017 14:56:11 - > @@ -1474,12 +1474,12 @@ smtp_data_io(struct io *io, int evt, voi > break; > > case IO_LOWAT: > - if (s->tx->dataeom && io_queued(s->tx->oev) == 0) { > - smtp_data_io_done(s); > - } else if (io_paused(s->io, IO_IN)) { > + if (io_paused(s->io, IO_IN)) { > log_debug("debug: smtp: %p: filter congestion over: > resuming session", s); > io_resume(s->io, IO_IN); > } > + if (s->tx->dataeom && io_queued(s->tx->oev) == 0) > + smtp_data_io_done(s); > break; > > default: >
Re: smtpd session hang
> > Nice catch, the diff reads fine to me, I'll commit later today when I > > have another ok from eric@ > Yes, this looks correct. But, I would rather move the resume test before > the EOM test, to avoid touching the session after the transfer has been > finalized by smtp_data_io_done(). It oughtn't make a difference as long as the duplex control is correct, but I'm fine with that change. > > side note, smtpd should not have been able to leak more than 5 fd, it > > should not have been able to exhaust, is this what you observed ? For the record, we discussed this with Gilles on irc and while we saw more than a dozen leaked fds, it's okay as smtpd will allow as many incoming sessions as the dtable can accommodate (with an fd reserve). The lower limits are on outgoing connections. New diff with reordered code. I'll see if I can get Adam to run one more round of testing.. Index: usr.sbin/smtpd/smtp_session.c === RCS file: /cvs/src/usr.sbin/smtpd/smtp_session.c,v retrieving revision 1.303 diff -u -p -r1.303 smtp_session.c --- usr.sbin/smtpd/smtp_session.c 17 May 2017 14:00:06 - 1.303 +++ usr.sbin/smtpd/smtp_session.c 16 Jun 2017 14:56:11 - @@ -1474,12 +1474,12 @@ smtp_data_io(struct io *io, int evt, voi break; case IO_LOWAT: - if (s->tx->dataeom && io_queued(s->tx->oev) == 0) { - smtp_data_io_done(s); - } else if (io_paused(s->io, IO_IN)) { + if (io_paused(s->io, IO_IN)) { log_debug("debug: smtp: %p: filter congestion over: resuming session", s); io_resume(s->io, IO_IN); } + if (s->tx->dataeom && io_queued(s->tx->oev) == 0) + smtp_data_io_done(s); break; default:
Re: Add pledge(2) for rebound(8)'s parent proc
Ugh! :\ You're right, and that sysctl is not allowed by pledge(2). Please disregard this diff. On 18:00 Fri 16 Jun , Sebastien Marie wrote: > On Fri, Jun 16, 2017 at 03:53:09PM +0100, Ricardo Mestre wrote: > > Hi tech@ > > > > rebound(8)'s parent proc doesn't seem to need much permissions to do what it > > needs, here is the pledge for the parent for the following promises: > > > > rpath: reload the configuration at reexec time (see below) > > proc/exec: needed to reexec itself and kill child if needed > > > > rebound will not be able to restore dnsjacking on exit. > > there is an atexit() call with resetport function. > > At exit, the function should be able to set { CTL_KERN, KERN_DNSJACKPORT }, > and it will not be able to do that if pledged. > > -- > Sebastien Marie
Re: Add pledge(2) for rebound(8)'s parent proc
On Fri, Jun 16, 2017 at 03:53:09PM +0100, Ricardo Mestre wrote: > Hi tech@ > > rebound(8)'s parent proc doesn't seem to need much permissions to do what it > needs, here is the pledge for the parent for the following promises: > > rpath: reload the configuration at reexec time (see below) > proc/exec: needed to reexec itself and kill child if needed > rebound will not be able to restore dnsjacking on exit. there is an atexit() call with resetport function. At exit, the function should be able to set { CTL_KERN, KERN_DNSJACKPORT }, and it will not be able to do that if pledged. -- Sebastien Marie
Add pledge(2) for rebound(8)'s parent proc
Hi tech@ rebound(8)'s parent proc doesn't seem to need much permissions to do what it needs, here is the pledge for the parent for the following promises: rpath: reload the configuration at reexec time (see below) proc/exec: needed to reexec itself and kill child if needed Comments? OK? Index: rebound.c === RCS file: /cvs/src/usr.sbin/rebound/rebound.c,v retrieving revision 1.84 diff -u -p -u -r1.84 rebound.c --- rebound.c 31 May 2017 04:52:11 - 1.84 +++ rebound.c 16 Jun 2017 14:07:40 - @@ -996,5 +996,8 @@ main(int argc, char **argv) logerr("daemon: %s", strerror(errno)); daemonized = 1; + if (pledge("stdio rpath proc exec", NULL) == -1) + logerr("pledge failed"); + return monitorloop(ud, ld, ud6, ld6, confname); }
net80211: initialize link state during attach
Set the link state of wifi interfaces to DOWN at attach time instead of leaving it as UNKNOWN which userland cannot really make use of (e.g. dhclient interprets UNKNOWN as UP). Link state is also reset by the ieee80211_newstate() function. However, that function is usually called by the driver-specific newstate function stored in ic->ic_newstate. During the very first time ic_newstate() runs, without the diff below the link state is UNKNOWN. And if a driver decides not to call ieee80211_newstate() for some reason (such as iwm(4) does during INIT->SCAN) the link state will still be UNKNOWN. In my opinion the link state of wifi interfaces should at any time be either DOWN or UP, and never UNKNOWN. ok? Index: sys/net80211/ieee80211.c === RCS file: /cvs/src/sys/net80211/ieee80211.c,v retrieving revision 1.61 diff -u -p -r1.61 ieee80211.c --- sys/net80211/ieee80211.c31 May 2017 09:17:39 - 1.61 +++ sys/net80211/ieee80211.c7 Jun 2017 15:08:55 - @@ -156,6 +156,8 @@ ieee80211_ifattach(struct ifnet *ifp) if_addgroup(ifp, "wlan"); ifp->if_priority = IF_WIRELESS_DEFAULT_PRIORITY; + + ieee80211_set_link_state(ic, LINK_STATE_DOWN); } void
Re: smtpd session hang
On Fri, Jun 16, 2017 at 09:11:29AM +0200, Gilles Chehade wrote: > On Fri, Jun 16, 2017 at 01:05:25AM +0300, Henri Kemppainen wrote: > > I had a little debugging session with awolk@ over at #openbsd-daily. His > > smtpd would over time end up with hung sessions that never timeout. > > > > The problem is related to the data_io path's congestion control which > > may pause the session. In this case the io system will not wait for > > read events and as such will not have a chance to timeout until it is > > resumed. > > > > If the pause happens when a full message is just about to pass through > > the data_io path, the session is never resumed, even though there is > > obviously no more congestion and the session should be reading more > > input from the client again. > > > > A debug trace excerpt shows the course of events: > > > > mtp: 0xe54baa1e000: IO_DATAIN > ib=16839 ob=0> > > debug: smtp: 0xe54baa1e000: filter congestion: pausing session > > smtp: 0xe54baa1e000 (data): IO_LOWAT > ib=0 ob=0> > > debug: smtp: 0xe54baa1e000: data io done (259146 bytes) > > debug: 0xe54baa1e000: end of message, msgflags=0x > > smtp: 0xe54baa1e000: >>> 250 2.0.0: 4f36f9e3 Message accepted for delivery > > smtp: 0xe54baa1e000: STATE_BODY -> STATE_HELO > > smtp: 0xe54baa1e000: IO_LOWAT > ib=0 ob=0> > > > > From this point on, session 0xe54baa1e000 and its io 0xe551d0d5000 > > (which has the pause_in flag) are never seen again in the trace, and > > fstat shows a corresponding connection to smtpd that never goes away. > > > > The proposed fix is to always resume the session if the data_io path > > hits the low water mark. > > > > Mr. Wolk tested this diff against smtpd on 6.1 as well as a against > > -current version of smtpd (compiled on the same system running 6.1). > > > > Nice catch, the diff reads fine to me, I'll commit later today when I > have another ok from eric@ Yes, this looks correct. But, I would rather move the resume test before the EOM test, to avoid touching the session after the transfer has been finalized by smtp_data_io_done(). Eric. > side note, smtpd should not have been able to leak more than 5 fd, it > should not have been able to exhaust, is this what you observed ? > > > Index: usr.sbin/smtpd/smtp_session.c > > === > > RCS file: /cvs/src/usr.sbin/smtpd/smtp_session.c,v > > retrieving revision 1.303 > > diff -u -p -r1.303 smtp_session.c > > --- usr.sbin/smtpd/smtp_session.c 17 May 2017 14:00:06 - 1.303 > > +++ usr.sbin/smtpd/smtp_session.c 15 Jun 2017 20:28:12 - > > @@ -1474,9 +1474,10 @@ smtp_data_io(struct io *io, int evt, voi > > break; > > > > case IO_LOWAT: > > - if (s->tx->dataeom && io_queued(s->tx->oev) == 0) { > > + if (s->tx->dataeom && io_queued(s->tx->oev) == 0) > > smtp_data_io_done(s); > > - } else if (io_paused(s->io, IO_IN)) { > > + > > + if (io_paused(s->io, IO_IN)) { > > log_debug("debug: smtp: %p: filter congestion over: > > resuming session", s); > > io_resume(s->io, IO_IN); > > } > > > > > > -- > Gilles Chehade > > https://www.poolp.org @poolpOrg >
remove redundant flag from iwm(4)
The HW_INITED flag has a grammatically dubious name and is unnecessary because it duplicates the purpose of the IFF_RUNNING flag. ok? Index: sys/dev/pci/if_iwm.c === RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v retrieving revision 1.197 diff -u -p -r1.197 if_iwm.c --- sys/dev/pci/if_iwm.c16 Jun 2017 08:45:34 - 1.197 +++ sys/dev/pci/if_iwm.c16 Jun 2017 11:47:09 - @@ -6106,9 +6106,6 @@ iwm_init(struct ifnet *ifp) struct ieee80211com *ic = >sc_ic; int err, generation; - if (sc->sc_flags & IWM_FLAG_HW_INITED) { - return 0; - } sc->sc_generation++; err = iwm_init_hw(sc); @@ -6135,8 +6132,6 @@ iwm_init(struct ifnet *ifp) return err; } while (ic->ic_state != IEEE80211_S_SCAN); - sc->sc_flags |= IWM_FLAG_HW_INITED; - return 0; } @@ -6214,7 +6209,6 @@ iwm_stop(struct ifnet *ifp, int disable) struct ieee80211com *ic = >sc_ic; struct iwm_node *in = (void *)ic->ic_bss; - sc->sc_flags &= ~IWM_FLAG_HW_INITED; sc->sc_generation++; ic->ic_scan_lock = IEEE80211_SCAN_UNLOCKED; ifp->if_flags &= ~IFF_RUNNING; @@ -7450,7 +7444,7 @@ iwm_init_task(void *arg1) } s = splnet(); - if (sc->sc_flags & IWM_FLAG_HW_INITED) + if (sc->sc_flags & IFF_RUNNING) iwm_stop(ifp, 0); if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == IFF_UP) iwm_init(ifp); Index: sys/dev/pci/if_iwmvar.h === RCS file: /cvs/src/sys/dev/pci/if_iwmvar.h,v retrieving revision 1.28 diff -u -p -r1.28 if_iwmvar.h --- sys/dev/pci/if_iwmvar.h 14 Jun 2017 16:56:04 - 1.28 +++ sys/dev/pci/if_iwmvar.h 14 Jun 2017 19:17:23 - @@ -280,9 +280,8 @@ struct iwm_rx_ring { }; #define IWM_FLAG_USE_ICT 0x01 -#define IWM_FLAG_HW_INITED 0x02 -#define IWM_FLAG_RFKILL0x04 -#define IWM_FLAG_SCANNING 0x08 +#define IWM_FLAG_RFKILL0x02 +#define IWM_FLAG_SCANNING 0x04 struct iwm_ucode_status { uint32_t uc_error_event_table;
Re: faster timecounters for kvm/xen
On Fri, Jun 16, 2017 at 10:25 +0200, Mike Belopuhov wrote: > Last time I've tried uebayashi's pvclock on Xen, it didn't > work for me. I didn't have time to investigate why but > probably because we need per-cpu readings. Which you do > for KVM. I'll test this on Xen as soon as I get to the > office. > [...] > > But this brings another point: where and how to perform > the pvclock initialization and attachment. In your diff > pvclock_xen_init comes a bit too early: none of the Xen > things are initialized at that point, shared info page > isn't allocated. > As I've told jmatthew@ privately this doesn't work on Xen, but I've changed a few things and made it work: https://github.com/mbelop/src/commits/pvclock However, I'm observing a huge drift: 6 minutes in about an hour so it's not usable as it is. I think this is the same thing as I saw before. I'll ponder the per-CPU vcpu_info path and report back when I've got something. But in the meantime I hope that jmatthew@ will check in rdtsc and pvbus_init_cpu bits (we're lobbying for a s/_vcpu/_cpu/ change. :-)
Re: faster timecounters for kvm/xen
On Fri, Jun 16, 2017 at 23:09 +1000, Jonathan Gray wrote: > On Fri, Jun 16, 2017 at 02:23:36PM +0200, Mike Belopuhov wrote: > > On Fri, Jun 16, 2017 at 16:31 +1000, Jonathan Matthew wrote: > > > Index: arch/i386/include/cpufunc.h > > > === > > > RCS file: /cvs/src/sys/arch/i386/include/cpufunc.h,v > > > retrieving revision 1.25 > > > diff -u -p -u -p -r1.25 cpufunc.h > > > --- arch/i386/include/cpufunc.h 27 May 2017 12:21:50 - 1.25 > > > +++ arch/i386/include/cpufunc.h 16 Jun 2017 06:07:16 - > > > @@ -217,6 +217,15 @@ mfence(void) > > > __asm volatile("mfence" : : : "memory"); > > > } > > > > > > +static __inline u_int64_t > > > +rdtsc(void) > > > +{ > > > + uint32_t hi, lo; > > > + > > > + __asm volatile("rdtsc" : "=d" (hi), "=a" (lo)); > > > + return (((uint64_t)hi << 32) | (uint64_t) lo); > > > +} > > > + > > > static __inline void > > > wrmsr(u_int msr, u_int64_t newval) > > > { > > > > I think it's OK to get this chunk in. amd64 has got this already. > > > > Perhaps make it __asm volatile ("rdtsc" : "=A" (v)); like the pctr.h version? > > That's also what the gcc example uses for rdtsc in > https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints > Sure. > I guess the chance of something pretending to be a 486 without TSC attaching > pvbus is slim. This file is full of instructions not supported on older CPUs so no harm done as far as I can tell.
Re: faster timecounters for kvm/xen
On Fri, Jun 16, 2017 at 02:23:36PM +0200, Mike Belopuhov wrote: > On Fri, Jun 16, 2017 at 16:31 +1000, Jonathan Matthew wrote: > > Index: arch/i386/include/cpufunc.h > > === > > RCS file: /cvs/src/sys/arch/i386/include/cpufunc.h,v > > retrieving revision 1.25 > > diff -u -p -u -p -r1.25 cpufunc.h > > --- arch/i386/include/cpufunc.h 27 May 2017 12:21:50 - 1.25 > > +++ arch/i386/include/cpufunc.h 16 Jun 2017 06:07:16 - > > @@ -217,6 +217,15 @@ mfence(void) > > __asm volatile("mfence" : : : "memory"); > > } > > > > +static __inline u_int64_t > > +rdtsc(void) > > +{ > > + uint32_t hi, lo; > > + > > + __asm volatile("rdtsc" : "=d" (hi), "=a" (lo)); > > + return (((uint64_t)hi << 32) | (uint64_t) lo); > > +} > > + > > static __inline void > > wrmsr(u_int msr, u_int64_t newval) > > { > > I think it's OK to get this chunk in. amd64 has got this already. > Perhaps make it __asm volatile ("rdtsc" : "=A" (v)); like the pctr.h version? That's also what the gcc example uses for rdtsc in https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints I guess the chance of something pretending to be a 486 without TSC attaching pvbus is slim.
Re: faster timecounters for kvm/xen
On Fri, Jun 16, 2017 at 16:31 +1000, Jonathan Matthew wrote: > Index: arch/i386/include/cpufunc.h > === > RCS file: /cvs/src/sys/arch/i386/include/cpufunc.h,v > retrieving revision 1.25 > diff -u -p -u -p -r1.25 cpufunc.h > --- arch/i386/include/cpufunc.h 27 May 2017 12:21:50 - 1.25 > +++ arch/i386/include/cpufunc.h 16 Jun 2017 06:07:16 - > @@ -217,6 +217,15 @@ mfence(void) > __asm volatile("mfence" : : : "memory"); > } > > +static __inline u_int64_t > +rdtsc(void) > +{ > + uint32_t hi, lo; > + > + __asm volatile("rdtsc" : "=d" (hi), "=a" (lo)); > + return (((uint64_t)hi << 32) | (uint64_t) lo); > +} > + > static __inline void > wrmsr(u_int msr, u_int64_t newval) > { I think it's OK to get this chunk in. amd64 has got this already.
Re: faster timecounters for kvm/xen
On Fri, Jun 16, 2017 at 10:25 +0200, Mike Belopuhov wrote: > I don't know if it's a good idea to depend on Xen's > definition of vcpu_time_info. I think I have factored > it out into the pvclock_time_info and put it into the > pvclockvar.h or something like that. And then made Xen > use those definitions instead of its own. Dunno what's > the best course of action here. > This is what I would like to use. I've stripped the API part, but we can add it as well. I don't believe this file requires a specific license since there's a handful of pvclock header files out there implementing a common interface so a person committing such a file can add his own copyright. Opinions? #ifndef _PV_PVCLOCK_H_ #define _PV_PVCLOCK_H_ struct pvclock_vcpu_time_info { volatile uint32_t version; volatile uint32_t pad1; volatile uint64_t tsc_timestamp; volatile uint64_t system_time; volatile uint32_t tsc_to_system_mul; volatile int8_t tsc_shift; volatile uint8_tflags; volatile uint8_tpad2[2]; } __packed; struct pvclock_wall_clock { volatile uint32_t version; volatile uint32_t sec; volatile uint32_t nsec; } __packed; #endif /* _PV_PVCLOCK_H_ */
Re: faster timecounters for kvm/xen
On Fri, Jun 16, 2017 at 16:31 +1000, Jonathan Matthew wrote: > Recently I updated the kernel lock profiling stuff I've been working on, since > it had been rotting a bit since witness was introduced. Running my diff on a > KVM VM, I found there was a pretty huge performance impact (10 minutes to > build a kernel instead of 4), which turned out to be because reading the > emulated HPET in KVM is slow, and lock profiling involves a lot of extra > clock reads. The diff below adds a new TSC-based timecounter implementation > for KVM and Xen to remedy this. > > KVM and Xen provide frequently-updated views of system time from the host to > each vcpu in a way that lets the VM get accurate high resolution time without > much work. Linux calls this mechanism 'pvclock' so I'm doing the same. > > The pvclock structure gives you a system time (in nanoseconds), the TSC > reading from when the time was updated, and scaling factors for converting TSC > values to nanoseconds. Usually you subtract the TSC reading in the pvclock > structure from a current reading, convert that to nanoseconds, and add it to > the system time. I decided to go the other way in order to keep all the > available resolution. > > Using pvclock as the timecounter reduces the overhead of lock profiling to > almost nothing. Even without the extra clock reads for lock profiling, > it cuts a few seconds off kernel compile time on a 2 vcpu vm. I've run it > for ~12 hours without ntpd and the clock keeps time accurately. > > One wrinkle here is that the KVM pvclock mechanism requires setup on each > vcpu, > so I added a new pvbus function that gets called from cpu_hatch, allowing any > hypervisor-specific setup to happen there. > > I still need to try this on xen, but comments at this stage are welcome. > Cool! You've beaten both of us to it :) Last time I've tried uebayashi's pvclock on Xen, it didn't work for me. I didn't have time to investigate why but probably because we need per-cpu readings. Which you do for KVM. I'll test this on Xen as soon as I get to the office. Now regarding the diff. pvbus_init_vcpu. Ah yes, please. It was a chicken and the egg problem for me: I didn't have Xen, but wanted a callback from cpu_hatch to setup shared info pages and events (interrupt delivery) for all CPUs. So please factor it out and let's get that committed. I don't know if it's a good idea to depend on Xen's definition of vcpu_time_info. I think I have factored it out into the pvclock_time_info and put it into the pvclockvar.h or something like that. And then made Xen use those definitions instead of its own. Dunno what's the best course of action here. But this brings another point: where and how to perform the pvclock initialization and attachment. In your diff pvclock_xen_init comes a bit too early: none of the Xen things are initialized at that point, shared info page isn't allocated. I told Stefan in Munich that perhaps having a kvm.c shim that would prepare and attach pvclock (and maybe provide some flags and other bells and whistles). I think we need to call pvclock attachment from Xen code where it's appropriate, not from pvbus code. Or do a config_attach on it. Why didn't you want to put it in its own device driver? It's nice that this version avoids using assembly. Any idea what was the reason for Linux/FreeBSD code to use it? Were they afraid to lose precision maybe? In any case, good job, lets try to get this in.
Re: smtpd session hang
On Fri, Jun 16, 2017 at 01:05:25AM +0300, Henri Kemppainen wrote: > I had a little debugging session with awolk@ over at #openbsd-daily. His > smtpd would over time end up with hung sessions that never timeout. > > The problem is related to the data_io path's congestion control which > may pause the session. In this case the io system will not wait for > read events and as such will not have a chance to timeout until it is > resumed. > > If the pause happens when a full message is just about to pass through > the data_io path, the session is never resumed, even though there is > obviously no more congestion and the session should be reading more > input from the client again. > > A debug trace excerpt shows the course of events: > > mtp: 0xe54baa1e000: IO_DATAIN ob=0> > debug: smtp: 0xe54baa1e000: filter congestion: pausing session > smtp: 0xe54baa1e000 (data): IO_LOWAT ob=0> > debug: smtp: 0xe54baa1e000: data io done (259146 bytes) > debug: 0xe54baa1e000: end of message, msgflags=0x > smtp: 0xe54baa1e000: >>> 250 2.0.0: 4f36f9e3 Message accepted for delivery > smtp: 0xe54baa1e000: STATE_BODY -> STATE_HELO > smtp: 0xe54baa1e000: IO_LOWAT ib=0 ob=0> > > From this point on, session 0xe54baa1e000 and its io 0xe551d0d5000 > (which has the pause_in flag) are never seen again in the trace, and > fstat shows a corresponding connection to smtpd that never goes away. > > The proposed fix is to always resume the session if the data_io path > hits the low water mark. > > Mr. Wolk tested this diff against smtpd on 6.1 as well as a against > -current version of smtpd (compiled on the same system running 6.1). > Nice catch, the diff reads fine to me, I'll commit later today when I have another ok from eric@ side note, smtpd should not have been able to leak more than 5 fd, it should not have been able to exhaust, is this what you observed ? > Index: usr.sbin/smtpd/smtp_session.c > === > RCS file: /cvs/src/usr.sbin/smtpd/smtp_session.c,v > retrieving revision 1.303 > diff -u -p -r1.303 smtp_session.c > --- usr.sbin/smtpd/smtp_session.c 17 May 2017 14:00:06 - 1.303 > +++ usr.sbin/smtpd/smtp_session.c 15 Jun 2017 20:28:12 - > @@ -1474,9 +1474,10 @@ smtp_data_io(struct io *io, int evt, voi > break; > > case IO_LOWAT: > - if (s->tx->dataeom && io_queued(s->tx->oev) == 0) { > + if (s->tx->dataeom && io_queued(s->tx->oev) == 0) > smtp_data_io_done(s); > - } else if (io_paused(s->io, IO_IN)) { > + > + if (io_paused(s->io, IO_IN)) { > log_debug("debug: smtp: %p: filter congestion over: > resuming session", s); > io_resume(s->io, IO_IN); > } > > -- Gilles Chehade https://www.poolp.org @poolpOrg
faster timecounters for kvm/xen
Recently I updated the kernel lock profiling stuff I've been working on, since it had been rotting a bit since witness was introduced. Running my diff on a KVM VM, I found there was a pretty huge performance impact (10 minutes to build a kernel instead of 4), which turned out to be because reading the emulated HPET in KVM is slow, and lock profiling involves a lot of extra clock reads. The diff below adds a new TSC-based timecounter implementation for KVM and Xen to remedy this. KVM and Xen provide frequently-updated views of system time from the host to each vcpu in a way that lets the VM get accurate high resolution time without much work. Linux calls this mechanism 'pvclock' so I'm doing the same. The pvclock structure gives you a system time (in nanoseconds), the TSC reading from when the time was updated, and scaling factors for converting TSC values to nanoseconds. Usually you subtract the TSC reading in the pvclock structure from a current reading, convert that to nanoseconds, and add it to the system time. I decided to go the other way in order to keep all the available resolution. Using pvclock as the timecounter reduces the overhead of lock profiling to almost nothing. Even without the extra clock reads for lock profiling, it cuts a few seconds off kernel compile time on a 2 vcpu vm. I've run it for ~12 hours without ntpd and the clock keeps time accurately. One wrinkle here is that the KVM pvclock mechanism requires setup on each vcpu, so I added a new pvbus function that gets called from cpu_hatch, allowing any hypervisor-specific setup to happen there. I still need to try this on xen, but comments at this stage are welcome. Index: arch/i386/i386/cpu.c === RCS file: /cvs/src/sys/arch/i386/i386/cpu.c,v retrieving revision 1.84 diff -u -p -u -p -r1.84 cpu.c --- arch/i386/i386/cpu.c30 May 2017 15:11:32 - 1.84 +++ arch/i386/i386/cpu.c16 Jun 2017 06:07:16 - @@ -67,6 +67,7 @@ #include "lapic.h" #include "ioapic.h" #include "vmm.h" +#include "pvbus.h" #include #include @@ -104,6 +105,10 @@ #include #endif +#if NPVBUS > 0 +#include +#endif + #include #include #include @@ -626,6 +631,9 @@ cpu_hatch(void *v) ci->ci_curpmap = pmap_kernel(); cpu_init(ci); +#if NPVBUS > 0 + pvbus_init_vcpu(); +#endif /* Re-initialise memory range handling on AP */ if (mem_range_softc.mr_op != NULL) Index: arch/i386/include/cpufunc.h === RCS file: /cvs/src/sys/arch/i386/include/cpufunc.h,v retrieving revision 1.25 diff -u -p -u -p -r1.25 cpufunc.h --- arch/i386/include/cpufunc.h 27 May 2017 12:21:50 - 1.25 +++ arch/i386/include/cpufunc.h 16 Jun 2017 06:07:16 - @@ -217,6 +217,15 @@ mfence(void) __asm volatile("mfence" : : : "memory"); } +static __inline u_int64_t +rdtsc(void) +{ + uint32_t hi, lo; + + __asm volatile("rdtsc" : "=d" (hi), "=a" (lo)); + return (((uint64_t)hi << 32) | (uint64_t) lo); +} + static __inline void wrmsr(u_int msr, u_int64_t newval) { Index: arch/amd64/amd64/cpu.c === RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.105 diff -u -p -u -p -r1.105 cpu.c --- arch/amd64/amd64/cpu.c 30 May 2017 15:11:32 - 1.105 +++ arch/amd64/amd64/cpu.c 16 Jun 2017 06:07:16 - @@ -67,6 +67,7 @@ #include "lapic.h" #include "ioapic.h" #include "vmm.h" +#include "pvbus.h" #include #include @@ -103,6 +104,10 @@ #include #endif +#if NPVBUS > 0 +#include +#endif + #include #include #include @@ -728,6 +733,9 @@ cpu_hatch(void *v) lldt(0); cpu_init(ci); +#if NPVBUS > 0 + pvbus_init_vcpu(); +#endif /* Re-initialise memory range handling on AP */ if (mem_range_softc.mr_op != NULL) Index: dev/pv/files.pv === RCS file: /cvs/src/sys/dev/pv/files.pv,v retrieving revision 1.13 diff -u -p -u -p -r1.13 files.pv --- dev/pv/files.pv 14 Jun 2017 10:25:40 - 1.13 +++ dev/pv/files.pv 16 Jun 2017 06:07:16 - @@ -75,3 +75,6 @@ file dev/pv/vioscsi.cvioscsi device vmmci attach vmmci at virtio file dev/pv/vmmci.c vmmci + +# paravirtualized clock, used by kvm and xen +filedev/pv/pvclock.c Index: dev/pv/pvbus.c === RCS file: /cvs/src/sys/dev/pv/pvbus.c,v retrieving revision 1.16 diff -u -p -u -p -r1.16 pvbus.c --- dev/pv/pvbus.c 10 Jan 2017 17:16:39 - 1.16 +++ dev/pv/pvbus.c 16 Jun 2017 06:07:16 - @@ -57,6 +57,7 @@ intpvbus_print(void *, const char *); int pvbus_search(struct device *, void *, void *); voidpvbus_kvm(struct pvbus_hv *); +voidpvbus_kvm_init_vcpu(struct pvbus_hv *);