Re: installer: support encryption with key disks
On Mon, Oct 16, 2023 at 07:46:10PM +, Klemens Nanni wrote: > On Mon, Sep 04, 2023 at 09:57:40PM +, Klemens Nanni wrote: > > Extend the yes/no question to no/passphrase/keydisk and have users pick an > > existing, preformated RAID partition; no support (yet) for creating one. > > > > Thanks to how ask_which() works, users can always say 'done' to land back > > at question to either skip crypto or use a passphrase instead. > > > > All code remains contained behind interactive non-default installations. > > Code is straight forward, I've not been able to break it; rest unchanged. > > > > Example install with root disk sd0 and ready-to-use key disk sd1: > > > > Available disks are: sd0 sd1. > > Which disk is the root disk? ('?' for details) [sd0] > > Encrypt the root disk with a (p)assphrase or (k)eydisk? [no] k > > Available disks are: sd1. > > Which disk contains the key disk? (or 'done') [sd1] > > Available sd1 partitions are: a. > > Which sd1 partition is the key disk? (or 'done') [a] > > > > Configuring the crypto chunk sd0... > > > > No valid MBR or GPT. > > Use (W)hole disk MBR, whole disk (G)PT or (E)dit? [whole] > > Setting OpenBSD MBR partition to whole sd0...done. > > sd2 at scsibus2 targ 1 lun 0: > > sd2: 1023MB, 512 bytes/sector, 2096560 sectors > > > > Configuring the root disk sd2... > > > > No valid MBR or GPT. > > Use (W)hole disk MBR, whole disk (G)PT or (E)dit? [whole] > > > > > > Feedback? OK? This seems to work well for me and the implementation looks reasonable. I think we want to update INSTALL with some extra docs as well, likely pointing folks to how to create a keydisk. OK afresh1@ > > Ping. > > Index: install.sub > === > RCS file: /cvs/src/distrib/miniroot/install.sub,v > retrieving revision 1.1255 > diff -u -p -r1.1255 install.sub > --- install.sub 21 Aug 2023 14:33:55 - 1.1255 > +++ install.sub 16 Oct 2023 19:36:55 - > @@ -3074,8 +3074,32 @@ do_autoinstall() { > exec reboot > } > > +# Chose an existing partition as key disk and set global $KEYDISK on success, > +# otherwise return non-zero. > +pick_keydisk() { > + KEYDISK= > + local _disk _label > + > + ask_which disk 'contains the key disk' '$(rmel $ROOTDISK $(get_dkdevs))' > + [[ $resp == done ]] && return 1 > + _disk=$resp > + > + make_dev $_disk > + if disklabel $_disk 2>/dev/null | ! grep -qw RAID; then > + echo "$_disk must contain a RAID partition." > + return 1 > + fi > + > + ask_which "$_disk partition" 'is the key disk' \ > + "\$(disklabel $_disk 2>/dev/null | > + sed -En 's/^ ([a-p]):.*RAID.*$/\1/p')" > + [[ $resp == done ]] && return 1 > + _label=$resp > + KEYDISK=$_disk$_label > +} > + > encrypt_root() { > - local _chunk=$ROOTDISK > + local _args _chunk=$ROOTDISK > > [[ $MDBOOTSR == y ]] || return > > @@ -3088,13 +3112,30 @@ encrypt_root() { > # e.g. auto-assembled at boot or done in (S)hell. > [[ -z $(get_softraid_volumes) ]] || return > > - ask_yn 'Encrypt the root disk with a passphrase?' || return > + while :; do > + ask 'Encrypt the root disk with a (p)assphrase or (k)eydisk?' no > + case $resp in > + # Retry on failure to allow passphrase or skip. > + [kK]*) > + pick_keydisk || continue > + _args=-k$KEYDISK > + break > + ;; > + # Do nothing, bioctl(8) will handle the passphrase. > + [pP]*) break > + ;; > + [nN]*) return > + ;; > + *) echo "'$resp' is not a valid choice." > + ;; > + esac > + done > > echo "\nConfiguring the crypto chunk $_chunk...\n" > md_prep_fdisk $_chunk > echo 'RAID *' | disklabel -w -A -T- $_chunk > > - bioctl -Cforce -cC -l${_chunk}a softraid0 >/dev/null > + bioctl -Cforce -cC -l${_chunk}a $_args softraid0 >/dev/null > > # No volumes existed before asking, but we just created one. > ROOTDISK=$(get_softraid_volumes) > -- andrew ($do || !$do) && undef($try) ; # Master of Perl, Yoda is. H?
Re: dwge(4): don't panic on truncated input packet
> Date: Wed, 18 Oct 2023 16:40:20 + > From: Miod Vallat > > I had the misfortune of hitting a KASSERT in dwge: > > panic: kernel diagnostic assertion "len > 0" failed: file > "/usr/src/sys/dev/fdt > /if_dwge.c", line 1102 > Stopped at panic+0x106:addia0,zero,256TIDPIDUID > PR > FLAGS PFLAGS CPU COMMAND > *405136 98879 1500 0x3 00 git > 301471 67731 0 0x14000 0x2001 softnet0 > panic() at panic+0x106 > panic() at panic > dwge_rx_proc() at dwge_rx_proc+0x1d4 > dwge_intr() at dwge_intr+0x4e > plic_irq_dispatch() at plic_irq_dispatch+0xec > plic_irq_handler() at plic_irq_handler+0x56 > riscv_cpu_intr() at riscv_cpu_intr+0x22 > cpu_exception_handler_supervisor() at cpu_exception_handler_supervisor+0x7a > pmap_copy_page() at pmap_copy_page+0x94 > uvm_pagerealloc_multi() at uvm_pagerealloc_multi+0x24e > buf_realloc_pages() at buf_realloc_pages+0xa0 > buf_flip_high() at buf_flip_high+0x64 > bufcache_recover_dmapages() at bufcache_recover_dmapages+0x13a > buf_get() at buf_get+0xec > end trace frame: 0xffc04d9ac870, count: 0 > > The following diff should count the error and skirt the panic. You probably hit a receive error and hit this code path because we don't check the error bit in the receive descriptor. I fixed this in dwqe(4) recently, and would prefer a similar fix here. The (untested) diff for that is attached here. We may still want to check the length on top of that (and make a similar change for dwqe(4)). I think I saw packets with the error bit set that had a length > 0 on dwqe(4) and this makes me suspect that it happens on dwge(4) as well. Index: dev/fdt/if_dwge.c === RCS file: /cvs/src/sys/dev/fdt/if_dwge.c,v retrieving revision 1.19 diff -u -p -r1.19 if_dwge.c --- dev/fdt/if_dwge.c 15 Aug 2023 08:27:30 - 1.19 +++ dev/fdt/if_dwge.c 18 Oct 2023 22:29:16 - @@ -214,6 +214,7 @@ struct dwge_desc { #define RDES0_OE (1 << 11) #define RDES0_SAF (1 << 13) #define RDES0_DE (1 << 14) +#define RDES0_ES (1 << 15) #define RDES0_FL_MASK 0x3fff #define RDES0_FL_SHIFT 16 #define RDES0_AFM (1 << 30) @@ -1097,15 +1098,20 @@ dwge_rx_proc(struct dwge_softc *sc) len, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, rxb->tb_map); - /* Strip off CRC. */ - len -= ETHER_CRC_LEN; - KASSERT(len > 0); - m = rxb->tb_m; rxb->tb_m = NULL; - m->m_pkthdr.len = m->m_len = len; + if (rxd->sd_status & RDES0_ES) { + ifp->if_ierrors++; + m_freem(m); + } else { + /* Strip off CRC. */ + len -= ETHER_CRC_LEN; + KASSERT(len > 0); - ml_enqueue(, m); + m->m_pkthdr.len = m->m_len = len; + + ml_enqueue(, m); + } put++; if (sc->sc_rx_cons == (DWGE_NRXDESC - 1)) > > Index: if_dwge.c > === > RCS file: /OpenBSD/src/sys/dev/fdt/if_dwge.c,v > retrieving revision 1.19 > diff -u -p -r1.19 if_dwge.c > --- if_dwge.c 15 Aug 2023 08:27:30 - 1.19 > +++ if_dwge.c 18 Oct 2023 16:37:59 - > @@ -1099,13 +1101,15 @@ dwge_rx_proc(struct dwge_softc *sc) > > /* Strip off CRC. */ > len -= ETHER_CRC_LEN; > - KASSERT(len > 0); > - > - m = rxb->tb_m; > - rxb->tb_m = NULL; > - m->m_pkthdr.len = m->m_len = len; > + if (len <= 0) { > + ifp->if_ierrors++; > + } else { > + m = rxb->tb_m; > + rxb->tb_m = NULL; > + m->m_pkthdr.len = m->m_len = len; > > - ml_enqueue(, m); > + ml_enqueue(, m); > + } > > put++; > if (sc->sc_rx_cons == (DWGE_NRXDESC - 1)) > >
Re: smtpd: implement nullmx RFC 7505
On 2023/10/17 22:27, Philipp wrote: > [2023-10-17 17:32] Omar Polo > > > > There is one part of the RFC7505 that I'd like to quote and discuss > > with you however. The last paragraph of the section 3 says: > > > > : A domain that advertises a null MX MUST NOT advertise any other MX > > : RR. > > > > Your implementation handles the case where there are more than one MX > > by setting the `nullmx' field in the shared struct when we encounter > > one, and then don't process the other MXs when that field is set. > > This is fine. > > > > However, I think we can simplify here by not honouring the Null MX > > when there are other MX records. We're not violating the RFC. See > > diff below to see what I mean. The only difference is in dns.c, the > > rest is the same with yours. > > My reasaning for that was: When you set Null MX you explicitly opt out > from reciving mail even when you violate the spec. But if you want > it diffrent I can change it. I think in that situation, the recipient's DNS admin has *not* set a valid nullmx, and delivery to other MXes should still be attempted. > But I don't think your proposed patch is a good solution, because the > result depend on the order of the RR in the repsonse. The problem is > when the first entry in the response is a Null MX your patch truncate > the other results and a bounce is generated. But when the Null MX is > later in the response the other entries are used to deliver the mail. > > > So far I've only compile-tested the code. I don't have a spare domain > > to test and don't know of anyone using a Null MX. > > To test Null MX you can use example.com. To test the localhost patch a > friend of me has set up mxtest.yannikenss.de. > > > > Because some domains set the MX record to "localhost." to get a similar > > > efect the secound patch ignores "localhost." MX entries and handles a MX > > > containing only "localhost." records like a Null MX. > > > > This could be simplified too. On top of diff below, it would need > > just something among the lines of: > > When localhost and Null MX should be handled the same this could be > simplified in one check, yes. But that "localhost." and Null MX are > handled different was on purpose. Because I would say setting a Null MX > is a explicit opt out from reciving mail and setting MX to localhost > is implicit. As said before, I have no problem which changing this. > > > I'm still not convinced about this part however. It feels wrong to me > > to hardcode such a check, it seems too much paranoic. The follow-up > > would be to check for localhost in dns_dispatch_host too? ;) > > The idea[1] about this part was because setting "localhost." as MX is used > in a similiar way as Null MX[0]. So handle this in a simmilar way as > Null MX make sense. This way the sender will get a better bounce message. > > Philipp > > [0] https://www.netmeister.org/blog/mx-diversity.html > > [1] I still belive OpenSMTPD should only connect to public routable > addresses for relay. So don't connect to something like 127.0.0.1/8, > ::1/128, RFC1918 addresses, ULA, ... > But this is independent of this patch. Support for nullmx makes sense to me. Perhaps also the localhost handling. But some intranet setups may require connections to RFC1918/ULA addresses - I don't think an MTA should prevent these. BTW, you can already block those at the DNS resolver level in a configurable way - see unbound's "private-address" feature.
Termp/Cap
The max approach is bogus. It happens because the terminal uses tc, and then tc again. The proper approach, especially in perl, is to actually detect a loop, which should be fairly easy by saving the tc that have already been done in a hash.
better fix for Term::Cap
Instead of an archaic limit, just keep track of tc's. This is actually slightly faster, because the termcap links is a tree, not a list. Please test. Index: Cap.pm === RCS file: /cvs/src/gnu/usr.bin/perl/cpan/Term-Cap/Cap.pm,v retrieving revision 1.3 diff -u -p -r1.3 Cap.pm --- Cap.pm 18 Oct 2023 01:49:26 - 1.3 +++ Cap.pm 18 Oct 2023 08:24:54 - @@ -280,7 +280,7 @@ sub Tgetent $first = 0;# first entry (keeps term name) -$max = 64; # max :tc=...:'s +my $seen = {}; # keep track of :tc=...:s if ($entry) { @@ -291,6 +291,7 @@ sub Tgetent if ( $entry =~ s/:tc=([^:]+):/:/ ) { $tmp_term = $1; + $seen->{$tmp_term} = 1; # protect any pattern metacharacters in $tmp_term $termpat = $tmp_term; @@ -332,10 +333,7 @@ sub Tgetent } else { - # do the same file again -# prevent endless recursion -$max-- || croak "failed termcap loop at $tmp_term"; $state = 1;# ok, maybe do a new file next time } @@ -345,11 +343,20 @@ sub Tgetent close TERMCAP; # If :tc=...: found then search this file again -$entry =~ s/:tc=([^:]+):/:/ && ( $tmp_term = $1, $state = 2 ); +while ($entry =~ s/:tc=([^:]+):/:/) { + $tmp_term = $1; + if ($seen->{$tmp_term}) { + # XXX first version of this croaked, but we can actually + # get several intermediate entries with the same tc ! + next; + } + $seen->{$tmp_term} = 1; + $state = 2; -# protect any pattern metacharacters in $tmp_term -$termpat = $tmp_term; -$termpat =~ s/(\W)/\\$1/g; + # protect any pattern metacharacters in $tmp_term + $termpat = $tmp_term; + $termpat =~ s/(\W)/\\$1/g; + } } croak "Can't find $term" if $entry eq '';
snmpd_metrics: add HOST-RESOURCES-MIB:hrSWRunPerfTable
This diff adds the two entries from the hrSWRunPerfTable: hrSWRunPerfCPU, and hrSWRunPerfMem. This allows snmptop from the net-snmp package to work. Math stolen^Wcopied from top/machine.c. OK? martijn@ Index: mib.c === RCS file: /cvs/src/libexec/snmpd/snmpd_metrics/mib.c,v retrieving revision 1.4 diff -u -p -r1.4 mib.c --- mib.c 4 Jul 2023 11:34:19 - 1.4 +++ mib.c 18 Oct 2023 09:52:07 - @@ -69,6 +69,10 @@ struct event connev; const char *agentxsocket = NULL; int agentxfd = -1; +int pageshift; +#define pagetok(size) ((size) << pageshift) + +voidpageshift_init(void); voidsnmp_connect(struct agentx *, void *, int); voidsnmp_tryconnect(int, short, void *); voidsnmp_read(int, short, void *); @@ -89,6 +93,7 @@ struct agentx_object *hrProcessorLoad; struct agentx_index *hrSWRunIdx; struct agentx_object *hrSWRunIndex, *hrSWRunName, *hrSWRunID, *hrSWRunPath; struct agentx_object *hrSWRunParameters, *hrSWRunType, *hrSWRunStatus; +struct agentx_object *hrSWRunPerfCPU, *hrSWRunPerfMem; voidmib_hrsystemuptime(struct agentx_varbind *); voidmib_hrsystemdate(struct agentx_varbind *); @@ -634,6 +639,7 @@ mib_hrswrun(struct agentx_varbind *vb) struct agentx_object*obj; enum agentx_request_type req; int32_t idx; + int32_t time; struct kinfo_proc *kinfo; char*s; @@ -711,6 +717,13 @@ mib_hrswrun(struct agentx_varbind *vb) agentx_varbind_integer(vb, 4); break; } + } else if (obj == hrSWRunPerfCPU) { + time = kinfo->p_rtime_sec * 100; + time += (kinfo->p_rtime_usec + 5000) / 1; + agentx_varbind_integer(vb, time); + } else if (obj == hrSWRunPerfMem) { + agentx_varbind_integer(vb, pagetok(kinfo->p_vm_tsize + + kinfo->p_vm_dsize + kinfo->p_vm_ssize)); } else fatal("%s: Unexpected object", __func__); } @@ -3278,6 +3291,7 @@ main(int argc, char *argv[]) kr_init(); pf_init(); timer_init(); + pageshift_init(); if (agentxsocket != NULL) { if (strlcpy(agentxsocketdir, agentxsocket, @@ -3375,6 +3389,10 @@ main(int argc, char *argv[]) (hrSWRunType = agentx_object(host, AGENTX_OID(HRSWRUNTYPE), , 1, 0, mib_hrswrun)) == NULL || (hrSWRunStatus = agentx_object(host, AGENTX_OID(HRSWRUNSTATUS), + , 1, 0, mib_hrswrun)) == NULL || + (hrSWRunPerfCPU = agentx_object(host, AGENTX_OID(HRSWRUNPERFCPU), + , 1, 0, mib_hrswrun)) == NULL || + (hrSWRunPerfMem = agentx_object(host, AGENTX_OID(HRSWRUNPERFMEM), , 1, 0, mib_hrswrun)) == NULL) fatal("agentx_object"); @@ -4318,6 +4336,22 @@ main(int argc, char *argv[]) log_setverbose(verbose); event_dispatch(); +} + +#define LOG1024 10 +void +pageshift_init(void) +{ + long pagesize; + + if ((pagesize = sysconf(_SC_PAGESIZE)) == -1) + fatal("sysconf(_SC_PAGESIZE)"); + while (pagesize > 1) { + pageshift++; + pagesize >>= 1; + } + /* we only need the amount of log(2)1024 for our conversion */ + pageshift -= LOG1024; } void
IPv4 on ix(4) slow/nothing - 7.4
Hi All, Just upgraded a couple of machines to 7.4. smooth as always!! I am however seeing issues with IPv4, slowness or no throughput at all. The machines I have upgraded are using an Intel X540-T network card and is connected on 10G. ix0 at pci5 dev 0 function 0 "Intel X540T" rev 0x01, msix, 16 queues, address b8:ca:3a:62:ee:40 ix1 at pci5 dev 0 function 1 "Intel X540T" rev 0x01, msix, 16 queues, address b8:ca:3a:62:ee:42 root@n2:~ # sysctl kern.version kern.version=OpenBSD 7.4 (GENERIC.MP) #1397: Tue Oct 10 09:02:37 MDT 2023 dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP There are a bunch of VMs running on top of it, as soon as I want to fetch something with ftp, for example, I don't get anything over IPv4, with IPv6 everything is normal. mischa@www2:~ $ ftp -4 https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso Trying 46.23.88.18... Requesting https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso 0% | | 512 KB - stalled -^C A trace on mirror / n2: n2:~ # tcpdump -i vport880 host 46.23.88.32 tcpdump: listening on vport880, link-type EN10MB 15:16:08.730274 www2.high5.nl.1828 > n2.high5.nl.https: S 2182224746:2182224746(0) win 16384 6,nop,nop,timestamp 2899683458 0> (DF) 15:16:08.730297 arp who-has www2.high5.nl tell n2.high5.nl 15:16:08.731535 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11 15:16:08.731540 n2.high5.nl.https > www2.high5.nl.1828: S 633749938:633749938(0) ack 2182224747 win 16384 1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106 2899683458> (DF) 15:16:08.732017 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1 win 256 (DF) 15:16:08.785752 www2.high5.nl.1828 > n2.high5.nl.https: P 1:312(311) ack 1 win 256 (DF) 15:16:08.786092 n2.high5.nl.https > www2.high5.nl.1828: P 1:128(127) ack 312 win 271 (DF) 15:16:08.786376 n2.high5.nl.https > www2.high5.nl.1828: P 128:134(6) ack 312 win 271 (DF) 15:16:08.786396 n2.high5.nl.https > www2.high5.nl.1828: P 134:166(32) ack 312 win 271 (DF) 15:16:08.786455 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) ack 312 win 271 (DF) 15:16:08.786457 n2.high5.nl.https > www2.high5.nl.1828: . 1614:3062(1448) ack 312 win 271 2899683510> (DF) 15:16:08.786460 n2.high5.nl.https > www2.high5.nl.1828: P 3062:3803(741) ack 312 win 271 (DF) 15:16:08.786943 www2.high5.nl.1828 > n2.high5.nl.https: . ack 134 win 255 (DF) 15:16:08.796534 n2.high5.nl.https > www2.high5.nl.1828: P 3803:4345(542) ack 312 win 271 (DF) 15:16:08.796577 n2.high5.nl.https > www2.high5.nl.1828: P 4345:4403(58) ack 312 win 271 (DF) 15:16:08.797518 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win 256 > (DF) 15:16:08.797522 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win 256 > (DF) 15:16:09.790297 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) ack 312 win 271 (DF) 15:16:09.790902 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1614 win 233 > (DF) 15:16:09.790917 n2.high5.nl.https > www2.high5.nl.1828: . 1614:3062(1448) ack 312 win 271 2899684519> (DF) 15:16:09.790923 n2.high5.nl.https > www2.high5.nl.1828: P 3062:4403(1341) ack 312 win 271 2899684519> (DF) 15:16:10.790299 n2.high5.nl.https > www2.high5.nl.1828: . 1614:3062(1448) ack 312 win 271 2899684519> (DF) 15:16:10.791204 www2.high5.nl.1828 > n2.high5.nl.https: . ack 3062 win 233 > (DF) 15:16:10.791223 n2.high5.nl.https > www2.high5.nl.1828: P 3062:4403(1341) ack 312 win 271 2899685520> (DF) 15:16:10.791692 www2.high5.nl.1828 > n2.high5.nl.https: . ack 4403 win 235 (DF) 15:16:10.802647 www2.high5.nl.1828 > n2.high5.nl.https: P 312:318(6) ack 4403 win 256 (DF) 15:16:11.000297 n2.high5.nl.https > www2.high5.nl.1828: . ack 318 win 271 (DF) 15:16:11.001162 www2.high5.nl.1828 > n2.high5.nl.https: P 318:527(209) ack 4403 win 256 (DF) 15:16:11.001860 n2.high5.nl.https > www2.high5.nl.1828: P 4403:5059(656) ack 527 win 271 (DF) 15:16:11.001989 n2.high5.nl.https > www2.high5.nl.1828: . 5059:6507(1448) ack 527 win 271 2899685730> (DF) 15:16:11.001992 n2.high5.nl.https > www2.high5.nl.1828: . 6507:7955(1448) ack 527 win 271 2899685730> (DF) 15:16:11.195431 www2.high5.nl.1828 > n2.high5.nl.https: . ack 5059 win 256 (DF) 15:16:11.195447 n2.high5.nl.https > www2.high5.nl.1828: . 7955:9403(1448) ack 527 win 271 2899685924> (DF) Running a trace on www2 I am seeing: www2:~ # tcpdump -i vio0 host 46.23.88.18 tcpdump: listening on vio0, link-type EN10MB 15:16:08.729974 www2.high5.nl.1828 > n2.high5.nl.https: S 2182224746:2182224746(0) win 16384 6,nop,nop,timestamp 2899683458 0> (DF) 15:16:08.731114 arp who-has www2.high5.nl tell n2.high5.nl 15:16:08.731229 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11 15:16:08.731631 n2.high5.nl.https > www2.high5.nl.1828: S 633749938:633749938(0) ack 2182224747 win 16384 1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106 2899683458> (DF) 15:16:08.731732 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1
[wireless] iwx: small code nitpicking and removal of iwx_wait_tx_queues_empty
Inlined patch does the following: - removes functions in initialization per style(9) (splnet) - removes double definition of IWX_NVM_GET_INFO - removes unused 'ring' variable from iwx_sta_tx_agg_start - removes unused 'iwx_tid_to_ac' array - removes iwx_wait_tx_queues_empty function Regarding the last thing - in our testing of a derivative product we've found that sometimes the queue has some queued packets, the function has to wait until all frames will be de-queued, but there are two problems: 1. There is no corresponding wakeup() for a ring, so this tsleep() should always fail with EWOULDBLOCK, if we have queued frams. This leads to the situation when needed softc flags aren't cleared, and on the next join we get a panic with "MAC already added". 2. Linux driver does the flushes a slightly differently: https://github.com/torvalds/linux/blob/master/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c#L5592 [...] if (drop) { if (iwl_mvm_flush_sta(mvm, mvmsta, false)) IWL_ERR(mvm, "flush request fail\n"); } else { if (iwl_mvm_has_new_tx_api(mvm)) iwl_mvm_wait_sta_queues_empty(mvm, mvmsta); else /* only used for !iwl_mvm_has_new_tx_api() below */ msk |= mvmsta->tfd_queue_msk; } [...] 'drop' var is from Linux 80211 API - "If the parameter drop is set to true, pending frames may be dropped." per https://www.kernel.org/doc/html/v6.5/driver-api/80211/mac80211.html As I can see OpenBSD driver tries to do both things, I propose to drop this iwx_wait_tx_queues_empty function and follow 'drop = true' case from the linux driver. Removal of this function has fixed panics for us. Debugging and code have been sponsored by Serenity Cybersecurity, LLC. diff /usr/src commit - a8cd26ec98e16ab36053bf0a84ae2e4d88c286ea path + /usr/src blob - 612b68e3c6982a8ed4c0bcaca01d8c4e0a81d115 file + sys/dev/pci/if_iwx.c --- sys/dev/pci/if_iwx.c +++ sys/dev/pci/if_iwx.c @@ -402,7 +402,6 @@ voidiwx_tx_update_byte_tbl(struct iwx_softc *, struct uint16_t, uint16_t); intiwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *); intiwx_flush_sta_tids(struct iwx_softc *, int, uint16_t); -intiwx_wait_tx_queues_empty(struct iwx_softc *); intiwx_drain_sta(struct iwx_softc *sc, struct iwx_node *, int); intiwx_flush_sta(struct iwx_softc *, struct iwx_node *); intiwx_beacon_filter_send_cmd(struct iwx_softc *, @@ -2792,18 +2791,6 @@ iwx_nic_init(struct iwx_softc *sc) return 0; } -/* Map a TID to an ieee80211_edca_ac category. */ -const uint8_t iwx_tid_to_ac[IWX_MAX_TID_COUNT] = { - EDCA_AC_BE, - EDCA_AC_BK, - EDCA_AC_BK, - EDCA_AC_BE, - EDCA_AC_VI, - EDCA_AC_VI, - EDCA_AC_VO, - EDCA_AC_VO, -}; - /* Map ieee80211_edca_ac categories to firmware Tx FIFO. */ const uint8_t iwx_ac_to_tx_fifo[] = { IWX_GEN2_EDCA_TX_FIFO_BE, @@ -3548,8 +3535,10 @@ iwx_mac_ctxt_task(void *arg) struct iwx_softc *sc = arg; struct ieee80211com *ic = >sc_ic; struct iwx_node *in = (void *)ic->ic_bss; - int err, s = splnet(); + int err, s; + s = splnet(); + if ((sc->sc_flags & IWX_FLAG_SHUTDOWN) || ic->ic_state != IEEE80211_S_RUN) { refcnt_rele_wake(>task_refs); @@ -3575,8 +3564,10 @@ iwx_phy_ctxt_task(void *arg) struct iwx_node *in = (void *)ic->ic_bss; struct ieee80211_node *ni = >in_ni; uint8_t chains, sco, vht_chan_width; - int err, s = splnet(); + int err, s; + s = splnet(); + if ((sc->sc_flags & IWX_FLAG_SHUTDOWN) || ic->ic_state != IEEE80211_S_RUN || in->in_phyctxt == NULL) { @@ -3668,7 +3659,6 @@ iwx_sta_tx_agg_start(struct iwx_softc *sc, struct ieee struct ieee80211com *ic = >sc_ic; struct ieee80211_tx_ba *ba; int err, qid; - struct iwx_tx_ring *ring; /* Ensure we can map this TID to an aggregation queue. */ if (tid >= IWX_MAX_TID_COUNT) @@ -3711,7 +3701,6 @@ iwx_sta_tx_agg_start(struct iwx_softc *sc, struct ieee ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff; - ring = >txq[qid]; ba->ba_timeout_val = 0; ieee80211_addba_resp_accept(ic, ni, tid); sc->aggqid[tid] = qid; @@ -3723,9 +3712,11 @@ iwx_ba_task(void *arg) struct iwx_softc *sc = arg; struct ieee80211com *ic = >sc_ic; struct ieee80211_node *ni = ic->ic_bss; - int s = splnet(); + int s; int tid; + s = splnet(); + for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) { if (sc->sc_flags & IWX_FLAG_SHUTDOWN) break; @@ -6429,31 +6420,7 @@ out: return err; } -#define IWX_FLUSH_WAIT_MS 2000 - int
bgpd session.c convert to new ibuf API
This is a bit overdue. Convert session.c to also use the new ibuf API. This simplifies some code since there is no need for local variables. Also kill the struct msg_header and especially msg_open. The are of very little use. Regress passes so I think this should be fine :) -- :wq Claudio Index: session.c === RCS file: /cvs/src/usr.sbin/bgpd/session.c,v retrieving revision 1.450 diff -u -p -r1.450 session.c --- session.c 17 Oct 2023 17:58:15 - 1.450 +++ session.c 18 Oct 2023 15:09:29 - @@ -1324,24 +1324,26 @@ session_capa_add(struct ibuf *opb, uint8 { int errs = 0; - errs += ibuf_add(opb, _code, sizeof(capa_code)); - errs += ibuf_add(opb, _len, sizeof(capa_len)); + errs += ibuf_add_n8(opb, capa_code); + errs += ibuf_add_n8(opb, capa_len); return (errs); } int session_capa_add_mp(struct ibuf *buf, uint8_t aid) { - uint8_t safi, pad = 0; uint16_t afi; + uint8_t safi; int errs = 0; - if (aid2afi(aid, , ) == -1) - fatalx("session_capa_add_mp: bad afi/safi pair"); - afi = htons(afi); - errs += ibuf_add(buf, , sizeof(afi)); - errs += ibuf_add(buf, , sizeof(pad)); - errs += ibuf_add(buf, , sizeof(safi)); + if (aid2afi(aid, , ) == -1) { + log_warn("session_capa_add_afi: bad AID"); + return (-1); + } + + errs += ibuf_add_n16(buf, afi); + errs += ibuf_add_zero(buf, 1); + errs += ibuf_add_n8(buf, safi); return (errs); } @@ -1356,13 +1358,12 @@ session_capa_add_afi(struct peer *p, str if (aid2afi(aid, , )) { log_warn("session_capa_add_afi: bad AID"); - return (1); + return (-1); } - afi = htons(afi); - errs += ibuf_add(b, , sizeof(afi)); - errs += ibuf_add(b, , sizeof(safi)); - errs += ibuf_add(b, , sizeof(flags)); + errs += ibuf_add_n16(b, afi); + errs += ibuf_add_n8(b, safi); + errs += ibuf_add_n8(b, flags); return (errs); } @@ -1370,21 +1371,19 @@ session_capa_add_afi(struct peer *p, str struct bgp_msg * session_newmsg(enum msg_type msgtype, uint16_t len) { + u_char marker[MSGSIZE_HEADER_MARKER]; struct bgp_msg *msg; - struct msg_headerhdr; struct ibuf *buf; int errs = 0; - memset(, 0xff, sizeof(hdr.marker)); - hdr.len = htons(len); - hdr.type = msgtype; + memset(marker, 0xff, sizeof(marker)); if ((buf = ibuf_open(len)) == NULL) return (NULL); - errs += ibuf_add(buf, , sizeof(hdr.marker)); - errs += ibuf_add(buf, , sizeof(hdr.len)); - errs += ibuf_add(buf, , sizeof(hdr.type)); + errs += ibuf_add(buf, marker, sizeof(marker)); + errs += ibuf_add_n16(buf, len); + errs += ibuf_add_n8(buf, msgtype); if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { ibuf_free(buf); @@ -1472,8 +1471,7 @@ session_open(struct peer *p) { struct bgp_msg *buf; struct ibuf *opb; - struct msg_open msg; - uint16_t len, optparamlen = 0; + uint16_t len, optparamlen = 0, holdtime; uint8_t i, op_type; int errs = 0, extlen = 0; int mpcapa = 0; @@ -1501,10 +1499,8 @@ session_open(struct peer *p) p->conf.role != ROLE_NONE && (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || mpcapa == 0)) { - uint8_t val; - val = role2capa(p->conf.role); errs += session_capa_add(opb, CAPA_ROLE, 1); - errs += ibuf_add(opb, , 1); + errs += ibuf_add_n8(opb, role2capa(p->conf.role)); } /* graceful restart and End-of-RIB marker, RFC 4724 */ @@ -1520,19 +1516,14 @@ session_open(struct peer *p) /* Only set the R-flag if no graceful restart is ongoing */ if (!rst) hdr |= CAPA_GR_R_FLAG; - hdr = htons(hdr); - errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); - errs += ibuf_add(opb, , sizeof(hdr)); + errs += ibuf_add_n16(opb, hdr); } /* 4-bytes AS numbers, RFC6793 */ if (p->capa.ann.as4byte) { /* 4 bytes data */ - uint32_tnas; - - nas = htonl(p->conf.local_as); - errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas)); - errs += ibuf_add(opb, , sizeof(nas)); + errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t)); + errs += ibuf_add_n32(opb,
Re: smtpd: implement nullmx RFC 7505
[2023-10-18 11:42] Omar Polo > On 2023/10/18 08:40:14 +0100, Stuart Henderson wrote: > > On 2023/10/17 22:27, Philipp wrote: > > > [2023-10-17 17:32] Omar Polo > > > > [...] > > > > But I don't think your proposed patch is a good solution, because the > > > result depend on the order of the RR in the repsonse. The problem is > > > when the first entry in the response is a Null MX your patch truncate > > > the other results and a bounce is generated. But when the Null MX is > > > later in the response the other entries are used to deliver the mail. > > that's true; I fell for a shorter diff. I've attached below a version > that doesn't depend on the order. Your patch looks mostly good. I only have a small nitpick style comment: > + > + if (rr.rr.mx.preference == 0 && !strcmp(buf, "")) { strcmp doesn't return a bool like value. Something like: if (rr.rr.mx.preference == 0 && strcmp(buf, "") == 0) is imho better to read.
Re: bgpd session.c convert to new ibuf API
On Wed, Oct 18, 2023 at 05:19:29PM +0200, Claudio Jeker wrote: > This is a bit overdue. Convert session.c to also use the new ibuf API. > This simplifies some code since there is no need for local variables. > Also kill the struct msg_header and especially msg_open. The are of very > little use. > > Regress passes so I think this should be fine :) Can't spot anything wrong with the conversion, two log nits below. > -- > :wq Claudio > > Index: session.c > === > RCS file: /cvs/src/usr.sbin/bgpd/session.c,v > retrieving revision 1.450 > diff -u -p -r1.450 session.c > --- session.c 17 Oct 2023 17:58:15 - 1.450 > +++ session.c 18 Oct 2023 15:09:29 - > @@ -1324,24 +1324,26 @@ session_capa_add(struct ibuf *opb, uint8 > { > int errs = 0; > > - errs += ibuf_add(opb, _code, sizeof(capa_code)); > - errs += ibuf_add(opb, _len, sizeof(capa_len)); > + errs += ibuf_add_n8(opb, capa_code); > + errs += ibuf_add_n8(opb, capa_len); > return (errs); > } > > int > session_capa_add_mp(struct ibuf *buf, uint8_t aid) > { > - uint8_t safi, pad = 0; > uint16_t afi; > + uint8_t safi; > int errs = 0; > > - if (aid2afi(aid, , ) == -1) > - fatalx("session_capa_add_mp: bad afi/safi pair"); > - afi = htons(afi); > - errs += ibuf_add(buf, , sizeof(afi)); > - errs += ibuf_add(buf, , sizeof(pad)); > - errs += ibuf_add(buf, , sizeof(safi)); > + if (aid2afi(aid, , ) == -1) { > + log_warn("session_capa_add_afi: bad AID"); wrong function name. Should this be log_warn("%s: bad AID", __func__); > + return (-1); > + } > + > + errs += ibuf_add_n16(buf, afi); > + errs += ibuf_add_zero(buf, 1); > + errs += ibuf_add_n8(buf, safi); > > return (errs); > } > @@ -1356,13 +1358,12 @@ session_capa_add_afi(struct peer *p, str > > if (aid2afi(aid, , )) { > log_warn("session_capa_add_afi: bad AID"); use __func__? > - return (1); > + return (-1); fun > } > > - afi = htons(afi); > - errs += ibuf_add(b, , sizeof(afi)); > - errs += ibuf_add(b, , sizeof(safi)); > - errs += ibuf_add(b, , sizeof(flags)); > + errs += ibuf_add_n16(b, afi); > + errs += ibuf_add_n8(b, safi); > + errs += ibuf_add_n8(b, flags); > > return (errs); > } > @@ -1370,21 +1371,19 @@ session_capa_add_afi(struct peer *p, str > struct bgp_msg * > session_newmsg(enum msg_type msgtype, uint16_t len) > { > + u_char marker[MSGSIZE_HEADER_MARKER]; > struct bgp_msg *msg; > - struct msg_headerhdr; > struct ibuf *buf; > int errs = 0; > > - memset(, 0xff, sizeof(hdr.marker)); > - hdr.len = htons(len); > - hdr.type = msgtype; > + memset(marker, 0xff, sizeof(marker)); > > if ((buf = ibuf_open(len)) == NULL) > return (NULL); > > - errs += ibuf_add(buf, , sizeof(hdr.marker)); > - errs += ibuf_add(buf, , sizeof(hdr.len)); > - errs += ibuf_add(buf, , sizeof(hdr.type)); > + errs += ibuf_add(buf, marker, sizeof(marker)); > + errs += ibuf_add_n16(buf, len); > + errs += ibuf_add_n8(buf, msgtype); > > if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) { > ibuf_free(buf); > @@ -1472,8 +1471,7 @@ session_open(struct peer *p) > { > struct bgp_msg *buf; > struct ibuf *opb; > - struct msg_open msg; > - uint16_t len, optparamlen = 0; > + uint16_t len, optparamlen = 0, holdtime; > uint8_t i, op_type; > int errs = 0, extlen = 0; > int mpcapa = 0; > @@ -1501,10 +1499,8 @@ session_open(struct peer *p) > p->conf.role != ROLE_NONE && > (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] || > mpcapa == 0)) { > - uint8_t val; > - val = role2capa(p->conf.role); > errs += session_capa_add(opb, CAPA_ROLE, 1); > - errs += ibuf_add(opb, , 1); > + errs += ibuf_add_n8(opb, role2capa(p->conf.role)); > } > > /* graceful restart and End-of-RIB marker, RFC 4724 */ > @@ -1520,19 +1516,14 @@ session_open(struct peer *p) > /* Only set the R-flag if no graceful restart is ongoing */ > if (!rst) > hdr |= CAPA_GR_R_FLAG; > - hdr = htons(hdr); > - > errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr)); > - errs += ibuf_add(opb, , sizeof(hdr)); > + errs += ibuf_add_n16(opb, hdr); > } > > /* 4-bytes AS numbers, RFC6793 */ > if (p->capa.ann.as4byte) { /* 4 bytes data */ >
Re: IPv4 on ix(4) slow/nothing - 7.4
> On Oct 18, 2023, at 15:44, Hrvoje Popovski wrote: > > On 18.10.2023. 15:35, Mischa wrote: >> Hi All, >> >> Just upgraded a couple of machines to 7.4. smooth as always!! >> >> I am however seeing issues with IPv4, slowness or no throughput at all. >> The machines I have upgraded are using an Intel X540-T network card and >> is connected on 10G. >> >> ix0 at pci5 dev 0 function 0 "Intel X540T" rev 0x01, msix, 16 queues, >> address b8:ca:3a:62:ee:40 >> ix1 at pci5 dev 0 function 1 "Intel X540T" rev 0x01, msix, 16 queues, >> address b8:ca:3a:62:ee:42 >> >> root@n2:~ # sysctl kern.version >> kern.version=OpenBSD 7.4 (GENERIC.MP) #1397: Tue Oct 10 09:02:37 MDT 2023 >> dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP >> >> There are a bunch of VMs running on top of it, as soon as I want to >> fetch something with ftp, for example, I don't get anything over IPv4, >> with IPv6 everything is normal. >> >> mischa@www2:~ $ ftp -4 >> https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso >> Trying 46.23.88.18... >> Requesting >> https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso >> 0% | | 512 KB - stalled >> -^C >> >> A trace on mirror / n2: >> >> n2:~ # tcpdump -i vport880 host 46.23.88.32 >> tcpdump: listening on vport880, link-type EN10MB >> 15:16:08.730274 www2.high5.nl.1828 > n2.high5.nl.https: S >> 2182224746:2182224746(0) win 16384 > 6,nop,nop,timestamp 2899683458 0> (DF) >> 15:16:08.730297 arp who-has www2.high5.nl tell n2.high5.nl >> 15:16:08.731535 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11 >> 15:16:08.731540 n2.high5.nl.https > www2.high5.nl.1828: S >> 633749938:633749938(0) ack 2182224747 win 16384 > 1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106 >> 2899683458> (DF) >> 15:16:08.732017 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1 win 256 >> (DF) >> 15:16:08.785752 www2.high5.nl.1828 > n2.high5.nl.https: P 1:312(311) ack >> 1 win 256 (DF) >> 15:16:08.786092 n2.high5.nl.https > www2.high5.nl.1828: P 1:128(127) ack >> 312 win 271 (DF) >> 15:16:08.786376 n2.high5.nl.https > www2.high5.nl.1828: P 128:134(6) ack >> 312 win 271 (DF) >> 15:16:08.786396 n2.high5.nl.https > www2.high5.nl.1828: P 134:166(32) >> ack 312 win 271 (DF) >> 15:16:08.786455 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) >> ack 312 win 271 (DF) >> 15:16:08.786457 n2.high5.nl.https > www2.high5.nl.1828: . >> 1614:3062(1448) ack 312 win 271 > 2899683510> (DF) >> 15:16:08.786460 n2.high5.nl.https > www2.high5.nl.1828: P 3062:3803(741) >> ack 312 win 271 (DF) >> 15:16:08.786943 www2.high5.nl.1828 > n2.high5.nl.https: . ack 134 win >> 255 (DF) >> 15:16:08.796534 n2.high5.nl.https > www2.high5.nl.1828: P 3803:4345(542) >> ack 312 win 271 (DF) >> 15:16:08.796577 n2.high5.nl.https > www2.high5.nl.1828: P 4345:4403(58) >> ack 312 win 271 (DF) >> 15:16:08.797518 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win >> 256 >> (DF) >> 15:16:08.797522 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win >> 256 >> (DF) >> 15:16:09.790297 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) >> ack 312 win 271 (DF) >> 15:16:09.790902 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1614 win >> 233 >> (DF) >> 15:16:09.790917 n2.high5.nl.https > www2.high5.nl.1828: . >> 1614:3062(1448) ack 312 win 271 > 2899684519> (DF) >> 15:16:09.790923 n2.high5.nl.https > www2.high5.nl.1828: P >> 3062:4403(1341) ack 312 win 271 > 2899684519> (DF) >> 15:16:10.790299 n2.high5.nl.https > www2.high5.nl.1828: . >> 1614:3062(1448) ack 312 win 271 > 2899684519> (DF) >> 15:16:10.791204 www2.high5.nl.1828 > n2.high5.nl.https: . ack 3062 win >> 233 >> (DF) >> 15:16:10.791223 n2.high5.nl.https > www2.high5.nl.1828: P >> 3062:4403(1341) ack 312 win 271 > 2899685520> (DF) >> 15:16:10.791692 www2.high5.nl.1828 > n2.high5.nl.https: . ack 4403 win >> 235 (DF) >> 15:16:10.802647 www2.high5.nl.1828 > n2.high5.nl.https: P 312:318(6) ack >> 4403 win 256 (DF) >> 15:16:11.000297 n2.high5.nl.https > www2.high5.nl.1828: . ack 318 win >> 271 (DF) >> 15:16:11.001162 www2.high5.nl.1828 > n2.high5.nl.https: P 318:527(209) >> ack 4403 win 256 (DF) >> 15:16:11.001860 n2.high5.nl.https > www2.high5.nl.1828: P 4403:5059(656) >> ack 527 win 271 (DF) >> 15:16:11.001989 n2.high5.nl.https > www2.high5.nl.1828: . >> 5059:6507(1448) ack 527 win 271 > 2899685730> (DF) >> 15:16:11.001992 n2.high5.nl.https > www2.high5.nl.1828: . >> 6507:7955(1448) ack 527 win 271 > 2899685730> (DF) >> 15:16:11.195431 www2.high5.nl.1828 > n2.high5.nl.https: . ack 5059 win >> 256 (DF) >> 15:16:11.195447 n2.high5.nl.https > www2.high5.nl.1828: . >> 7955:9403(1448) ack 527 win 271 > 2899685924> (DF) >> >> >> Running a trace on www2 I am seeing: >> >> www2:~ # tcpdump -i vio0 host 46.23.88.18 >> tcpdump: listening on vio0, link-type EN10MB >> 15:16:08.729974 www2.high5.nl.1828 > n2.high5.nl.https: S >> 2182224746:2182224746(0) win 16384 > 6,nop,nop,timestamp
TSO for ixl(4)
Hi, This diff implements TCP Segmentation Offloading for ixl(4). I tested it successfully on amd64 and sparc64 with Intel X710. It should increase the TCP bulk performance to 10 Gbit/s. On sparc64 I got an increase from 600 MBit/s to 2.000 Gbit/s. Further testing is welcome. bye, Jan Index: dev/pci/if_ixl.c === RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v retrieving revision 1.89 diff -u -p -r1.89 if_ixl.c --- dev/pci/if_ixl.c29 Sep 2023 19:44:47 - 1.89 +++ dev/pci/if_ixl.c18 Oct 2023 15:15:30 - @@ -71,6 +71,7 @@ #include #include #include +#include #include #if NBPFILTER > 0 @@ -85,6 +86,8 @@ #include #include #include +#include +#include #include #include @@ -827,6 +830,10 @@ struct ixl_tx_desc { #define IXL_TX_DESC_BSIZE_MASK \ (IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT) +#define IXL_TX_CTX_DESC_CMD_TSO0x10 +#define IXL_TX_CTX_DESC_TLEN_SHIFT 30 +#define IXL_TX_CTX_DESC_MSS_SHIFT 50 + #define IXL_TX_DESC_L2TAG1_SHIFT 48 } __packed __aligned(16); @@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 { uint64_tqword3; } __packed __aligned(16); -#define IXL_TX_PKT_DESCS 8 +#define IXL_TX_PKT_DESCS 32 #define IXL_TX_QUEUE_ALIGN 128 #define IXL_RX_QUEUE_ALIGN 128 #define IXL_HARDMTU9712 /* 9726 - ETHER_HDR_LEN */ +#define IXL_TSO_SIZE ((255 * 1024) - 1) +#define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1) + +/* + * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES. + * This interface could not handle packets greater than IXL_TSO_SIZE. + */ +CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE); #define IXL_PCIREG PCI_MAPREG_START @@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct ifp->if_capabilities |= IFCAP_CSUM_IPv4 | IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; ifmedia_init(>sc_media, 0, ixl_media_change, ixl_media_status); @@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi txm = [i]; if (bus_dmamap_create(sc->sc_dmat, - IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0, + MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, >txm_map) != 0) goto uncreate; @@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm } static uint64_t -ixl_tx_setup_offload(struct mbuf *m0) +ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr, +unsigned int prod) { struct ether_extracted ext; uint64_t hlen; @@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0) } if (!ISSET(m0->m_pkthdr.csum_flags, - M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) + M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO)) return (offload); ether_extract_headers(m0, ); @@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0) offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT; } + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) { + if (ext.tcp) { + struct ixl_tx_desc *ring, *txd; + uint64_t cmd = 0; + + hlen += ext.tcp->th_off << 2; + ring = IXL_DMA_KVA(>txr_mem); + txd = [prod]; + + cmd |= IXL_TX_DESC_DTYPE_CONTEXT; + cmd |= IXL_TX_CTX_DESC_CMD_TSO; + cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN + - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT; + cmd |= (uint64_t)(m0->m_pkthdr.ph_mss) + << IXL_TX_CTX_DESC_MSS_SHIFT; + + htolem64(>addr, 0); + htolem64(>cmd, cmd); + } else + tcpstat_inc(tcps_outbadtso); + } + return (offload); } @@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq) mask = sc->sc_tx_ring_ndescs - 1; for (;;) { - if (free <= IXL_TX_PKT_DESCS) { + /* We need one extra descriptor for TSO packets. */ + if (free <= (IXL_TX_PKT_DESCS + 1)) { ifq_set_oactive(ifq); break; } @@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq) if (m == NULL) break; - offload = ixl_tx_setup_offload(m); + offload = ixl_tx_setup_offload(m, txr, prod); txm = >txr_maps[prod]; map =
Re: smtpd: implement nullmx RFC 7505
On 2023/10/18 08:40:14 +0100, Stuart Henderson wrote: > On 2023/10/17 22:27, Philipp wrote: > > [2023-10-17 17:32] Omar Polo > > > There is one part of the RFC7505 that I'd like to quote and discuss > > > with you however. The last paragraph of the section 3 says: > > > > > > : A domain that advertises a null MX MUST NOT advertise any other MX > > > : RR. > > > > > > Your implementation handles the case where there are more than one MX > > > by setting the `nullmx' field in the shared struct when we encounter > > > one, and then don't process the other MXs when that field is set. > > > This is fine. > > > > > > However, I think we can simplify here by not honouring the Null MX > > > when there are other MX records. We're not violating the RFC. See > > > diff below to see what I mean. The only difference is in dns.c, the > > > rest is the same with yours. > > > > My reasaning for that was: When you set Null MX you explicitly opt out > > from reciving mail even when you violate the spec. But if you want > > it diffrent I can change it. > > I think in that situation, the recipient's DNS admin has *not* set a > valid nullmx, and delivery to other MXes should still be attempted. That's what I though too. > > But I don't think your proposed patch is a good solution, because the > > result depend on the order of the RR in the repsonse. The problem is > > when the first entry in the response is a Null MX your patch truncate > > the other results and a bounce is generated. But when the Null MX is > > later in the response the other entries are used to deliver the mail. that's true; I fell for a shorter diff. I've attached below a version that doesn't depend on the order. > > > So far I've only compile-tested the code. I don't have a spare domain > > > to test and don't know of anyone using a Null MX. > > > > To test Null MX you can use example.com. To test the localhost patch a > > friend of me has set up mxtest.yannikenss.de. thanks! > [...] > > [1] I still belive OpenSMTPD should only connect to public routable > > addresses for relay. So don't connect to something like 127.0.0.1/8, > > ::1/128, RFC1918 addresses, ULA, ... > > But this is independent of this patch. > > Support for nullmx makes sense to me. Perhaps also the localhost > handling. But some intranet setups may require connections to > RFC1918/ULA addresses - I don't think an MTA should prevent these. Completely agree. diff 2d025d839f99dc09ee525c11a4ed09a0f3bbe7d0 02bb94351d3865e61483023cab9fa02bcac2970d commit - 2d025d839f99dc09ee525c11a4ed09a0f3bbe7d0 commit + 02bb94351d3865e61483023cab9fa02bcac2970d blob - 4cf5d23d1d14b5400c6f4429dae0a4f6490073d4 blob + 552a5cf9115401b4fac3d131d6e5c022ebb8b7fd --- usr.sbin/smtpd/dns.c +++ usr.sbin/smtpd/dns.c @@ -232,10 +232,10 @@ dns_dispatch_mx(struct asr_result *ar, void *arg) struct dns_rrrr; char buf[512]; size_t found; + int nullmx = 0; if (ar->ar_h_errno && ar->ar_h_errno != NO_DATA && ar->ar_h_errno != NOTIMP) { - m_create(s->p, IMSG_MTA_DNS_HOST_END, 0, 0, -1); m_add_id(s->p, s->reqid); if (ar->ar_rcode == NXDOMAIN) @@ -259,13 +259,29 @@ dns_dispatch_mx(struct asr_result *ar, void *arg) unpack_rr(, ); if (rr.rr_type != T_MX) continue; + print_dname(rr.rr.mx.exchange, buf, sizeof(buf)); buf[strlen(buf) - 1] = '\0'; + + if (rr.rr.mx.preference == 0 && !strcmp(buf, "")) { + nullmx = 1; + continue; + } + dns_lookup_host(s, buf, rr.rr.mx.preference); found++; } free(ar->ar_data); + if (nullmx && found == 0) { + m_create(s->p, IMSG_MTA_DNS_HOST_END, 0, 0, -1); + m_add_id(s->p, s->reqid); + m_add_int(s->p, DNS_NULLMX); + m_close(s->p); + free(s); + return; + } + /* fallback to host if no MX is found. */ if (found == 0) dns_lookup_host(s, s->name, 0); blob - c0d0fc02931b90409441035d2744923af9e42df1 blob + 25e158b68a88c8485428a2476c1c557f8c60404d --- usr.sbin/smtpd/mta.c +++ usr.sbin/smtpd/mta.c @@ -1088,6 +1088,10 @@ mta_on_mx(void *tag, void *arg, void *data) else relay->failstr = "No MX found for domain"; break; + case DNS_NULLMX: + relay->fail = IMSG_MTA_DELIVERY_PERMFAIL; + relay->failstr = "Domain does not accept mail"; + break; default: fatalx("bad DNS lookup error code"); break; blob - 6781286928da45e6159bce81ff2437011ebdef72 blob + d5cbe88feeaf9e91eca119b31dda957aca68c031 --- usr.sbin/smtpd/smtpd.h +++ usr.sbin/smtpd/smtpd.h @@
Re: IPv4 on ix(4) slow/nothing - 7.4
On 18.10.2023. 15:35, Mischa wrote: > Hi All, > > Just upgraded a couple of machines to 7.4. smooth as always!! > > I am however seeing issues with IPv4, slowness or no throughput at all. > The machines I have upgraded are using an Intel X540-T network card and > is connected on 10G. > > ix0 at pci5 dev 0 function 0 "Intel X540T" rev 0x01, msix, 16 queues, > address b8:ca:3a:62:ee:40 > ix1 at pci5 dev 0 function 1 "Intel X540T" rev 0x01, msix, 16 queues, > address b8:ca:3a:62:ee:42 > > root@n2:~ # sysctl kern.version > kern.version=OpenBSD 7.4 (GENERIC.MP) #1397: Tue Oct 10 09:02:37 MDT 2023 > dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP > > There are a bunch of VMs running on top of it, as soon as I want to > fetch something with ftp, for example, I don't get anything over IPv4, > with IPv6 everything is normal. > > mischa@www2:~ $ ftp -4 > https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso > Trying 46.23.88.18... > Requesting > https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso > 0% | | 512 KB - stalled > -^C > > A trace on mirror / n2: > > n2:~ # tcpdump -i vport880 host 46.23.88.32 > tcpdump: listening on vport880, link-type EN10MB > 15:16:08.730274 www2.high5.nl.1828 > n2.high5.nl.https: S > 2182224746:2182224746(0) win 16384 6,nop,nop,timestamp 2899683458 0> (DF) > 15:16:08.730297 arp who-has www2.high5.nl tell n2.high5.nl > 15:16:08.731535 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11 > 15:16:08.731540 n2.high5.nl.https > www2.high5.nl.1828: S > 633749938:633749938(0) ack 2182224747 win 16384 1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106 > 2899683458> (DF) > 15:16:08.732017 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1 win 256 > (DF) > 15:16:08.785752 www2.high5.nl.1828 > n2.high5.nl.https: P 1:312(311) ack > 1 win 256 (DF) > 15:16:08.786092 n2.high5.nl.https > www2.high5.nl.1828: P 1:128(127) ack > 312 win 271 (DF) > 15:16:08.786376 n2.high5.nl.https > www2.high5.nl.1828: P 128:134(6) ack > 312 win 271 (DF) > 15:16:08.786396 n2.high5.nl.https > www2.high5.nl.1828: P 134:166(32) > ack 312 win 271 (DF) > 15:16:08.786455 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) > ack 312 win 271 (DF) > 15:16:08.786457 n2.high5.nl.https > www2.high5.nl.1828: . > 1614:3062(1448) ack 312 win 271 2899683510> (DF) > 15:16:08.786460 n2.high5.nl.https > www2.high5.nl.1828: P 3062:3803(741) > ack 312 win 271 (DF) > 15:16:08.786943 www2.high5.nl.1828 > n2.high5.nl.https: . ack 134 win > 255 (DF) > 15:16:08.796534 n2.high5.nl.https > www2.high5.nl.1828: P 3803:4345(542) > ack 312 win 271 (DF) > 15:16:08.796577 n2.high5.nl.https > www2.high5.nl.1828: P 4345:4403(58) > ack 312 win 271 (DF) > 15:16:08.797518 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win > 256 > (DF) > 15:16:08.797522 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win > 256 > (DF) > 15:16:09.790297 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) > ack 312 win 271 (DF) > 15:16:09.790902 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1614 win > 233 > (DF) > 15:16:09.790917 n2.high5.nl.https > www2.high5.nl.1828: . > 1614:3062(1448) ack 312 win 271 2899684519> (DF) > 15:16:09.790923 n2.high5.nl.https > www2.high5.nl.1828: P > 3062:4403(1341) ack 312 win 271 2899684519> (DF) > 15:16:10.790299 n2.high5.nl.https > www2.high5.nl.1828: . > 1614:3062(1448) ack 312 win 271 2899684519> (DF) > 15:16:10.791204 www2.high5.nl.1828 > n2.high5.nl.https: . ack 3062 win > 233 > (DF) > 15:16:10.791223 n2.high5.nl.https > www2.high5.nl.1828: P > 3062:4403(1341) ack 312 win 271 2899685520> (DF) > 15:16:10.791692 www2.high5.nl.1828 > n2.high5.nl.https: . ack 4403 win > 235 (DF) > 15:16:10.802647 www2.high5.nl.1828 > n2.high5.nl.https: P 312:318(6) ack > 4403 win 256 (DF) > 15:16:11.000297 n2.high5.nl.https > www2.high5.nl.1828: . ack 318 win > 271 (DF) > 15:16:11.001162 www2.high5.nl.1828 > n2.high5.nl.https: P 318:527(209) > ack 4403 win 256 (DF) > 15:16:11.001860 n2.high5.nl.https > www2.high5.nl.1828: P 4403:5059(656) > ack 527 win 271 (DF) > 15:16:11.001989 n2.high5.nl.https > www2.high5.nl.1828: . > 5059:6507(1448) ack 527 win 271 2899685730> (DF) > 15:16:11.001992 n2.high5.nl.https > www2.high5.nl.1828: . > 6507:7955(1448) ack 527 win 271 2899685730> (DF) > 15:16:11.195431 www2.high5.nl.1828 > n2.high5.nl.https: . ack 5059 win > 256 (DF) > 15:16:11.195447 n2.high5.nl.https > www2.high5.nl.1828: . > 7955:9403(1448) ack 527 win 271 2899685924> (DF) > > > Running a trace on www2 I am seeing: > > www2:~ # tcpdump -i vio0 host 46.23.88.18 > tcpdump: listening on vio0, link-type EN10MB > 15:16:08.729974 www2.high5.nl.1828 > n2.high5.nl.https: S > 2182224746:2182224746(0) win 16384 6,nop,nop,timestamp 2899683458 0> (DF) > 15:16:08.731114 arp who-has www2.high5.nl tell n2.high5.nl > 15:16:08.731229 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11 > 15:16:08.731631 n2.high5.nl.https >
snmpd_metrics: differentiate between hrSWRunName and hrSWRunPath
Right now we return the same value for both hrSWRunName and hrSWRunPath. hrSWRunPath should return the full path of the binary, and hrSWRunName a description of the running software. Afaik there's no proper way to retrieve the full path of the running binary, However, in a lot of cases argv[0] can contain the full or relative path. But if argv[0] gets overwritten (like most of our daemons' children) it gives a more descriptive name, which is more in line with hrSWRunName. netsnmp's snmpd uses argv[0] for hrSWRunPath and kinfo_proc's p_comm for hrSWRunName, and snmptop defaults to hrSWRunName. top(1) also defaults to p_comm, but contrary to top(1), snmptop allows us to switch between hrSWRunName, and hrSWRunPath and toggling of hrSWRunParameters independently, where top(1) toggles between p_comm and argv[]. So there's an argument to be made either way, but for this diff I stuck with netsnmp's choices. While here, change the buffer length from 128 to 129. HOST-RESOURCES-MIB allows up to 128 characters in the response, so make room for the terminating NUL. Thoughts? OK? martijn@ Index: mib.c === RCS file: /cvs/src/libexec/snmpd/snmpd_metrics/mib.c,v retrieving revision 1.4 diff -u -p -r1.4 mib.c --- mib.c 4 Jul 2023 11:34:19 - 1.4 +++ mib.c 18 Oct 2023 11:47:00 - @@ -103,7 +103,9 @@ int kinfo_proc_comp(const void *, const int kinfo_proc(u_int32_t, struct kinfo_proc **); voidkinfo_timer_cb(int, short, void *); voidkinfo_proc_free(void); -int kinfo_args(struct kinfo_proc *, char **); +int kinfo_args(struct kinfo_proc *, char ***); +int kinfo_path(struct kinfo_proc *, char **); +int kinfo_parameters(struct kinfo_proc *, char **); /* IF-MIB */ struct agentx_index *ifIdx; @@ -669,13 +671,21 @@ mib_hrswrun(struct agentx_varbind *vb) if (obj == hrSWRunIndex) agentx_varbind_integer(vb, kinfo->p_pid); - else if (obj == hrSWRunName || obj == hrSWRunPath) + else if (obj == hrSWRunName) agentx_varbind_string(vb, kinfo->p_comm); - else if (obj == hrSWRunID) + else if (obj == hrSWRunPath) { + if (kinfo_path(kinfo, ) == -1) { + log_warn("kinfo_path"); + agentx_varbind_error(vb); + return; + } + + agentx_varbind_string(vb, s); + } else if (obj == hrSWRunID) agentx_varbind_oid(vb, AGENTX_OID(0, 0)); else if (obj == hrSWRunParameters) { - if (kinfo_args(kinfo, ) == -1) { - log_warn("kinfo_args"); + if (kinfo_parameters(kinfo, ) == -1) { + log_warn("kinfo_parameters"); agentx_varbind_error(vb); return; } @@ -811,25 +821,22 @@ kinfo_proc_free(void) } int -kinfo_args(struct kinfo_proc *kinfo, char **s) +kinfo_args(struct kinfo_proc *kinfo, char ***s) { - static char str[128]; static char *buf = NULL; static size_tbuflen = 128; int mib[] = { CTL_KERN, KERN_PROC_ARGS, kinfo->p_pid, KERN_PROC_ARGV }; - char*nbuf, **argv; + char*nbuf; + *s = NULL; if (buf == NULL) { buf = malloc(buflen); if (buf == NULL) return (-1); } - str[0] = '\0'; - *s = str; - while (sysctl(mib, nitems(mib), buf, , NULL, 0) == -1) { if (errno != ENOMEM) { /* some errors are expected, dont get too upset */ @@ -844,11 +851,41 @@ kinfo_args(struct kinfo_proc *kinfo, cha buflen += 128; } - argv = (char **)buf; - if (argv[0] == NULL) - return (0); + *s = (char **)buf; + return (0); +} + +int +kinfo_path(struct kinfo_proc *kinfo, char **s) +{ + static char str[129]; + char**argv; + + if (kinfo_args(kinfo, ) == -1) + return (-1); + str[0] = '\0'; + *s = str; + if (argv != NULL && argv[0] != NULL) + strlcpy(str, argv[0], sizeof(str)); + return (0); +} + +int +kinfo_parameters(struct kinfo_proc *kinfo, char **s) +{ + static char str[129]; + char**argv; + + if (kinfo_args(kinfo, ) == -1) + return (-1); + + str[0] = '\0'; + *s = str; + if (argv == NULL || argv[0] == NULL) + return (0); argv++; + while (*argv != NULL) { strlcat(str, *argv, sizeof(str)); argv++;
Re: log.c use buffered IO
On Tue, Oct 17, 2023 at 10:06:54AM +0200, Sebastian Benoit wrote: > Theo Buehler(t...@theobuehler.org) on 2023.10.17 09:13:15 +0200: > > On Mon, Oct 16, 2023 at 12:19:17PM +0200, Claudio Jeker wrote: > > > I dislike how log.c does all these asprintf() calls with dubious > > > workaround calls in case asprintf() fails. > > > > You're not alone. > > > > > IMO it is easier to use the stdio provided buffers and fflush() to get > > > "atomic" writes on stderr. At least from my understanding this is the > > > reason to go all this lenght to do a single fprintf call. > > > > This makes sense, but I don't know the history here. > > as far as i can remember, it was done so it would still be able to work > somewhat when out of memeory. > After some input off-list here another idea. Require that logit() is called with a \n and by that simplify a lot of code around it. vsyslog() handles both so having the \n should not cause any breakage. Now logit() is mostly used internally but in bgpd it is also used in logmsg.c and parse.y. Fixing those is simple. Also this uses a stack buffer for all the log_* cases now. This should make the code more thread safe. Also this removes vlog() from the API. I had a quick look at all the other log.c users and apart from ldapd this can be added to all of them without much issues. Nothing else uses vlog() and the logit() is also very minimal (mostly the same parse.y change as below). -- :wq Claudio Index: log.c === RCS file: /cvs/src/usr.sbin/bgpd/log.c,v retrieving revision 1.64 diff -u -p -r1.64 log.c --- log.c 21 Mar 2017 12:06:55 - 1.64 +++ log.c 18 Oct 2023 07:10:32 - @@ -26,6 +26,8 @@ #include "log.h" +#define MAX_LOGLEN 4096 + static int debug; static int verbose; static const char *log_procname; @@ -68,30 +70,15 @@ void logit(int pri, const char *fmt, ...) { va_list ap; + int saved_errno = errno; va_start(ap, fmt); - vlog(pri, fmt, ap); - va_end(ap); -} - -void -vlog(int pri, const char *fmt, va_list ap) -{ - char*nfmt; - int saved_errno = errno; - if (debug) { - /* best effort in out of mem situations */ - if (asprintf(, "%s\n", fmt) == -1) { - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - } else { - vfprintf(stderr, nfmt, ap); - free(nfmt); - } + vfprintf(stderr, fmt, ap); fflush(stderr); } else vsyslog(pri, fmt, ap); + va_end(ap); errno = saved_errno; } @@ -99,26 +86,18 @@ vlog(int pri, const char *fmt, va_list a void log_warn(const char *emsg, ...) { - char*nfmt; - va_list ap; - int saved_errno = errno; + charfmtbuf[MAX_LOGLEN]; + va_list ap; + int saved_errno = errno; /* best effort to even work in out of memory situations */ if (emsg == NULL) - logit(LOG_ERR, "%s", strerror(saved_errno)); + logit(LOG_ERR, "%s\n", strerror(saved_errno)); else { va_start(ap, emsg); - - if (asprintf(, "%s: %s", emsg, - strerror(saved_errno)) == -1) { - /* we tried it... */ - vlog(LOG_ERR, emsg, ap); - logit(LOG_ERR, "%s", strerror(saved_errno)); - } else { - vlog(LOG_ERR, nfmt, ap); - free(nfmt); - } + (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap); va_end(ap); + logit(LOG_ERR, "%s: %s\n", fmtbuf, strerror(saved_errno)); } errno = saved_errno; @@ -127,53 +106,65 @@ log_warn(const char *emsg, ...) void log_warnx(const char *emsg, ...) { - va_list ap; + charfmtbuf[MAX_LOGLEN]; + va_list ap; + int saved_errno = errno; va_start(ap, emsg); - vlog(LOG_ERR, emsg, ap); + (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap); va_end(ap); + logit(LOG_ERR, "%s\n", fmtbuf); + + errno = saved_errno; } void log_info(const char *emsg, ...) { - va_list ap; + charfmtbuf[MAX_LOGLEN]; + va_list ap; + int saved_errno = errno; va_start(ap, emsg); - vlog(LOG_INFO, emsg, ap); + (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap); va_end(ap); + logit(LOG_INFO, "%s\n", fmtbuf); + + errno = saved_errno; } void log_debug(const char *emsg, ...) { - va_list ap; + charfmtbuf[MAX_LOGLEN]; + va_list ap; if (verbose) { va_start(ap, emsg); - vlog(LOG_DEBUG, emsg, ap); +
dwge(4): don't panic on truncated input packet
I had the misfortune of hitting a KASSERT in dwge: panic: kernel diagnostic assertion "len > 0" failed: file "/usr/src/sys/dev/fdt /if_dwge.c", line 1102 Stopped at panic+0x106:addia0,zero,256TIDPIDUID PR FLAGS PFLAGS CPU COMMAND *405136 98879 1500 0x3 00 git 301471 67731 0 0x14000 0x2001 softnet0 panic() at panic+0x106 panic() at panic dwge_rx_proc() at dwge_rx_proc+0x1d4 dwge_intr() at dwge_intr+0x4e plic_irq_dispatch() at plic_irq_dispatch+0xec plic_irq_handler() at plic_irq_handler+0x56 riscv_cpu_intr() at riscv_cpu_intr+0x22 cpu_exception_handler_supervisor() at cpu_exception_handler_supervisor+0x7a pmap_copy_page() at pmap_copy_page+0x94 uvm_pagerealloc_multi() at uvm_pagerealloc_multi+0x24e buf_realloc_pages() at buf_realloc_pages+0xa0 buf_flip_high() at buf_flip_high+0x64 bufcache_recover_dmapages() at bufcache_recover_dmapages+0x13a buf_get() at buf_get+0xec end trace frame: 0xffc04d9ac870, count: 0 The following diff should count the error and skirt the panic. Index: if_dwge.c === RCS file: /OpenBSD/src/sys/dev/fdt/if_dwge.c,v retrieving revision 1.19 diff -u -p -r1.19 if_dwge.c --- if_dwge.c 15 Aug 2023 08:27:30 - 1.19 +++ if_dwge.c 18 Oct 2023 16:37:59 - @@ -1099,13 +1101,15 @@ dwge_rx_proc(struct dwge_softc *sc) /* Strip off CRC. */ len -= ETHER_CRC_LEN; - KASSERT(len > 0); - - m = rxb->tb_m; - rxb->tb_m = NULL; - m->m_pkthdr.len = m->m_len = len; + if (len <= 0) { + ifp->if_ierrors++; + } else { + m = rxb->tb_m; + rxb->tb_m = NULL; + m->m_pkthdr.len = m->m_len = len; - ml_enqueue(, m); + ml_enqueue(, m); + } put++; if (sc->sc_rx_cons == (DWGE_NRXDESC - 1))
Re: better fix for Term::Cap
On Wed, Oct 18, 2023 at 10:25:59AM +0200, Marc Espie wrote: > Instead of an archaic limit, just keep track of tc's. > This is actually slightly faster, because the termcap links is a tree, > not a list. I like it, but I don't want to carry this patch against upstream. I will happily pull in this change from there as soon as they merge it though. Changing the 32 to 64 is an easy patch to keep up on and to solve merge conflicts with when upgrading. This larger change is much less so. That said, there is so much in this file that could be improved, why stop here? > > Please test. > > Index: Cap.pm > === > RCS file: /cvs/src/gnu/usr.bin/perl/cpan/Term-Cap/Cap.pm,v > retrieving revision 1.3 > diff -u -p -r1.3 Cap.pm > --- Cap.pm18 Oct 2023 01:49:26 - 1.3 > +++ Cap.pm18 Oct 2023 08:24:54 - > @@ -280,7 +280,7 @@ sub Tgetent > > $first = 0;# first entry (keeps term name) > > -$max = 64; # max :tc=...:'s > +my $seen = {}; # keep track of :tc=...:s > > if ($entry) > { > @@ -291,6 +291,7 @@ sub Tgetent > if ( $entry =~ s/:tc=([^:]+):/:/ ) > { > $tmp_term = $1; > + $seen->{$tmp_term} = 1; > > # protect any pattern metacharacters in $tmp_term > $termpat = $tmp_term; > @@ -332,10 +333,7 @@ sub Tgetent > } > else > { > - > # do the same file again > -# prevent endless recursion > -$max-- || croak "failed termcap loop at $tmp_term"; > $state = 1;# ok, maybe do a new file next time > } > > @@ -345,11 +343,20 @@ sub Tgetent > close TERMCAP; > > # If :tc=...: found then search this file again > -$entry =~ s/:tc=([^:]+):/:/ && ( $tmp_term = $1, $state = 2 ); > +while ($entry =~ s/:tc=([^:]+):/:/) { > + $tmp_term = $1; > + if ($seen->{$tmp_term}) { > + # XXX first version of this croaked, but we can actually > + # get several intermediate entries with the same tc ! > + next; > + } > + $seen->{$tmp_term} = 1; > + $state = 2; > > -# protect any pattern metacharacters in $tmp_term > -$termpat = $tmp_term; > -$termpat =~ s/(\W)/\\$1/g; > + # protect any pattern metacharacters in $tmp_term > + $termpat = $tmp_term; > + $termpat =~ s/(\W)/\\$1/g; > + } > } > > croak "Can't find $term" if $entry eq ''; > -- andrew "Programming today is a race between software engineers striving to build bigger and better idiot-proof programs, and the Universe trying to produce bigger and better idiots. So far, the Universe is winning." -- Rich Cook
Re: TSO for ixl(4)
On Wed, Oct 18, 2023 at 05:29:41PM +0200, Jan Klemkow wrote: > This diff implements TCP Segmentation Offloading for ixl(4). I tested > it successfully on amd64 and sparc64 with Intel X710. It should > increase the TCP bulk performance to 10 Gbit/s. On sparc64 I got an > increase from 600 MBit/s to 2.000 Gbit/s. > > Further testing is welcome. tested on amd64 OK bluhm@ > Index: dev/pci/if_ixl.c > === > RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v > retrieving revision 1.89 > diff -u -p -r1.89 if_ixl.c > --- dev/pci/if_ixl.c 29 Sep 2023 19:44:47 - 1.89 > +++ dev/pci/if_ixl.c 18 Oct 2023 15:15:30 - > @@ -71,6 +71,7 @@ > #include > #include > #include > +#include > #include > > #if NBPFILTER > 0 > @@ -85,6 +86,8 @@ > #include > #include > #include > +#include > +#include > #include > #include > > @@ -827,6 +830,10 @@ struct ixl_tx_desc { > #define IXL_TX_DESC_BSIZE_MASK \ > (IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT) > > +#define IXL_TX_CTX_DESC_CMD_TSO 0x10 > +#define IXL_TX_CTX_DESC_TLEN_SHIFT 30 > +#define IXL_TX_CTX_DESC_MSS_SHIFT50 > + > #define IXL_TX_DESC_L2TAG1_SHIFT 48 > } __packed __aligned(16); > > @@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 { > uint64_tqword3; > } __packed __aligned(16); > > -#define IXL_TX_PKT_DESCS 8 > +#define IXL_TX_PKT_DESCS 32 > #define IXL_TX_QUEUE_ALIGN 128 > #define IXL_RX_QUEUE_ALIGN 128 > > #define IXL_HARDMTU 9712 /* 9726 - ETHER_HDR_LEN */ > +#define IXL_TSO_SIZE ((255 * 1024) - 1) > +#define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1) > + > +/* > + * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES. > + * This interface could not handle packets greater than IXL_TSO_SIZE. > + */ > +CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE); > > #define IXL_PCIREG PCI_MAPREG_START > > @@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct > ifp->if_capabilities |= IFCAP_CSUM_IPv4 | > IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 | > IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; > + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; > > ifmedia_init(>sc_media, 0, ixl_media_change, ixl_media_status); > > @@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi > txm = [i]; > > if (bus_dmamap_create(sc->sc_dmat, > - IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0, > + MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0, > BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > >txm_map) != 0) > goto uncreate; > @@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm > } > > static uint64_t > -ixl_tx_setup_offload(struct mbuf *m0) > +ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr, > +unsigned int prod) > { > struct ether_extracted ext; > uint64_t hlen; > @@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0) > } > > if (!ISSET(m0->m_pkthdr.csum_flags, > - M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) > + M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO)) > return (offload); > > ether_extract_headers(m0, ); > @@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0) > offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT; > } > > + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) { > + if (ext.tcp) { > + struct ixl_tx_desc *ring, *txd; > + uint64_t cmd = 0; > + > + hlen += ext.tcp->th_off << 2; > + ring = IXL_DMA_KVA(>txr_mem); > + txd = [prod]; > + > + cmd |= IXL_TX_DESC_DTYPE_CONTEXT; > + cmd |= IXL_TX_CTX_DESC_CMD_TSO; > + cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN > + - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT; > + cmd |= (uint64_t)(m0->m_pkthdr.ph_mss) > + << IXL_TX_CTX_DESC_MSS_SHIFT; > + > + htolem64(>addr, 0); > + htolem64(>cmd, cmd); > + } else > + tcpstat_inc(tcps_outbadtso); > + } > + > return (offload); > } > > @@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq) > mask = sc->sc_tx_ring_ndescs - 1; > > for (;;) { > - if (free <= IXL_TX_PKT_DESCS) { > + /* We need one extra descriptor for TSO packets. */ > + if (free <= (IXL_TX_PKT_DESCS + 1)) { > ifq_set_oactive(ifq); > break; > } > @@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq) >
bt(5), btrace(8): execute END probe and print maps after exit() statement
Hi, A bt(5) exit() statement causes the btrace(8) interpreter to exit(3) immediately. A BPFtrace exit() statement is more nuanced: the END probe is executed and the contents of all maps are printed before the interpreter exits. This patch adds a halting check after the execution of each bt(5) statement. If a statement causes the program to halt, the halt bubbles up to the top-level rule evaluation loop and terminates execution. rules_teardown() then runs, just as if the program had received SIGTERM. Two edge-like cases: 1. You can exit() from BEGIN. rules_setup() returns non-zero if this happens so the main loop knows to halt immediately. 2. You can exit() from END. This is just an early-return: the END probe doesn't run again. Thoughts? $ btrace -e ' BEGIN { @[probe] = "reached"; exit(); @[probe] = "not reached"; } END { @[probe] = "reached"; exit(); @[probe] = "not reached"; }' Index: btrace.c === RCS file: /cvs/src/usr.sbin/btrace/btrace.c,v retrieving revision 1.79 diff -u -p -r1.79 btrace.c --- btrace.c12 Oct 2023 15:16:44 - 1.79 +++ btrace.c18 Oct 2023 17:54:16 - @@ -71,10 +71,10 @@ struct dtioc_probe_info *dtpi_get_by_val * Main loop and rule evaluation. */ voidrules_do(int); -voidrules_setup(int); -voidrules_apply(int, struct dt_evt *); +int rules_setup(int); +int rules_apply(int, struct dt_evt *); voidrules_teardown(int); -voidrule_eval(struct bt_rule *, struct dt_evt *); +int rule_eval(struct bt_rule *, struct dt_evt *); voidrule_printmaps(struct bt_rule *); /* @@ -84,7 +84,7 @@ uint64_t builtin_nsecs(struct dt_evt * const char *builtin_kstack(struct dt_evt *); const char *builtin_arg(struct dt_evt *, enum bt_argtype); struct bt_arg *fn_str(struct bt_arg *, struct dt_evt *, char *); -voidstmt_eval(struct bt_stmt *, struct dt_evt *); +int stmt_eval(struct bt_stmt *, struct dt_evt *); voidstmt_bucketize(struct bt_stmt *, struct dt_evt *); voidstmt_clear(struct bt_stmt *); voidstmt_delete(struct bt_stmt *, struct dt_evt *); @@ -405,6 +405,7 @@ void rules_do(int fd) { struct sigaction sa; + int halt = 0; memset(, 0, sizeof(sa)); sigemptyset(_mask); @@ -415,9 +416,9 @@ rules_do(int fd) if (sigaction(SIGTERM, , NULL)) err(1, "sigaction"); - rules_setup(fd); + halt = rules_setup(fd); - while (!quit_pending && g_nprobes > 0) { + while (!quit_pending && !halt && g_nprobes > 0) { static struct dt_evt devtbuf[64]; ssize_t rlen; size_t i; @@ -434,8 +435,11 @@ rules_do(int fd) if ((rlen % sizeof(struct dt_evt)) != 0) err(1, "incorrect read"); - for (i = 0; i < rlen / sizeof(struct dt_evt); i++) - rules_apply(fd, [i]); + for (i = 0; i < rlen / sizeof(struct dt_evt); i++) { + halt = rules_apply(fd, [i]); + if (halt) + break; + } } rules_teardown(fd); @@ -484,7 +488,7 @@ rules_action_scan(struct bt_stmt *bs) return evtflags; } -void +int rules_setup(int fd) { struct dtioc_probe_info *dtpi; @@ -493,7 +497,7 @@ rules_setup(int fd) struct bt_probe *bp; struct bt_stmt *bs; struct bt_arg *ba; - int dokstack = 0, on = 1; + int dokstack = 0, halt = 0, on = 1; uint64_t evtflags; TAILQ_FOREACH(r, _rules, br_next) { @@ -553,7 +557,7 @@ rules_setup(int fd) clock_gettime(CLOCK_REALTIME, _devt.dtev_tsp); if (rbegin) - rule_eval(rbegin, _devt); + halt = rule_eval(rbegin, _devt); /* Enable all probes */ TAILQ_FOREACH(r, _rules, br_next) { @@ -571,9 +575,14 @@ rules_setup(int fd) if (ioctl(fd, DTIOCRECORD, )) err(1, "DTIOCRECORD"); } + + return halt; } -void +/* + * Returns non-zero if the program should halt. + */ +int rules_apply(int fd, struct dt_evt *dtev) { struct bt_rule *r; @@ -586,9 +595,11 @@ rules_apply(int fd, struct dt_evt *dtev) continue; dtai_cache(fd, _dtpis[dtev->dtev_pbn - 1]); - rule_eval(r, dtev); + if (rule_eval(r, dtev)) + return 1; } } + return 0; } void @@ -637,7 +648,10 @@ rules_teardown(int fd)