Re: installer: support encryption with key disks

2023-10-18 Thread Andrew Hewus Fresh
On Mon, Oct 16, 2023 at 07:46:10PM +, Klemens Nanni wrote:
> On Mon, Sep 04, 2023 at 09:57:40PM +, Klemens Nanni wrote:
> > Extend the yes/no question to no/passphrase/keydisk and have users pick an
> > existing, preformated RAID partition;  no support (yet) for creating one.
> > 
> > Thanks to how ask_which() works, users can always say 'done' to land back
> > at question to either skip crypto or use a passphrase instead.
> > 
> > All code remains contained behind interactive non-default installations.
> > Code is straight forward, I've not been able to break it;  rest unchanged.
> > 
> > Example install with root disk sd0 and ready-to-use key disk sd1:
> > 
> > Available disks are: sd0 sd1.
> > Which disk is the root disk? ('?' for details) [sd0] 
> > Encrypt the root disk with a (p)assphrase or (k)eydisk? [no] k
> > Available disks are: sd1.
> > Which disk contains the key disk? (or 'done') [sd1] 
> > Available sd1 partitions are: a.
> > Which sd1 partition is the key disk? (or 'done') [a] 
> > 
> > Configuring the crypto chunk sd0...
> > 
> > No valid MBR or GPT.
> > Use (W)hole disk MBR, whole disk (G)PT or (E)dit? [whole] 
> > Setting OpenBSD MBR partition to whole sd0...done.
> > sd2 at scsibus2 targ 1 lun 0: 
> > sd2: 1023MB, 512 bytes/sector, 2096560 sectors
> > 
> > Configuring the root disk sd2...
> > 
> > No valid MBR or GPT.
> > Use (W)hole disk MBR, whole disk (G)PT or (E)dit? [whole] 
> > 
> > 
> > Feedback? OK?

This seems to work well for me and the implementation looks reasonable.

I think we want to update INSTALL with some extra docs as well, likely
pointing folks to how to create a keydisk.

OK afresh1@



> 
> Ping.
> 
> Index: install.sub
> ===
> RCS file: /cvs/src/distrib/miniroot/install.sub,v
> retrieving revision 1.1255
> diff -u -p -r1.1255 install.sub
> --- install.sub   21 Aug 2023 14:33:55 -  1.1255
> +++ install.sub   16 Oct 2023 19:36:55 -
> @@ -3074,8 +3074,32 @@ do_autoinstall() {
>   exec reboot
>  }
>  
> +# Chose an existing partition as key disk and set global $KEYDISK on success,
> +# otherwise return non-zero.
> +pick_keydisk() {
> + KEYDISK=
> + local _disk _label
> +
> + ask_which disk 'contains the key disk' '$(rmel $ROOTDISK $(get_dkdevs))'
> + [[ $resp == done ]] && return 1
> + _disk=$resp
> +
> + make_dev $_disk
> + if disklabel $_disk 2>/dev/null | ! grep -qw RAID; then
> + echo "$_disk must contain a RAID partition."
> + return 1
> + fi
> +
> + ask_which "$_disk partition" 'is the key disk' \
> + "\$(disklabel $_disk 2>/dev/null |
> + sed -En 's/^  ([a-p]):.*RAID.*$/\1/p')"
> + [[ $resp == done ]] && return 1
> + _label=$resp
> + KEYDISK=$_disk$_label
> +}
> +
>  encrypt_root() {
> - local _chunk=$ROOTDISK
> + local _args _chunk=$ROOTDISK
>  
>   [[ $MDBOOTSR == y ]] || return
>  
> @@ -3088,13 +3112,30 @@ encrypt_root() {
>   # e.g. auto-assembled at boot or done in (S)hell.
>   [[ -z $(get_softraid_volumes) ]] || return
>  
> - ask_yn 'Encrypt the root disk with a passphrase?' || return
> + while :; do
> + ask 'Encrypt the root disk with a (p)assphrase or (k)eydisk?' no
> + case $resp in
> + # Retry on failure to allow passphrase or skip.
> + [kK]*)
> + pick_keydisk || continue
> + _args=-k$KEYDISK
> + break
> + ;;
> + # Do nothing, bioctl(8) will handle the passphrase.
> + [pP]*)  break
> + ;;
> + [nN]*)  return
> + ;;
> + *)  echo "'$resp' is not a valid choice."
> + ;;
> + esac
> + done
>  
>   echo "\nConfiguring the crypto chunk $_chunk...\n"
>   md_prep_fdisk $_chunk
>   echo 'RAID *' | disklabel -w -A -T- $_chunk
>  
> - bioctl -Cforce -cC -l${_chunk}a softraid0 >/dev/null
> + bioctl -Cforce -cC -l${_chunk}a $_args softraid0 >/dev/null
>  
>   # No volumes existed before asking, but we just created one.
>   ROOTDISK=$(get_softraid_volumes)
> 

-- 
andrew

($do || !$do) && undef($try) ;  # Master of Perl, Yoda is.  H?



Re: dwge(4): don't panic on truncated input packet

2023-10-18 Thread Mark Kettenis
> Date: Wed, 18 Oct 2023 16:40:20 +
> From: Miod Vallat 
> 
> I had the misfortune of hitting a KASSERT in dwge:
> 
> panic: kernel diagnostic assertion "len > 0" failed: file 
> "/usr/src/sys/dev/fdt
> /if_dwge.c", line 1102
> Stopped at  panic+0x106:addia0,zero,256TIDPIDUID 
> PR
> FLAGS PFLAGS  CPU  COMMAND
> *405136  98879   1500 0x3  00  git
>  301471  67731  0 0x14000  0x2001  softnet0
> panic() at panic+0x106
> panic() at panic
> dwge_rx_proc() at dwge_rx_proc+0x1d4
> dwge_intr() at dwge_intr+0x4e
> plic_irq_dispatch() at plic_irq_dispatch+0xec
> plic_irq_handler() at plic_irq_handler+0x56
> riscv_cpu_intr() at riscv_cpu_intr+0x22
> cpu_exception_handler_supervisor() at cpu_exception_handler_supervisor+0x7a
> pmap_copy_page() at pmap_copy_page+0x94
> uvm_pagerealloc_multi() at uvm_pagerealloc_multi+0x24e
> buf_realloc_pages() at buf_realloc_pages+0xa0
> buf_flip_high() at buf_flip_high+0x64
> bufcache_recover_dmapages() at bufcache_recover_dmapages+0x13a
> buf_get() at buf_get+0xec
> end trace frame: 0xffc04d9ac870, count: 0
> 
> The following diff should count the error and skirt the panic.

You probably hit a receive error and hit this code path because we
don't check the error bit in the receive descriptor.  I fixed this in
dwqe(4) recently, and would prefer a similar fix here.  The (untested)
diff for that is attached here.

We may still want to check the length on top of that (and make a
similar change for dwqe(4)).  I think I saw packets with the error bit
set that had a length > 0 on dwqe(4) and this makes me suspect that it
happens on dwge(4) as well.


Index: dev/fdt/if_dwge.c
===
RCS file: /cvs/src/sys/dev/fdt/if_dwge.c,v
retrieving revision 1.19
diff -u -p -r1.19 if_dwge.c
--- dev/fdt/if_dwge.c   15 Aug 2023 08:27:30 -  1.19
+++ dev/fdt/if_dwge.c   18 Oct 2023 22:29:16 -
@@ -214,6 +214,7 @@ struct dwge_desc {
 #define RDES0_OE   (1 << 11)
 #define RDES0_SAF  (1 << 13)
 #define RDES0_DE   (1 << 14)
+#define RDES0_ES   (1 << 15)
 #define RDES0_FL_MASK  0x3fff
 #define RDES0_FL_SHIFT 16
 #define RDES0_AFM  (1 << 30)
@@ -1097,15 +1098,20 @@ dwge_rx_proc(struct dwge_softc *sc)
len, BUS_DMASYNC_POSTREAD);
bus_dmamap_unload(sc->sc_dmat, rxb->tb_map);
 
-   /* Strip off CRC. */
-   len -= ETHER_CRC_LEN;
-   KASSERT(len > 0);
-
m = rxb->tb_m;
rxb->tb_m = NULL;
-   m->m_pkthdr.len = m->m_len = len;
+   if (rxd->sd_status & RDES0_ES) {
+   ifp->if_ierrors++;
+   m_freem(m);
+   } else {
+   /* Strip off CRC. */
+   len -= ETHER_CRC_LEN;
+   KASSERT(len > 0);
 
-   ml_enqueue(, m);
+   m->m_pkthdr.len = m->m_len = len;
+
+   ml_enqueue(, m);
+   }
 
put++;
if (sc->sc_rx_cons == (DWGE_NRXDESC - 1))


> 
> Index: if_dwge.c
> ===
> RCS file: /OpenBSD/src/sys/dev/fdt/if_dwge.c,v
> retrieving revision 1.19
> diff -u -p -r1.19 if_dwge.c
> --- if_dwge.c 15 Aug 2023 08:27:30 -  1.19
> +++ if_dwge.c 18 Oct 2023 16:37:59 -
> @@ -1099,13 +1101,15 @@ dwge_rx_proc(struct dwge_softc *sc)
>  
>   /* Strip off CRC. */
>   len -= ETHER_CRC_LEN;
> - KASSERT(len > 0);
> -
> - m = rxb->tb_m;
> - rxb->tb_m = NULL;
> - m->m_pkthdr.len = m->m_len = len;
> + if (len <= 0) {
> + ifp->if_ierrors++;
> + } else {
> + m = rxb->tb_m;
> + rxb->tb_m = NULL;
> + m->m_pkthdr.len = m->m_len = len;
>  
> - ml_enqueue(, m);
> + ml_enqueue(, m);
> + }
>  
>   put++;
>   if (sc->sc_rx_cons == (DWGE_NRXDESC - 1))
> 
> 



Re: smtpd: implement nullmx RFC 7505

2023-10-18 Thread Stuart Henderson
On 2023/10/17 22:27, Philipp wrote:
> [2023-10-17 17:32] Omar Polo 
> >
> > There is one part of the RFC7505 that I'd like to quote and discuss
> > with you however.  The last paragraph of the section 3 says:
> >
> > : A domain that advertises a null MX MUST NOT advertise any other MX
> > : RR.
> >
> > Your implementation handles the case where there are more than one MX
> > by setting the `nullmx' field in the shared struct when we encounter
> > one, and then don't process the other MXs when that field is set.
> > This is fine.
> >
> > However, I think we can simplify here by not honouring the Null MX
> > when there are other MX records.  We're not violating the RFC.  See
> > diff below to see what I mean.  The only difference is in dns.c, the
> > rest is the same with yours.
> 
> My reasaning for that was: When you set Null MX you explicitly opt out
> from reciving mail even when you violate the spec. But if you want
> it diffrent I can change it.

I think in that situation, the recipient's DNS admin has *not* set a
valid nullmx, and delivery to other MXes should still be attempted.

> But I don't think your proposed patch is a good solution, because the
> result depend on the order of the RR in the repsonse. The problem is
> when the first entry in the response is a Null MX your patch truncate
> the other results and a bounce is generated. But when the Null MX is
> later in the response the other entries are used to deliver the mail.
> 
> > So far I've only compile-tested the code.  I don't have a spare domain
> > to test and don't know of anyone using a Null MX.
> 
> To test Null MX you can use example.com. To test the localhost patch a
> friend of me has set up mxtest.yannikenss.de.
> 
> > > Because some domains set the MX record to "localhost." to get a similar
> > > efect the secound patch ignores "localhost." MX entries and handles a MX
> > > containing only "localhost." records like a Null MX.
> >
> > This could be simplified too.  On top of diff below, it would need
> > just something among the lines of:
> 
> When localhost and Null MX should be handled the same this could be
> simplified in one check, yes. But that "localhost." and Null MX are
> handled different was on purpose. Because I would say setting a Null MX
> is a explicit opt out from reciving mail and setting MX to localhost
> is implicit. As said before, I have no problem which changing this.
> 
> > I'm still not convinced about this part however.  It feels wrong to me
> > to hardcode such a check, it seems too much paranoic.  The follow-up
> > would be to check for localhost in dns_dispatch_host too?  ;)
> 
> The idea[1] about this part was because setting "localhost." as MX is used
> in a similiar way as Null MX[0]. So handle this in a simmilar way as
> Null MX make sense. This way the sender will get a better bounce message.
> 
> Philipp
> 
> [0] https://www.netmeister.org/blog/mx-diversity.html
> 
> [1] I still belive OpenSMTPD should only connect to public routable
> addresses for relay. So don't connect to something like 127.0.0.1/8,
> ::1/128, RFC1918 addresses, ULA, ...
> But this is independent of this patch.

Support for nullmx makes sense to me. Perhaps also the localhost
handling. But some intranet setups may require connections to
RFC1918/ULA addresses - I don't think an MTA should prevent these.

BTW, you can already block those at the DNS resolver level in a
configurable way - see unbound's "private-address" feature.



Termp/Cap

2023-10-18 Thread Marc Espie
The max approach is bogus. It happens because the terminal uses tc, and
then tc again.

The proper approach, especially in perl, is to actually detect a loop,
which should be fairly easy by saving the tc that have already been done
in a hash.



better fix for Term::Cap

2023-10-18 Thread Marc Espie
Instead of an archaic limit, just keep track of tc's.
This is actually slightly faster, because the termcap links is a tree,
not a list.

Please test.

Index: Cap.pm
===
RCS file: /cvs/src/gnu/usr.bin/perl/cpan/Term-Cap/Cap.pm,v
retrieving revision 1.3
diff -u -p -r1.3 Cap.pm
--- Cap.pm  18 Oct 2023 01:49:26 -  1.3
+++ Cap.pm  18 Oct 2023 08:24:54 -
@@ -280,7 +280,7 @@ sub Tgetent
 
 $first = 0;# first entry (keeps term name)
 
-$max = 64; # max :tc=...:'s
+my $seen = {}; # keep track of :tc=...:s
 
 if ($entry)
 {
@@ -291,6 +291,7 @@ sub Tgetent
 if ( $entry =~ s/:tc=([^:]+):/:/ )
 {
 $tmp_term = $1;
+   $seen->{$tmp_term} = 1;
 
 # protect any pattern metacharacters in $tmp_term
 $termpat = $tmp_term;
@@ -332,10 +333,7 @@ sub Tgetent
 }
 else
 {
-
 # do the same file again
-# prevent endless recursion
-$max-- || croak "failed termcap loop at $tmp_term";
 $state = 1;# ok, maybe do a new file next time
 }
 
@@ -345,11 +343,20 @@ sub Tgetent
 close TERMCAP;
 
 # If :tc=...: found then search this file again
-$entry =~ s/:tc=([^:]+):/:/ && ( $tmp_term = $1, $state = 2 );
+while ($entry =~ s/:tc=([^:]+):/:/) {
+   $tmp_term = $1;
+   if ($seen->{$tmp_term}) {
+   # XXX first version of this croaked, but we can actually
+   # get several intermediate entries with the same tc !
+   next;
+   }
+   $seen->{$tmp_term} = 1;
+   $state = 2;
 
-# protect any pattern metacharacters in $tmp_term
-$termpat = $tmp_term;
-$termpat =~ s/(\W)/\\$1/g;
+   # protect any pattern metacharacters in $tmp_term
+   $termpat = $tmp_term;
+   $termpat =~ s/(\W)/\\$1/g;
+   }
 }
 
 croak "Can't find $term" if $entry eq '';



snmpd_metrics: add HOST-RESOURCES-MIB:hrSWRunPerfTable

2023-10-18 Thread Martijn van Duren
This diff adds the two entries from the hrSWRunPerfTable:
hrSWRunPerfCPU, and hrSWRunPerfMem. This allows snmptop from the
net-snmp package to work. Math stolen^Wcopied from top/machine.c.

OK?

martijn@

Index: mib.c
===
RCS file: /cvs/src/libexec/snmpd/snmpd_metrics/mib.c,v
retrieving revision 1.4
diff -u -p -r1.4 mib.c
--- mib.c   4 Jul 2023 11:34:19 -   1.4
+++ mib.c   18 Oct 2023 09:52:07 -
@@ -69,6 +69,10 @@ struct event  connev;
 const char *agentxsocket = NULL;
 int agentxfd = -1;
 
+int pageshift;
+#define pagetok(size) ((size) << pageshift)
+
+voidpageshift_init(void);
 voidsnmp_connect(struct agentx *, void *, int);
 voidsnmp_tryconnect(int, short, void *);
 voidsnmp_read(int, short, void *);
@@ -89,6 +93,7 @@ struct agentx_object *hrProcessorLoad;
 struct agentx_index *hrSWRunIdx;
 struct agentx_object *hrSWRunIndex, *hrSWRunName, *hrSWRunID, *hrSWRunPath;
 struct agentx_object *hrSWRunParameters, *hrSWRunType, *hrSWRunStatus;
+struct agentx_object *hrSWRunPerfCPU, *hrSWRunPerfMem;
 
 voidmib_hrsystemuptime(struct agentx_varbind *);
 voidmib_hrsystemdate(struct agentx_varbind *);
@@ -634,6 +639,7 @@ mib_hrswrun(struct agentx_varbind *vb)
struct agentx_object*obj;
enum agentx_request_type req;
int32_t  idx;
+   int32_t  time;
struct kinfo_proc   *kinfo;
char*s;
 
@@ -711,6 +717,13 @@ mib_hrswrun(struct agentx_varbind *vb)
agentx_varbind_integer(vb, 4);
break;
}
+   } else if (obj == hrSWRunPerfCPU) {
+   time = kinfo->p_rtime_sec * 100;
+   time += (kinfo->p_rtime_usec + 5000) / 1;
+   agentx_varbind_integer(vb, time);
+   } else if (obj == hrSWRunPerfMem) {
+   agentx_varbind_integer(vb, pagetok(kinfo->p_vm_tsize +
+   kinfo->p_vm_dsize + kinfo->p_vm_ssize));
} else
fatal("%s: Unexpected object", __func__);
 }
@@ -3278,6 +3291,7 @@ main(int argc, char *argv[])
kr_init();
pf_init();
timer_init();
+   pageshift_init();
 
if (agentxsocket != NULL) {
if (strlcpy(agentxsocketdir, agentxsocket,
@@ -3375,6 +3389,10 @@ main(int argc, char *argv[])
(hrSWRunType = agentx_object(host, AGENTX_OID(HRSWRUNTYPE),
, 1, 0, mib_hrswrun)) == NULL ||
(hrSWRunStatus = agentx_object(host, AGENTX_OID(HRSWRUNSTATUS),
+   , 1, 0, mib_hrswrun)) == NULL ||
+   (hrSWRunPerfCPU = agentx_object(host, AGENTX_OID(HRSWRUNPERFCPU),
+   , 1, 0, mib_hrswrun)) == NULL ||
+   (hrSWRunPerfMem = agentx_object(host, AGENTX_OID(HRSWRUNPERFMEM),
, 1, 0, mib_hrswrun)) == NULL)
fatal("agentx_object");
 
@@ -4318,6 +4336,22 @@ main(int argc, char *argv[])
log_setverbose(verbose);
 
event_dispatch();
+}
+
+#define LOG1024 10
+void
+pageshift_init(void)
+{
+   long pagesize;
+
+   if ((pagesize = sysconf(_SC_PAGESIZE)) == -1)
+   fatal("sysconf(_SC_PAGESIZE)");
+   while (pagesize > 1) {
+   pageshift++;
+   pagesize >>= 1;
+   }
+   /* we only need the amount of log(2)1024 for our conversion */
+   pageshift -= LOG1024;
 }
 
 void



IPv4 on ix(4) slow/nothing - 7.4

2023-10-18 Thread Mischa

Hi All,

Just upgraded a couple of machines to 7.4. smooth as always!!

I am however seeing issues with IPv4, slowness or no throughput at all.
The machines I have upgraded are using an Intel X540-T network card and 
is connected on 10G.


ix0 at pci5 dev 0 function 0 "Intel X540T" rev 0x01, msix, 16 queues, 
address b8:ca:3a:62:ee:40
ix1 at pci5 dev 0 function 1 "Intel X540T" rev 0x01, msix, 16 queues, 
address b8:ca:3a:62:ee:42


root@n2:~ # sysctl kern.version
kern.version=OpenBSD 7.4 (GENERIC.MP) #1397: Tue Oct 10 09:02:37 MDT 
2023

dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP

There are a bunch of VMs running on top of it, as soon as I want to 
fetch something with ftp, for example, I don't get anything over IPv4, 
with IPv6 everything is normal.


mischa@www2:~ $ ftp -4 
https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso

Trying 46.23.88.18...
Requesting 
https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso
  0% | |   512 KB  - stalled 
-^C


A trace on mirror / n2:

n2:~ # tcpdump -i vport880 host 46.23.88.32
tcpdump: listening on vport880, link-type EN10MB
15:16:08.730274 www2.high5.nl.1828 > n2.high5.nl.https: S 
2182224746:2182224746(0) win 16384 6,nop,nop,timestamp 2899683458 0> (DF)

15:16:08.730297 arp who-has www2.high5.nl tell n2.high5.nl
15:16:08.731535 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11
15:16:08.731540 n2.high5.nl.https > www2.high5.nl.1828: S 
633749938:633749938(0) ack 2182224747 win 16384 1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106 
2899683458> (DF)
15:16:08.732017 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1 win 256 
 (DF)
15:16:08.785752 www2.high5.nl.1828 > n2.high5.nl.https: P 1:312(311) ack 
1 win 256  (DF)
15:16:08.786092 n2.high5.nl.https > www2.high5.nl.1828: P 1:128(127) ack 
312 win 271  (DF)
15:16:08.786376 n2.high5.nl.https > www2.high5.nl.1828: P 128:134(6) ack 
312 win 271  (DF)
15:16:08.786396 n2.high5.nl.https > www2.high5.nl.1828: P 134:166(32) 
ack 312 win 271  (DF)
15:16:08.786455 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) 
ack 312 win 271  (DF)
15:16:08.786457 n2.high5.nl.https > www2.high5.nl.1828: . 
1614:3062(1448) ack 312 win 271 2899683510> (DF)
15:16:08.786460 n2.high5.nl.https > www2.high5.nl.1828: P 3062:3803(741) 
ack 312 win 271  (DF)
15:16:08.786943 www2.high5.nl.1828 > n2.high5.nl.https: . ack 134 win 
255  (DF)
15:16:08.796534 n2.high5.nl.https > www2.high5.nl.1828: P 3803:4345(542) 
ack 312 win 271  (DF)
15:16:08.796577 n2.high5.nl.https > www2.high5.nl.1828: P 4345:4403(58) 
ack 312 win 271  (DF)
15:16:08.797518 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win 
256 > (DF)
15:16:08.797522 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win 
256 > (DF)
15:16:09.790297 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448) 
ack 312 win 271  (DF)
15:16:09.790902 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1614 win 
233 > (DF)
15:16:09.790917 n2.high5.nl.https > www2.high5.nl.1828: . 
1614:3062(1448) ack 312 win 271 2899684519> (DF)
15:16:09.790923 n2.high5.nl.https > www2.high5.nl.1828: P 
3062:4403(1341) ack 312 win 271 2899684519> (DF)
15:16:10.790299 n2.high5.nl.https > www2.high5.nl.1828: . 
1614:3062(1448) ack 312 win 271 2899684519> (DF)
15:16:10.791204 www2.high5.nl.1828 > n2.high5.nl.https: . ack 3062 win 
233 > (DF)
15:16:10.791223 n2.high5.nl.https > www2.high5.nl.1828: P 
3062:4403(1341) ack 312 win 271 2899685520> (DF)
15:16:10.791692 www2.high5.nl.1828 > n2.high5.nl.https: . ack 4403 win 
235  (DF)
15:16:10.802647 www2.high5.nl.1828 > n2.high5.nl.https: P 312:318(6) ack 
4403 win 256  (DF)
15:16:11.000297 n2.high5.nl.https > www2.high5.nl.1828: . ack 318 win 
271  (DF)
15:16:11.001162 www2.high5.nl.1828 > n2.high5.nl.https: P 318:527(209) 
ack 4403 win 256  (DF)
15:16:11.001860 n2.high5.nl.https > www2.high5.nl.1828: P 4403:5059(656) 
ack 527 win 271  (DF)
15:16:11.001989 n2.high5.nl.https > www2.high5.nl.1828: . 
5059:6507(1448) ack 527 win 271 2899685730> (DF)
15:16:11.001992 n2.high5.nl.https > www2.high5.nl.1828: . 
6507:7955(1448) ack 527 win 271 2899685730> (DF)
15:16:11.195431 www2.high5.nl.1828 > n2.high5.nl.https: . ack 5059 win 
256  (DF)
15:16:11.195447 n2.high5.nl.https > www2.high5.nl.1828: . 
7955:9403(1448) ack 527 win 271 2899685924> (DF)



Running a trace on www2 I am seeing:

www2:~ # tcpdump -i vio0 host 46.23.88.18
tcpdump: listening on vio0, link-type EN10MB
15:16:08.729974 www2.high5.nl.1828 > n2.high5.nl.https: S 
2182224746:2182224746(0) win 16384 6,nop,nop,timestamp 2899683458 0> (DF)

15:16:08.731114 arp who-has www2.high5.nl tell n2.high5.nl
15:16:08.731229 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11
15:16:08.731631 n2.high5.nl.https > www2.high5.nl.1828: S 
633749938:633749938(0) ack 2182224747 win 16384 1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106 
2899683458> (DF)
15:16:08.731732 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1 

[wireless] iwx: small code nitpicking and removal of iwx_wait_tx_queues_empty

2023-10-18 Thread Mikhail Pchelin
Inlined patch does the following:

- removes functions in initialization per style(9) (splnet)
- removes double definition of IWX_NVM_GET_INFO
- removes unused 'ring' variable from iwx_sta_tx_agg_start
- removes unused 'iwx_tid_to_ac' array
- removes iwx_wait_tx_queues_empty function

Regarding the last thing - in our testing of a derivative product we've
found that sometimes the queue has some queued packets, the function has
to wait until all frames will be de-queued, but there are two problems:

1. There is no corresponding wakeup() for a ring, so this tsleep()
should always fail with EWOULDBLOCK, if we have queued frams. This leads
to the situation when needed softc flags aren't cleared, and on the next
join we get a panic with "MAC already added".

2. Linux driver does the flushes a slightly differently:

https://github.com/torvalds/linux/blob/master/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c#L5592

[...]
if (drop) {
if (iwl_mvm_flush_sta(mvm, mvmsta, false))
IWL_ERR(mvm, "flush request fail\n");
} else {
if (iwl_mvm_has_new_tx_api(mvm))
iwl_mvm_wait_sta_queues_empty(mvm, mvmsta);
else /* only used for !iwl_mvm_has_new_tx_api() below */
msk |= mvmsta->tfd_queue_msk;
}
[...]

'drop' var is from Linux 80211 API - "If the parameter drop is set to
true, pending frames may be dropped." per
https://www.kernel.org/doc/html/v6.5/driver-api/80211/mac80211.html

As I can see OpenBSD driver tries to do both things, I propose to drop
this iwx_wait_tx_queues_empty function and follow 'drop = true' case
from the linux driver. Removal of this function has fixed panics for us.

Debugging and code have been sponsored by Serenity Cybersecurity, LLC.

diff /usr/src
commit - a8cd26ec98e16ab36053bf0a84ae2e4d88c286ea
path + /usr/src
blob - 612b68e3c6982a8ed4c0bcaca01d8c4e0a81d115
file + sys/dev/pci/if_iwx.c
--- sys/dev/pci/if_iwx.c
+++ sys/dev/pci/if_iwx.c
@@ -402,7 +402,6 @@ voidiwx_tx_update_byte_tbl(struct iwx_softc *, 
struct
uint16_t, uint16_t);
 intiwx_tx(struct iwx_softc *, struct mbuf *, struct ieee80211_node *);
 intiwx_flush_sta_tids(struct iwx_softc *, int, uint16_t);
-intiwx_wait_tx_queues_empty(struct iwx_softc *);
 intiwx_drain_sta(struct iwx_softc *sc, struct iwx_node *, int);
 intiwx_flush_sta(struct iwx_softc *, struct iwx_node *);
 intiwx_beacon_filter_send_cmd(struct iwx_softc *,
@@ -2792,18 +2791,6 @@ iwx_nic_init(struct iwx_softc *sc)
return 0;
 }
 
-/* Map a TID to an ieee80211_edca_ac category. */
-const uint8_t iwx_tid_to_ac[IWX_MAX_TID_COUNT] = {
-   EDCA_AC_BE,
-   EDCA_AC_BK,
-   EDCA_AC_BK,
-   EDCA_AC_BE,
-   EDCA_AC_VI,
-   EDCA_AC_VI,
-   EDCA_AC_VO,
-   EDCA_AC_VO,
-};
-
 /* Map ieee80211_edca_ac categories to firmware Tx FIFO. */
 const uint8_t iwx_ac_to_tx_fifo[] = {
IWX_GEN2_EDCA_TX_FIFO_BE,
@@ -3548,8 +3535,10 @@ iwx_mac_ctxt_task(void *arg)
struct iwx_softc *sc = arg;
struct ieee80211com *ic = >sc_ic;
struct iwx_node *in = (void *)ic->ic_bss;
-   int err, s = splnet();
+   int err, s;
 
+   s = splnet();
+
if ((sc->sc_flags & IWX_FLAG_SHUTDOWN) ||
ic->ic_state != IEEE80211_S_RUN) {
refcnt_rele_wake(>task_refs);
@@ -3575,8 +3564,10 @@ iwx_phy_ctxt_task(void *arg)
struct iwx_node *in = (void *)ic->ic_bss;
struct ieee80211_node *ni = >in_ni;
uint8_t chains, sco, vht_chan_width;
-   int err, s = splnet();
+   int err, s;
 
+   s = splnet();
+
if ((sc->sc_flags & IWX_FLAG_SHUTDOWN) ||
ic->ic_state != IEEE80211_S_RUN ||
in->in_phyctxt == NULL) {
@@ -3668,7 +3659,6 @@ iwx_sta_tx_agg_start(struct iwx_softc *sc, struct ieee
struct ieee80211com *ic = >sc_ic;
struct ieee80211_tx_ba *ba;
int err, qid;
-   struct iwx_tx_ring *ring;
 
/* Ensure we can map this TID to an aggregation queue. */
if (tid >= IWX_MAX_TID_COUNT)
@@ -3711,7 +3701,6 @@ iwx_sta_tx_agg_start(struct iwx_softc *sc, struct ieee
 
ba->ba_winend = (ba->ba_winstart + ba->ba_winsize - 1) & 0xfff;
 
-   ring = >txq[qid];
ba->ba_timeout_val = 0;
ieee80211_addba_resp_accept(ic, ni, tid);
sc->aggqid[tid] = qid;
@@ -3723,9 +3712,11 @@ iwx_ba_task(void *arg)
struct iwx_softc *sc = arg;
struct ieee80211com *ic = >sc_ic;
struct ieee80211_node *ni = ic->ic_bss;
-   int s = splnet();
+   int s;
int tid;
 
+   s = splnet();
+
for (tid = 0; tid < IWX_MAX_TID_COUNT; tid++) {
if (sc->sc_flags & IWX_FLAG_SHUTDOWN)
break;
@@ -6429,31 +6420,7 @@ out:
return err;
 }
 
-#define IWX_FLUSH_WAIT_MS  2000
-
 int

bgpd session.c convert to new ibuf API

2023-10-18 Thread Claudio Jeker
This is a bit overdue. Convert session.c to also use the new ibuf API.
This simplifies some code since there is no need for local variables.
Also kill the struct msg_header and especially msg_open. The are of very
little use.

Regress passes so I think this should be fine :)
-- 
:wq Claudio

Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.450
diff -u -p -r1.450 session.c
--- session.c   17 Oct 2023 17:58:15 -  1.450
+++ session.c   18 Oct 2023 15:09:29 -
@@ -1324,24 +1324,26 @@ session_capa_add(struct ibuf *opb, uint8
 {
int errs = 0;
 
-   errs += ibuf_add(opb, _code, sizeof(capa_code));
-   errs += ibuf_add(opb, _len, sizeof(capa_len));
+   errs += ibuf_add_n8(opb, capa_code);
+   errs += ibuf_add_n8(opb, capa_len);
return (errs);
 }
 
 int
 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
 {
-   uint8_t  safi, pad = 0;
uint16_t afi;
+   uint8_t  safi;
int  errs = 0;
 
-   if (aid2afi(aid, , ) == -1)
-   fatalx("session_capa_add_mp: bad afi/safi pair");
-   afi = htons(afi);
-   errs += ibuf_add(buf, , sizeof(afi));
-   errs += ibuf_add(buf, , sizeof(pad));
-   errs += ibuf_add(buf, , sizeof(safi));
+   if (aid2afi(aid, , ) == -1) {
+   log_warn("session_capa_add_afi: bad AID");
+   return (-1);
+   }
+
+   errs += ibuf_add_n16(buf, afi);
+   errs += ibuf_add_zero(buf, 1);
+   errs += ibuf_add_n8(buf, safi);
 
return (errs);
 }
@@ -1356,13 +1358,12 @@ session_capa_add_afi(struct peer *p, str
 
if (aid2afi(aid, , )) {
log_warn("session_capa_add_afi: bad AID");
-   return (1);
+   return (-1);
}
 
-   afi = htons(afi);
-   errs += ibuf_add(b, , sizeof(afi));
-   errs += ibuf_add(b, , sizeof(safi));
-   errs += ibuf_add(b, , sizeof(flags));
+   errs += ibuf_add_n16(b, afi);
+   errs += ibuf_add_n8(b, safi);
+   errs += ibuf_add_n8(b, flags);
 
return (errs);
 }
@@ -1370,21 +1371,19 @@ session_capa_add_afi(struct peer *p, str
 struct bgp_msg *
 session_newmsg(enum msg_type msgtype, uint16_t len)
 {
+   u_char   marker[MSGSIZE_HEADER_MARKER];
struct bgp_msg  *msg;
-   struct msg_headerhdr;
struct ibuf *buf;
int  errs = 0;
 
-   memset(, 0xff, sizeof(hdr.marker));
-   hdr.len = htons(len);
-   hdr.type = msgtype;
+   memset(marker, 0xff, sizeof(marker));
 
if ((buf = ibuf_open(len)) == NULL)
return (NULL);
 
-   errs += ibuf_add(buf, , sizeof(hdr.marker));
-   errs += ibuf_add(buf, , sizeof(hdr.len));
-   errs += ibuf_add(buf, , sizeof(hdr.type));
+   errs += ibuf_add(buf, marker, sizeof(marker));
+   errs += ibuf_add_n16(buf, len);
+   errs += ibuf_add_n8(buf, msgtype);
 
if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
ibuf_free(buf);
@@ -1472,8 +1471,7 @@ session_open(struct peer *p)
 {
struct bgp_msg  *buf;
struct ibuf *opb;
-   struct msg_open  msg;
-   uint16_t len, optparamlen = 0;
+   uint16_t len, optparamlen = 0, holdtime;
uint8_t  i, op_type;
int  errs = 0, extlen = 0;
int  mpcapa = 0;
@@ -1501,10 +1499,8 @@ session_open(struct peer *p)
p->conf.role != ROLE_NONE &&
(p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
mpcapa == 0)) {
-   uint8_t val;
-   val = role2capa(p->conf.role);
errs += session_capa_add(opb, CAPA_ROLE, 1);
-   errs += ibuf_add(opb, , 1);
+   errs += ibuf_add_n8(opb, role2capa(p->conf.role));
}
 
/* graceful restart and End-of-RIB marker, RFC 4724 */
@@ -1520,19 +1516,14 @@ session_open(struct peer *p)
/* Only set the R-flag if no graceful restart is ongoing */
if (!rst)
hdr |= CAPA_GR_R_FLAG;
-   hdr = htons(hdr);
-
errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
-   errs += ibuf_add(opb, , sizeof(hdr));
+   errs += ibuf_add_n16(opb, hdr);
}
 
/* 4-bytes AS numbers, RFC6793 */
if (p->capa.ann.as4byte) {  /* 4 bytes data */
-   uint32_tnas;
-
-   nas = htonl(p->conf.local_as);
-   errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
-   errs += ibuf_add(opb, , sizeof(nas));
+   errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t));
+   errs += ibuf_add_n32(opb, 

Re: smtpd: implement nullmx RFC 7505

2023-10-18 Thread Philipp
[2023-10-18 11:42] Omar Polo 
> On 2023/10/18 08:40:14 +0100, Stuart Henderson  wrote:
> > On 2023/10/17 22:27, Philipp wrote:
> > > [2023-10-17 17:32] Omar Polo 
> > > > [...]
>
> > > But I don't think your proposed patch is a good solution, because the
> > > result depend on the order of the RR in the repsonse. The problem is
> > > when the first entry in the response is a Null MX your patch truncate
> > > the other results and a bounce is generated. But when the Null MX is
> > > later in the response the other entries are used to deliver the mail.
>
> that's true; I fell for a shorter diff.  I've attached below a version
> that doesn't depend on the order.

Your patch looks mostly good. I only have a small nitpick style comment:

> +
> + if (rr.rr.mx.preference == 0 && !strcmp(buf, "")) {

strcmp doesn't return a bool like value. Something like:

if (rr.rr.mx.preference == 0 && strcmp(buf, "") == 0)

is imho better to read.



Re: bgpd session.c convert to new ibuf API

2023-10-18 Thread Theo Buehler
On Wed, Oct 18, 2023 at 05:19:29PM +0200, Claudio Jeker wrote:
> This is a bit overdue. Convert session.c to also use the new ibuf API.
> This simplifies some code since there is no need for local variables.
> Also kill the struct msg_header and especially msg_open. The are of very
> little use.
> 
> Regress passes so I think this should be fine :)

Can't spot anything wrong with the conversion, two log nits below.

> -- 
> :wq Claudio
> 
> Index: session.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
> retrieving revision 1.450
> diff -u -p -r1.450 session.c
> --- session.c 17 Oct 2023 17:58:15 -  1.450
> +++ session.c 18 Oct 2023 15:09:29 -
> @@ -1324,24 +1324,26 @@ session_capa_add(struct ibuf *opb, uint8
>  {
>   int errs = 0;
>  
> - errs += ibuf_add(opb, _code, sizeof(capa_code));
> - errs += ibuf_add(opb, _len, sizeof(capa_len));
> + errs += ibuf_add_n8(opb, capa_code);
> + errs += ibuf_add_n8(opb, capa_len);
>   return (errs);
>  }
>  
>  int
>  session_capa_add_mp(struct ibuf *buf, uint8_t aid)
>  {
> - uint8_t  safi, pad = 0;
>   uint16_t afi;
> + uint8_t  safi;
>   int  errs = 0;
>  
> - if (aid2afi(aid, , ) == -1)
> - fatalx("session_capa_add_mp: bad afi/safi pair");
> - afi = htons(afi);
> - errs += ibuf_add(buf, , sizeof(afi));
> - errs += ibuf_add(buf, , sizeof(pad));
> - errs += ibuf_add(buf, , sizeof(safi));
> + if (aid2afi(aid, , ) == -1) {
> + log_warn("session_capa_add_afi: bad AID");

wrong function name. Should this be

log_warn("%s: bad AID", __func__);

> + return (-1);
> + }
> +
> + errs += ibuf_add_n16(buf, afi);
> + errs += ibuf_add_zero(buf, 1);
> + errs += ibuf_add_n8(buf, safi);
>  
>   return (errs);
>  }
> @@ -1356,13 +1358,12 @@ session_capa_add_afi(struct peer *p, str
>  
>   if (aid2afi(aid, , )) {
>   log_warn("session_capa_add_afi: bad AID");

use __func__?

> - return (1);
> + return (-1);

fun

>   }
>  
> - afi = htons(afi);
> - errs += ibuf_add(b, , sizeof(afi));
> - errs += ibuf_add(b, , sizeof(safi));
> - errs += ibuf_add(b, , sizeof(flags));
> + errs += ibuf_add_n16(b, afi);
> + errs += ibuf_add_n8(b, safi);
> + errs += ibuf_add_n8(b, flags);
>  
>   return (errs);
>  }
> @@ -1370,21 +1371,19 @@ session_capa_add_afi(struct peer *p, str
>  struct bgp_msg *
>  session_newmsg(enum msg_type msgtype, uint16_t len)
>  {
> + u_char   marker[MSGSIZE_HEADER_MARKER];
>   struct bgp_msg  *msg;
> - struct msg_headerhdr;
>   struct ibuf *buf;
>   int  errs = 0;
>  
> - memset(, 0xff, sizeof(hdr.marker));
> - hdr.len = htons(len);
> - hdr.type = msgtype;
> + memset(marker, 0xff, sizeof(marker));
>  
>   if ((buf = ibuf_open(len)) == NULL)
>   return (NULL);
>  
> - errs += ibuf_add(buf, , sizeof(hdr.marker));
> - errs += ibuf_add(buf, , sizeof(hdr.len));
> - errs += ibuf_add(buf, , sizeof(hdr.type));
> + errs += ibuf_add(buf, marker, sizeof(marker));
> + errs += ibuf_add_n16(buf, len);
> + errs += ibuf_add_n8(buf, msgtype);
>  
>   if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
>   ibuf_free(buf);
> @@ -1472,8 +1471,7 @@ session_open(struct peer *p)
>  {
>   struct bgp_msg  *buf;
>   struct ibuf *opb;
> - struct msg_open  msg;
> - uint16_t len, optparamlen = 0;
> + uint16_t len, optparamlen = 0, holdtime;
>   uint8_t  i, op_type;
>   int  errs = 0, extlen = 0;
>   int  mpcapa = 0;
> @@ -1501,10 +1499,8 @@ session_open(struct peer *p)
>   p->conf.role != ROLE_NONE &&
>   (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
>   mpcapa == 0)) {
> - uint8_t val;
> - val = role2capa(p->conf.role);
>   errs += session_capa_add(opb, CAPA_ROLE, 1);
> - errs += ibuf_add(opb, , 1);
> + errs += ibuf_add_n8(opb, role2capa(p->conf.role));
>   }
>  
>   /* graceful restart and End-of-RIB marker, RFC 4724 */
> @@ -1520,19 +1516,14 @@ session_open(struct peer *p)
>   /* Only set the R-flag if no graceful restart is ongoing */
>   if (!rst)
>   hdr |= CAPA_GR_R_FLAG;
> - hdr = htons(hdr);
> -
>   errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
> - errs += ibuf_add(opb, , sizeof(hdr));
> + errs += ibuf_add_n16(opb, hdr);
>   }
>  
>   /* 4-bytes AS numbers, RFC6793 */
>   if (p->capa.ann.as4byte) {  /* 4 bytes data */
> 

Re: IPv4 on ix(4) slow/nothing - 7.4

2023-10-18 Thread Mischa Peters


> On Oct 18, 2023, at 15:44, Hrvoje Popovski  wrote:
> 
> On 18.10.2023. 15:35, Mischa wrote:
>> Hi All,
>> 
>> Just upgraded a couple of machines to 7.4. smooth as always!!
>> 
>> I am however seeing issues with IPv4, slowness or no throughput at all.
>> The machines I have upgraded are using an Intel X540-T network card and
>> is connected on 10G.
>> 
>> ix0 at pci5 dev 0 function 0 "Intel X540T" rev 0x01, msix, 16 queues,
>> address b8:ca:3a:62:ee:40
>> ix1 at pci5 dev 0 function 1 "Intel X540T" rev 0x01, msix, 16 queues,
>> address b8:ca:3a:62:ee:42
>> 
>> root@n2:~ # sysctl kern.version
>> kern.version=OpenBSD 7.4 (GENERIC.MP) #1397: Tue Oct 10 09:02:37 MDT 2023
>> dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
>> 
>> There are a bunch of VMs running on top of it, as soon as I want to
>> fetch something with ftp, for example, I don't get anything over IPv4,
>> with IPv6 everything is normal.
>> 
>> mischa@www2:~ $ ftp -4
>> https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso
>> Trying 46.23.88.18...
>> Requesting
>> https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso
>>   0% | |   512 KB  - stalled
>> -^C
>> 
>> A trace on mirror / n2:
>> 
>> n2:~ # tcpdump -i vport880 host 46.23.88.32
>> tcpdump: listening on vport880, link-type EN10MB
>> 15:16:08.730274 www2.high5.nl.1828 > n2.high5.nl.https: S
>> 2182224746:2182224746(0) win 16384 > 6,nop,nop,timestamp 2899683458 0> (DF)
>> 15:16:08.730297 arp who-has www2.high5.nl tell n2.high5.nl
>> 15:16:08.731535 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11
>> 15:16:08.731540 n2.high5.nl.https > www2.high5.nl.1828: S
>> 633749938:633749938(0) ack 2182224747 win 16384 > 1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106
>> 2899683458> (DF)
>> 15:16:08.732017 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1 win 256
>>  (DF)
>> 15:16:08.785752 www2.high5.nl.1828 > n2.high5.nl.https: P 1:312(311) ack
>> 1 win 256  (DF)
>> 15:16:08.786092 n2.high5.nl.https > www2.high5.nl.1828: P 1:128(127) ack
>> 312 win 271  (DF)
>> 15:16:08.786376 n2.high5.nl.https > www2.high5.nl.1828: P 128:134(6) ack
>> 312 win 271  (DF)
>> 15:16:08.786396 n2.high5.nl.https > www2.high5.nl.1828: P 134:166(32)
>> ack 312 win 271  (DF)
>> 15:16:08.786455 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448)
>> ack 312 win 271  (DF)
>> 15:16:08.786457 n2.high5.nl.https > www2.high5.nl.1828: .
>> 1614:3062(1448) ack 312 win 271 > 2899683510> (DF)
>> 15:16:08.786460 n2.high5.nl.https > www2.high5.nl.1828: P 3062:3803(741)
>> ack 312 win 271  (DF)
>> 15:16:08.786943 www2.high5.nl.1828 > n2.high5.nl.https: . ack 134 win
>> 255  (DF)
>> 15:16:08.796534 n2.high5.nl.https > www2.high5.nl.1828: P 3803:4345(542)
>> ack 312 win 271  (DF)
>> 15:16:08.796577 n2.high5.nl.https > www2.high5.nl.1828: P 4345:4403(58)
>> ack 312 win 271  (DF)
>> 15:16:08.797518 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win
>> 256 >> (DF)
>> 15:16:08.797522 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win
>> 256 >> (DF)
>> 15:16:09.790297 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448)
>> ack 312 win 271  (DF)
>> 15:16:09.790902 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1614 win
>> 233 >> (DF)
>> 15:16:09.790917 n2.high5.nl.https > www2.high5.nl.1828: .
>> 1614:3062(1448) ack 312 win 271 > 2899684519> (DF)
>> 15:16:09.790923 n2.high5.nl.https > www2.high5.nl.1828: P
>> 3062:4403(1341) ack 312 win 271 > 2899684519> (DF)
>> 15:16:10.790299 n2.high5.nl.https > www2.high5.nl.1828: .
>> 1614:3062(1448) ack 312 win 271 > 2899684519> (DF)
>> 15:16:10.791204 www2.high5.nl.1828 > n2.high5.nl.https: . ack 3062 win
>> 233 >> (DF)
>> 15:16:10.791223 n2.high5.nl.https > www2.high5.nl.1828: P
>> 3062:4403(1341) ack 312 win 271 > 2899685520> (DF)
>> 15:16:10.791692 www2.high5.nl.1828 > n2.high5.nl.https: . ack 4403 win
>> 235  (DF)
>> 15:16:10.802647 www2.high5.nl.1828 > n2.high5.nl.https: P 312:318(6) ack
>> 4403 win 256  (DF)
>> 15:16:11.000297 n2.high5.nl.https > www2.high5.nl.1828: . ack 318 win
>> 271  (DF)
>> 15:16:11.001162 www2.high5.nl.1828 > n2.high5.nl.https: P 318:527(209)
>> ack 4403 win 256  (DF)
>> 15:16:11.001860 n2.high5.nl.https > www2.high5.nl.1828: P 4403:5059(656)
>> ack 527 win 271  (DF)
>> 15:16:11.001989 n2.high5.nl.https > www2.high5.nl.1828: .
>> 5059:6507(1448) ack 527 win 271 > 2899685730> (DF)
>> 15:16:11.001992 n2.high5.nl.https > www2.high5.nl.1828: .
>> 6507:7955(1448) ack 527 win 271 > 2899685730> (DF)
>> 15:16:11.195431 www2.high5.nl.1828 > n2.high5.nl.https: . ack 5059 win
>> 256  (DF)
>> 15:16:11.195447 n2.high5.nl.https > www2.high5.nl.1828: .
>> 7955:9403(1448) ack 527 win 271 > 2899685924> (DF)
>> 
>> 
>> Running a trace on www2 I am seeing:
>> 
>> www2:~ # tcpdump -i vio0 host 46.23.88.18
>> tcpdump: listening on vio0, link-type EN10MB
>> 15:16:08.729974 www2.high5.nl.1828 > n2.high5.nl.https: S
>> 2182224746:2182224746(0) win 16384 > 6,nop,nop,timestamp 

TSO for ixl(4)

2023-10-18 Thread Jan Klemkow
Hi,

This diff implements TCP Segmentation Offloading for ixl(4).  I tested
it successfully on amd64 and sparc64 with Intel X710.  It should
increase the TCP bulk performance to 10 Gbit/s.  On sparc64 I got an
increase from 600 MBit/s to 2.000 Gbit/s.

Further testing is welcome.

bye,
Jan

Index: dev/pci/if_ixl.c
===
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.89
diff -u -p -r1.89 if_ixl.c
--- dev/pci/if_ixl.c29 Sep 2023 19:44:47 -  1.89
+++ dev/pci/if_ixl.c18 Oct 2023 15:15:30 -
@@ -71,6 +71,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #if NBPFILTER > 0
@@ -85,6 +86,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -827,6 +830,10 @@ struct ixl_tx_desc {
 #define IXL_TX_DESC_BSIZE_MASK \
(IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT)
 
+#define IXL_TX_CTX_DESC_CMD_TSO0x10
+#define IXL_TX_CTX_DESC_TLEN_SHIFT 30
+#define IXL_TX_CTX_DESC_MSS_SHIFT  50
+
 #define IXL_TX_DESC_L2TAG1_SHIFT   48
 } __packed __aligned(16);
 
@@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 {
uint64_tqword3;
 } __packed __aligned(16);
 
-#define IXL_TX_PKT_DESCS   8
+#define IXL_TX_PKT_DESCS   32
 #define IXL_TX_QUEUE_ALIGN 128
 #define IXL_RX_QUEUE_ALIGN 128
 
 #define IXL_HARDMTU9712 /* 9726 - ETHER_HDR_LEN */
+#define IXL_TSO_SIZE   ((255 * 1024) - 1)
+#define IXL_MAX_DMA_SEG_SIZE   ((16 * 1024) - 1)
+
+/*
+ * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES.
+ * This interface could not handle packets greater than IXL_TSO_SIZE.
+ */
+CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE);
 
 #define IXL_PCIREG PCI_MAPREG_START
 
@@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct
ifp->if_capabilities |= IFCAP_CSUM_IPv4 |
IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
+   ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
 
ifmedia_init(>sc_media, 0, ixl_media_change, ixl_media_status);
 
@@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
txm = [i];
 
if (bus_dmamap_create(sc->sc_dmat,
-   IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0,
+   MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
>txm_map) != 0)
goto uncreate;
@@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
 }
 
 static uint64_t
-ixl_tx_setup_offload(struct mbuf *m0)
+ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr,
+unsigned int prod)
 {
struct ether_extracted ext;
uint64_t hlen;
@@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0)
}
 
if (!ISSET(m0->m_pkthdr.csum_flags,
-   M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
+   M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO))
return (offload);
 
ether_extract_headers(m0, );
@@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0)
offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
}
 
+   if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
+   if (ext.tcp) {
+   struct ixl_tx_desc *ring, *txd;
+   uint64_t cmd = 0;
+
+   hlen += ext.tcp->th_off << 2;
+   ring = IXL_DMA_KVA(>txr_mem);
+   txd = [prod];
+
+   cmd |= IXL_TX_DESC_DTYPE_CONTEXT;
+   cmd |= IXL_TX_CTX_DESC_CMD_TSO;
+   cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN
+   - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT;
+   cmd |= (uint64_t)(m0->m_pkthdr.ph_mss)
+   << IXL_TX_CTX_DESC_MSS_SHIFT;
+
+   htolem64(>addr, 0);
+   htolem64(>cmd, cmd);
+   } else
+   tcpstat_inc(tcps_outbadtso);
+   }
+
return (offload);
 }
 
@@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq)
mask = sc->sc_tx_ring_ndescs - 1;
 
for (;;) {
-   if (free <= IXL_TX_PKT_DESCS) {
+   /* We need one extra descriptor for TSO packets. */
+   if (free <= (IXL_TX_PKT_DESCS + 1)) {
ifq_set_oactive(ifq);
break;
}
@@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq)
if (m == NULL)
break;
 
-   offload = ixl_tx_setup_offload(m);
+   offload = ixl_tx_setup_offload(m, txr, prod);
 
txm = >txr_maps[prod];
map = 

Re: smtpd: implement nullmx RFC 7505

2023-10-18 Thread Omar Polo
On 2023/10/18 08:40:14 +0100, Stuart Henderson  wrote:
> On 2023/10/17 22:27, Philipp wrote:
> > [2023-10-17 17:32] Omar Polo 
> > > There is one part of the RFC7505 that I'd like to quote and discuss
> > > with you however.  The last paragraph of the section 3 says:
> > >
> > > : A domain that advertises a null MX MUST NOT advertise any other MX
> > > : RR.
> > >
> > > Your implementation handles the case where there are more than one MX
> > > by setting the `nullmx' field in the shared struct when we encounter
> > > one, and then don't process the other MXs when that field is set.
> > > This is fine.
> > >
> > > However, I think we can simplify here by not honouring the Null MX
> > > when there are other MX records.  We're not violating the RFC.  See
> > > diff below to see what I mean.  The only difference is in dns.c, the
> > > rest is the same with yours.
> > 
> > My reasaning for that was: When you set Null MX you explicitly opt out
> > from reciving mail even when you violate the spec. But if you want
> > it diffrent I can change it.
> 
> I think in that situation, the recipient's DNS admin has *not* set a
> valid nullmx, and delivery to other MXes should still be attempted.

That's what I though too.

> > But I don't think your proposed patch is a good solution, because the
> > result depend on the order of the RR in the repsonse. The problem is
> > when the first entry in the response is a Null MX your patch truncate
> > the other results and a bounce is generated. But when the Null MX is
> > later in the response the other entries are used to deliver the mail.

that's true; I fell for a shorter diff.  I've attached below a version
that doesn't depend on the order.

> > > So far I've only compile-tested the code.  I don't have a spare domain
> > > to test and don't know of anyone using a Null MX.
> > 
> > To test Null MX you can use example.com. To test the localhost patch a
> > friend of me has set up mxtest.yannikenss.de.

thanks!

> [...]
> > [1] I still belive OpenSMTPD should only connect to public routable
> > addresses for relay. So don't connect to something like 127.0.0.1/8,
> > ::1/128, RFC1918 addresses, ULA, ...
> > But this is independent of this patch.
> 
> Support for nullmx makes sense to me. Perhaps also the localhost
> handling. But some intranet setups may require connections to
> RFC1918/ULA addresses - I don't think an MTA should prevent these.

Completely agree.



diff 2d025d839f99dc09ee525c11a4ed09a0f3bbe7d0 
02bb94351d3865e61483023cab9fa02bcac2970d
commit - 2d025d839f99dc09ee525c11a4ed09a0f3bbe7d0
commit + 02bb94351d3865e61483023cab9fa02bcac2970d
blob - 4cf5d23d1d14b5400c6f4429dae0a4f6490073d4
blob + 552a5cf9115401b4fac3d131d6e5c022ebb8b7fd
--- usr.sbin/smtpd/dns.c
+++ usr.sbin/smtpd/dns.c
@@ -232,10 +232,10 @@ dns_dispatch_mx(struct asr_result *ar, void *arg)
struct dns_rrrr;
char buf[512];
size_t   found;
+   int  nullmx = 0;
 
if (ar->ar_h_errno && ar->ar_h_errno != NO_DATA &&
ar->ar_h_errno != NOTIMP) {
-
m_create(s->p,  IMSG_MTA_DNS_HOST_END, 0, 0, -1);
m_add_id(s->p, s->reqid);
if (ar->ar_rcode == NXDOMAIN)
@@ -259,13 +259,29 @@ dns_dispatch_mx(struct asr_result *ar, void *arg)
unpack_rr(, );
if (rr.rr_type != T_MX)
continue;
+
print_dname(rr.rr.mx.exchange, buf, sizeof(buf));
buf[strlen(buf) - 1] = '\0';
+
+   if (rr.rr.mx.preference == 0 && !strcmp(buf, "")) {
+   nullmx = 1;
+   continue;
+   }
+
dns_lookup_host(s, buf, rr.rr.mx.preference);
found++;
}
free(ar->ar_data);
 
+   if (nullmx && found == 0) {
+   m_create(s->p, IMSG_MTA_DNS_HOST_END, 0, 0, -1);
+   m_add_id(s->p, s->reqid);
+   m_add_int(s->p, DNS_NULLMX);
+   m_close(s->p);
+   free(s);
+   return;
+   }
+
/* fallback to host if no MX is found. */
if (found == 0)
dns_lookup_host(s, s->name, 0);
blob - c0d0fc02931b90409441035d2744923af9e42df1
blob + 25e158b68a88c8485428a2476c1c557f8c60404d
--- usr.sbin/smtpd/mta.c
+++ usr.sbin/smtpd/mta.c
@@ -1088,6 +1088,10 @@ mta_on_mx(void *tag, void *arg, void *data)
else
relay->failstr = "No MX found for domain";
break;
+   case DNS_NULLMX:
+   relay->fail = IMSG_MTA_DELIVERY_PERMFAIL;
+   relay->failstr = "Domain does not accept mail";
+   break;
default:
fatalx("bad DNS lookup error code");
break;
blob - 6781286928da45e6159bce81ff2437011ebdef72
blob + d5cbe88feeaf9e91eca119b31dda957aca68c031
--- usr.sbin/smtpd/smtpd.h
+++ usr.sbin/smtpd/smtpd.h
@@ 

Re: IPv4 on ix(4) slow/nothing - 7.4

2023-10-18 Thread Hrvoje Popovski
On 18.10.2023. 15:35, Mischa wrote:
> Hi All,
> 
> Just upgraded a couple of machines to 7.4. smooth as always!!
> 
> I am however seeing issues with IPv4, slowness or no throughput at all.
> The machines I have upgraded are using an Intel X540-T network card and
> is connected on 10G.
> 
> ix0 at pci5 dev 0 function 0 "Intel X540T" rev 0x01, msix, 16 queues,
> address b8:ca:3a:62:ee:40
> ix1 at pci5 dev 0 function 1 "Intel X540T" rev 0x01, msix, 16 queues,
> address b8:ca:3a:62:ee:42
> 
> root@n2:~ # sysctl kern.version
> kern.version=OpenBSD 7.4 (GENERIC.MP) #1397: Tue Oct 10 09:02:37 MDT 2023
>     dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> 
> There are a bunch of VMs running on top of it, as soon as I want to
> fetch something with ftp, for example, I don't get anything over IPv4,
> with IPv6 everything is normal.
> 
> mischa@www2:~ $ ftp -4
> https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso
> Trying 46.23.88.18...
> Requesting
> https://mirror.openbsd.amsterdam/pub/OpenBSD/7.4/amd64/install74.iso
>   0% | |   512 KB  - stalled
> -^C
> 
> A trace on mirror / n2:
> 
> n2:~ # tcpdump -i vport880 host 46.23.88.32
> tcpdump: listening on vport880, link-type EN10MB
> 15:16:08.730274 www2.high5.nl.1828 > n2.high5.nl.https: S
> 2182224746:2182224746(0) win 16384  6,nop,nop,timestamp 2899683458 0> (DF)
> 15:16:08.730297 arp who-has www2.high5.nl tell n2.high5.nl
> 15:16:08.731535 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11
> 15:16:08.731540 n2.high5.nl.https > www2.high5.nl.1828: S
> 633749938:633749938(0) ack 2182224747 win 16384  1460,nop,nop,sackOK,nop,wscale 6,nop,nop,timestamp 3129955106
> 2899683458> (DF)
> 15:16:08.732017 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1 win 256
>  (DF)
> 15:16:08.785752 www2.high5.nl.1828 > n2.high5.nl.https: P 1:312(311) ack
> 1 win 256  (DF)
> 15:16:08.786092 n2.high5.nl.https > www2.high5.nl.1828: P 1:128(127) ack
> 312 win 271  (DF)
> 15:16:08.786376 n2.high5.nl.https > www2.high5.nl.1828: P 128:134(6) ack
> 312 win 271  (DF)
> 15:16:08.786396 n2.high5.nl.https > www2.high5.nl.1828: P 134:166(32)
> ack 312 win 271  (DF)
> 15:16:08.786455 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448)
> ack 312 win 271  (DF)
> 15:16:08.786457 n2.high5.nl.https > www2.high5.nl.1828: .
> 1614:3062(1448) ack 312 win 271  2899683510> (DF)
> 15:16:08.786460 n2.high5.nl.https > www2.high5.nl.1828: P 3062:3803(741)
> ack 312 win 271  (DF)
> 15:16:08.786943 www2.high5.nl.1828 > n2.high5.nl.https: . ack 134 win
> 255  (DF)
> 15:16:08.796534 n2.high5.nl.https > www2.high5.nl.1828: P 3803:4345(542)
> ack 312 win 271  (DF)
> 15:16:08.796577 n2.high5.nl.https > www2.high5.nl.1828: P 4345:4403(58)
> ack 312 win 271  (DF)
> 15:16:08.797518 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win
> 256 > (DF)
> 15:16:08.797522 www2.high5.nl.1828 > n2.high5.nl.https: . ack 166 win
> 256 > (DF)
> 15:16:09.790297 n2.high5.nl.https > www2.high5.nl.1828: . 166:1614(1448)
> ack 312 win 271  (DF)
> 15:16:09.790902 www2.high5.nl.1828 > n2.high5.nl.https: . ack 1614 win
> 233 > (DF)
> 15:16:09.790917 n2.high5.nl.https > www2.high5.nl.1828: .
> 1614:3062(1448) ack 312 win 271  2899684519> (DF)
> 15:16:09.790923 n2.high5.nl.https > www2.high5.nl.1828: P
> 3062:4403(1341) ack 312 win 271  2899684519> (DF)
> 15:16:10.790299 n2.high5.nl.https > www2.high5.nl.1828: .
> 1614:3062(1448) ack 312 win 271  2899684519> (DF)
> 15:16:10.791204 www2.high5.nl.1828 > n2.high5.nl.https: . ack 3062 win
> 233 > (DF)
> 15:16:10.791223 n2.high5.nl.https > www2.high5.nl.1828: P
> 3062:4403(1341) ack 312 win 271  2899685520> (DF)
> 15:16:10.791692 www2.high5.nl.1828 > n2.high5.nl.https: . ack 4403 win
> 235  (DF)
> 15:16:10.802647 www2.high5.nl.1828 > n2.high5.nl.https: P 312:318(6) ack
> 4403 win 256  (DF)
> 15:16:11.000297 n2.high5.nl.https > www2.high5.nl.1828: . ack 318 win
> 271  (DF)
> 15:16:11.001162 www2.high5.nl.1828 > n2.high5.nl.https: P 318:527(209)
> ack 4403 win 256  (DF)
> 15:16:11.001860 n2.high5.nl.https > www2.high5.nl.1828: P 4403:5059(656)
> ack 527 win 271  (DF)
> 15:16:11.001989 n2.high5.nl.https > www2.high5.nl.1828: .
> 5059:6507(1448) ack 527 win 271  2899685730> (DF)
> 15:16:11.001992 n2.high5.nl.https > www2.high5.nl.1828: .
> 6507:7955(1448) ack 527 win 271  2899685730> (DF)
> 15:16:11.195431 www2.high5.nl.1828 > n2.high5.nl.https: . ack 5059 win
> 256  (DF)
> 15:16:11.195447 n2.high5.nl.https > www2.high5.nl.1828: .
> 7955:9403(1448) ack 527 win 271  2899685924> (DF)
> 
> 
> Running a trace on www2 I am seeing:
> 
> www2:~ # tcpdump -i vio0 host 46.23.88.18
> tcpdump: listening on vio0, link-type EN10MB
> 15:16:08.729974 www2.high5.nl.1828 > n2.high5.nl.https: S
> 2182224746:2182224746(0) win 16384  6,nop,nop,timestamp 2899683458 0> (DF)
> 15:16:08.731114 arp who-has www2.high5.nl tell n2.high5.nl
> 15:16:08.731229 arp reply www2.high5.nl is-at fe:51:bb:1e:12:11
> 15:16:08.731631 n2.high5.nl.https > 

snmpd_metrics: differentiate between hrSWRunName and hrSWRunPath

2023-10-18 Thread Martijn van Duren
Right now we return the same value for both hrSWRunName and hrSWRunPath.
hrSWRunPath should return the full path of the binary, and hrSWRunName
a description of the running software.

Afaik there's no proper way to retrieve the full path of the running
binary, However, in a lot of cases argv[0] can contain the full or
relative path. But if argv[0] gets overwritten (like most of our
daemons' children) it gives a more descriptive name, which is more in
line with hrSWRunName.

netsnmp's snmpd uses argv[0] for hrSWRunPath and kinfo_proc's p_comm for
hrSWRunName, and snmptop defaults to hrSWRunName. top(1) also defaults
to p_comm, but contrary to top(1), snmptop allows us to switch between
hrSWRunName, and hrSWRunPath and toggling of hrSWRunParameters
independently, where top(1) toggles between p_comm and argv[].

So there's an argument to be made either way, but for this diff I stuck
with netsnmp's choices.

While here, change the buffer length from 128 to 129. HOST-RESOURCES-MIB
allows up to 128 characters in the response, so make room for the
terminating NUL.

Thoughts? OK?

martijn@

Index: mib.c
===
RCS file: /cvs/src/libexec/snmpd/snmpd_metrics/mib.c,v
retrieving revision 1.4
diff -u -p -r1.4 mib.c
--- mib.c   4 Jul 2023 11:34:19 -   1.4
+++ mib.c   18 Oct 2023 11:47:00 -
@@ -103,7 +103,9 @@ int  kinfo_proc_comp(const void *, const
 int kinfo_proc(u_int32_t, struct kinfo_proc **);
 voidkinfo_timer_cb(int, short, void *);
 voidkinfo_proc_free(void);
-int kinfo_args(struct kinfo_proc *, char **);
+int kinfo_args(struct kinfo_proc *, char ***);
+int kinfo_path(struct kinfo_proc *, char **);
+int kinfo_parameters(struct kinfo_proc *, char **);
 
 /* IF-MIB */
 struct agentx_index *ifIdx;
@@ -669,13 +671,21 @@ mib_hrswrun(struct agentx_varbind *vb)
 
if (obj == hrSWRunIndex)
agentx_varbind_integer(vb, kinfo->p_pid);
-   else if (obj == hrSWRunName || obj == hrSWRunPath)
+   else if (obj == hrSWRunName)
agentx_varbind_string(vb, kinfo->p_comm);
-   else if (obj == hrSWRunID)
+   else if (obj == hrSWRunPath) {
+   if (kinfo_path(kinfo, ) == -1) {
+   log_warn("kinfo_path");
+   agentx_varbind_error(vb);
+   return;
+   }
+   
+   agentx_varbind_string(vb, s);
+   } else if (obj == hrSWRunID)
agentx_varbind_oid(vb, AGENTX_OID(0, 0));
else if (obj == hrSWRunParameters) {
-   if (kinfo_args(kinfo, ) == -1) {
-   log_warn("kinfo_args");
+   if (kinfo_parameters(kinfo, ) == -1) {
+   log_warn("kinfo_parameters");
agentx_varbind_error(vb);
return;
}
@@ -811,25 +821,22 @@ kinfo_proc_free(void)
 }
 
 int
-kinfo_args(struct kinfo_proc *kinfo, char **s)
+kinfo_args(struct kinfo_proc *kinfo, char ***s)
 {
-   static char  str[128];
static char *buf = NULL;
static size_tbuflen = 128;
 
int  mib[] = { CTL_KERN, KERN_PROC_ARGS,
kinfo->p_pid, KERN_PROC_ARGV };
-   char*nbuf, **argv;
+   char*nbuf;
 
+   *s = NULL;
if (buf == NULL) {
buf = malloc(buflen);
if (buf == NULL)
return (-1);
}
 
-   str[0] = '\0';
-   *s = str;
-
while (sysctl(mib, nitems(mib), buf, , NULL, 0) == -1) {
if (errno != ENOMEM) {
/* some errors are expected, dont get too upset */
@@ -844,11 +851,41 @@ kinfo_args(struct kinfo_proc *kinfo, cha
buflen += 128;
}
 
-   argv = (char **)buf;
-   if (argv[0] == NULL)
-   return (0);
+   *s = (char **)buf;
+   return (0);
+}
+
+int
+kinfo_path(struct kinfo_proc *kinfo, char **s)
+{
+   static char  str[129];
+   char**argv;
+
+   if (kinfo_args(kinfo, ) == -1)
+   return (-1);
 
+   str[0] = '\0';
+   *s = str;
+   if (argv != NULL && argv[0] != NULL)
+   strlcpy(str, argv[0], sizeof(str));
+   return (0);
+}
+
+int
+kinfo_parameters(struct kinfo_proc *kinfo, char **s)
+{
+   static char  str[129];
+   char**argv;
+
+   if (kinfo_args(kinfo, ) == -1)
+   return (-1);
+
+   str[0] = '\0';
+   *s = str;
+   if (argv == NULL || argv[0] == NULL)
+   return (0);
argv++;
+
while (*argv != NULL) {
strlcat(str, *argv, sizeof(str));
argv++;



Re: log.c use buffered IO

2023-10-18 Thread Claudio Jeker
On Tue, Oct 17, 2023 at 10:06:54AM +0200, Sebastian Benoit wrote:
> Theo Buehler(t...@theobuehler.org) on 2023.10.17 09:13:15 +0200:
> > On Mon, Oct 16, 2023 at 12:19:17PM +0200, Claudio Jeker wrote:
> > > I dislike how log.c does all these asprintf() calls with dubious
> > > workaround calls in case asprintf() fails.
> > 
> > You're not alone.
> > 
> > > IMO it is easier to use the stdio provided buffers and fflush() to get
> > > "atomic" writes on stderr. At least from my understanding this is the
> > > reason to go all this lenght to do a single fprintf call.
> > 
> > This makes sense, but I don't know the history here.
> 
> as far as i can remember, it was done so it would still be able to work
> somewhat when out of memeory.
>  

After some input off-list here another idea.
Require that logit() is called with a \n and by that simplify a lot of
code around it. vsyslog() handles both so having the \n should not cause
any breakage.

Now logit() is mostly used internally but in bgpd it is also used in
logmsg.c and parse.y. Fixing those is simple.

Also this uses a stack buffer for all the log_* cases now. This should
make the code more thread safe.

Also this removes vlog() from the API.  I had a quick look at all the
other log.c users and apart from ldapd this can be added to all of them
without much issues. Nothing else uses vlog() and the logit() is also very
minimal (mostly the same parse.y change as below).

-- 
:wq Claudio

Index: log.c
===
RCS file: /cvs/src/usr.sbin/bgpd/log.c,v
retrieving revision 1.64
diff -u -p -r1.64 log.c
--- log.c   21 Mar 2017 12:06:55 -  1.64
+++ log.c   18 Oct 2023 07:10:32 -
@@ -26,6 +26,8 @@
 
 #include "log.h"
 
+#define MAX_LOGLEN 4096
+
 static int  debug;
 static int  verbose;
 static const char  *log_procname;
@@ -68,30 +70,15 @@ void
 logit(int pri, const char *fmt, ...)
 {
va_list ap;
+   int saved_errno = errno;
 
va_start(ap, fmt);
-   vlog(pri, fmt, ap);
-   va_end(ap);
-}
-
-void
-vlog(int pri, const char *fmt, va_list ap)
-{
-   char*nfmt;
-   int  saved_errno = errno;
-
if (debug) {
-   /* best effort in out of mem situations */
-   if (asprintf(, "%s\n", fmt) == -1) {
-   vfprintf(stderr, fmt, ap);
-   fprintf(stderr, "\n");
-   } else {
-   vfprintf(stderr, nfmt, ap);
-   free(nfmt);
-   }
+   vfprintf(stderr, fmt, ap);
fflush(stderr);
} else
vsyslog(pri, fmt, ap);
+   va_end(ap);
 
errno = saved_errno;
 }
@@ -99,26 +86,18 @@ vlog(int pri, const char *fmt, va_list a
 void
 log_warn(const char *emsg, ...)
 {
-   char*nfmt;
-   va_list  ap;
-   int  saved_errno = errno;
+   charfmtbuf[MAX_LOGLEN];
+   va_list ap;
+   int saved_errno = errno;
 
/* best effort to even work in out of memory situations */
if (emsg == NULL)
-   logit(LOG_ERR, "%s", strerror(saved_errno));
+   logit(LOG_ERR, "%s\n", strerror(saved_errno));
else {
va_start(ap, emsg);
-
-   if (asprintf(, "%s: %s", emsg,
-   strerror(saved_errno)) == -1) {
-   /* we tried it... */
-   vlog(LOG_ERR, emsg, ap);
-   logit(LOG_ERR, "%s", strerror(saved_errno));
-   } else {
-   vlog(LOG_ERR, nfmt, ap);
-   free(nfmt);
-   }
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
va_end(ap);
+   logit(LOG_ERR, "%s: %s\n", fmtbuf, strerror(saved_errno));
}
 
errno = saved_errno;
@@ -127,53 +106,65 @@ log_warn(const char *emsg, ...)
 void
 log_warnx(const char *emsg, ...)
 {
-   va_list  ap;
+   charfmtbuf[MAX_LOGLEN];
+   va_list ap;
+   int saved_errno = errno;
 
va_start(ap, emsg);
-   vlog(LOG_ERR, emsg, ap);
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
va_end(ap);
+   logit(LOG_ERR, "%s\n", fmtbuf);
+
+   errno = saved_errno;
 }
 
 void
 log_info(const char *emsg, ...)
 {
-   va_list  ap;
+   charfmtbuf[MAX_LOGLEN];
+   va_list ap;
+   int saved_errno = errno;
 
va_start(ap, emsg);
-   vlog(LOG_INFO, emsg, ap);
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
va_end(ap);
+   logit(LOG_INFO, "%s\n", fmtbuf);
+
+   errno = saved_errno;
 }
 
 void
 log_debug(const char *emsg, ...)
 {
-   va_list  ap;
+   charfmtbuf[MAX_LOGLEN];
+   va_list ap;
 
if (verbose) {
va_start(ap, emsg);
-   vlog(LOG_DEBUG, emsg, ap);
+   

dwge(4): don't panic on truncated input packet

2023-10-18 Thread Miod Vallat
I had the misfortune of hitting a KASSERT in dwge:

panic: kernel diagnostic assertion "len > 0" failed: file "/usr/src/sys/dev/fdt
/if_dwge.c", line 1102
Stopped at  panic+0x106:addia0,zero,256TIDPIDUID PR
FLAGS PFLAGS  CPU  COMMAND
*405136  98879   1500 0x3  00  git
 301471  67731  0 0x14000  0x2001  softnet0
panic() at panic+0x106
panic() at panic
dwge_rx_proc() at dwge_rx_proc+0x1d4
dwge_intr() at dwge_intr+0x4e
plic_irq_dispatch() at plic_irq_dispatch+0xec
plic_irq_handler() at plic_irq_handler+0x56
riscv_cpu_intr() at riscv_cpu_intr+0x22
cpu_exception_handler_supervisor() at cpu_exception_handler_supervisor+0x7a
pmap_copy_page() at pmap_copy_page+0x94
uvm_pagerealloc_multi() at uvm_pagerealloc_multi+0x24e
buf_realloc_pages() at buf_realloc_pages+0xa0
buf_flip_high() at buf_flip_high+0x64
bufcache_recover_dmapages() at bufcache_recover_dmapages+0x13a
buf_get() at buf_get+0xec
end trace frame: 0xffc04d9ac870, count: 0

The following diff should count the error and skirt the panic.

Index: if_dwge.c
===
RCS file: /OpenBSD/src/sys/dev/fdt/if_dwge.c,v
retrieving revision 1.19
diff -u -p -r1.19 if_dwge.c
--- if_dwge.c   15 Aug 2023 08:27:30 -  1.19
+++ if_dwge.c   18 Oct 2023 16:37:59 -
@@ -1099,13 +1101,15 @@ dwge_rx_proc(struct dwge_softc *sc)
 
/* Strip off CRC. */
len -= ETHER_CRC_LEN;
-   KASSERT(len > 0);
-
-   m = rxb->tb_m;
-   rxb->tb_m = NULL;
-   m->m_pkthdr.len = m->m_len = len;
+   if (len <= 0) {
+   ifp->if_ierrors++;
+   } else {
+   m = rxb->tb_m;
+   rxb->tb_m = NULL;
+   m->m_pkthdr.len = m->m_len = len;
 
-   ml_enqueue(, m);
+   ml_enqueue(, m);
+   }
 
put++;
if (sc->sc_rx_cons == (DWGE_NRXDESC - 1))



Re: better fix for Term::Cap

2023-10-18 Thread Andrew Hewus Fresh
On Wed, Oct 18, 2023 at 10:25:59AM +0200, Marc Espie wrote:
> Instead of an archaic limit, just keep track of tc's.
> This is actually slightly faster, because the termcap links is a tree,
> not a list.

I like it, but I don't want to carry this patch against upstream.  I
will happily pull in this change from there as soon as they merge it
though.

Changing the 32 to 64 is an easy patch to keep up on and to solve merge
conflicts with when upgrading.  This larger change is much less so.

That said, there is so much in this file that could be improved, why
stop here?

> 
> Please test.
> 
> Index: Cap.pm
> ===
> RCS file: /cvs/src/gnu/usr.bin/perl/cpan/Term-Cap/Cap.pm,v
> retrieving revision 1.3
> diff -u -p -r1.3 Cap.pm
> --- Cap.pm18 Oct 2023 01:49:26 -  1.3
> +++ Cap.pm18 Oct 2023 08:24:54 -
> @@ -280,7 +280,7 @@ sub Tgetent
>  
>  $first = 0;# first entry (keeps term name)
>  
> -$max = 64; # max :tc=...:'s
> +my $seen = {};   # keep track of :tc=...:s
>  
>  if ($entry)
>  {
> @@ -291,6 +291,7 @@ sub Tgetent
>  if ( $entry =~ s/:tc=([^:]+):/:/ )
>  {
>  $tmp_term = $1;
> + $seen->{$tmp_term} = 1;
>  
>  # protect any pattern metacharacters in $tmp_term
>  $termpat = $tmp_term;
> @@ -332,10 +333,7 @@ sub Tgetent
>  }
>  else
>  {
> -
>  # do the same file again
> -# prevent endless recursion
> -$max-- || croak "failed termcap loop at $tmp_term";
>  $state = 1;# ok, maybe do a new file next time
>  }
>  
> @@ -345,11 +343,20 @@ sub Tgetent
>  close TERMCAP;
>  
>  # If :tc=...: found then search this file again
> -$entry =~ s/:tc=([^:]+):/:/ && ( $tmp_term = $1, $state = 2 );
> +while ($entry =~ s/:tc=([^:]+):/:/) {
> + $tmp_term = $1;
> + if ($seen->{$tmp_term}) {
> + # XXX first version of this croaked, but we can actually
> + # get several intermediate entries with the same tc !
> + next;
> + }
> + $seen->{$tmp_term} = 1;
> + $state = 2;
>  
> -# protect any pattern metacharacters in $tmp_term
> -$termpat = $tmp_term;
> -$termpat =~ s/(\W)/\\$1/g;
> + # protect any pattern metacharacters in $tmp_term
> + $termpat = $tmp_term;
> + $termpat =~ s/(\W)/\\$1/g;
> + }
>  }
>  
>  croak "Can't find $term" if $entry eq '';
> 

-- 
andrew

"Programming today is a race between software engineers striving to
build bigger and better idiot-proof programs, and the Universe
trying to produce bigger and better idiots. So far, the Universe is
winning." -- Rich Cook



Re: TSO for ixl(4)

2023-10-18 Thread Alexander Bluhm
On Wed, Oct 18, 2023 at 05:29:41PM +0200, Jan Klemkow wrote:
> This diff implements TCP Segmentation Offloading for ixl(4).  I tested
> it successfully on amd64 and sparc64 with Intel X710.  It should
> increase the TCP bulk performance to 10 Gbit/s.  On sparc64 I got an
> increase from 600 MBit/s to 2.000 Gbit/s.
> 
> Further testing is welcome.

tested on amd64
OK bluhm@

> Index: dev/pci/if_ixl.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
> retrieving revision 1.89
> diff -u -p -r1.89 if_ixl.c
> --- dev/pci/if_ixl.c  29 Sep 2023 19:44:47 -  1.89
> +++ dev/pci/if_ixl.c  18 Oct 2023 15:15:30 -
> @@ -71,6 +71,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #if NBPFILTER > 0
> @@ -85,6 +86,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  #include 
>  #include 
>  
> @@ -827,6 +830,10 @@ struct ixl_tx_desc {
>  #define IXL_TX_DESC_BSIZE_MASK   \
>   (IXL_TX_DESC_BSIZE_MAX << IXL_TX_DESC_BSIZE_SHIFT)
>  
> +#define IXL_TX_CTX_DESC_CMD_TSO  0x10
> +#define IXL_TX_CTX_DESC_TLEN_SHIFT   30
> +#define IXL_TX_CTX_DESC_MSS_SHIFT50
> +
>  #define IXL_TX_DESC_L2TAG1_SHIFT 48
>  } __packed __aligned(16);
>  
> @@ -893,11 +900,19 @@ struct ixl_rx_wb_desc_32 {
>   uint64_tqword3;
>  } __packed __aligned(16);
>  
> -#define IXL_TX_PKT_DESCS 8
> +#define IXL_TX_PKT_DESCS 32
>  #define IXL_TX_QUEUE_ALIGN   128
>  #define IXL_RX_QUEUE_ALIGN   128
>  
>  #define IXL_HARDMTU  9712 /* 9726 - ETHER_HDR_LEN */
> +#define IXL_TSO_SIZE ((255 * 1024) - 1)
> +#define IXL_MAX_DMA_SEG_SIZE ((16 * 1024) - 1)
> +
> +/*
> + * Our TCP/IP Stack could not handle packets greater than MAXMCLBYTES.
> + * This interface could not handle packets greater than IXL_TSO_SIZE.
> + */
> +CTASSERT(MAXMCLBYTES < IXL_TSO_SIZE);
>  
>  #define IXL_PCIREG   PCI_MAPREG_START
>  
> @@ -1958,6 +1973,7 @@ ixl_attach(struct device *parent, struct
>   ifp->if_capabilities |= IFCAP_CSUM_IPv4 |
>   IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
>   IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
> + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
>  
>   ifmedia_init(>sc_media, 0, ixl_media_change, ixl_media_status);
>  
> @@ -2603,7 +2619,7 @@ ixl_txr_alloc(struct ixl_softc *sc, unsi
>   txm = [i];
>  
>   if (bus_dmamap_create(sc->sc_dmat,
> - IXL_HARDMTU, IXL_TX_PKT_DESCS, IXL_HARDMTU, 0,
> + MAXMCLBYTES, IXL_TX_PKT_DESCS, IXL_MAX_DMA_SEG_SIZE, 0,
>   BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
>   >txm_map) != 0)
>   goto uncreate;
> @@ -2787,7 +2803,8 @@ ixl_load_mbuf(bus_dma_tag_t dmat, bus_dm
>  }
>  
>  static uint64_t
> -ixl_tx_setup_offload(struct mbuf *m0)
> +ixl_tx_setup_offload(struct mbuf *m0, struct ixl_tx_ring *txr,
> +unsigned int prod)
>  {
>   struct ether_extracted ext;
>   uint64_t hlen;
> @@ -2800,7 +2817,7 @@ ixl_tx_setup_offload(struct mbuf *m0)
>   }
>  
>   if (!ISSET(m0->m_pkthdr.csum_flags,
> - M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT))
> + M_IPV4_CSUM_OUT|M_TCP_CSUM_OUT|M_UDP_CSUM_OUT|M_TCP_TSO))
>   return (offload);
>  
>   ether_extract_headers(m0, );
> @@ -2833,6 +2850,28 @@ ixl_tx_setup_offload(struct mbuf *m0)
>   offload |= (sizeof(*ext.udp) >> 2) << IXL_TX_DESC_L4LEN_SHIFT;
>   }
>  
> + if (ISSET(m0->m_pkthdr.csum_flags, M_TCP_TSO)) {
> + if (ext.tcp) {
> + struct ixl_tx_desc *ring, *txd;
> + uint64_t cmd = 0;
> +
> + hlen += ext.tcp->th_off << 2;
> + ring = IXL_DMA_KVA(>txr_mem);
> + txd = [prod];
> +
> + cmd |= IXL_TX_DESC_DTYPE_CONTEXT;
> + cmd |= IXL_TX_CTX_DESC_CMD_TSO;
> + cmd |= (uint64_t)(m0->m_pkthdr.len - ETHER_HDR_LEN
> + - hlen) << IXL_TX_CTX_DESC_TLEN_SHIFT;
> + cmd |= (uint64_t)(m0->m_pkthdr.ph_mss)
> + << IXL_TX_CTX_DESC_MSS_SHIFT;
> +
> + htolem64(>addr, 0);
> + htolem64(>cmd, cmd);
> + } else
> + tcpstat_inc(tcps_outbadtso);
> + }
> +
>   return (offload);
>  }
>  
> @@ -2873,7 +2912,8 @@ ixl_start(struct ifqueue *ifq)
>   mask = sc->sc_tx_ring_ndescs - 1;
>  
>   for (;;) {
> - if (free <= IXL_TX_PKT_DESCS) {
> + /* We need one extra descriptor for TSO packets. */
> + if (free <= (IXL_TX_PKT_DESCS + 1)) {
>   ifq_set_oactive(ifq);
>   break;
>   }
> @@ -2882,10 +2922,16 @@ ixl_start(struct ifqueue *ifq)
>   

bt(5), btrace(8): execute END probe and print maps after exit() statement

2023-10-18 Thread Scott Cheloha
Hi,

A bt(5) exit() statement causes the btrace(8) interpreter to exit(3)
immediately.

A BPFtrace exit() statement is more nuanced: the END probe is executed
and the contents of all maps are printed before the interpreter exits.

This patch adds a halting check after the execution of each bt(5)
statement.  If a statement causes the program to halt, the halt
bubbles up to the top-level rule evaluation loop and terminates
execution.  rules_teardown() then runs, just as if the program had
received SIGTERM.

Two edge-like cases:

1. You can exit() from BEGIN.  rules_setup() returns non-zero if this
   happens so the main loop knows to halt immediately.

2. You can exit() from END.  This is just an early-return: the END probe
   doesn't run again.

Thoughts?

$ btrace -e '
BEGIN {
@[probe] = "reached";
exit();
@[probe] = "not reached";
}
END {
@[probe] = "reached";
exit();
@[probe] = "not reached";
}'

Index: btrace.c
===
RCS file: /cvs/src/usr.sbin/btrace/btrace.c,v
retrieving revision 1.79
diff -u -p -r1.79 btrace.c
--- btrace.c12 Oct 2023 15:16:44 -  1.79
+++ btrace.c18 Oct 2023 17:54:16 -
@@ -71,10 +71,10 @@ struct dtioc_probe_info *dtpi_get_by_val
  * Main loop and rule evaluation.
  */
 voidrules_do(int);
-voidrules_setup(int);
-voidrules_apply(int, struct dt_evt *);
+int rules_setup(int);
+int rules_apply(int, struct dt_evt *);
 voidrules_teardown(int);
-voidrule_eval(struct bt_rule *, struct dt_evt *);
+int rule_eval(struct bt_rule *, struct dt_evt *);
 voidrule_printmaps(struct bt_rule *);
 
 /*
@@ -84,7 +84,7 @@ uint64_t   builtin_nsecs(struct dt_evt *
 const char *builtin_kstack(struct dt_evt *);
 const char *builtin_arg(struct dt_evt *, enum bt_argtype);
 struct bt_arg  *fn_str(struct bt_arg *, struct dt_evt *, char *);
-voidstmt_eval(struct bt_stmt *, struct dt_evt *);
+int stmt_eval(struct bt_stmt *, struct dt_evt *);
 voidstmt_bucketize(struct bt_stmt *, struct dt_evt *);
 voidstmt_clear(struct bt_stmt *);
 voidstmt_delete(struct bt_stmt *, struct dt_evt *);
@@ -405,6 +405,7 @@ void
 rules_do(int fd)
 {
struct sigaction sa;
+   int halt = 0;
 
memset(, 0, sizeof(sa));
sigemptyset(_mask);
@@ -415,9 +416,9 @@ rules_do(int fd)
if (sigaction(SIGTERM, , NULL))
err(1, "sigaction");
 
-   rules_setup(fd);
+   halt = rules_setup(fd);
 
-   while (!quit_pending && g_nprobes > 0) {
+   while (!quit_pending && !halt && g_nprobes > 0) {
static struct dt_evt devtbuf[64];
ssize_t rlen;
size_t i;
@@ -434,8 +435,11 @@ rules_do(int fd)
if ((rlen % sizeof(struct dt_evt)) != 0)
err(1, "incorrect read");
 
-   for (i = 0; i < rlen / sizeof(struct dt_evt); i++)
-   rules_apply(fd, [i]);
+   for (i = 0; i < rlen / sizeof(struct dt_evt); i++) {
+   halt = rules_apply(fd, [i]);
+   if (halt)
+   break;
+   }
}
 
rules_teardown(fd);
@@ -484,7 +488,7 @@ rules_action_scan(struct bt_stmt *bs)
return evtflags;
 }
 
-void
+int
 rules_setup(int fd)
 {
struct dtioc_probe_info *dtpi;
@@ -493,7 +497,7 @@ rules_setup(int fd)
struct bt_probe *bp;
struct bt_stmt *bs;
struct bt_arg *ba;
-   int dokstack = 0, on = 1;
+   int dokstack = 0, halt = 0, on = 1;
uint64_t evtflags;
 
TAILQ_FOREACH(r, _rules, br_next) {
@@ -553,7 +557,7 @@ rules_setup(int fd)
clock_gettime(CLOCK_REALTIME, _devt.dtev_tsp);
 
if (rbegin)
-   rule_eval(rbegin, _devt);
+   halt = rule_eval(rbegin, _devt);
 
/* Enable all probes */
TAILQ_FOREACH(r, _rules, br_next) {
@@ -571,9 +575,14 @@ rules_setup(int fd)
if (ioctl(fd, DTIOCRECORD, ))
err(1, "DTIOCRECORD");
}
+
+   return halt;
 }
 
-void
+/*
+ * Returns non-zero if the program should halt.
+ */
+int
 rules_apply(int fd, struct dt_evt *dtev)
 {
struct bt_rule *r;
@@ -586,9 +595,11 @@ rules_apply(int fd, struct dt_evt *dtev)
continue;
 
dtai_cache(fd, _dtpis[dtev->dtev_pbn - 1]);
-   rule_eval(r, dtev);
+   if (rule_eval(r, dtev))
+   return 1;
}
}
+   return 0;
 }
 
 void
@@ -637,7 +648,10 @@ rules_teardown(int fd)