Re: vmm(4): use monotonic base for pvclock

2021-06-01 Thread Mike Larkin
On Tue, Jun 01, 2021 at 08:03:43PM -0500, Scott Cheloha wrote:
> The documentation for the Linux pvclock is pretty sparse but I am
> pretty sure we want to use a monotonic base for ti_system_time.  We
> also have a function for converting a timespec into a 64-bit count of
> nanoseconds we can use.
>
> We may as well also use rdtsc_lfence() to ensure consistent behavior.
>
> ... this is still not quite right because the VM expects the pvclock
> to have a fixed frequency, but we have no interface to reading a raw
> timestamp.  Something to add in the future, maybe.
>
> Index: vmm.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.284
> diff -u -p -r1.284 vmm.c
> --- vmm.c 18 May 2021 00:05:20 -  1.284
> +++ vmm.c 2 Jun 2021 00:57:31 -
> @@ -7294,8 +7294,8 @@ vmm_init_pvclock(struct vcpu *vcpu, padd
>  int
>  vmm_update_pvclock(struct vcpu *vcpu)
>  {
> + struct timespec now;
>   struct pvclock_time_info *pvclock_ti;
> - struct timespec tv;
>   struct vm *vm = vcpu->vc_parent;
>   paddr_t pvclock_hpa, pvclock_gpa;
>
> @@ -7309,10 +7309,9 @@ vmm_update_pvclock(struct vcpu *vcpu)
>   pvclock_ti->ti_version =
>   (++vcpu->vc_pvclock_version << 1) | 0x1;
>
> - pvclock_ti->ti_tsc_timestamp = rdtsc();
> - nanotime();
> - pvclock_ti->ti_system_time =
> - tv.tv_sec * 10L + tv.tv_nsec;
> + pvclock_ti->ti_tsc_timestamp = rdtsc_lfence();
> + nanouptime();
> + pvclock_ti->ti_system_time = TIMESPEC_TO_NSEC();
>   pvclock_ti->ti_tsc_shift = 12;
>   pvclock_ti->ti_tsc_to_system_mul =
>   vcpu->vc_pvclock_system_tsc_mul;
>

This probably needs to be tested on a wide variety (and versions) of Linux
guests. I've found in the past that different kernel versions do different
things and behave differently.

Did you test a few Linux guest VMs? Did this work across all of them?

-ml



let ipv4_check remember if the ip checksum was good

2021-06-01 Thread David Gwynne
if a bridge checked the ipv4 checksum and it was good, we can avoid
checking it again in ip_input.

ok?

Index: ip_input.c
===
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.361
diff -u -p -r1.361 ip_input.c
--- ip_input.c  2 Jun 2021 00:09:57 -   1.361
+++ ip_input.c  2 Jun 2021 01:07:35 -
@@ -287,8 +287,8 @@ ipv4_check(struct ifnet *ifp, struct mbu
}
}
 
-   if ((m->m_pkthdr.csum_flags & M_IPV4_CSUM_IN_OK) == 0) {
-   if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_IN_BAD) {
+   if (!ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK)) {
+   if (ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_BAD)) {
ipstat_inc(ips_badsum);
goto bad;
}
@@ -298,6 +298,8 @@ ipv4_check(struct ifnet *ifp, struct mbu
ipstat_inc(ips_badsum);
goto bad;
}
+
+   SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK);
}
 
/* Retrieve the packet length. */



vmm(4): use monotonic base for pvclock

2021-06-01 Thread Scott Cheloha
The documentation for the Linux pvclock is pretty sparse but I am
pretty sure we want to use a monotonic base for ti_system_time.  We
also have a function for converting a timespec into a 64-bit count of
nanoseconds we can use.

We may as well also use rdtsc_lfence() to ensure consistent behavior.

... this is still not quite right because the VM expects the pvclock
to have a fixed frequency, but we have no interface to reading a raw
timestamp.  Something to add in the future, maybe.

Index: vmm.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.284
diff -u -p -r1.284 vmm.c
--- vmm.c   18 May 2021 00:05:20 -  1.284
+++ vmm.c   2 Jun 2021 00:57:31 -
@@ -7294,8 +7294,8 @@ vmm_init_pvclock(struct vcpu *vcpu, padd
 int
 vmm_update_pvclock(struct vcpu *vcpu)
 {
+   struct timespec now;
struct pvclock_time_info *pvclock_ti;
-   struct timespec tv;
struct vm *vm = vcpu->vc_parent;
paddr_t pvclock_hpa, pvclock_gpa;
 
@@ -7309,10 +7309,9 @@ vmm_update_pvclock(struct vcpu *vcpu)
pvclock_ti->ti_version =
(++vcpu->vc_pvclock_version << 1) | 0x1;
 
-   pvclock_ti->ti_tsc_timestamp = rdtsc();
-   nanotime();
-   pvclock_ti->ti_system_time =
-   tv.tv_sec * 10L + tv.tv_nsec;
+   pvclock_ti->ti_tsc_timestamp = rdtsc_lfence();
+   nanouptime();
+   pvclock_ti->ti_system_time = TIMESPEC_TO_NSEC();
pvclock_ti->ti_tsc_shift = 12;
pvclock_ti->ti_tsc_to_system_mul =
vcpu->vc_pvclock_system_tsc_mul;



Re: vmd: Fix grammar for random lladdr

2021-06-01 Thread Dave Voutila


Martin Vahlensieck writes:

> Hi
>
> The grammar for lladdr of interfaces is according to the manpage:
>
>   [locked] lladdr [etheraddr]
>
> This implies that `locked lladdr' is OK but looking at parse.y this
> does not seem to be the case.  Making it optional would lead to a
> `lladdr' all by itself being valid, which I find weird.  So I copied
> the way ifconfig does it and now the syntax is:
>
>   [locked] lladdr etheraddr|random
>
> so to have a random locked lladdr one would have to write
>
>   locked lladdr random
>
> Is this a good approach?

Part of me thinks just specifying:

  locked lladdr

should give you a random address but enable the source mac
filtering. Having to specify "random" seems odd to me. Thoughts?

I see what you're saying with how ifconfig(8) does it, but it's a bit
different in this context as there's the "locked" modifier, so it's not
exactly the same.

I'm not sure about the man page changes regardless. Will need another
set of eyes on the syntax.

>
> Best,
>
> Martin
>
> Index: parse.y
> ===
> RCS file: /cvs/src/usr.sbin/vmd/parse.y,v
> retrieving revision 1.56
> diff -u -p -r1.56 parse.y
> --- parse.y   23 Sep 2020 19:18:18 -  1.56
> +++ parse.y   22 May 2021 07:55:18 -
> @@ -685,14 +685,16 @@ string  : STRING string {
>  lladdr   : STRING{
>   struct ether_addr *ea;
>
> - if ((ea = ether_aton($1)) == NULL) {
> + if (strcmp($1, "random") == 0) {
> + memset($$, 0, ETHER_ADDR_LEN);
> + } else if ((ea = ether_aton($1)) != NULL) {
> + memcpy($$, ea, ETHER_ADDR_LEN);
> + } else {
>   yyerror("invalid address: %s\n", $1);
>   free($1);
>   YYERROR;
>   }
>   free($1);
> -
> - memcpy($$, ea, ETHER_ADDR_LEN);
>   }
>   ;
>
> Index: vm.conf.5
> ===
> RCS file: /cvs/src/usr.sbin/vmd/vm.conf.5,v
> retrieving revision 1.56
> diff -u -p -r1.56 vm.conf.5
> --- vm.conf.5 1 Mar 2021 14:27:44 -   1.56
> +++ vm.conf.5 22 May 2021 07:55:18 -
> @@ -237,10 +237,12 @@ The
>  must not be longer than 15 characters or end with a digit,
>  as described in
>  .Xr ifconfig 8 .
> -.It Oo Cm locked Oc Cm lladdr Op Ar etheraddr
> +.It Oo Cm locked Oc Cm lladdr Ar etheraddr Ns | Ns Cm random
>  Change the link layer address (MAC address) of the interface on the
>  VM guest side.
> -If not specified, a randomized address will be assigned by
> +If
> +.Cm random
> +is specified, a randomized address will be assigned by
>  .Xr vmd 8 .
>  If the
>  .Cm locked



Re: Enable pool cache on knote_pool

2021-06-01 Thread Visa Hankala
On Tue, Jun 01, 2021 at 08:23:24AM +1000, David Gwynne wrote:
> 
> > On 1 Jun 2021, at 02:58, Visa Hankala  wrote:
> > 
> > This patch enables the pool cache feature on the knote pool to reduce
> > the overhead of knote management.
> > 
> > Profiling done by mpi@ and bluhm@ indicate that the potentially needless
> > allocation of knotes in kqueue_register() causes slowdown with
> > kqueue-based poll(2) and select(2).
> > 
> > One approach to fix this is to reverse the function's initial guess
> > about knote: Try without allocation first. Then allocate and retry if
> > the knote is missing from the kqueue and EV_ADD is given.
> > 
> > Another option is to cache free knotes so that the shared knote pool
> > would be accessed less frequently.
> > 
> > The following diff takes the second approach. The caching is implemented
> > simply by enabling the pool cache feature. This makes use of existing
> > code and does not complicate kqueue_register(). The feature also helps
> > if there is heavy knote churn.
> > 
> > I think the most substantial part of the diff is that it extends pool
> > cache usage beyond mbufs. Is this change acceptable?
> 
> absolutely.
> 
> > Note the cache is not particularly useful without kqueue-based poll(2)
> > and select(2). The pool view of systat(1) shows that there are pools
> > that would benefit more than knote_pool from caching, at least in terms
> > of request frequencies. The relative frequencies are dependent on system
> > workload, though. Kqpoll would definitely make knote pool more heavily
> > used.
> 
> ok.
> 
> separate to this diff, at some point maybe we should have a task list/dohook 
> thing for "per cpu init" like mountroot or startup?

At first I fumbled with a deferring version of pool_cache_init() but the
idea felt overly specific. However, a more general dohook might indeed
make sense.

It is a bit unfortunate that the deferring is needed at all, though.
The number of different boot-time deferral options gets larger as well.

The diff below omits a manual page.

Index: kern/init_main.c
===
RCS file: src/sys/kern/init_main.c,v
retrieving revision 1.306
diff -u -p -r1.306 init_main.c
--- kern/init_main.c8 Feb 2021 10:51:01 -   1.306
+++ kern/init_main.c1 Jun 2021 15:29:05 -
@@ -432,6 +432,9 @@ main(void *framep)
prof_init();
 #endif
 
+   /* Run hooks that require the presence of all CPUs. */
+   dopercpuhooks();
+
mbcpuinit();/* enable per cpu mbuf data */
uvm_init_percpu();
 
Index: kern/kern_subr.c
===
RCS file: src/sys/kern/kern_subr.c,v
retrieving revision 1.50
diff -u -p -r1.50 kern_subr.c
--- kern/kern_subr.c29 Apr 2018 17:26:31 -  1.50
+++ kern/kern_subr.c1 Jun 2021 15:29:05 -
@@ -198,6 +198,8 @@ hashfree(void *hash, int elements, int t
  * "startup hook" types, functions, and variables.
  */
 
+struct hook_desc_head percpuhook_list =
+TAILQ_HEAD_INITIALIZER(percpuhook_list);
 struct hook_desc_head startuphook_list =
 TAILQ_HEAD_INITIALIZER(startuphook_list);
 
Index: sys/systm.h
===
RCS file: src/sys/sys/systm.h,v
retrieving revision 1.153
diff -u -p -r1.153 systm.h
--- sys/systm.h 28 Apr 2021 09:42:04 -  1.153
+++ sys/systm.h 1 Jun 2021 15:29:05 -
@@ -283,6 +283,9 @@ voidwdog_register(int (*)(void *, int),
 void   wdog_shutdown(void *);
 
 /*
+ * Per-CPU hooks are functions that are run after all CPUs have been detected
+ * but before the scheduler has started.
+ *
  * Startup hooks are functions running after the scheduler has started
  * but before any threads have been created or root has been mounted.
  */
@@ -294,6 +297,7 @@ struct hook_desc {
 };
 TAILQ_HEAD(hook_desc_head, hook_desc);
 
+extern struct hook_desc_head percpuhook_list;
 extern struct hook_desc_head startuphook_list;
 
 void   *hook_establish(struct hook_desc_head *, int, void (*)(void *), void *);
@@ -303,6 +307,12 @@ void   dohooks(struct hook_desc_head *, in
 #define HOOK_REMOVE0x01
 #define HOOK_FREE  0x02
 
+#define percpuhook_establish(fn, arg) \
+   hook_establish(_list, 1, (fn), (arg))
+#define percpuhook_disestablish(vhook) \
+   hook_disestablish(_list, (vhook))
+#define dopercpuhooks() dohooks(_list, HOOK_REMOVE|HOOK_FREE)
+
 #define startuphook_establish(fn, arg) \
hook_establish(_list, 1, (fn), (arg))
 #define startuphook_disestablish(vhook) \



Re: Ryzen 5800X hw.setperf vs hw.cpuspeed

2021-06-01 Thread Otto Moerbeek
On Mon, May 31, 2021 at 10:24:01PM +0200, Josh wrote:

> thanks Otto for the dmesg.
> 
> I'd like to get one B550 mobo as well. Which version of Gigabyte B550
> AORUS ELITE do you have exactly? ATX? mATX ?
> Most of them listed here[1] have either RLT8118 or RLT8125 chipset and
> re(4) doesn't list them...
> 
> Can't find any reference to your model there[1] (RTL8168 chipset) "re0
> at pci5 dev 0 function 0 "Realtek 8168" rev 0x06: RTL8168E/8111E
> (0x2c00), msi, address 64:70:02:01:db:3c
> rgephy0 at re0 phy 7: RTL8169S/8110S/8211 PHY, rev. 4"
> 
> Could it be this one[2]?

Yes, that the mb.

The re(4) is a random pci express card I plugged in initially, the
on-board rge(4) did not work properly: interrupt storm and no WOL,
both issues are fixed now.

I'm not using power management atm.

-Otto
> 
> Cheers
> 
> [1] https://www.gigabyte.com/Motherboard/AORUS-Gaming
> [2] https://www.gigabyte.com/Motherboard/B550-AORUS-ELITE-rev-10/sp#sp
> 
> On Fri, Nov 20, 2020 at 9:28 AM Otto Moerbeek  wrote:
> >
> > Hi,
> >
> > I got a new Ryzen machine, dmesg below. What I'm observing might be a
> > issue with hw.setperf.
> >
> > On startsup it shows:
> >
> > hw.cpuspeed=3800
> > hw.setperf=100
> >
> > If I lower hw.setperf to zero, the new state is reflect immediately in
> > hw.cpuspeed:
> >
> > hw.cpuspeed=2200
> > hw.setperf=0
> >
> > And also sha256 -t becomes slower as expected.
> >
> > But If I raise hw.setperf to 100 I'm seeing:
> >
> > hw.cpuspeed=2200
> > hw.setperf=100
> >
> > and sha256 -t is still slow. Only after some time passes (lets say a
> > couple of tens of seconds) it does show:
> >
> > hw.cpuspeed=3800
> > hw.setperf=100
> >
> > and sha256 -t is fast again.
> >
> > This behaviour is different from my old machine, where setting
> > hs.setperf was reflected in hs.cpuspeed immediately both ways
> >
> > Any clue?
> >
> > -Otto
> >
> > OpenBSD 6.8-current (GENERIC.MP) #1: Thu Nov 19 21:01:06 CET 2020
> > o...@lou.intra.drijf.net:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> > real mem = 34286964736 (32698MB)
> > avail mem = 33232543744 (31693MB)
> > random: good seed from bootblocks
> > mpath0 at root
> > scsibus0 at mpath0: 256 targets
> > mainbus0 at root
> > bios0 at mainbus0: SMBIOS rev. 3.3 @ 0xe8d60 (55 entries)
> > bios0: vendor American Megatrends Inc. version "F11d" date 10/29/2020
> > bios0: Gigabyte Technology Co., Ltd. B550 AORUS ELITE
> > acpi0 at bios0: ACPI 6.0
> > acpi0: sleep states S0 S3 S4 S5
> > acpi0: tables DSDT FACP SSDT SSDT SSDT SSDT FIDT MCFG HPET BGRT IVRS PCCT 
> > SSDT CRAT CDIT SSDT SSDT SSDT SSDT WSMT APIC SSDT SSDT SSDT FPDT
> > acpi0: wakeup devices GPP0(S4) GP12(S4) GP13(S4) XHC0(S4) GP30(S4) GP31(S4) 
> > GPP2(S4) GPP3(S4) GPP8(S4) GPP1(S4)
> > acpitimer0 at acpi0: 3579545 Hz, 32 bits
> > acpimcfg0 at acpi0
> > acpimcfg0: addr 0xf000, bus 0-127
> > acpihpet0 at acpi0: 14318180 Hz
> > acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
> > cpu0 at mainbus0: apid 0 (boot processor)
> > cpu0: AMD Ryzen 7 5800X 8-Core Processor, 3793.35 MHz, 19-21-00
> > cpu0: 
> > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,PKU,IBPB,IBRS,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> > cpu0: 32KB 64b/line 8-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 
> > 64b/line 8-way L2 cache, 32MB 64b/line disabled L3 cache
> > cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully 
> > associative
> > cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully 
> > associative
> > cpu0: smt 0, core 0, package 0
> > mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
> > cpu0: apic clock running at 99MHz
> > cpu0: mwait min=64, max=64, C-substates=1.1, IBE
> > cpu1 at mainbus0: apid 2 (application processor)
> > cpu1: AMD Ryzen 7 5800X 8-Core Processor, 3792.89 MHz, 19-21-00
> > cpu1: 
> > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,HTT,SSE3,PCLMUL,MWAIT,SSSE3,FMA3,CX16,SSE4.1,SSE4.2,MOVBE,POPCNT,AES,XSAVE,AVX,F16C,RDRAND,NXE,MMXX,FFXSR,PAGE1GB,RDTSCP,LONG,LAHF,CMPLEG,SVM,EAPICSP,AMCR8,ABM,SSE4A,MASSE,3DNOWP,OSVW,IBS,SKINIT,TCE,TOPEXT,CPCTR,DBKP,PCTRL3,MWAITX,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,PQM,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,SHA,UMIP,PKU,IBPB,IBRS,STIBP,SSBD,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> > cpu1: 32KB 64b/line 8-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 
> > 64b/line 8-way L2 cache, 32MB 64b/line disabled L3 cache
> > cpu1: ITLB 64 4KB entries fully associative, 64 4MB entries fully 
> > associative
> > cpu1: DTLB 64 4KB entries fully associative, 

Re: vmd: Fix grammar for random lladdr

2021-06-01 Thread Martin Vahlensieck
Ping.

On Sat, May 22, 2021 at 09:58:46AM +0200, Martin Vahlensieck wrote:
> Hi
> 
> The grammar for lladdr of interfaces is according to the manpage:
> 
>   [locked] lladdr [etheraddr]
> 
> This implies that `locked lladdr' is OK but looking at parse.y this
> does not seem to be the case.  Making it optional would lead to a
> `lladdr' all by itself being valid, which I find weird.  So I copied
> the way ifconfig does it and now the syntax is:
> 
>   [locked] lladdr etheraddr|random
> 
> so to have a random locked lladdr one would have to write
> 
>   locked lladdr random
> 
> Is this a good approach?
> 
> Best,
> 
> Martin
> 
> Index: parse.y
> ===
> RCS file: /cvs/src/usr.sbin/vmd/parse.y,v
> retrieving revision 1.56
> diff -u -p -r1.56 parse.y
> --- parse.y   23 Sep 2020 19:18:18 -  1.56
> +++ parse.y   22 May 2021 07:55:18 -
> @@ -685,14 +685,16 @@ string  : STRING string {
>  lladdr   : STRING{
>   struct ether_addr *ea;
>  
> - if ((ea = ether_aton($1)) == NULL) {
> + if (strcmp($1, "random") == 0) {
> + memset($$, 0, ETHER_ADDR_LEN);
> + } else if ((ea = ether_aton($1)) != NULL) {
> + memcpy($$, ea, ETHER_ADDR_LEN);
> + } else {
>   yyerror("invalid address: %s\n", $1);
>   free($1);
>   YYERROR;
>   }
>   free($1);
> -
> - memcpy($$, ea, ETHER_ADDR_LEN);
>   }
>   ;
>  
> Index: vm.conf.5
> ===
> RCS file: /cvs/src/usr.sbin/vmd/vm.conf.5,v
> retrieving revision 1.56
> diff -u -p -r1.56 vm.conf.5
> --- vm.conf.5 1 Mar 2021 14:27:44 -   1.56
> +++ vm.conf.5 22 May 2021 07:55:18 -
> @@ -237,10 +237,12 @@ The
>  must not be longer than 15 characters or end with a digit,
>  as described in
>  .Xr ifconfig 8 .
> -.It Oo Cm locked Oc Cm lladdr Op Ar etheraddr
> +.It Oo Cm locked Oc Cm lladdr Ar etheraddr Ns | Ns Cm random
>  Change the link layer address (MAC address) of the interface on the
>  VM guest side.
> -If not specified, a randomized address will be assigned by
> +If
> +.Cm random
> +is specified, a randomized address will be assigned by
>  .Xr vmd 8 .
>  If the
>  .Cm locked
> 



Re: Kill SS_ASYNC

2021-06-01 Thread Vitaliy Makkoveev
On Sat, May 29, 2021 at 10:33:17AM +0200, Martin Pieuchot wrote:
> The socket flag SS_ASYNC is redundant with the socket buffer flag
> SB_ASYNC.  Both are set & unset via FIOASYNC.  So the diff below gets
> rid of SS_ASYNC.
> 
> Checking states on socket buffers will help reduce the scope of the
> NET_LOCK() when we'll introduce a socket buffer lock.
> 
> ok?
> 

ok mvs@

> Index: kern/sys_socket.c
> ===
> RCS file: /cvs/src/sys/kern/sys_socket.c,v
> retrieving revision 1.45
> diff -u -p -r1.45 sys_socket.c
> --- kern/sys_socket.c 22 Feb 2020 11:58:29 -  1.45
> +++ kern/sys_socket.c 29 May 2021 07:53:49 -
> @@ -96,11 +96,9 @@ soo_ioctl(struct file *fp, u_long cmd, c
>   case FIOASYNC:
>   s = solock(so);
>   if (*(int *)data) {
> - so->so_state |= SS_ASYNC;
>   so->so_rcv.sb_flags |= SB_ASYNC;
>   so->so_snd.sb_flags |= SB_ASYNC;
>   } else {
> - so->so_state &= ~SS_ASYNC;
>   so->so_rcv.sb_flags &= ~SB_ASYNC;
>   so->so_snd.sb_flags &= ~SB_ASYNC;
>   }
> Index: kern/uipc_socket2.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.110
> diff -u -p -r1.110 uipc_socket2.c
> --- kern/uipc_socket2.c   26 May 2021 08:28:34 -  1.110
> +++ kern/uipc_socket2.c   29 May 2021 07:53:41 -
> @@ -409,7 +409,7 @@ sbunlock(struct socket *so, struct sockb
>  /*
>   * Wakeup processes waiting on a socket buffer.
>   * Do asynchronous notification via SIGIO
> - * if the socket has the SS_ASYNC flag set.
> + * if the socket buffer has the SB_ASYNC flag set.
>   */
>  void
>  sowakeup(struct socket *so, struct sockbuf *sb)
> @@ -421,7 +421,7 @@ sowakeup(struct socket *so, struct sockb
>   sb->sb_flags &= ~SB_WAIT;
>   wakeup(>sb_cc);
>   }
> - if (so->so_state & SS_ASYNC)
> + if (sb->sb_flags & SB_ASYNC)
>   pgsigio(>so_sigio, SIGIO, 0);
>   selwakeup(>sb_sel);
>  }
> Index: sys/socketvar.h
> ===
> RCS file: /cvs/src/sys/sys/socketvar.h,v
> retrieving revision 1.97
> diff -u -p -r1.97 socketvar.h
> --- sys/socketvar.h   21 May 2021 10:59:02 -  1.97
> +++ sys/socketvar.h   29 May 2021 07:54:27 -
> @@ -147,7 +147,6 @@ struct socket {
>  #define  SS_ISDISCONNECTED   0x800   /* socket disconnected from 
> peer */
>  
>  #define  SS_PRIV 0x080   /* privileged for broadcast, 
> raw... */
> -#define  SS_ASYNC0x200   /* async i/o notify */
>  #define  SS_CONNECTOUT   0x1000  /* connect, not accept, at this 
> end */
>  #define  SS_ISSENDING0x2000  /* hint for lower layer */
>  #define  SS_DNS  0x4000  /* created using SOCK_DNS 
> socket(2) */
> 



Kill SS_ASYNC

2021-06-01 Thread Martin Pieuchot
The socket flag SS_ASYNC is redundant with the socket buffer flag
SB_ASYNC.  Both are set & unset via FIOASYNC.  So the diff below gets
rid of SS_ASYNC.

Checking states on socket buffers will help reduce the scope of the
NET_LOCK() when we'll introduce a socket buffer lock.

ok?

Index: kern/sys_socket.c
===
RCS file: /cvs/src/sys/kern/sys_socket.c,v
retrieving revision 1.45
diff -u -p -r1.45 sys_socket.c
--- kern/sys_socket.c   22 Feb 2020 11:58:29 -  1.45
+++ kern/sys_socket.c   29 May 2021 07:53:49 -
@@ -96,11 +96,9 @@ soo_ioctl(struct file *fp, u_long cmd, c
case FIOASYNC:
s = solock(so);
if (*(int *)data) {
-   so->so_state |= SS_ASYNC;
so->so_rcv.sb_flags |= SB_ASYNC;
so->so_snd.sb_flags |= SB_ASYNC;
} else {
-   so->so_state &= ~SS_ASYNC;
so->so_rcv.sb_flags &= ~SB_ASYNC;
so->so_snd.sb_flags &= ~SB_ASYNC;
}
Index: kern/uipc_socket2.c
===
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.110
diff -u -p -r1.110 uipc_socket2.c
--- kern/uipc_socket2.c 26 May 2021 08:28:34 -  1.110
+++ kern/uipc_socket2.c 29 May 2021 07:53:41 -
@@ -409,7 +409,7 @@ sbunlock(struct socket *so, struct sockb
 /*
  * Wakeup processes waiting on a socket buffer.
  * Do asynchronous notification via SIGIO
- * if the socket has the SS_ASYNC flag set.
+ * if the socket buffer has the SB_ASYNC flag set.
  */
 void
 sowakeup(struct socket *so, struct sockbuf *sb)
@@ -421,7 +421,7 @@ sowakeup(struct socket *so, struct sockb
sb->sb_flags &= ~SB_WAIT;
wakeup(>sb_cc);
}
-   if (so->so_state & SS_ASYNC)
+   if (sb->sb_flags & SB_ASYNC)
pgsigio(>so_sigio, SIGIO, 0);
selwakeup(>sb_sel);
 }
Index: sys/socketvar.h
===
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.97
diff -u -p -r1.97 socketvar.h
--- sys/socketvar.h 21 May 2021 10:59:02 -  1.97
+++ sys/socketvar.h 29 May 2021 07:54:27 -
@@ -147,7 +147,6 @@ struct socket {
 #defineSS_ISDISCONNECTED   0x800   /* socket disconnected from 
peer */
 
 #defineSS_PRIV 0x080   /* privileged for broadcast, 
raw... */
-#defineSS_ASYNC0x200   /* async i/o notify */
 #defineSS_CONNECTOUT   0x1000  /* connect, not accept, at this 
end */
 #defineSS_ISSENDING0x2000  /* hint for lower layer */
 #defineSS_DNS  0x4000  /* created using SOCK_DNS 
socket(2) */



Re: sdmmc(4): check and retry bus width change

2021-06-01 Thread Patrick Wildt
Am Mon, Feb 22, 2021 at 08:10:21PM +0100 schrieb Patrick Wildt:
> Hi,
> 
> it seems like some eMMCs are not capable of doing 8-bit operation,
> even if the controller supports it.  I was questioning our drivers
> first, but it looks like it's the same on Linux.  In the case that
> 8-bit doesn't work, they seem to fall back to lower values to make
> that HW work.
> 
> This diff implements a mechanism that tries 8-bit, if available,
> then 4-bit and in the end falls back to 1-bit.  This makes my HW
> work, but I would like to have this tested by a broader audience.
> 
> Apparently there's a "bus test" command, but it isn't implemented
> on all host controllers.  Hence I simply try to read the EXT_CSD
> to make sure the transfer works.
> 
> For testing, a print like
> 
> printf("%s: using %u-bit width\n", DEVNAME(sc), width);
> 
> could be added at line 928.
> 
> What could possible regressions be?  The width could become smaller
> then previously.  This would reduce the read/write transfer speed.
> Also it's possible that eMMCs are not recognized/initialized anymore.
> 
> What could possible improvements be?  eMMCs that previously didn't
> work now work, with at least 1-bit or 4-bit wide transfers.
> 
> Please note that this only works for eMMCs.  SD cards are *not* using
> this code path.  SD cards have a different initialization code path.
> 
> Please report any changes or non-changes.  If nothing changes, that's
> perfect.
> 
> Patrick

Anyone want to give this a try?  It's basically relevant for all
ARM machines with eMMC ('soldered SD cards'), and they should work
as well as before.

> diff --git a/sys/dev/sdmmc/sdmmc_mem.c b/sys/dev/sdmmc/sdmmc_mem.c
> index 59bcb1b4a11..5856b9bb1b3 100644
> --- a/sys/dev/sdmmc/sdmmc_mem.c
> +++ b/sys/dev/sdmmc/sdmmc_mem.c
> @@ -56,6 +56,8 @@ int sdmmc_mem_signal_voltage(struct sdmmc_softc *, int);
>  
>  int  sdmmc_mem_sd_init(struct sdmmc_softc *, struct sdmmc_function *);
>  int  sdmmc_mem_mmc_init(struct sdmmc_softc *, struct sdmmc_function *);
> +int  sdmmc_mem_mmc_select_bus_width(struct sdmmc_softc *,
> + struct sdmmc_function *, int);
>  int  sdmmc_mem_single_read_block(struct sdmmc_function *, int, u_char *,
>   size_t);
>  int  sdmmc_mem_read_block_subr(struct sdmmc_function *, bus_dmamap_t,
> @@ -908,31 +910,20 @@ sdmmc_mem_mmc_init(struct sdmmc_softc *sc, struct 
> sdmmc_function *sf)
>   ext_csd[EXT_CSD_CARD_TYPE]);
>   }
>  
> - if (ISSET(sc->sc_caps, SMC_CAPS_8BIT_MODE)) {
> + if (ISSET(sc->sc_caps, SMC_CAPS_8BIT_MODE) &&
> + sdmmc_mem_mmc_select_bus_width(sc, sf, 8) == 0)
>   width = 8;
> - value = EXT_CSD_BUS_WIDTH_8;
> - } else if (ISSET(sc->sc_caps, SMC_CAPS_4BIT_MODE)) {
> + else if (ISSET(sc->sc_caps, SMC_CAPS_4BIT_MODE) &&
> + sdmmc_mem_mmc_select_bus_width(sc, sf, 4) == 0)
>   width = 4;
> - value = EXT_CSD_BUS_WIDTH_4;
> - } else {
> - width = 1;
> - value = EXT_CSD_BUS_WIDTH_1;
> - }
> -
> - if (width != 1) {
> - error = sdmmc_mem_mmc_switch(sf, EXT_CSD_CMD_SET_NORMAL,
> - EXT_CSD_BUS_WIDTH, value);
> - if (error == 0)
> - error = sdmmc_chip_bus_width(sc->sct,
> - sc->sch, width);
> - else {
> + else {
> + error = sdmmc_mem_mmc_select_bus_width(sc, sf, 1);
> + if (error != 0) {
>   DPRINTF(("%s: can't change bus width"
>   " (%d bit)\n", DEVNAME(sc), width));
>   return error;
>   }
> -
> - /* : need bus test? (using by CMD14 & CMD19) */
> - sdmmc_delay(1);
> + width = 1;
>   }
>  
>   if (timing != SDMMC_TIMING_LEGACY) {
> @@ -1041,6 +1032,59 @@ sdmmc_mem_mmc_init(struct sdmmc_softc *sc, struct 
> sdmmc_function *sf)
>   return error;
>  }
>  
> +int
> +sdmmc_mem_mmc_select_bus_width(struct sdmmc_softc *sc, struct sdmmc_function 
> *sf,
> +int width)
> +{
> + u_int8_t ext_csd[512];
> + int error, value;
> +
> + switch (width) {
> + case 8:
> + value = EXT_CSD_BUS_WIDTH_8;
> + break;
> + case 4:
> + value = EXT_CSD_BUS_WIDTH_4;
> + break;
> + case 1:
> + value = EXT_CSD_BUS_WIDTH_1;
> + break;
> + default:
> + printf("%s: invalid bus width\n", DEVNAME(sc));
> + return EINVAL;
> + }
> +
> + error = sdmmc_mem_mmc_switch(sf, EXT_CSD_CMD_SET_NORMAL,
> + EXT_CSD_BUS_WIDTH, value);
> + if (error != 0) {
> +