Re: PR review process?

2023-07-30 Thread Nick Hudson

On 29/07/2023 10:43, Andreas Hecht wrote:

Recently, I have reported a problem (kern/57518) with a corresponding
fix regarding split transaction scheduling in the ehci driver. The
problem report was submitted on the 10th of July but I haven't heard
anything about it in the meantime.


I'll take a look.

Thanks,
Nick




MI boot option helper functions

2023-02-25 Thread Nick Hudson
Any objections / improvements to this diff to add MI boot option helper 
functions?


Thanks,
Nickdiff --git a/sys/kern/subr_optstr.c b/sys/kern/subr_optstr.c
index c557dc71e341..0e7cf76d27ef 100644
--- a/sys/kern/subr_optstr.c
+++ b/sys/kern/subr_optstr.c
@@ -43,13 +43,15 @@ __KERNEL_RCSID(0, "$NetBSD: subr_optstr.c,v 1.7 2022/11/19 15:30:12 skrll Exp $"
  * with a maximum of bufsize bytes.  If the key is found, returns true;
  * otherwise FALSE.
  */
-bool
-optstr_get(const char *optstr, const char *key, char *buf, size_t bufsize)
+
+
+static bool
+optstr_get_pointer(const char *optstr, const char *key, const char **result)
 {
 	bool found = false;
 
-	/* Skip any initial spaces until we find a word. */
-	while (*optstr == ' ')
+	/* Skip any initial spaces or tabs until we find a word. */
+	while (*optstr == ' ' || *optstr == '\t')
 		optstr++;
 
 	/* Search for the given key within the option string. */
@@ -76,17 +78,118 @@ optstr_get(const char *optstr, const char *key, char *buf, size_t bufsize)
 		}
 	}
 
-	/* If the key was found; copy its value to the target buffer. */
 	if (found) {
-		const char *lastbuf;
+		optstr++; /* Skip '='. */
+		*result = optstr;
+	}
 
-		lastbuf = buf + (bufsize - 1);
+	return found;
+}
 
-		optstr++; /* Skip '='. */
-		while (buf != lastbuf && *optstr != ' ' && *optstr != '\0')
-			*buf++ = *optstr++;
+bool
+optstr_get(const char *optstr, const char *key, char *buf, size_t bufsize)
+{
+	const char *data;
+	bool found = optstr_get_pointer(optstr, key, );
+
+	/* If the key was found; copy its value to the target buffer. */
+	if (found) {
+		const char *lastbuf = buf + (bufsize - 1);
+
+		while (buf != lastbuf && *data != ' ' && *data != '\0')
+			*buf++ = *data++;
 		*buf = '\0';
 	}
+	return found;
+}
+
+
+bool
+optstr_get_string(const char *optstr, const char *key, const char **result)
+{
+	const char *data;
+	const bool found = optstr_get_pointer(optstr, key, );
+
+	/* If the key was found; copy its value to the target buffer. */
+	if (found) {
+		*result = data;
+	}
+	return found;
+}
+
+bool
+optstr_get_number(const char *optstr, const char *key, unsigned long *result)
+{
+	const char *data;
+	const bool found = optstr_get_pointer(optstr, key, );
+
+	/* If the key was found; copy its value to the target buffer. */
+	if (found) {
+		char *ep;
+		const unsigned long ulval = strtoul(data, , 10);
+		if (ep == data)
+			return false;
+		*result = ulval;
+		return true;
+	}
+	return false;
+}
+
+bool
+optstr_get_number_hex(const char *optstr, const char *key,
+unsigned long *result)
+{
+	const char *data;
+	const bool found = optstr_get_pointer(optstr, key, );
+
+	/* If the key was found; copy its value to the target buffer. */
+	if (found) {
+		char *ep;
+		const unsigned long ulval = strtoul(data, , 16);
+		if (ep == data)
+			return false;
+		*result = ulval;
+		return true;
+	}
+	return false;
+}
+
+bool
+optstr_get_number_binary(const char *optstr, const char *key,
+unsigned long *result)
+{
+	const char *data;
+	const bool found = optstr_get_pointer(optstr, key, );
+
+	/* If the key was found; copy its value to the target buffer. */
+	if (found) {
+		char *ep;
+		const unsigned long ulval = strtoul(data, , 2);
+		if (ep == data)
+			return false;
+		*result = ulval;
+		return true;
+	}
+	return false;
+}
 
+
+#if NETHER > 0
+bool
+optstr_get_macaddr(const char *optstr, const char *key,
+uint8_t result[ETHER_ADDR_LEN])
+{
+	const char *data;
+	const bool found = optstr_get_pointer(optstr, key, );
+
+	/* If the key was found; copy its value to the target buffer. */
+	if (found) {
+		uint8_t temp[ETHER_ADDR_LEN];
+		int error = ether_aton_r(temp, sizeof(temp), data);
+		if (error)
+			return false;
+		memcpy(result, temp, sizeof(temp));
+	}
 	return found;
 }
+#endif
diff --git a/sys/sys/optstr.h b/sys/sys/optstr.h
index bca2780228df..4a4bb757df3e 100644
--- a/sys/sys/optstr.h
+++ b/sys/sys/optstr.h
@@ -29,12 +29,29 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#if !defined(_SYS_OPTSTR_H_)
+#ifndef _SYS_OPTSTR_H_
 #define _SYS_OPTSTR_H_
 
+#include "ether.h"
+
+#include 
+
+#if NETHER > 0
+#include 
+#endif
+
 /*
  * Prototypes for functions defined in sys/kern/subr_optstr.c.
  */
 bool optstr_get(const char *, const char *, char *, size_t);
 
-#endif /* !defined(_SYS_OPTSTR_H_) */
+bool optstr_get_string(const char *, const char *, const char **);
+bool optstr_get_number(const char *, const char *, unsigned long *);
+bool optstr_get_number_hex(const char *, const char *, unsigned long *);
+bool optstr_get_number_binary(const char *, const char *, unsigned long *);
+
+#if NETHER > 0
+bool optstr_get_macaddr(const char *, const char *, uint8_t [ETHER_ADDR_LEN]);
+#endif
+
+#endif /* _SYS_OPTSTR_H_ */


Re: ci_want_resched bits vs. MD ci_data.cpu_softints bits

2022-12-04 Thread Nick Hudson

On 05/12/2022 01:52, Chuck Silvers wrote:

On Sat, Dec 03, 2022 at 03:40:13PM +0100, Martin Husemann wrote:

On Thu, Jul 07, 2022 at 09:22:42PM +0200, Martin Husemann wrote:

I guess in older times ci->ci_want_resched was used as a boolean flag, so only
0 or !0 did matter - but nowadays it is a flag word of various bits:

#define RESCHED_REMOTE  0x01/* request is for a remote CPU */
#define RESCHED_IDLE0x02/* idle LWP observed */
#define RESCHED_UPREEMPT0x04/* immediate user ctx switch */
#define RESCHED_KPREEMPT0x08/* immediate kernel ctx switch */


The MD usage of ci_data.cpu_softints on powerpc is a bitmask of pending
softint IPLs, which could easily collide with above flags.


Even if this (currently) does not seem to cause issues, mixing bits from
different bitsets is wrong, so I'd like to commit the originaly proposed
fix (only ever clear all bits).


I agree this seems best.


I agree as well.





Additionaly I'd like to commit something like the second change below, fixing
an issue when creating new lwps (that neither should have ASTs pending nor
own the altivec psu). I am not sure if the PSL_VEC is the only bit that
should be cleared here.


the only bits in md_flags that can be set are PSL_VEC and PSL_SE,
but nothing ever looks at the PSL_SE bit in this field.
so you skip copying the whole l_md structure and just set both
md_flags and md_astpending to zero.  the code in process_machdep.c
that sets and clears PSL_SE in md_flags could also be removed.


l_md is already zeroed by the memset(>l_startzero, 0, ... ) calls

https://nxr.netbsd.org/search?q=l_startzero=src

I've taken to just asserts that md_astpending is zero.

Nick


Re: boothowto(9) options with Raspberry Pi

2022-10-06 Thread Nick Hudson

On 05/10/2022 13:27, Michael van Elst wrote:

stephan...@googlemail.com (Stephan) writes:


Hello,



I am re-asking this question here because I have not received a reply
on port-arm@, surprisingly.



I am doing some experimentation with the Raspberry Pi for which I have
set up a serial connection to another computer. Now I=C2=B4d like to
prevent certain drivers from being loaded at boot using the
userconf(4) prompt. However, I wasn=C2=B4t able to find out how to pass the
corresponding boothowto(9) parameter (-c) to the kernel (I tried
several variants in cmdline.txt).


Nothing there

This patch adds the missing boot options.


Why not use BOOT_FLAG (as well as the get_bootconf_option for the long
format)?

Nick


Re: libsa and 4K devices

2022-04-23 Thread Nick Hudson




On 23/04/2022 15:25, Tobias Nygren wrote:

On Sat, 23 Apr 2022 14:17:09 - (UTC)
Michael van Elst  wrote:


nick.hud...@gmx.co.uk (Nick Hudson) writes:


To enable efiboot to work from Apple M1 nvme I had to apply this diff so
that libsa picks up the fs_fshift based FFS_FSBTODB.



Is this correct or does it mean the FS has an incorrect fs_fsbtodb? (and
there's a bug in mkfs somewhere)



The bug is probably in the arm efi code.


See port-evbarm/56356 for some of the bugs and how to fix them.


https://github.com/skrll/src/commit/a5432c0ce71ea2fd1b7ad22ff6c26d01f4dca71a

With this commit efiboot finds the super block...

Nick


libsa and 4K devices

2022-04-22 Thread Nick Hudson

Hi,

To enable efiboot to work from Apple M1 nvme I had to apply this diff so
that libsa picks up the fs_fshift based FFS_FSBTODB.

Is this correct or does it mean the FS has an incorrect fs_fsbtodb? (and
there's a bug in mkfs somewhere)

Thanks,
Nick
Index: sys/ufs/ffs/fs.h
===
RCS file: /cvsroot/src/sys/ufs/ffs/fs.h,v
retrieving revision 1.69
diff -u -p -r1.69 fs.h
--- sys/ufs/ffs/fs.h	18 Sep 2021 03:05:20 -	1.69
+++ sys/ufs/ffs/fs.h	22 Apr 2022 08:16:04 -
@@ -616,7 +616,7 @@ struct ocg {
  * Turn file system block numbers into disk block addresses.
  * This maps file system blocks to device size blocks.
  */
-#if defined (_KERNEL)
+#if defined (_KERNEL) || defined(_STANDALONE)
 #define	FFS_FSBTODB(fs, b)	((b) << ((fs)->fs_fshift - DEV_BSHIFT))
 #define	FFS_DBTOFSB(fs, b)	((b) >> ((fs)->fs_fshift - DEV_BSHIFT))
 #else


Re: uhidev(9) improvements

2022-01-25 Thread Nick Hudson

On 25/01/2022 01:09, Taylor R Campbell wrote:

The attached patch series partially fixes a bug in the uhidev(9) API
(uhidev_stop didn't do anything at all; now it is merely racy).

But more importantly, the patch series makes struct uhidev_softc
opaque, so the uhidev(9) API can have a more stable ABI.  This will
give us better latitude to fix this race -- and potentially others --
later on and pull them up to netbsd-10 after it branches.



Looks good to me.

+   /* XXX Can this just use sc->sc_udev, or am I mistaken?  */
+   usbd_interface2device_handle(sc->sc_iface, );

I think yes.



I have tested uhid(4) and ukbd(4).  I haven't tested ucycom(4), which
uses uhidev(9) in a way that nothing else does -- to do async xfers on
the output pipe.  Can anyone review and/or test ucycom(4)?



ucycom(4) works as well as it did before the change.

Nick


Re: uscanner

2021-06-25 Thread Nick Hudson

On 25/06/2021 12:10, nia wrote:

hi tech-kern,

The uscanner driver only exists for compatibility with userspace Linux
software that uses an interface that was already deprecated by Linux
in 2004. All scanner software these days uses ugen.

I think we should remove this driver in case someone mistakenly
enables it thinking it will help them scan things when in reality
enabling the driver will just break scanning.



Fine by me.

Nick


Re: Check in cpu_switchto triggering crash with PARANOIA on

2021-02-21 Thread Nick Hudson

On 22/02/2021 04:15, Alan Fisher wrote:

Hello,

I've been trying to get the evbmips port working on a new chip recently,
and in the process I've tried building the kernel with PARANOIA enabled.
This has resulted in a crash on startup, and I am wondering if it is
surfacing a bug. Here is what's happening:

Some code under an #ifdef PARANOIA in cpu_switchto checks whether the
IPL is IPL_SCHED, and if not, throws a trap. According to the manpage
for cpu_switchto(9), the current IPL level being IPL_SCHED is a
precondition for cpu_switchto(), so this check seems to make sense. The
callstack looks like this:

cpu_switchto  - this causes a trap when the check fails - manpage says
IPL must be IPL_SCHED
mi_switch - manpage says IPL must be IPL_SCHED
yield - manpage doesn't say anything about IPL_SCHED, and IPL is not
changed in this routine



276 yield(void)
277 {
278 struct lwp *l = curlwp;
279
280 KERNEL_UNLOCK_ALL(l, >l_biglocks);
281 lwp_lock(l);

lwp_lock will raise the IPL to IPL_SCHED. spc_{lock,mutex} are used by
lwp_lock (maybe others)
HTH,

Nick


Re: USB lockup

2020-11-26 Thread Nick Hudson

On 26/11/2020 20:35, Edgar Fuß wrote:

Add a check to ohci_softintr to see if the list goes circular and enter
ddb / dump usbhist when it does...

I already did add a panic and it fired.

I'm still trying to find out how that happens.

What I'm seeing (dumped by device_ctrl_start()) is a chain of four TDs
(named here after their addresses' three least significant nybbles):
E20->EE0->FA0->F40->0
which are linked in that sense by both nexttd and td.td_nexttd.

Then, in ohci_softint(), the done queue is (as linked by td.nexttd):
FA0->EE0->E20->FA0->...
and, as expected, the nexttd links are as before.
Absent the E20->FA0 link, that's exactly what one would expect if the first
three TDs have been handled (the done list is most recently done first);
the big question is where that additinal link comes from.

I've added code to ohci_hash_add_td() to catch a TD being added with a
physical address already present in the hash list, but that didn't fire.




Really hard to help without seeing the full ohcidebug usbhist log. I
guess the E20 TD got written out with incorrect next_td, or some other
error condition caused the mixup.

The change I referred to was

Revision 1.254.2.76 / (download) - annotate - [select for diffs], Mon
May 30 06:46:50 2016 UTC (4 years, 5 months ago) by skrll
Branch: nick-nhusb
Changes since 1.254.2.75: +181 -48 lines
Diff to previous 1.254.2.75 (colored) to branchpoint 1.254 (colored)

Restructure the abort code for TD based transfers (ctrl, bulk, intr).

In PR/22646 some TDs can be on the done queue when the abort start and,
if this is the case, they need to processed after the WDH interrupt.
Instead of waiting for WDH we release TDs that have been touched by the
HC and replace them with new ones.  Once WDH happens the floating TDs
will be returned to the free list.


is something being aborted?

Nick


Re: USB lockup

2020-11-26 Thread Nick Hudson

On 24/11/2020 16:30, Edgar Fuß wrote:

so the td list must have gone circular, no?

It's indeed circular (in the td_nexttd sense), as addionally inserted
debugging output revealed. It also happens in uniprocessor (boot -1) mode.




Add a check to ohci_softintr to see if the list goes circular and enter
ddb / dump usbhist when it does...

I had a fix on my nick-nhusb branch that might help here, but other
updates broke it and I've not looked as to why.

Nick



Re: Scheduling problem - need some help here

2020-07-28 Thread Nick Hudson

On 28/06/2020 16:11, Anders Magnusson wrote:

Hi,

there is a problem (on vax) that I do not really understand.  Greg Oster
filed a PR on it (#55415).

A while ago ad@ removed the  "(ci)->ci_want_resched = 1;" from
cpu_need_resched() in vax/include/cpu.h.
And as I read the code (in kern_runq.c) it shouldn't be needed,
ci_want_resched should be set already when the macro cpu_need_resched()
is invoked.

But; without setting cpu_need_resched=1 the vax performs really bad (as
described in the PR).

cpu_need_resched may have multiple values nowadays, setting it to 1 will
effectively clear out other flags, which is probably what makes it work.

Anyone know what os going on here (and can explain it to me)?


I'm no expert here, but I think the expectation is that each platform
has its own method to signal "ast pending" and eventually call userret
(and preempt) when it's set - see setsoftast/aston.

As I don't understand vax I don't know what

197 #define cpu_signotify(l) mtpr(AST_OK,PR_ASTLVL)

is expected to do, but somehow it should result in userret() being called.

Other points are:

- vax cpu_need_resched doesn't seem to differentiate between locally
  running lwp and an lwp running on another cpu.

- I can't see how hardclock would result in userret being called, but
  like I said - I don't know vax.


http://src.illumos.org/source/xref/netbsd-src/sys/arch/vax/vax/intvec.S#311

I believe ci_want_resched is an MI variable for the scheduler which is
why its use in vax cpu_need_resched got removed.

End of ramblings...

Nick


Re: style change: explicitly permit braces for single statements

2020-07-11 Thread Nick Hudson

On 12/07/2020 04:01, Luke Mewburn wrote:

On 20-07-12 10:01, Luke Mewburn wrote:
   | I propose that the NetBSD C style guide in to /usr/share/misc/style
   | is reworded to more explicitly permit braces around single statements,
   | instead of the current discourgement.
   |
   | IMHO, permitting braces to be consistently used:
   | - Adds to clarity of intent.
   | - Aids code review.
   | - Avoids gotofail: 
https://en.wikipedia.org/wiki/Unreachable_code#goto_fail_bug
   |
   | regards,
   | Luke.

I was asked to CC this thread to tech-kern@, so I'm doing that.



yes please

Nick


Re: vmspace refcnt refactor patch

2020-05-16 Thread Nick Hudson



On 16/05/2020 12:45, Kamil Rytarowski wrote:

On 16.05.2020 07:48, Nick Hudson wrote:

On 15/05/2020 17:35, Kamil Rytarowski wrote:

I propose the following patch:

http://netbsd.org/~kamil/patch-00253-refactor-vmspace-refcnt.txt

Does it look good?

Nick, does it fix the hppa locking problem?


Thanks for working on this.

Unfortunately, it doesn't fix all the problems...


Please check this:

http://netbsd.org/~kamil/patch-00254-remove-lwp_lock_in_sstep.txt


I can run the ptrace tests now without triggering LOCKDEBUG asserts.


Thanks,

Nick



Re: vmspace refcnt refactor patch

2020-05-15 Thread Nick Hudson

On 15/05/2020 17:35, Kamil Rytarowski wrote:

I propose the following patch:

http://netbsd.org/~kamil/patch-00253-refactor-vmspace-refcnt.txt

Does it look good?

Nick, does it fix the hppa locking problem?



Thanks for working on this.

Unfortunately, it doesn't fix all the problems...


setstep1: [ 209.5497837] Mutex error: assert_sleepable,78: spin
lock held

[ 209.5497837] lock address : 0x3ffccf00 type :  spin
[ 209.5497837] initialized  : 0x007d6f78
[ 209.5497837] shared holds :  0 exclusive: 1
[ 209.5497837] shares wanted:  0 exclusive: 0
[ 209.5497837] relevant cpu :  0 last held: 0
[ 209.5497837] relevant lwp : 0x3fc36d00 last held:
0x3fc36d00
[ 209.5497837] last locked* : 0x0084a760 unlocked :
0x007d5134
[ 209.5497837] owner field  : 0xff10 wait/spin:   0/1

[ 209.5497837] panic: LOCKDEBUG: Mutex error: assert_sleepable,78: spin
lock held
[ 209.5497837] cpu0: Begin traceback...
[ 209.5497837] vpanic() at netbsd:vpanic+0x1b8
[ 209.5497837] panic() at netbsd:panic+0x38
[ 209.5497837] lockdebug_abort1() at netbsd:lockdebug_abort1+0x170
[ 209.5497837] lockdebug_barrier() at netbsd:lockdebug_barrier+0x114
[ 209.5497837] assert_sleepable() at netbsd:assert_sleepable+0xa0
[ 209.5497837] pool_cache_get_paddr() at netbsd:pool_cache_get_paddr+0x254
[ 209.5497837] uvm_mapent_alloc() at netbsd:uvm_mapent_alloc+0x150
[ 209.5497837] uvm_map_enter() at netbsd:uvm_map_enter+0x848
[ 209.5497837] uvm_map() at netbsd:uvm_map+0xd4
[ 209.5497837] uvm_map_reserve() at netbsd:uvm_map_reserve+0x240
[ 209.5497837] uvm_map_extract() at netbsd:uvm_map_extract+0x6fc
[ 209.5497837] uvm_io() at netbsd:uvm_io+0xf8
[ 209.5497837] process_domem() at netbsd:process_domem+0x94
[ 209.5497837] ss_get_value() at netbsd:ss_get_value+0x74
[ 209.5497837] process_sstep() at netbsd:process_sstep+0x74
[ 209.5497837] do_ptrace() at netbsd:do_ptrace+0xe00
[ 209.5497837] sys_ptrace() at netbsd:sys_ptrace+0x4c
[ 209.5497837] syscall() at netbsd:syscall+0x480
[ 209.5497837] -- syscall #26(7, 141a, 1, 0, ...) (0xd3131000)
[ 209.5497837] cpu0: End traceback...

nick@zoom:/netbsd/hppa/obj.hppa/sys/arch/hppa/compile/GENERIC$
hppa--netbsd-addr2line -e netbsd.gdb  -f 0x007d6f78
sched_cpuattach
/netbsd/hppa/src/sys/kern/kern_runq.c:147
nick@zoom:/netbsd/hppa/obj.hppa/sys/arch/hppa/compile/GENERIC$
hppa--netbsd-addr2line -e netbsd.gdb  -f 0x0084a760
lwp_lock
/netbsd/hppa/src/sys/sys/lwp.h:412
nick@zoom:/netbsd/hppa/obj.hppa/sys/arch/hppa/compile/GENERIC$
hppa--netbsd-addr2line -e netbsd.gdb  -f 0x007d5134
calcru
/netbsd/hppa/src/sys/kern/kern_resource.c:506 (discriminator 2)
nick@zoom:/netbsd/hppa/obj.hppa/sys/arch/hppa/compile/GENERIC$

Nick


Re: [PATCH] xhci: Reduce ring memory usage

2020-04-02 Thread Nick Hudson

On 30/03/2020 08:20, sc.dy...@gmail.com wrote:

Hello.

  Most devices use a few endpoints but current xhci code allocates all of
31 endpoints in the slot when a device is connected.
  This patch defers ring memory allocation to when usbd_open_pipe opens
the endpoint, and it allocates one ring for an endpoint.

  For example, a ordinary network device uses 4 endpoints.
It requires 192296 bytes in old code on amd64, it requires
25096 bytes in new code.

Old:
sizeof xhci_slot 1832
ring dma buf 4096 (per endpoint) x 31
xr_cookie 2048 (per endpoint) x 31

1832 + (4096 + 2048) x 32 = 192296

New:
sizeof xhci_slot 296
sizeof xhci_ring 56 (per endpoint) x 4
ring dma buf 4096 (per endpoint) x 4
xr_cookie 2048 (per endpoint) x 4

296 + (56 + 4096 + 2048) x 4 = 25096

  I've replaced xhci_endpoint in xhci_slot with xhci_ring *[], as there are
no other structures other than xhci_ring in xhci_endpoint.




Committed.

Thanks,
Nick


Re: panic: softint screwup

2020-02-10 Thread Nick Hudson
On 04/02/2020 23:17, Andrew Doran wrote:
> On Tue, Feb 04, 2020 at 07:03:28AM -0400, Jared McNeill wrote:
>
>> First time seeing this one.. an arm64 board sitting idle at the login prompt
>> rebooted itself with this panic. Unfortunately the default ddb.onpanic=0
>> strikes again and I can't get any more information than this:
>
> I added this recently to replace a vague KASSERT.  Thanks for grabbing the
> output.
>
>> [ 364.3342263] curcpu=0, spl=4 curspl=7
>> [ 364.3342263] onproc=0x00237f743080 => l_stat=7 l_flag=2201 l_cpu=0
>> [ 364.3342263] curlwp=0x00237f71e580 => l_stat=1 l_flag=0200 l_cpu=0
>> [ 364.3342263] pinned=0x00237f71e100 => l_stat=7 l_flag=0200 l_cpu=0
>> [ 364.3342263] panic: softint screwup
>> [ 364.3342263] cpu0: Begin traceback...
>> [ 364.3342263] trace fp ffc101da7be0
>> [ 364.3342263] fp ffc101da7c00 vpanic() at ffc0004ad728 
>> netbsd:vpanic+0x160
>> [ 364.3342263] fp ffc101da7c70 panic() at ffc0004ad81c 
>> netbsd:panic+0x44
>> [ 364.3342263] fp ffc101da7d40 softint_dispatch() at ffc00047bda4 
>> netbsd:softint_dispatch+0x5c4
>> [ 364.3342263] fp ffc101d9fc30 cpu_switchto_softint() at 
>> ffc85198 netbsd:cpu_switchto_softint+0x68
>> [ 364.3342263] fp ffc101d9fc80 splx() at ffc040d4 
>> netbsd:splx+0xbc
>> [ 364.3342263] fp ffc101d9fcb0 callout_softclock() at ffc000489e04 
>> netbsd:callout_softclock+0x36c
>> [ 364.3342263] fp ffc101d9fd40 softint_dispatch() at ffc00047b8dc 
>> netbsd:softint_dispatch+0xfc
>> [ 364.3342263] fp ffc101d3fcc0 cpu_switchto_softint() at 
>> ffc85198 netbsd:cpu_switchto_softint+0x68
>> [ 364.3342263] fp ffc101d3fdf8 cpu_idle() at ffc86128 
>> netbsd:cpu_idle+0x58
>> [ 364.3342263] fp ffc101d3fe40 idle_loop() at ffc0004546a4 
>> netbsd:idle_loop+0x174
>
> Something has cleared the LW_RUNNING flag on softclk/0 between where it is
> set (unlocked) at line 884 of kern_softint.c and callout_softclock().

Isn't it the case that softclk/0 is the victim/interrupted LWP for a 
soft{serial,net,bio}.
That's certainly how I read the FP values.

the callout handler blocked and softclk/0 became a victim as well maybe?

http://src.illumos.org/source/xref/netbsd-src/sys/kern/kern_synch.c#687

a soft{serial,net,bio} happends before curlwp is changed away from the blocking 
softint thread


Nick


Re: [PATCH] Address USB abort races

2019-12-23 Thread Nick Hudson

On 17/12/2019 04:34, Taylor R Campbell wrote:

The attached change set aims to fix a number of races in the USB
transfer complete/abort/timeout protocol by consolidating the logic to
synchronize it into a few helper subroutines.  (Details below.)
Bonus: It is no longer necessary to sleep (other than on an adaptive
mutex) when aborting.


Thanks for working on this.

Some questions/comments if I may

- Any reason that dwc2 and ahci didn't get the conversion? At least dwc2
  should be done - I'm not sure ahci ever worked, but I've tried to keep
  it up-to-date in the past.  slhci probably needs some love too.

- It appears to me that upm_abort could be removed in favour of
  ubm_abortx and all the xfer cancel logic contained there, but that
  could be a future change

- Off list you mentioned ohci getting stuck during testing and I would
  agree it's unlikely that these diffs caused the issue.  I have some
  changes to ohci aborting on nhusb which I'll try and fixup when this
  diff goes in.

Thanks,
Nick


Re: evbarm hang

2019-04-19 Thread Nick Hudson

On 19/04/2019 10:10, Manuel Bouyer wrote:

Overnight lockdebug did find something:
login: [ 1908.3939406] Mutex error: mutex_vector_enter,504: spinout

[ 1908.3939406] lock address : 0x90b79074 type :   spin
[ 1908.3939406] initialized  : 0x8041601c
[ 1908.3939406] shared holds :  0 exclusive:  1
[ 1908.3939406] shares wanted:  0 exclusive:  1
[ 1908.3939406] current cpu  :  0 last held:  1
[ 1908.3939406] current lwp  : 0x91fc3760 last held: 0x91fc26e0
[ 1908.3939406] last locked* : 0x80416668 unlocked : 0x804169e8
[ 1908.3939406] owner field  : 0x00010500 wait/spin:0/1

[ 1908.4626458] panic: LOCKDEBUG: Mutex error: mutex_vector_enter,504: spinout
[ 1908.4626458] cpu0: Begin traceback...
[ 1908.4626458] 0x9e4a192c: netbsd:db_panic+0x14
[ 1908.4626458] 0x9e4a1944: netbsd:vpanic+0x194
[ 1908.4626458] 0x9e4a195c: netbsd:snprintf
[ 1908.4626458] 0x9e4a199c: netbsd:lockdebug_more
[ 1908.4626458] 0x9e4a19d4: netbsd:lockdebug_abort+0xc0
[ 1908.4626458] 0x9e4a19f4: netbsd:mutex_abort+0x34
[ 1908.4626458] 0x9e4a1a64: netbsd:mutex_enter+0x580
[ 1908.4626458] 0x9e4a1abc: netbsd:pool_get+0x70
[ 1908.4626458] 0x9e4a1b0c: netbsd:pool_cache_get_slow+0x1f4
[ 1908.4626458] 0x9e4a1b5c: netbsd:pool_cache_get_paddr+0x288
[ 1908.4626458] 0x9e4a1b7c: netbsd:m_clget+0x34
[ 1908.4626458] 0x9e4a1bdc: netbsd:dwc_gmac_intr+0x194
[ 1908.4626458] 0x9e4a1bf4: netbsd:gic_fdt_intr+0x2c
[ 1908.4626458] 0x9e4a1c1c: netbsd:pic_dispatch+0x110
[ 1908.4626458] 0x9e4a1c7c: netbsd:armgic_irq_handler+0xf4
[ 1908.4626458] 0x9e4a1db4: netbsd:irq_entry+0x68
[ 1908.4626458] 0x9e4a1dec: netbsd:tcp_send_wrapper+0x9c
[ 1908.4626458] 0x9e4a1e84: netbsd:sosend+0x6fc
[ 1908.4626458] 0x9e4a1eac: netbsd:soo_write+0x3c
[ 1908.4626458] 0x9e4a1f04: netbsd:dofilewrite+0x7c
[ 1908.4626458] 0x9e4a1f34: netbsd:sys_write+0x5c
[ 1908.4626458] 0x9e4a1fac: netbsd:syscall+0x12c
[ 1908.4626458] cpu0: End traceback...



Does show event tell you if dwc_gmac interrupts are being distributed to
both cpus? I think we need to prevent or protect against this.

Nick


Re: evbarm hang

2019-04-19 Thread Nick Hudson

On 19/04/2019 10:10, Manuel Bouyer wrote:
[snip]


Did you see my suggestion for getting the backtrace from the lwp on the
"other" cpu?


db{0}> mach cpu 1
kdb_trap: switching to cpu1
Stopped in pid 21532.1 (gcc) at netbsd:_kernel_lock+0x19c:

So cpu 1 is indeed running the LWP hodling the spin lock, and it looks
like it's itself waiting for a mutex.
Now I have to find why "mach cpu 1" hangs, and how to avoid it ...



The kdb_trap code is racey and needs to look more like the mips version.
Feel free to beat me to fixing it.

Nick


Re: evbarm hang

2019-04-18 Thread Nick Hudson

On 18/04/2019 15:32, Manuel Bouyer wrote:

Hello,
I got several hard hang while building packages on an allwinner a20 (lime2)
I use distcc so there is moderate network traffic in addition to
local CPU load.

most of the time I can't enter ddb from serial console, I just get
Stopped in pid 25136.1 (cc1) at netbsd:cpu_Debugger+0x4:

On 2 occasion, I could enter ddb, and both time the stack trace on CPU 0
was similar:

0x9cea7c0c: netbsd:pool_get+0x70 <-- hang here
0x9cea7c5c: netbsd:pool_cache_get_slow+0x1f4
0x9cea7cac: netbsd:pool_cache_get_paddr+0x288
0x9cea7ccc: netbsd:m_clget+0x34
0x9cea7d2c: netbsd:dwc_gmac_intr+0x194
0x9cea7d44: netbsd:gic_fdt_intr+0x2c
0x9cea7d6c: netbsd:pic_dispatch+0x110
0x9cea7dcc: netbsd:armgic_irq_handler+0xf4
0x9cea7e3c: netbsd:irq_entry+0x68
0x9cea7e7c: netbsd:uvm_unmap_detach+0x80
0x9cea7eac: netbsd:uvmspace_free+0x100
0x9cea7f14: netbsd:exit1+0x190
0x9cea7f34: netbsd:sys_exit+0x3c
0x9cea7fac: netbsd:syscall+0x12c


(the common point is the dwc_gmac_intr() call, which ends up in pool_get().
pool_get() seems to spin on the mutex_enter(>pr_lock); call
just before startover:.
Unfortunably I can't get a stack trace for CPU 1:
db{0}> mach cpu 1
kdb_trap: switching to cpu1
Stopped in pid 26110.1 (gcc) at netbsd:_kernel_lock+0xc4:


Maybe you can get it by looking for the lwp on the other cpu using ps/l
and using bt/a?


Nick


Re: Multiple outstanding transfer vs xhci / ehci (Re: CVS commit: src/sys/dev/usb)

2019-02-15 Thread Nick Hudson

On 15/02/2019 13:44, Rin Okuyama wrote:

On 2019/02/15 21:57, Jonathan A. Kollasch wrote:

On Wed, Feb 13, 2019 at 06:35:14PM +0900, Rin Okuyama wrote:

Hi,

On 2019/02/13 3:54, Nick Hudson wrote:

On 12/02/2019 16:02, Rin Okuyama wrote:

Hi,

The system freezes indefinitely with xhci(4) or ehci(4), when NIC with
multiple outstanding transfers [axen(4), mue(4), and ure(4)] is 
stopped

by "ifconfig down" or detached.

As discussed in the previous message, this is due to infinite loop in
usbd_ar_pipe(); xfers with USBD_NOT_STARTED remain in a queue forever
because upm_abort [= xhci_device_bulk_abort() etc.] cannot remove 
them.



Why not the attached patch instead?

Nick


Thank you so much for your prompt reply!

It works if s/ux_state/ux_status/ here:

+    if (xfer->ux_state == USBD_NOT_STARTED) {

Can I commit the revised patch?



The revised patch results in
https://nxr.netbsd.org/xref/src/sys/dev/usb/ehci.c?r=1.265#1566
firing upon `drvctl detach -d bwfm0` on Pinebook.

Jonathan Kollasch


IMO, this is because NOT_STARTED queues are removed in a way that is
unexpected to ehci. For my old amd64 box, the attached patch fixes
assertion failures when devices are detached from ehci. I would like
to commit it together with the previous patch.



ux_state has probably out lived its usefulness.

Other/All HCs do the same ux_state check. So, either all need to be 
updated or we do the same XFER_ONQU to XFER_BUSY transition in usbd_ar_pipe



Nick


Re: Multiple outstanding transfer vs xhci / ehci (Re: CVS commit: src/sys/dev/usb)

2019-02-12 Thread Nick Hudson

On 12/02/2019 16:02, Rin Okuyama wrote:

Hi,

The system freezes indefinitely with xhci(4) or ehci(4), when NIC with
multiple outstanding transfers [axen(4), mue(4), and ure(4)] is stopped
by "ifconfig down" or detached.

As discussed in the previous message, this is due to infinite loop in
usbd_ar_pipe(); xfers with USBD_NOT_STARTED remain in a queue forever
because upm_abort [= xhci_device_bulk_abort() etc.] cannot remove them.



Why not the attached patch instead?

Nick
? sys/dev/usb/cscope.out
Index: sys/dev/usb/usbdi.c
===
RCS file: /cvsroot/src/sys/dev/usb/usbdi.c,v
retrieving revision 1.181
diff -u -p -r1.181 usbdi.c
--- sys/dev/usb/usbdi.c 10 Jan 2019 22:13:07 -  1.181
+++ sys/dev/usb/usbdi.c 12 Feb 2019 18:51:28 -
@@ -884,9 +884,13 @@ usbd_ar_pipe(struct usbd_pipe *pipe)
USBHIST_LOG(usbdebug, "pipe = %#jx xfer = %#jx "
"(methods = %#jx)", (uintptr_t)pipe, (uintptr_t)xfer,
(uintptr_t)pipe->up_methods, 0);
-   /* Make the HC abort it (and invoke the callback). */
-   pipe->up_methods->upm_abort(xfer);
-   /* XXX only for non-0 usbd_clear_endpoint_stall(pipe); */
+   if (xfer->ux_state == USBD_NOT_STARTED) {
+   SIMPLEQ_REMOVE_HEAD(>up_queue, ux_next);
+   } else {
+   /* Make the HC abort it (and invoke the callback). */
+   pipe->up_methods->upm_abort(xfer);
+   /* XXX only for non-0 usbd_clear_endpoint_stall(pipe); 
*/
+   }
}
pipe->up_aborting = 0;
return USBD_NORMAL_COMPLETION;


Re: USB error checking

2018-12-29 Thread Nick Hudson

On 28/12/2018 17:38, Robert Swindells wrote:


I posted a few weeks ago that I could reboot my Pinebook by doing:

% cat /dev/video0 > foo.jpg

I have now got it to drop into DDB and found that it is triggering an
assertion in sys/arch/arm/arm32/bus_dma.c:_bus_dmamap_sync().

 KASSERTMSG(len > 0 && offset + len <= map->dm_mapsize,
 "len %lu offset %lu mapsize %lu",
 len, offset, map->dm_mapsize);

with len = 0.



The attached should avoid the KASSERT. It's interesting as something 
must be requesting a ZLP (or there's a bug).  An ECHI_DEBUG/ehcidebug=1 
log when it happens would be interesting.


Thanks,
Nick

Index: sys/dev/usb/ehci.c
===
RCS file: /cvsroot/src/sys/dev/usb/ehci.c,v
retrieving revision 1.265
diff -u -p -r1.265 ehci.c
--- sys/dev/usb/ehci.c  18 Sep 2018 02:00:06 -  1.265
+++ sys/dev/usb/ehci.c  29 Dec 2018 09:42:25 -
@@ -4025,8 +4025,9 @@ ehci_device_bulk_done(struct usbd_xfer *
 
KASSERT(sc->sc_bus.ub_usepolling || mutex_owned(>sc_lock));
 
-   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
-   rd ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
+   if (xfer->ux_length)
+   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
+   rd ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
 
DPRINTF("length=%jd", xfer->ux_actlen, 0, 0, 0);
 }
@@ -4242,8 +4243,9 @@ ehci_device_intr_done(struct usbd_xfer *
 
endpt = epipe->pipe.up_endpoint->ue_edesc->bEndpointAddress;
isread = UE_GET_DIR(endpt) == UE_DIR_IN;
-   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
-   isread ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
+   if (xfer->ux_length)
+   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
+   isread ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
 }
 
 //
@@ -4608,8 +4610,9 @@ ehci_device_fs_isoc_done(struct usbd_xfe
epipe->isoc.cur_xfers--;
ehci_remove_sitd_chain(sc, exfer->ex_itdstart);
 
-   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
-   BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
+   if (xfer->ux_length)
+   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
+   BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
 }
 
 
@@ -5000,6 +5003,7 @@ ehci_device_isoc_done(struct usbd_xfer *
 
epipe->isoc.cur_xfers--;
ehci_remove_itd_chain(sc, exfer->ex_sitdstart);
-   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
-   BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
+   if (xfer->ux_length)
+   usb_syncmem(>ux_dmabuf, 0, xfer->ux_length,
+   BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
 }


Re: fdtbus_intr_establish

2018-10-20 Thread Nick Hudson

On 20/10/2018 16:55, yarl-bau...@mailoo.org wrote:

Am I wrong?


diff --git a/sys/dev/fdt/fdt_intr.c b/sys/dev/fdt/fdt_intr.c
index fb0e56f76f6b..282d2ceb2710 100644
--- a/sys/dev/fdt/fdt_intr.c
+++ b/sys/dev/fdt/fdt_intr.c
@@ -184,7 +184,7 @@ fdtbus_intr_disestablish(int phandle, void *cookie)
  }
  }

-    if (ic != NULL)
+    if (ic == NULL)
  panic("%s: interrupt handle not valid", __func__);

  return ic->ic_funcs->disestablish(ic->ic_dev, cookie);



Fixed.

Thanks,
Nick


Re: Reboot resistant USB bug

2018-10-16 Thread Nick Hudson

On 11/10/2018 15:11, Emmanuel Dreyfus wrote:

Hello

On both netbsd-8 and -current, I have a problem with USB devices that
get stuck in a non-functionning state even after a reboot.

This happens after interrupting transfer with different NFC readers
from different vendors, and the only way to recover the device is
to power-cycle it. I wonder if there could be a missing step in the
way we initialize USB devices that could explain that situation.

Once I have a device in bad state, on reboot I get (usb0 chilren omitted):
xhci0 at pci0 dev 20 function 0: vendor 8086 product a36d (rev. 0x10)
xhci0: interrupting at msi0 vec 0
xhci0: xHCI version 1.10 not known to be supported
usb0 at xhci0: USB revision 3.0
usb1 at xhci0: USB revision 2.0
uhub1 at usb1: vendor 8086 (0x8086) xHCI Root Hub (), class 9/0, rev 
2.00/1.00, addr 0
uhub1: 16 ports with 16 removable, self powered
uhub1: port 5, set config at addr 2 failed
uhub1: device problem, disabling port 5

I do not think the problem is specific to xhci, since I also observed it
on a uhci based system.

The calling stack leading to the set config failed is:
usbd_reattach_device() at netbsd:usbd_reattach_device
xhci_new_device() at netbsd:xhci_new_device+0xfb2
usbd_new_device() at netbsd:usbd_new_device+0x83
uhub_explore() at netbsd:uhub_explore+0xca3
usb_discover() at netbsd:usb_discover+0x82
usb_event_thread() at netbsd:usb_event_thread+0x55

I have a huge trace of usb/uhub/xhci debug output, where I can see
that the kernale is able to pull descriptors from the device:
04.188673 usbd_reload_device_desc#4@1: bLength18
04.188673 usbd_reload_device_desc#4@1: bDescriptorType 1
04.188673 usbd_reload_device_desc#4@1: bcdUSB   2.00
04.188673 usbd_reload_device_desc#4@1: bDeviceClass0
04.188673 usbd_reload_device_desc#4@1: bDeviceSubClass 0
04.188673 usbd_reload_device_desc#4@1: bDeviceProtocol 0
04.188673 usbd_reload_device_desc#4@1: bMaxPacketSize0 8
04.188673 usbd_reload_device_desc#4@1: idVendor0xcc 0x4
04.188673 usbd_reload_device_desc#4@1: idProduct   0x33 0x25
04.188673 usbd_reload_device_desc#4@1: bcdDevice1.00
04.188673 usbd_reload_device_desc#4@1: iManufacturer   1
04.188673 usbd_reload_device_desc#4@1: iProduct2
04.188673 usbd_reload_device_desc#4@1: iSerial 0
04.188673 usbd_reload_device_desc#4@1: bNumConfigurations  1

But the trace finishes with:
04.195189 usbd_get_config_desc#4@1: confidx=0, bad desc len=120 type=120
04.195189 usbd_set_config_index#4@1: get_config_desc=4
04.195189 usbd_probe_and_attach#2@1: port 5, set config at addr 2 failed, er
ror=4

Any hint?


Can you make the trace available somewhere, please?

Nick



Re: Reboot resistant USB bug

2018-10-16 Thread Nick Hudson

On 16/10/18 16:00, Christos Zoulas wrote:

In article <20181016122719.gi16...@homeworld.netbsd.org>,
Emmanuel Dreyfus   wrote:

On Thu, Oct 11, 2018 at 02:11:22PM +, Emmanuel Dreyfus wrote:

On both netbsd-8 and -current, I have a problem with USB devices that
get stuck in a non-functionning state even after a reboot.


I investigated a lot: in my example, the pn533 chip seems to corrupts
its USB config, interface and endpoint descriptors. They contain
garbage, and on reboot the kernel cannot figure enough  about the
device, and disable the USB port.

But if I detect the condition in usbd_get_desc() and inject
fake descriptors (see below), the device is correctly attached
on reboot, and it works again. Is it an acceptable workaround?

[snip]



Isn't there a way to reset it?


Doesn't the hub power the port down? The bug is probably in uhub.c



christos




Nick


Re: mutex vs turnstile

2018-01-18 Thread Nick Hudson

On 01/09/18 03:30, Mateusz Guzik wrote:

Some time ago I wrote about performance problems when doing high -j
build.sh and made few remarks about mutex implementation.

TL;DR for that one was mostly that cas returns the found value, so
explicit re-reads can be avoided. There are also avoidable explicit
barriers (yes, I know about the old Opteron errata).

I had another look and have a remark definitely worth looking at (and
easy to fix). Unfortunately I lost my test jig so can't benchmark.

That said, here it is:

After it is is concluded that lock owner sleeps:


itym... after it is concluded that the thread should sleep as the lock 
is owned (by another lwp) and the owner is not on cpu.




 ts = turnstile_lookup(mtx);
 /*
  * Once we have the turnstile chain interlock, mark the
  * mutex has having waiters.  If that fails, spin again:
  * chances are that the mutex has been released.
  */
 if (!MUTEX_SET_WAITERS(mtx, owner)) {
 turnstile_exit(mtx);
 owner = mtx->mtx_owner;
 continue;
 }

Note that the lock apart from being free, can be:
1. owned by the same owner, which is now running

In this case the bit is set spuriously and triggers slow path
unlock.


Recursive locks aren't allowed so I don't follow what you mean here.

    544         if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
    545             MUTEX_ABORT(mtx, "locking against myself");
    546


2. owned by a different owner, which is NOT running


Is this still a possibility given the above?

Nick



Re: struct ifnet locking [was Re: IFEF_MPSAFE]

2017-12-21 Thread Nick Hudson

On 12/19/17 08:21, Ryota Ozaki wrote:

On Tue, Dec 19, 2017 at 4:52 PM, Nick Hudson <nick.hud...@gmx.co.uk> wrote:

On 19/12/2017 03:43, Ryota Ozaki wrote:



BTW I committed a change that disables IFEF_MPSAFE by default on
all interfaces because it seems that IFEF_MPSAFE requires additional
changes to work safely with it. We should enable it by default if an
interface is guaranteed to be safe.

What additional changes?

For example, avoid updating if_flags (and reading it and depending on
the result) in Tx/Rx paths and using if_watchdog.


Ah, I see.

Do we have a model driver yet?

Thanks,
NIck


Re: xhci spec (mis?)interpretation on hubs

2017-12-20 Thread Nick Hudson

On 12/20/17 12:18, Sprow wrote:

In article <3ef1728a-54fb-6c89-5634-e39ca9808...@netbsd.org>,
    Nick Hudson <sk...@netbsd.org> wrote:




How so?

It used to read

#define IS_TTHUB(dd) \
 ((dd)->bDeviceProtocol == UDPROTO_HSHUBSTT || \
  (dd)->bDeviceProtocol == UDPROTO_HSHUBMTT)

then I applied

-#define IS_TTHUB(dd) \
-((dd)->bDeviceProtocol == UDPROTO_HSHUBSTT || \
+#define IS_MTTHUB(dd) \

leaving me with

#define IS_MTTHUB(dd) \
  (dd)->bDeviceProtocol == UDPROTO_HSHUBMTT)

which is missing a bracket.

I must have sent you a bad patch... Here's what's committed...

1.83 
<http://cvsweb.netbsd.org/bsdweb.cgi/src/sys/dev/usb/xhci.c?only_with_tag=MAIN#rev1.83> ! skrll3188: #define IS_MTTHUB(dd) \

!  3189:      ((dd)->bDeviceProtocol == UDPROTO_HSHUBMTT)

Nick




Re: xhci spec (mis?)interpretation on hubs

2017-12-20 Thread Nick Hudson

On 12/19/17 20:32, Sprow wrote:

In article <5fc73ccd-26a2-7919-17e3-8098ccb3f...@netbsd.org>,
    Nick Hudson <sk...@netbsd.org> wrote:

On 12/17/17 10:40, Nick Hudson wrote:

On 12/08/17 18:25, Sprow wrote:

I've been having a look on a USB analyser at a class of hubs that
don't seem to work on a driver based on
src/sys/dev/usb/xhci.c

Comparing the dialogue for a non XHCI controller I see the issue is
that the cascade of hubs is HS->FS->LS. The GetDescriptor of the LS
device never makes it through.

Can you try this patch?

I think there's a missing '(' on the IS_MTTHUB() macro.


How so?


The check around line 3201 still needs the '!ishub' qualifier in my view,
because the else could be reached if the speed/IS_MTTHUB checks fail. Even if
it's logically redundant it certainly makes it easier to read, and this is
single use non speed critical code.


The "FS/LS" device can be a hub so it's incorrect to test for !ishub, I 
think.





Functionally though the problematic class of HS->FS->LS devices I have to
hand all work now, so it's a nice improvement,


Good stuff.

Pullup #459 submitted for netbsd-8. I'll do one for netbsd-7 too.

Nick


Re: xhci spec (mis?)interpretation on hubs

2017-12-20 Thread Nick Hudson

On 12/17/17 10:40, Nick Hudson wrote:

On 12/08/17 18:25, Sprow wrote:

Hi,
I've been having a look on a USB analyser at a class of hubs that 
don't seem

to work on a driver based on
   src/sys/dev/usb/xhci.c

Comparing the dialogue for a non XHCI controller I see the issue is 
that the
cascade of hubs is HS->FS->LS. The GetDescriptor of the LS device 
never makes
it through. It happens to be a KVM which presents a fake 
keyboard/mouse HID
plugged into port 3 of a 4 port (FS) hub, which I've then plugged 
into a HS
hub, which is itself attached to the root hub (ie. the XHCI root hub 
is in

USB2 mode).


Can you try this patch?

Thanks,
Nick
Index: sys/dev/usb/xhci.c
===
RCS file: /cvsroot/src/sys/dev/usb/xhci.c,v
retrieving revision 1.82
diff -u -p -r1.82 xhci.c
--- sys/dev/usb/xhci.c	19 Dec 2017 16:04:27 -	1.82
+++ sys/dev/usb/xhci.c	19 Dec 2017 16:08:27 -
@@ -3138,6 +3138,7 @@ xhci_setup_tthub(struct usbd_pipe *pipe,
 	struct usbd_port *myhsport = dev->ud_myhsport;
 	usb_device_descriptor_t * const dd = >ud_ddesc;
 	uint32_t speed = dev->ud_speed;
+	uint8_t rhaddr = dev->ud_bus->ub_rhaddr;
 	uint8_t tthubslot, ttportnum;
 	bool ishub;
 	bool usemtt;
@@ -3159,8 +3160,8 @@ xhci_setup_tthub(struct usbd_pipe *pipe,
 	 *   parent hub is not HS hub ||
 	 *   attached to root hub.
 	 */
-	if (myhsport && myhub && myhub->ud_depth &&
-	myhub->ud_speed == USB_SPEED_HIGH &&
+	if (myhsport &&
+	myhsport->up_parent->ud_addr != rhaddr &&
 	(speed == USB_SPEED_LOW || speed == USB_SPEED_FULL)) {
 		ttportnum = myhsport->up_portno;
 		tthubslot = myhsport->up_parent->ud_addr;
@@ -3185,29 +3186,30 @@ xhci_setup_tthub(struct usbd_pipe *pipe,
 		DPRINTFN(4, "nports=%jd ttt=%jd", hd->bNbrPorts, ttt, 0, 0);
 	}
 
-#define IS_TTHUB(dd) \
-((dd)->bDeviceProtocol == UDPROTO_HSHUBSTT || \
+#define IS_MTTHUB(dd) \
  (dd)->bDeviceProtocol == UDPROTO_HSHUBMTT)
 
 	/*
 	 * MTT flag is set if
-	 * 1. this is HS hub && MTT is enabled
-	 *  or
-	 * 2. this is not hub && this is LS or FS device &&
-	 *MTT of parent HS hub (and its parent, too) is enabled
+	 * 1. this is HS hub && MTTs are supported and enabled;  or
+	 * 2. this is LS or FS device && there is a parent HS hub where MTTs
+	 *are supported and enabled.
+	 *
+	 * XXX enabled is not tested yet
 	 */
-	if (ishub && speed == USB_SPEED_HIGH && IS_TTHUB(dd))
+	if (ishub && speed == USB_SPEED_HIGH && IS_MTTHUB(dd))
 		usemtt = true;
-	else if (!ishub && (speed == USB_SPEED_LOW || speed == USB_SPEED_FULL) &&
-	 myhub && myhub->ud_depth && myhub->ud_speed == USB_SPEED_HIGH &&
-	 myhsport && IS_TTHUB(>up_parent->ud_ddesc))
+	else if ((speed == USB_SPEED_LOW || speed == USB_SPEED_FULL) &&
+	myhsport &&
+	myhsport->up_parent->ud_addr != rhaddr &&
+	IS_MTTHUB(>up_parent->ud_ddesc))
 		usemtt = true;
 	else
 		usemtt = false;
 	DPRINTFN(4, "class %ju proto %ju ishub %jd usemtt %jd",
 	dd->bDeviceClass, dd->bDeviceProtocol, ishub, usemtt);
 
-#undef IS_TTHUB
+#undef IS_MTTHUB
 
 	cp[0] |=
 	XHCI_SCTX_0_HUB_SET(ishub ? 1 : 0) |




Re: struct ifnet locking [was Re: IFEF_MPSAFE]

2017-12-18 Thread Nick Hudson

On 19/12/2017 03:43, Ryota Ozaki wrote:



BTW I committed a change that disables IFEF_MPSAFE by default on
all interfaces because it seems that IFEF_MPSAFE requires additional
changes to work safely with it. We should enable it by default if an
interface is guaranteed to be safe.

What additional changes?

Thanks,
Nick


Re: xhci spec (mis?)interpretation on hubs

2017-12-17 Thread Nick Hudson

On 12/08/17 18:25, Sprow wrote:

Hi,
I've been having a look on a USB analyser at a class of hubs that don't seem
to work on a driver based on
   src/sys/dev/usb/xhci.c

Comparing the dialogue for a non XHCI controller I see the issue is that the
cascade of hubs is HS->FS->LS. The GetDescriptor of the LS device never makes
it through. It happens to be a KVM which presents a fake keyboard/mouse HID
plugged into port 3 of a 4 port (FS) hub, which I've then plugged into a HS
hub, which is itself attached to the root hub (ie. the XHCI root hub is in
USB2 mode).

Looking in the XHCI spec I think I see the problem. The text in table 59 is
badly worded, it says
   "If this device is LS/FS and connected through a HS hub, then this field
contains...of the parent HS hub"

The 'parent' isn't the immediately adjacent (as in parent/child relationship)

hub, I think the operative word is *through*, and it really means the first
ancestor which is HS, but not necessarily adjacent. The footnote (a) sort of
says that, by defining the 'parent' as the point where there's a transition
from HS to FS/LS signalling.

For example, with
   HS(1)->HS(2)->FS(3)->FS(4)->FS(5)->LS
it would be hub 2, not 5.

Looking at the driver we have:

if (myhsport && myhub && myhub->ud_depth &&
myhub->ud_speed == USB_SPEED_HIGH &&
(speed == USB_SPEED_LOW || speed == USB_SPEED_FULL)) {
ttportnum = myhsport->up_portno;
tthubslot = myhsport->up_parent->ud_addr;
} else {
ttportnum = 0;
tthubslot = 0;
}

The check of myhub->ud_speed looks wrong, because that's checking the
adjacent hub (5 in my example). As a result the expression evaluates to false
and the ttportnum & tthubslot are 0, which blocks the controller from routing
the GetDescriptor properly.


I didn't look too closely yet, but I'm pretty sure the upstream HS hub 
is required here as you stated.


Nick


Re: struct ifnet locking [was Re: IFEF_MPSAFE]

2017-12-14 Thread Nick Hudson



On 14/12/2017 10:48, Ryota Ozaki wrote:

On Fri, Dec 8, 2017 at 7:53 PM, Nick Hudson <sk...@netbsd.org> wrote:




Not sure I follow this. I think we agree that the driver should not use
if_flags in the rx/tx path (anymore).

Yes. Drivers should provide their own method.


Great.



Anyway I updated the document that reflects recent changes:
   http://www.netbsd.org/~ozaki-r/ifnet-locks.v2.diff


Some wording improvement suggestions...

@@ -391,11 +429,33 @@ typedef struct ifnet {
 #defineIFEF_NO_LINK_STATE_CHANGE   __BIT(1)/* doesn't use 
link state interrupts */
 
 /*

- * The following if_XXX() handlers don't take KERNEL_LOCK if the interface
- * is set IFEF_MPSAFE:
- *   - if_start
- *   - if_output
- *   - if_ioctl
+ * The guidline to enable IFEF_MPSAFE on an interface

The guidelines for converting an interface to IFEF_MPSAFE are as follows

+ * Enabling IFEF_MPSAFE on an interface suppress taking KERNEL_LOCK
+ * on the following handlers:

Enabling IFEF_MPSAFE on an interface suppresses taking KERNEL_LOCK when
calling the following handlers:

Thanks for working on this.

Nick


Re: struct ifnet locking [was Re: IFEF_MPSAFE]

2017-12-08 Thread Nick Hudson

On 12/06/17 11:11, Ryota Ozaki wrote:

On Tue, Nov 28, 2017 at 6:40 PM, Ryota Ozaki <ozaki.ry...@gmail.com> wrote:

On Mon, Nov 27, 2017 at 12:24 AM, Nick Hudson <sk...@netbsd.org> wrote:

On 11/17/17 07:44, Ryota Ozaki wrote:



[snip]

Hi,

(Sorry for late replying. I was sick in bed for days...)


Can you document the protection requirements of the struct ifnet members,
e.g. if_flags.

Ideally by annotating it like struct proc

http://src.illumos.org/source/xref/netbsd-src/sys/sys/proc.h#230

OK. I'm trying to add annotations like that.


It's my turn to say sorry for a late reply... Sorry and thanks for working on 
this.



Some more changes are needed though, I fixed some race conditions on ifnet
and wrote the lock requirements of ifnet:
http://www.netbsd.org/~ozaki-r/ifnet-locks.diff

There remain some unsafe members. Those for carp, agr and pf should be fixed
when making them MP-safe. Another remainder is a set of statistic counters,
which will be MP-safe (by making them per-CPU counters) sooner or later.

if_flags should be modified with holding if_ioct_lock. If an interface enables
IEFE_MPSAFE, the interface must meet the contract, which enforces the
interface to update if_flags only in XXX_ioctl and also the interface has to
give up using IFF_OACTIVE flag which doesn't suit for multi-core systems
and hardware multi-queue NICs.


I'd pretty much come to the same conclusion, but wanted to make sure we 
shared the same understanding.



I'm not sure yet we have to do
synchronization between updates and reads on if_flags. (Anyway we should
NOT do that because the synchronization will hurt performance.)


Not sure I follow this. I think we agree that the driver should not use 
if_flags in the rx/tx path (anymore).


Thanks,
Nick




Re: kernel condvars: how to use?

2017-12-08 Thread Nick Hudson

On 12/08/17 09:04, Nick Hudson wrote:

On 12/08/17 03:26, Mouse wrote:


[Brian Buhrow]

1.  [...].  Mutexes that use spin locks can't be used in interrupt
context.

Sure you don't have that backwards?  I _think_ mutex(9) says that spin
mutexes are the only ones that _can_ be used from an interrupt.


Brain did get it backwards. To quote mutex(9)...


My brain got Brian backwards...



   IPL_VM, IPL_SCHED, IPL_HIGH

 A spin mutex will be returned.  Spin mutexes provide 
mutual

 exclusion between LWPs, and between LWPs and interrupt
handlers.

Nick






Re: kernel condvars: how to use?

2017-12-08 Thread Nick Hudson

On 12/08/17 03:26, Mouse wrote:


[Brian Buhrow]

1.  [...].  Mutexes that use spin locks can't be used in interrupt
context.

Sure you don't have that backwards?  I _think_ mutex(9) says that spin
mutexes are the only ones that _can_ be used from an interrupt.


Brain did get it backwards. To quote mutex(9)...

   IPL_VM, IPL_SCHED, IPL_HIGH

 A spin mutex will be returned.  Spin mutexes provide 
mutual

 exclusion between LWPs, and between LWPs and interrupt
handlers.

Nick



struct ifnet locking [was Re: IFEF_MPSAFE]

2017-11-26 Thread Nick Hudson

On 11/17/17 07:44, Ryota Ozaki wrote:

On Wed, Nov 15, 2017 at 1:53 PM, Ryota Ozaki  wrote:

On Fri, Nov 10, 2017 at 6:35 PM, Ryota Ozaki  wrote:

Hi,

http://www.netbsd.org/~ozaki-r/IFEF_MPSAFE.diff

I'm going to commit the above change that integrates
IFEF_OUTPUT_MPSAFE and IFEF_START_MPSAFE flags into
IFEF_MPSAFE.

The motivation is to not waste if_extflags bits. I'm now
trying to make if_ioctl() hold KERNEL_LOCK selectively
for some reasons as well as if_start() and if_output().

BTW this is a patch for this plan:
   http://www.netbsd.org/~ozaki-r/if_ioctl-no-KERNEL_LOCK.diff

It removes KERNEL_LOCK for if_ioctl from soo_ioctl and
selectively takes it in doifioctl. To this end, some fine-grain
KERNEL_LOCKs have to be added where calling components/functions
that aren't MP-safe.

Revised rebased on latest NET_MPSAFE macros. The new patch also provides
similar macros:
   http://www.netbsd.org/~ozaki-r/if_ioctl-no-KERNEL_LOCK.revised.diff

   ozaki-r


Hi,

Can you document the protection requirements of the struct ifnet 
members, e.g. if_flags.


Ideally by annotating it like struct proc

http://src.illumos.org/source/xref/netbsd-src/sys/sys/proc.h#230

Thanks,

Nick



Re: kmodule: absolute symbols

2017-11-04 Thread Nick Hudson

Hi,


Index: sys/sys/kobj.h
===
RCS file: /cvsroot/src/sys/sys/kobj.h,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -p -r1.16 -r1.17
--- sys/sys/kobj.h  13 Aug 2011 21:04:07 -  1.16
+++ sys/sys/kobj.h  3 Nov 2017 09:59:07 -   1.17
@@ -44,7 +44,7 @@ int   kobj_stat(kobj_t, vaddr_t *, size_t
 int    kobj_find_section(kobj_t, const char *, void **, size_t *);

 /* MI-MD interface. */
-uintptr_t  kobj_sym_lookup(kobj_t, uintptr_t);
+int    kobj_sym_lookup(kobj_t, uintptr_t, uintptr_t *);
 int    kobj_reloc(kobj_t, uintptr_t, const void *, bool, bool);
 int    kobj_machdep(kobj_t, void *, size_t, bool);

This broke the arm builds

Why uintptr_t * and not Elf_Addr *?

Nick



Re: USB fixes hopefully getting in to -8

2017-10-03 Thread Nick Hudson

On 10/03/17 00:51, John Klos wrote:

Hi,

I have a 2 TB drive connected via USB to a Raspberry Pi 2 which is 
running netbsd-8. I tried five times, unsuccessfully, to copy a 200 
gigabyte file via scp to the drive. Each time the Pi locked up, either 
while just copying or while doing things in other ssh sessions. It 
seemed to coincide at least twice with either logging out or trying to 
log in.


Since ethernet is on USB on the Pi along with the disk, I decided to 
try an 8.99.3 kernel compiled from yesterday's sources. The scp worked 
without any issue at all, even with other things going on like cvs 
updates and compiling.


Does anyone who might know what's been fixed know if the changes are 
going to be pulled in to -8?


There are no changes in sys/dev/usb worth note. Maybe git bisect to find 
out what fixed it?


Nick


Re: vrele vs. syncer deadlock

2016-12-19 Thread Nick Hudson

On 12/12/16 09:55, J. Hannken-Illjes wrote:

On 11 Dec 2016, at 22:33, Nick Hudson <sk...@netbsd.org> wrote:

On 12/11/16 21:05, J. Hannken-Illjes wrote:

On 11 Dec 2016, at 21:01, David Holland <dholland-t...@netbsd.org> wrote:

On a low-memory machine Nick ran into the following deadlock:

  (a) rename -> vrele on child -> inactive -> truncate -> getblk ->
  no memory in buffer pool -> wait for syncer
  (b) syncer waiting for locked parent vnode from the rename



Where is the syncer waiting for the parent?

db>  bt/a  8ff28060
pid  0.37  at  0x98041096
0x980410961bb0:  kernel_text+dc  (0,0,0,0)  ra  803ad484  sz  0
0x980410961bb0:  mi_switch+1c4  (0,0,0,0)  ra  803a9ef8  sz  96
0x980410961c10:  sleepq_block+b0  (0,0,0,0)  ra  803b8edc  sz  64
0x980410961c50:  turnstile_block+2e4  (0,0,0,0)  ra  803a487c  sz  
96
0x980410961cb0:  rw_enter+17c  (0,0,0,0)  ra  8044862c  sz  112
0x980410961d20:  genfs_lock+8c  (0,0,0,0)  ra  8043fd60  sz  48
0x980410961d50:  VOP_LOCK+30  (8049d4c8,2,0,0)  ra  
80436c8c  sz  48
0x980410961d80:  vn_lock+94  (8049d4c8,2,0,0)  ra  803367d8 
 sz  64
0x980410961dc0:  ffs_sync+c8  (8049d4c8,2,0,0)  ra  
80428f4c  sz  112
0x980410961e30:  sched_sync+1c4  (8049d4c8,2,0,0)  ra  
80228dd0  sz  112
0x980410961ea0:  mips64r2_lwp_trampoline+18  (8049d4c8,2,0,0)  ra  
0  sz  32




Which file system?

ffs

Looks like a bug introduced by myself.  Calling ffs_sync() from the
syncer (MNT_LAZY set) will write back modified inodes only, fsync
is handled by individual synclist entries.

Some time ago I unconditionally removed LK_NOWAIT from vn_lock().
Suppose we need this patch:

RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.341
diff -p -u -2 -r1.341 ffs_vfsops.c
--- ffs_vfsops.c20 Oct 2016 19:31:32 -  1.341
+++ ffs_vfsops.c12 Dec 2016 09:45:17 -
@@ -1918,5 +1918,6 @@ ffs_sync(struct mount *mp, int waitfor,
 while ((vp = vfs_vnode_iterator_next(marker, ffs_sync_selector, )))
 {
-   error = vn_lock(vp, LK_EXCLUSIVE);
+   error = vn_lock(vp, LK_EXCLUSIVE |
+   (waitfor == MNT_LAZY ? LK_NOWAIT : 0));
 if (error) {
 vrele(vp);

Is it reproducible so you can test it?


I can't reproduce it easily. Others (mrg@ and roy@) seem to have more 
luck finding a workload to trigger the page freelist problem on erlite


Also, I saw the dh@ thought more was needed?



--
J. Hannken-Illjes - hann...@eis.cs.tu-bs.de - TU Braunschweig (Germany)



Nick


Re: vrele vs. syncer deadlock

2016-12-11 Thread Nick Hudson

On 12/11/16 21:05, J. Hannken-Illjes wrote:

On 11 Dec 2016, at 21:01, David Holland  wrote:

On a low-memory machine Nick ran into the following deadlock:

  (a) rename -> vrele on child -> inactive -> truncate -> getblk ->
  no memory in buffer pool -> wait for syncer
  (b) syncer waiting for locked parent vnode from the rename

Do you have full backtrace?


0x98041097f540:  kernel_text+dc  (0,0,0,0)  ra  803ad484  sz  0
0x98041097f540:  mi_switch+1c4  (0,0,0,0)  ra  803a9f20  sz  96
0x98041097f5a0:  sleepq_block+d8  (0,0,0,0)  ra  80377784  sz  64
0x98041097f5e0:  cv_timedwait+114  (0,0,0,0)  ra  8041bdcc  sz  64
0x98041097f620:  allocbuf+344  (0,0,0,0)  ra  8041c504  sz  128
0x98041097f6a0:  getblk+1d4  (0,0,0,0)  ra  80332b40  sz  80
0x98041097f6f0:  ffs_getblk+48  (0,0,0,0)  ra  8032eec4  sz  80
0x98041097f740:  ffs_indirtrunc+b4  (0,0,539ac0,)  ra  
8033057c  sz  192
0x98041097f800:  ffs_truncate+b44  (0,0,539ac0,)  ra  
8033bd54  sz  320
0x98041097f940:  ufs_truncate_retry+c4  (0,0,539ac0,)  ra  
8033bf24  sz  80
0x98041097f990:  ufs_inactive+184  (0,0,539ac0,)  ra  
8043fce8  sz  48
0x98041097f9c0:  VOP_INACTIVE+30  
(8049d538,98041097f9f0,539ac0,)  ra  
804338ec  sz  48
0x98041097f9f0:  vrelel+534  
(8049d538,98041097f9f0,539ac0,)  ra  
804449a8  sz  96
0x98041097fa50:  genfs_rename_exit+108  
(8049d538,98041097f9f0,539ac0,)  ra  
80445c3c  sz  48
0x98041097fa80:  genfs_sane_rename+414  
(8049d538,98041097f9f0,539ac0,98041097fb68)  ra  
8033da84  sz  192
0x98041097fb40:  ufs_sane_rename+44  
(8049d538,98041097f9f0,539ac0,98041097fb68)  ra  
80445600  sz  80
0x98041097fb90:  genfs_insane_rename+168  
(8049d538,98041097f9f0,539ac0,98041097fb68)  ra  
8043fa28  sz  96
0x98041097fbf0:  VOP_RENAME+40  
(8049d6c8,8006d678,98041097fd08,8fd57690)  ra  
8042b8cc  sz  80
0x98041097fc40:  do_sys_renameat+454  
(8049d6c8,8006d678,98041097fd08,8fd57690)  ra  
80243e5c  sz  336
0x98041097fd90:  netbsd32_rename+24  
(8049d6c8,8006d678,98041097fd08,8fd57690)  ra  
80234494  sz  32
0x98041097fdb0:  syscall+114  
(8049d6c8,8006d678,98041097fd08,785ea604)  ra  
802289f4  sz  240
0x98041097fea0:  mips64r2_systemcall+d4  
(8049d6c8,8006d678,98041097fd08,785ea604)  ra  785ea604  sz 
 0
PC  0x785ea604:  not  in  kernel  space




Where is the syncer waiting for the parent?


db>  bt/a  8ff28060
pid  0.37  at  0x98041096
0x980410961bb0:  kernel_text+dc  (0,0,0,0)  ra  803ad484  sz  0
0x980410961bb0:  mi_switch+1c4  (0,0,0,0)  ra  803a9ef8  sz  96
0x980410961c10:  sleepq_block+b0  (0,0,0,0)  ra  803b8edc  sz  64
0x980410961c50:  turnstile_block+2e4  (0,0,0,0)  ra  803a487c  sz  
96
0x980410961cb0:  rw_enter+17c  (0,0,0,0)  ra  8044862c  sz  112
0x980410961d20:  genfs_lock+8c  (0,0,0,0)  ra  8043fd60  sz  48
0x980410961d50:  VOP_LOCK+30  (8049d4c8,2,0,0)  ra  
80436c8c  sz  48
0x980410961d80:  vn_lock+94  (8049d4c8,2,0,0)  ra  803367d8 
 sz  64
0x980410961dc0:  ffs_sync+c8  (8049d4c8,2,0,0)  ra  
80428f4c  sz  112
0x980410961e30:  sched_sync+1c4  (8049d4c8,2,0,0)  ra  
80228dd0  sz  112
0x980410961ea0:  mips64r2_lwp_trampoline+18  (8049d4c8,2,0,0)  ra  
0  sz  32




Which file system?


ffs


--
J. Hannken-Illjes - hann...@eis.cs.tu-bs.de - TU Braunschweig (Germany)


Nick


Re: pmap attempts at copying to executable pages both on mips and powerpc/booke

2016-09-30 Thread Nick Hudson

On 09/28/16 16:13, Rin Okuyama wrote:

On 2016/09/28 20:40, Nick Hudson wrote:



[snip]


if the mapping changes PA then the resident_count gets reduced by 
pmap_{,pte_}remove, but not increased again.


I'm working on a fix.


Thank you very much for your analysis. I'm looking forward to hearing
good news from you!

Rin


Fixed in sys/uvm/pmap/pmap.c:1.23

Nick


Re: pmap attempts at copying to executable pages both on mips and powerpc/booke

2016-09-28 Thread Nick Hudson

On 09/20/16 12:34, Rin Okuyama wrote:

[snip...]

However, unfortunately, something is still wrong. top(1) reports 
resources

of some processes are negative:

  % top
  ...
PID USERNAME PRI NICE   SIZE   RES STATE  TIME   WCPU CPU COMMAND
  ...
573 root  850  4304K -3832K nanoslp0:01  0.00% 0.00% cron
  ...



The problem is here...

https://nxr.netbsd.org/xref/src/sys/uvm/pmap/pmap.c#1275

   1275 if (pte_valid_p(opte) && pte_to_paddr(opte) != pa) {
   1276 pmap_remove(pmap, va, va + NBPG);
   1277 PMAP_COUNT(user_mappings_changed);
   1278 }
   1279
   1280 KASSERT(pte_valid_p(npte));
   1281 const bool resident = pte_valid_p(opte);
   1282 if (resident) {
   1283 update_flags |= PMAP_TLB_NEED_IPI;
   1284 } else {
   1285 pmap->pm_stats.resident_count++;
   1286 }

if the mapping changes PA then the resident_count gets reduced by 
pmap_{,pte_}remove, but not increased again.


I'm working on a fix.

Nick





Re: pmap attempts at copying to executable pages both on mips and powerpc/booke

2016-09-16 Thread Nick Hudson

On 09/16/16 14:19, Rin Okuyama wrote:

I reported a reproducible failure of KASSERT on powerpc/booke in which
destination of pmap_copy_page(9) is executable:

https://mail-index.netbsd.org/port-powerpc/2016/09/11/msg003498.html

By adding the similar KASSERTs to mips kernel, I observed the same
failure on ERLITE (evbmips64-eb):

  # uname -mpr
  7.99.38 evbmips mips64eb
  # cd /usr/pkgsrc/lang/perl5; make
  (snip)
  => Checking for portability problems in extracted files
  (snip)
  Use which C compiler? [gcc]
  execve_loadvm: check exec failed 8
  execve_loadvm: check exec failed 8
  execve_loadvm: check exec failed 8
  execve_loadvm: check exec failed 8
  Checking for GNU cc in disguise and/or its version number...
  panic: kernel diagnostic assertion 
"!VM_PAGEMD_EXECPAGE_P(VM_PAGE_TO_MD(dst_pg))" failed: file 
"/var/build/src/sys/arch/mips/mips/pmap_machdep.c", line 628

  kernel: breakpoint trap
  Stopped in pid 2328.1 (sed) at  netbsd:cpu_Debugger+0x4: jr  ra
  bdslot: nop


 Matt fixed it with src/sys/uvm/pmap/pmap.c:1.22

Nick


Re: [patch] xhci patch 20160809

2016-08-18 Thread Nick Hudson

On 08/09/16 11:09, Takahiro Hayashi wrote:

Hello,

I'll post xhci patches for HEAD.


x-addrendian.diff
+ Fix endian issue of device address.
x-linktrb2.diff
+ Put Link TRB always at the end of ring.
  Should fix ctrl xfer problem on Intel xHC.


Thanks,

Committed.

Thanks,
Nick


Re: CVS commit: src/sys

2016-07-25 Thread Nick Hudson

On 07/20/16 08:38, Martin Husemann wrote:

On Wed, Jul 20, 2016 at 12:42:47PM +0900, Ryota Ozaki wrote:

If we cannot fix the issue easily, we can disable workqueue unless
NET_MPSAFE for now.

I think it is only exposing some older mips bug, we should analyze it
(but I am currently out of ideas).


src/sys/arch/mips/mips/spl.S:1.11 helps in most places.

My cobalt (real hw) still has a problem here.




Martin



Nick


Re: CVS commit: src/sys

2016-07-14 Thread Nick Hudson

On 07/14/16 11:58, Martin Husemann wrote:

On Mon, Jul 11, 2016 at 07:37:00AM +, Ryota Ozaki wrote:

Module Name:src
Committed By:   ozaki-r
Date:   Mon Jul 11 07:37:00 UTC 2016

Modified Files:
src/sys/net: route.c
src/sys/netinet: ip_flow.c
src/sys/netinet6: ip6_flow.c nd6.c

Log Message:
Run timers in workqueue

No idea why, but this change breaks booting my ERLIT-3 with root on NFS.
The machine hangs hard after:

timecounter: Timecounter "clockinterrupt" frequency 100 Hz quality 0
timecounter: Timecounter "mips3_cp0_counter" frequency 5 Hz quality 100

(that is just after going !cold, I guess)


It also causes me a problem on my cobalt

Copyright  (c)  1996,  1997,  1998,  1999,  2000,  2001,  2002,  2003,  2004,  2005,   
2006,  2007,  2008,  2009,  2010,  2011,  2012,  2013,  2014,  2015,  2016
The  NetBSD  Foundation,  Inc.   All  rights  reserved.   
Copyright  (c)  1982,  1986,  1989,  1991,  1993  
The  Regents  of  the  University  of  California.   All  rights  reserved.  
   
NetBSD  7.99.34  (GENERIC)  #8:  Thu  Jul  14  06:19:34  BST  2016   
nick@zoom:/wrk/binary/iij/obj.cobalt/wrk/binary/iij/netbsd-src/sys/arch/cobalt/compile/GENERIC  
Cobalt  RaQ  2
total  memory  =  256  MB

avail  memory  =  247  MB
mainbus0  (root)
com0  at  mainbus0  addr  0x1c80  level  3:  st16650a,  working  fifo
com0:  console
cpu0  at  mainbus0:  QED  RM5200  CPU  (0x28a0)  Rev.  10.0  with  built-in  
FPU  Rev.  10.0
cpu0:  48  TLB  entries,  16MB  max  page  size
cpu0:  32KB/32B  2-way  set-associative  L1  instruction  cache
cpu0:  32KB/32B  2-way  set-associative  write-back  L1  data  cache
mcclock0  at  mainbus0  addr  0x1070:  mc146818  compatible  time-of-day  
clock
panel0  at  mainbus0  addr  0x1f00
gt0  at  mainbus0  addr  0x1400
pci0  at  gt0
pchb0  at  pci0  dev  0  function  0:  Galileo  GT-64111  System  Controller,  
rev  1
tlp0  at  pci0  dev  7  function  0:  DECchip  21143  Ethernet,  pass  4.1
tlp0:  interrupting  at  level  1
tlp0:  Ethernet  address  00:10:e0:00:5c:a6
lxtphy0  at  tlp0  phy  1:  LXT970  10/100  media  interface,  rev.  3
lxtphy0:  10baseT,  10baseT-FDX,  100baseTX,  100baseTX-FDX,  auto
pcib0  at  pci0  dev  9  function  0
pcib0:  vendor  1106  product  0586,  rev  39
viaide0  at  pci0  dev  9  function  1
viaide0:  VIA  Technologies  VT82C586  (Apollo  VP)  ATA33  controller
viaide0:  primary  channel  interrupting  at  irq  14
atabus0  at  viaide0  channel  0
viaide0:  secondary  channel  interrupting  at  irq  15
atabus1  at  viaide0  channel  1
vendor  1106  product  3038  (USB  serial  bus,  UHCI,  revision  0x02)  at  
pci0  dev  9  function  2  not  configured

telnet>  send  brk
kernel:  breakpoint  trap
Stopped  in  pid  0.2  (system)  at   netbsd:cpu_Debugger+0x4: jr   
ra
bdslot:  nop
db>  set  $lines=0
$lines   18  =  0
db>  set  $maxwidth=0
$maxwidth50  =  0
db>  ps/l
PID LID  S  CPU  FLAGSSTRUCT  LWP  *NAME  WAIT
1 1  30  08fef0300init  lbolt
030  302008fef0020   unpgc  unpgc
029  302008feabd00   nd6_timer  
nd6_timer
028  302008feaba20rt_timer  rt_timer
027  302008feab740 vmem_rehash  
vmem_rehash
018  302008fef0e80 atabus1  atainitq
017  302008fef1160 atabus0  atarst
016  302008fef1440   cryptoret  crypto_w
015  302008fef1720  pmfsuspend  
pmfsuspend
014  302008fef1a00pmfevent  pmfevent
013  302008fef1ce0  sopendfree  sopendfr
012  302008ff62000nfssilly  nfssilly
011  302008ff622e0 cachegc  cachegc
010  302008ff625c0   

Re: SOSEND_LOAN problems in MIPS

2016-07-12 Thread Nick Hudson

On 07/12/16 16:07, Thor Lancelot Simon wrote:

On Sun, Jul 10, 2016 at 05:56:24PM -0700, Matt Thomas wrote:

It's very CPU dependent.  Maybe the CPUs used on your sgimips
don't have the problem.  It's related to virtual cache aliasing.

Do I understand correctly that if I build a 16K page kernel, the problem
will go away -- though I'll lose a little bit of efficiency?


yes.

That said, I think the pmap deals with virtual cache aliasing correctly now.




Thor


Nick


Re: device-major question

2016-05-11 Thread Nick Hudson

On 05/11/16 11:52, Kimihiro Nonaka wrote:

2016-05-11 15:41 GMT+09:00 Nick Hudson<sk...@netbsd.org>:


I'd be happy (as a tegra owner/user) to move hdmicec to 206. I don't think
hdmicec is use anywhere else yet.

char 206 is already used by seeprom.

- sys/conf/majors
device-major seeprom char 206 seeprom
-


heh, no idea where I got 206 from. I meant 210 - see diff.

Nick
Index: sys/conf/majors
===
RCS file: /cvsroot/src/sys/conf/majors,v
retrieving revision 1.71
diff -u -p -r1.71 majors
--- sys/conf/majors	11 May 2016 06:42:06 -	1.71
+++ sys/conf/majors	11 May 2016 10:58:32 -
@@ -55,12 +55,10 @@ device-major seeprom   char 206		   seep
 device-major dtracechar 207		   dtrace
 device-major spiflash  char 208 block 208  spiflash
 device-major lua   char 209lua
+device-major hdmicec   char 210hdmicec
 
 # 210-219 reserved for MI ws devices
 # 220-239 reserved for MI usb devices
 # 240-259 reserved for MI "std" devices
 # 260-269 reserved for MI tty devices
 # 310-339 reserved for MI storage devices
-
-# XXX conflicts with tty
-device-major hdmicec   char 260hdmicec



Re: device-major question

2016-05-11 Thread Nick Hudson

On 05/11/16 04:21, Kimihiro Nonaka wrote:

Hi,

I found hdmicec and MI com use same device-major "char 260".
Would it be acceptable?

- sys/conf/majors
device-major hdmicec   char 260hdmicec
-

- sys/conf/majors.tty
device-majorcom char 260com
-

Regards,


I'd be happy (as a tegra owner/user) to move hdmicec to 206.  I don't 
think hdmicec is use anywhere else yet.


Anyone else have an opinion?

NIck


Re: nick-nhusb merge coming soon

2016-04-14 Thread Nick Hudson

On 04/13/16 21:22, Takahiro Hayashi wrote:

Hi,


xhci(4) is still imcomplete and experimental.
It needs more work.

I summarise known problems:

+ KASSERT(sc->sc_command_addr == 0) in xhci_do_command() may fail.
+ My ASM1042 card does not work at all.  No interrupts.
  It works on FreeBSD and OpenBSD.



http://nxr.netbsd.org/xref/src-freebsd/sys/dev/usb/controller/xhci.c#1226

Is that relevant here?



+ Implementation of upm_abort is imcomplete.

I'll try and look into this soon.


+ Link Power management is not implemented.
+ Isochronous transfer is not implemented.
+ Stream protocol is not supported.


Takers? :)


+ Address of root hub is 0 (but it works anyway).


I'll also try and look into this


+ On the xhci of Intel PCH:
  + The address assigned to device increases when reattaching device.
  + HS hub in 3.0 hub under 3.0 port is disconnected and reconnected 
every

several minutes repeatedly.


ENOHARDWARE




regards,
--
t-hash


Thanks,
Nick


Re: nick-nhusb merge coming soon

2016-04-13 Thread Nick Hudson

On 04/13/16 11:58, Dave Tyson wrote:

On Wednesday 13 Apr 2016 07:59:20 Nick Hudson wrote:

Hi,

I think the first phase of nick-nhusb is in a state ready to be
merged. I'd like to merge it in the next few days, but in the meantime
I've put some kernels for testing here:

  http://www.netbsd.org/~skrll/nick-nhusb


Please test and report success or failure.

I can provide kernels to other platforms on request.

Thanks,
Nick

Hi Nick,

I've done some testing with a couple of USB video cameras on amd64. I wanted
to see if PR 48308 could be closed.


The code is restructure to specifically avoid the type of problem 
described in PR/48308 and I

intend to close it once I've merged.


[snip]


Both cameras worked fine under nick-usb, but they also worked fine under
current 7.99.27 (GENERIC.201604110150Z) from a couple of days ago :-) so
something must have changed in current since PR 48308 was filed as I couldn't
reproduce it now.


The problem still exists in HEAD, but is hard to trigger.



I could trigger a panic if I unplugged a camera while mplayer was running:

video0: detached
uvideo0: detached
uvideo0: at uhub4 port 6 (addr 2) disconnected
audio1: detached
uaudio0: detached
uaudio0: at uhub4 port 6 (addr 2) disconnected
usb_disconnect_port: no device
panic: kernel diagnostic assertion "uvm_page_locked_p(pg)" failed: file
"/wrk/nhusb/src/sys/arch/x86/x86/pmap.c", line 3315
cpu1: Begin traceback...
vpanic() at netbsd:vpanic+0x13c
valid_user_selector() at netbsd:valid_user_selector
pmap_remove_pte() at netbsd:pmap_remove_pte+0x311
pmap_remove() at netbsd:pmap_remove+0x1ff
uvm_unmap_remove() at netbsd:uvm_unmap_remove+0x210
sys_munmap() at netbsd:sys_munmap+0x8a
syscall() at netbsd:syscall+0xbe
--- syscall (number 73) ---
7f7feb10d80a:
cpu1: End traceback...

I guess a panic in this situation is not unexpected. I have the core dump if
you want it.


sure, I'll take a look.



I'll try and get around to testing USB scanners and printers in a few days.


Great. Thanks.



Cheers,
Dave



Nick


Re: nick-nhusb merge coming soon

2016-04-13 Thread Nick Hudson

On 04/13/16 08:15, Paul Goyette wrote:

On Wed, 13 Apr 2016, Nick Hudson wrote:


Hi,

I think the first phase of nick-nhusb is in a state ready to be
merged. I'd like to merge it in the next few days, but in the meantime
I've put some kernels for testing here:

   http://www.netbsd.org/~skrll/nick-nhusb


Please test and report success or failure.

I can provide kernels to other platforms on request.


Kewl!

Does this include xhci support for USB3 (ie, removal of the 
"experimental" tag)?


There is some support for USB3 which has come with the patches from 
Takahiro HAYASHI.


xhci(4) still needs quite a bit of love to be fully ready.

Thanks for all the hard work on this - it appears to have been a 
rather herculean effort...  :)



:)

Nick


Re: nick-nhusb merge coming soon

2016-04-13 Thread Nick Hudson

On 04/13/16 08:05, Taylor R Campbell wrote:

Date: Wed, 13 Apr 2016 07:59:20 +0100
From: Nick Hudson <sk...@netbsd.org>

I think the first phase of nick-nhusb is in a state ready to be
merged. I'd like to merge it in the next few days, but in the meantime
I've put some kernels for testing here:

 http://www.netbsd.org/~skrll/nick-nhusb

Cool!

Can you write a brief summary of what changed in the USB driver API?


The main change is to replace

   void *usbd_alloc_buffer(struct usbd_xfer *, u_int32_t);
   void usbd_free_buffer(struct usbd_xfer *);

   struct usbd_xfer *usbd_alloc_xfer(struct usbd_device *dev);
   usbd_status usbd_free_xfer(struct usbd_xfer *xfer);

with

   int usbd_create_xfer(struct usbd_pipe *pipe, size_t len, unsigned 
int flags,

  unsigned int nframes, struct usbd_xfer **xp)

void usbd_destroy_xfer(struct usbd_xfer *xfer)

and the pipe argument is dropped from the usbd_setup_* functions

The idea being that transfers are linked to the pipe/endpoint when 
created and the
HCD can, if required, allocate all resources for the transfer before 
being started.



Have any man pages been updated in your branch?

I have local changes I'll commit at merge time.

Nick



nick-nhusb merge coming soon

2016-04-13 Thread Nick Hudson

Hi,

I think the first phase of nick-nhusb is in a state ready to be
merged. I'd like to merge it in the next few days, but in the meantime
I've put some kernels for testing here:

http://www.netbsd.org/~skrll/nick-nhusb


Please test and report success or failure.

I can provide kernels to other platforms on request.

Thanks,
Nick


Re: asynchronous ugen(4) bulk transfers

2016-04-05 Thread Nick Hudson

On 04/04/16 08:45, Nick Hudson wrote:

On 04/04/16 05:25, Brian Buhrow wrote:

hello.  I did a lot of work on this under NetBSD-5  to allow the
libimobiledevice tools to work with iOS based Apple devices.  I have 
some

simple patches for libusb1 and a re-working of the ugen(4) driver to
address the issues you note below.  My plan is to port these fixes to
-current, but I hadn't finished that process yet.  My changes make the
libimobiledevice tools work reliably with Apple products, modulo a 
bug in

the ehci(4) and ohci(4) drivers that preclude the USB stack from sending
zero-length packets.


Not that I think ehci(4) has a problem in HEAD


Does this diff help ehci+zlp for you? Thanks to Colin Granville for the 
hint.


Nick
Index: sys/dev/usb/ehci.c
===
RCS file: /cvsroot/src/sys/dev/usb/ehci.c,v
retrieving revision 1.248
diff -u -p -r1.248 ehci.c
--- sys/dev/usb/ehci.c	13 Mar 2016 07:01:43 -	1.248
+++ sys/dev/usb/ehci.c	5 Apr 2016 06:57:13 -
@@ -2975,7 +2975,7 @@ ehci_alloc_sqtd_chain(struct ehci_pipe *
 
 		/* adjust the toggle based on the number of packets in this
 		   qtd */
-		if (((curlen + mps - 1) / mps) & 1) {
+		if (curlen == 0 || (((curlen + mps - 1) / mps) & 1)) {
 			tog ^= 1;
 			qtdstatus ^= EHCI_QTD_TOGGLE_MASK;
 		}


Re: asynchronous ugen(4) bulk transfers

2016-04-04 Thread Nick Hudson

On 04/04/16 05:25, Brian Buhrow wrote:

hello.  I did a lot of work on this under NetBSD-5  to allow the
libimobiledevice tools to work with iOS based Apple devices.  I have some
simple patches for libusb1 and a re-working of the ugen(4) driver to
address the issues you note below.  My plan is to port these fixes to
-current, but I hadn't finished that process yet.  My changes make the
libimobiledevice tools work reliably with Apple products, modulo a bug in
the ehci(4) and ohci(4) drivers that preclude the USB stack from sending
zero-length packets.


Not that I think ehci(4) has a problem in HEAD, but I fixed ohci(4) on 
my branch.



I discussed these fixes on tech-kern with the subject:

Potential problem with reading data from usb devices with ugen(4)


The two major fixes I implemented were:

1.  Make poll(2) work with ugen(4) so  you know when data is ready to be
read from or written to a device asynchronously.

2.  Ensure that each read or write call to a ugen(4) device results in a
matching USB transaction to or from the device.

If you'd like my patches to port to -current, let me know.


Please send your patches.


-thanks
-Brian


Thanks,
Nick



Re: Potential problem with reading data from usb devices with ugen(4)

2016-01-27 Thread Nick Hudson

On 01/27/16 19:10, Brian Buhrow wrote:

hello.  Although I've been silent on this issue for a while, I've
continued to make progress in getting the ugen(4) driver into a state where
it works well with the libimobiledevice tools and can reliably be used with
Apple products.  The problem I've run into now is the one that's been
troubling me for a while, but which I now have a much better handle on.
Specifically, it seems to be impossible to ask the ehci(4) or ohci(4)
drivers to generate zero-length packets.  The uhci(4) driver works
beautifully.  To clarify, both the ehci(4) and ohci(4) drivers seem to have
fixes which allow them to generate zero-length packets in the case where a
write request is equal to the exact size of an USB packet.  In that case,
they generate the required zero-length packet following the initial write
request.  The problem is that if an upper layer usb driver wants to
generate a zero-length packet as its first write request, as the ugen(4)
driver would in response to a call from a user-level process, the ehci(4)
driver, and I believe the ohci(4) driver, will fail.  the issue appears to
be how these drivers allocate memory forpassing the data to the chips
themselves.  The allocator for these two drivers appears to assume that the
length parameter for all requests will be greater than 0, so they calculate
the amount of memory required for a given request by performing a number of
arithmetic operations against the length parameter to get the required
number of buffers to service the request.  The issue is that if the length
parameter is 0, those operations result in a negative number, or a very
large number in this case, since the allocator assumes the parameter is an
unsigned integer.  As a result, the ehci(4) and ohci(4) drivers fail when
asked to send zero-length packets with a USBD_NOMEM error.
I believe the problem lies around line 2931 of sys/dev/usb/ehci.c, 
V1.238.
For completenesS I should say that I've not tested the ohci() driver
directly, but code inspection leads me to believe it has the same issue as
the ehci(4) driver, which I have tested.

I am having trouble making sense of the ehci(4) allocation code and my
attempts to come up with a patch to work around the issue have been
unsuccessful so far.  Perhaps someone can look at this section of the
ehci.c code and provide a little guidance on what's going on.  I'll
continue to  poke at it as well, but another set of eyes looking at the
issue would be much appreciated.


I see the bug(s) and will try and cook up diffs soon. They'll be against 
-current, though




-thanks
-Brian



Nick


Re: arm/arm32/pmap.c assert

2016-01-20 Thread Nick Hudson

On 01/20/16 16:16, Frank Zerangue wrote:

Thanks Nick,

Here are the pertinent options in my kernel config:
options CPU_ARM1136 # Support the ARM1136 core
options CPU_ARM11   # Support the ARM11 core
options FPU_VFP # Support the ARM1136 vfp

Build options are:
makeoptions CPUFLAGS= "-march=armv6 -mtune=arm1136j-s -mfpu=vfp 
-mfloat-abi=hard”

The IMX31 has an ARM11 r0p4 core which does not support  armv6k extensions, 
i.e. LDREX(B,H,D), STREX(B,H,D), CLREX. To proceed I implemented atomic ops for 
1 byte, 2 byte, and 4 byte operations using LDREX and STREX and do not support 
8 byte atomic ops.

I’ve discovered so far that pmap.c assumes PAGE_SIZE = 8096 in pmap_alloc_l1(), 
pmap_delete_l1(), as they use PAGE_SIZE to allocate L1 half table for user 
space. Since I specify PAGE_SIZE = 4096, what happens is that L1[0..1023] = 0 
and L1[1024..2047] = 0x thus the failed assert. When I change the 
allocation and deallocation to L1_TABLE_SIZE/2 pmap_enter() succeeds but I get 
data aborts.


Why did you change PAGE_SIZE to 4096?

The code is written assuming the PAGE_SIZE is L1PT size which is 8KB.

Nick


Re: On softints, softnet_lock and sleeping (aka ipv6 vs USB network interfaces)

2016-01-01 Thread Nick Hudson

On 01/01/16 15:21, Frank Kardel wrote:

Hi !

Is there any progress on this? I see also PR/49065 being still 
existent on an RPI2.
Also running named with 4 threads on an RPI2 together with vtund doing 
"ifconfig tunX ..."
is a sure killer. Runinng named with only one thread gets you over it 
(maybe just most of the time).


You might like to try

 http://www.netbsd.org/~skrll/usb.softint.diff

as fixing everything to not hold softnet_lock while doing USB transfers 
is a lot of
work.  The patch should help in the meantime and is probably correct 
regardless.


Changing the USB callbacks to run at IPL_SOFTSERIAL really means that the
USB callbacks should be re-checked that they provide the right protection
when calling into the subsystems they access.

Let me know if the patch fixes things for you.

Thanks,
Nick


Re: Potential problem with reading data from usb devices with ugen(4)

2015-12-29 Thread Nick Hudson

On 27/12/2015 20:00, Brian Buhrow wrote:

hello Nick.  I've been doing this work under NetBSD-5 and the ehci(4)
driver definitely doesn't behave well when sent a zero-length packet under
that branch.


Define "doesn't behave well"


  I also checked out your USB branch, as well as the -current
branch from cvs, and looking through the cvs logs, I don't see specific
notes about where the ehci(4) , uhci(4) or ohci(4) drivers were taught to
handle zero length packets.  Could you point me at the specific commits
that you  think address this issue?


http://cvsweb.netbsd.org/bsdweb.cgi/src/sys/dev/usb/ehci.c.diff?r1=1.101=1.102=h
http://cvsweb.netbsd.org/bsdweb.cgi/src/sys/dev/usb/ehci.c?rev=1.154.4.3=text/x-cvsweb-markup_with_tag=netbsd-5



  I plan to port these changes to the
-current tree when I'm done, but I want to get things working with some
confidence before I do that.

-thanks
-Brian


Nick


Re: On softints, softnet_lock and sleeping (aka ipv6 vs USB network interfaces)

2015-12-07 Thread Nick Hudson

On 12/06/15 16:01, Taylor R Campbell wrote:

Date: Sun, 6 Dec 2015 09:22:05 +
From: Nick Hudson <sk...@netbsd.org>

It seems to me that nd6_timer is either expecting too much of
the USB stack by expecting a synchronous interface to changing
multicast filters that doesn't sleep; or the USB stack should
provide an asynchronous update method and any failure should be
handled elsewhere.

One quick fix might be to change nd6_timer to call in6_purgeaddr in a
workqueue (or...task, if we had that).  It seems to me that
in6_purgeaddr is a relatively expensive operation (and I think there's
a bug: it calls callout_stop via nd6_dad_stop/nd6_dad_stoptimer when
it should probably call callout_halt), so calling it from a callout
doesn't seem right anyway.


Sure, but the "sleeping with softnet_lock held" problem still exists.


Another problem in the PR is that

1) CPU N (not 0) takes softnet_lock and requests a USB control transfer
(which will sleep for completion)

2) CPU 0 takes clock interrupt and nd6_timer expires.  nd6_timer starts and
tries to take softnet lock and blocks

3) CPU 0 also runs ipintr (not sure why) which takes softnet lock and locks

Aside: This is probably because ipintr gets scheduled on a specified
target CPU, not on the local CPU, in pktq_enqueue...and apparently
every caller, except for bridges, specifies CPU 0.

4) CPU 0 receives USB HC interrupt for completed control transfer from CPU N
and schedules softint process (at IPL_SOFTNET) which never runs as the lwp
is blocked in step 3)

Maybe

 290 #define IPL_SOFTUSB IPL_SOFTNET

http://nxr.netbsd.org/xref/src/sys/dev/usb/usbdi.h#290

should be changed to IPL_SOFTBIO?

As a practical matter, I don't see how that would help -- IPL_SOFTUSB
is only ever used as a mutex ipl, and IPL_SOFT* is equivalent to
IPL_NONE for the practical purposes of mutex(9).


Why do IPL_SOFT* exist? If the intention is to show usage then IPL_SOFT is
enough, right?

That aside, can softints even interrupt softints, or are the
priorities only about who goes first if two softints are scheduled
`simultaneously' (as far as softint_dispatch can discern)?


The latter, iiuc.

If so, even then, using SOFTINT_BIO instead of SOFTINT_NET wouldn't
help here -- SOFTINT_BIO is even lower-priority than SOFTINT_NET, but
you need to allow the USB HC interrupt to run (I assume you mean,
e.g., ehci_softintr?) faster than SOFTINT_NET in order to wake
usbd_transfer while a SOFTINT_NET lwp is blocked on softnet_lock.


Looking at spl(9) and the code again I meant SOFTINT_SERIAL. The idea 
would indeed be
that the USB HC softint (e.g. ehci_softintr) would run before 
SOFTINT_NET handlers.




It seems to me the deeper problem is that we ever sleeping with
softnet_lock held at all, which (a) is wrong and (b) means it is wrong
to acquire softnet_lock in a softint.


Fixes welcome :)

Thanks,
Nick


Re: Potential problem with reading data from usb devices with ugen(4)

2015-12-06 Thread Nick Hudson

On 12/05/15 02:53, Brian Buhrow wrote:

Hello.  I'm making a lot of progress over here on this improved
driver.  I have a question that I am not finding the answer to easily and
I'm hoping someone can point me in the right direction.

It's possible for USB transfers to be 16384 bytes in length.  Our USB
system seems to prefer breaking these down into 1024 byte transfers.  My
question is this:


USB transfers can be greater than 1024.

http://nxr.netbsd.org/search?q=UGEN_BBSIZE=src

I think we should look to other ugen drivers to make sure we behave 
similarly


Nick




On softints, softnet_lock and sleeping (aka ipv6 vs USB network interfaces)

2015-12-06 Thread Nick Hudson

Hi,

PR/50491 raises some questions that I need some guidance on.
Take this stack trace...

 Setting date via ntp.
 panic: assert_sleepable: softint caller=0x802e2014
 cpu2: Begin traceback...
 0xbada3c3c: netbsd:db_panic+0xc
 0xbada3c6c: netbsd:vpanic+0x1b0
 0xbada3c84: netbsd:snprintf
 0xbada3cbc: netbsd:assert_sleepable+0xb4
 0xbada3d0c: netbsd:usbd_do_request_flags_pipe+0x28
 0xbada3d34: netbsd:usbd_do_request+0x38
 0xbada3d64: netbsd:smsc_write_reg+0x60
 0xbada3d8c: netbsd:smsc_setmulti+0x100
 0xbada3dbc: netbsd:smsc_ioctl+0x124
 0xbada3e64: netbsd:if_mcast_op+0x50
 0xbada3eb4: netbsd:in6_delmulti+0x154
 0xbada3ecc: netbsd:in6_leavegroup+0x20
 0xbada3ef4: netbsd:in6_purgeaddr+0x6c
 0xbada3f2c: netbsd:nd6_timer+0x108
 0xbada3f64: netbsd:callout_softclock+0x194
 0xbada3fac: netbsd:softint_dispatch+0xd4

It seems to me that nd6_timer is either expecting too much of
the USB stack by expecting a synchronous interface to changing
multicast filters that doesn't sleep; or the USB stack should
provide an asynchronous update method and any failure should be
handled elsewhere.

Another problem in the PR is that

1) CPU N (not 0) takes softnet_lock and requests a USB control transfer
(which will sleep for completion)

2) CPU 0 takes clock interrupt and nd6_timer expires.  nd6_timer starts and
tries to take softnet lock and blocks

3) CPU 0 also runs ipintr (not sure why) which takes softnet lock and locks

4) CPU 0 receives USB HC interrupt for completed control transfer from CPU N
and schedules softint process (at IPL_SOFTNET) which never runs as the lwp
is blocked in step 3)

Maybe

290 #define IPL_SOFTUSB IPL_SOFTNET

http://nxr.netbsd.org/xref/src/sys/dev/usb/usbdi.h#290

should be changed to IPL_SOFTBIO?

Thoughts?

Nick


Re: RPI2: Mutex related KASSERT when entering ddb(4)

2015-10-04 Thread Nick Hudson

On 10/02/15 12:30, Stephan wrote:

Hmm, I grabbed a recent HEAD kernel from nyftp.netbsd.org, but I saw
the same issue.


I don't see how - DIAGNOSTIC is not enabled in HEAD.


  I may have to build my own kernel to be sure the
change to usbdi.c is present.


Probably best :)



Does it work for you?


There are almost certainly other KASSERTs that need adapting to polling.

Nick



Re: Potential problem with reading data from usb devices with ugen(4)

2015-09-26 Thread Nick Hudson

On 09/24/15 21:35, Greg Troxel wrote:

Brian Buhrow  writes:


hello.  I don't necessarily need the read ahead functionality, though
it might be useful when I start doing large transfers to and from the Apple
devices.  However, what I want is the non-blocking functionality which,
according to the manual, requires I use the read ahead code to get.  It
appears, however, that the non-blocking functionality is minimally tested
and may not work as advertised.  Also, it looks like the timeout
functionality may not be quite right either, though I've not investigated
that as thoroughly yet.

So perhaps we should step back and ask why non-blocking doesn't work.
Part of it is that reading from a USB device causes bus transactions, so
even reading is an active step.  The next question is if it makes sense
within USB to have a pending read, or if you can try to read and either
succeed or find nothing.


USB requires that the host controller query and endpoint for data. If the
endpoint isn't ready to provide data then it will respond with NAK.

That is, the USB stack needs an active transfer to an endpoint for any data
to be returned.

The host controller drivers are a little inconsistent in their handling 
of the

USBD_SHORT_XFER flag, but without it a short transfer will always
result in a callback status of USBD_SHORT_XFER

http://nxr.netbsd.org/xref/src/sys/dev/usb/usbdi.c#883

Nick



Re: RPI2: Mutex related KASSERT when entering ddb(4)

2015-09-26 Thread Nick Hudson

On 09/22/15 08:01, Stephan wrote:

Hi

I´m running a pretty recent image of 7.0_RC3 on my RPI2. When I hit
Strg+Alt+Esc in order to enter ddb(4), a mutex related assertion
fires, ending up in a crash.

The assertion is in sys/dev/usb/usbdi.c

KASSERT(mutex_owned(pipe->device->bus->lock)); (line 950, in usbd_start_next())


The stack indicates an USB transfer in polling mode, originating
somewhere from ukbd(4). The top of the stack is this:

vpanic()
__udivmoddi4()
usbd_start_next()
dwc2_softintr()
...
dwc2_poll()
usbd_dopoll()
ukbd_cngetc()
...

>From looking at the code, I would expect to see usb_tranfer_complete()
being called from dwc2_softintr(), which then calls usbd_start_next().
usb_transfer_complete() is however missing on the stack trace and I´m
not sure why that is.

I suspect this branch was taken in usb_transfer_complete() which
doesn´t deal with the bus lock, and usbd_start_next() expects it being
held:

 if (!repeat) {
 /* XXX should we stop the queue on all errors? */
 if (erred && pipe->iface != NULL)/* not control pipe */
 pipe->running = 0;
 else
 usbd_start_next(pipe);
 }

I´m not an USB guy so I don´t really understand what this all is. I
hope someone can fix this.


cvs update and try again. The locking is not required when polling.

Nick


Re: Potential problem with reading data from usb devices with ugen(4)

2015-09-24 Thread Nick Hudson

On 09/24/15 08:07, Brian Buhrow wrote:

hello folks.   Perhaps someone on this list can enlighten me as to
where I'm going wrong, but I think there may be an issue with ugen(4) when
using the bulk read ahead and write behind code in conjunction with file
descriptors which are in O_NONBLOCK mode.  This is with NetBSD/i386 5.2
with the UGEN_BULK_RA_WB code ifdef'd into the ugen(4) code.  I've looked
at -current and it looks like the problem is still there.  Let me explain:

I'm trying to get the usbmuxd and libimobiledevice code working under
NetBSD using the libusb1-1.0.19 package.  The usbmuxd code scans the
available ugen devices, identifies which ones are Apple devices, and then
rescans the devices it identified as being of interest.  With the default
libusb1 code, this functionality doesn't work because the usbmuxd code
expects to be able to do this over and over again but the ugen(4) driver
locks the process in "ugenrb" when it checks to see if a given  device has
the proper bulk endpoints.  I modified the libusb1 NetBSD backend module to
enable the read ahead code  and write behind code, as well as to put the
file descriptor associated with each of the endpoints into non-blocking
mode with fcntl(2).Now, the first pass usbmuxd makes works, but it doesn't
generate any requests of the Apple devices which cause them to respond with
data on the bulk endpoints.  That's fine, but when usbmuxd comes around to
begin conversing with the "interesting" devices again, it's told there's
nothing to read because the ugen(4) driver, which has read ahead enabled
already, doesn't check for new input from the USB device when read requests
are made and there's no data available.  Specifically, when this code is
enabled, I think data is read from the bulk end points of a device using
the function ugen_bulkra_intr().  In theory it's supposed to be a self
perpetuating function that keeps checking for input, but it seems to only
get called from itself or when the read ahead  code is enabled via the
ioctl(2) call.  The scenario looks like this to me:

1.  Enable the read ahead code with the ioctl(2) on a device bulk endpoint
and also set the file descriptor into non-blocking mode.  The function
ugen_bulkra_intr() gets called once, but there's nothing to read so
the function doesn't set things up to call itself again.

2.  The next call to read(2) by the user process doesn't rigger a call to
ugen_bulkra_intr() because the file descriptor is in non-blocking mode and
there's no data to pass to the process so the driver just returnes and
EWOULDBLOCK.

3.  This repeats over and over again without ever actually querying the
device to see if it has anything to say.


Are you sure about this? I'm not sure why ugen_bulkra_intr doesn't start 
a new transfer, but see below.


I wonder if

http://nxr.netbsd.org/xref/src/sys/dev/usb/ugen.c#697
http://nxr.netbsd.org/xref/src/sys/dev/usb/ugen.c#1221

needs USBD_SHORT_XFER_OK adding to the flags arg.

UGEN_DEBUG output will help?

Nick


Re: Potential problem with reading data from usb devices with ugen(4)

2015-09-24 Thread Nick Hudson

On 09/24/15 15:52, Brian Buhrow wrote:

On Sep 24, 12:00pm, Nick Hudson wrote:
} Are you sure about this? I'm not sure why ugen_bulkra_intr doesn't start
} a new transfer, but see below.
}
} I wonder if
}
}  http://nxr.netbsd.org/xref/src/sys/dev/usb/ugen.c#697
}  http://nxr.netbsd.org/xref/src/sys/dev/usb/ugen.c#1221
}
} needs USBD_SHORT_XFER_OK adding to the flags arg.
}
} UGEN_DEBUG output will help?
}
} Nick

I forgot to mention the libusb1 calls do request the
USB_SET_SHORT_XFER via ioctl(2).
This doesn't matter / help here as UGEN_SHORT_OK is ignored for 
UGEN_BULK_{RA,WB}


Can you try the idea?

Thanks,
Nick


Re: [patch] xhci patch 20150911

2015-09-13 Thread Nick Hudson

On 09/11/15 11:07, Takahiro Hayashi wrote:

Hello,

Here is xhci patches for nick-nhusb branch.


nhusb-xhci-lock.diff
+ Fix lock error.  Same as
https://mail-index.netbsd.org/tech-kern/2015/07/15/msg019170.html

nhusb-xhci-evh.diff
+ Split xhci_handle_event() into 3 functions.  Whitespace.

nhusb-xhci-evh2.diff
+ Improve xhci_configure_endpoint().
+ Split off maxburst and interval calculation.
+ Start interval calculation with 10, not 11.
+ Put correct maxburst value.
+ Split off constructing endpoint context.
+ Improve xhci_event_transfer()
+ Remove case of unlikely completion codes I added.
+ Clear xr_cookies too when xhci_set_dequeue() clears xr_trb.
+ Return USBD_NO_ADDR if xhci_address_device fails with
  XHCI_TRB_ERROR_NO_SLOTS.
+ Add aprint_debug xhci capability registers.
+ Add & update comments.



Hi,

I applied there with some amendments - please check my changes.

Thanks,
Nick


Re: [patch] xhci patch 20150911

2015-09-11 Thread Nick Hudson

On 09/11/15 11:07, Takahiro Hayashi wrote:

Hello,

Here is xhci patches for nick-nhusb branch.


I'll look into this.



nhusb-xhci-lock.diff
+ Fix lock error.  Same as
https://mail-index.netbsd.org/tech-kern/2015/07/15/msg019170.html


iirc, I wanted to do this differently



nhusb-xhci-evh.diff
+ Split xhci_handle_event() into 3 functions.  Whitespace.

nhusb-xhci-evh2.diff
+ Improve xhci_configure_endpoint().
+ Split off maxburst and interval calculation.
+ Start interval calculation with 10, not 11.
+ Put correct maxburst value.
+ Split off constructing endpoint context.
+ Improve xhci_event_transfer()
+ Remove case of unlikely completion codes I added.
+ Clear xr_cookies too when xhci_set_dequeue() clears xr_trb.
+ Return USBD_NO_ADDR if xhci_address_device fails with
  XHCI_TRB_ERROR_NO_SLOTS.
+ Add aprint_debug xhci capability registers.
+ Add & update comments.



thanks for working on this.

Nick


Re: Very slow transfers to/from micro SD card on a RPi B+

2015-08-18 Thread Nick Hudson

On 08/17/15 19:08, Stephan wrote:

I have just rebooted with WAPBL enabled. Some quick notes:

-Sequential write speed is a little lower, around 5,4 MB/s.

-Creating 1000 files takes 0,25 sec. while almost no xfers happen. (It just
goes to the log I guess).

-When creating more files (say 10.000), a known issue comes to light. One
CPU core gets utilized 100% in kernel mode while there are almost no xfers.
It takes ages until the operation completes. In contrast, a non-WAPBL
mounted FS experiences many xfers until the drive limit gets hit (100
xfers/sec.).

I am pretty certain that there is a regression in WAPBL. Can you tell me
how I can extract a kernel for the Pi using objdump, so I can conduct some
experimentation?


What do you want to do with objdump?

Nick



Re: USB, NetBSD 6.1.5/amd64: freezes when 2 umass connected

2015-07-02 Thread Nick Hudson

On 07/02/15 10:07, tlaro...@polynum.com wrote:

Hello,

On an NetBSD 6.1.5/amd64, when I connect a second USB connected disk to
the machine, NetBSD freezes. Unable to connect remotely; hard reboot
required.

Questions:

1) The machine has two usb ports, with uhub0 and uhub1 first attached
resp. to these ones; the uhub2 cascading from uhub0 and uhub3 from
uhub1.
uhub2 has 6 ports removable;
uhub3 has 8 ports removable;
Since in /dev/ there are only 8 devices (from usb0 to usb7) could this
be the problem? (6 + 8 = 14, even if I have only one USB device---first
disk---and the second disk is only the second device; but how are the
device nodes assigned to one USB port?)

2) The two USB disks are from the same vendor (Western Digital) but not
exactly the same model (not the same capacity). Could the USB driver be
confused by two similar devices connected to the same(?) USB tree?

3) Physically, on the machine, there are USB ports on the rear, and USB
ports on the front. Does somebody know if front ports could be
duplicating rear ports, that is slots on the front be in fact
connected to the same ports as the rear ones causing conflict?

I'm trying to find what is causing this misbehavior. And a freeze is
rather annoying for a node that is mainly supposed to be administrated
from remote...

TIA,


Can you try netbsd-7 or better still -current?

Thanks,
Nick


Re: x96 cannot build without MULTIPROCESSOR

2015-07-01 Thread Nick Hudson

On 07/01/15 09:01, Emmanuel Dreyfus wrote:

Hi

In -current, x86 seems to be unable to build withotu MUTLIPROCESSOR:
src/sys/kern/kern_stub.c:195:5: error: #error __HAVE_PREEMPTION requires 
MULTIPROCESSOR

__HAVE_PREEMPTION is defined uncondtionaly in src/sys/x86/include/intr.h

Disabling SMP can be useful, sometime you get a computer model where
it crashes. The only workaround I see is to build with
MEMORY_DISK_RBFLAGS=0x1000  # RB_AUTOBOOT | RB_MD1

This is how you're supposed to do it, I believe. The boot loader helps 
as well.


Nick


Re: xhci patch 20150623

2015-06-23 Thread Nick Hudson

On 06/23/15 07:43, takahiro hayashi wrote:

Hello,

Here is xhci patch for nick-nhusb branch.


nh-xhci-check.diff
+ Add port range check in xhci_rhpsc().
+ Add sanity check if xfer-ux_pipe != NULL in xhci_handle_event().
nh-xhci-prc.diff
+ Remove SET_FEATURE C_PORT_RESET(PRC) -- what would happen?
nh-xhci-methods1.diff
+ Sync return type of xhci_close_pipe() with return type of
  upm_close method.
nh-xhci-abort.diff
+ Add routines for aborting xfer and command.
nh-xhci-comments.diff
+ Update comments.
nh-usb_subr.diff
+ Don't give up doing SET_CONFIG in usbd_set_config_index()
  even if it fails to get BOS descriptor.
  In this case ud_bdesc will be NULL.


Thanks, I'll look them over.



Known problems:

+ HS hub in 3.0 hub under 3.0 port is disconnected and reconnected every
   several minutes repeatedly.
   I don't know what is culprit yet.
+ Detaching hubs or devices might cause panic.
   Especially when the hub that hangs many devices is disconnected.

This is a general problem that has been around forever. I have a plan.


+ KASSERT(sc-sc_command_addr == 0) in xhci_do_command() may fail.
   If two or more threads run xhci_do_command(), all of them except one
   should be blocked at mutex_enter. But one of blocked thread can get
   sc_lock with sc_command_addr != 0 when cv_timedwait() unlocks sc_lock.
+ xhci.c cannot handle cross-64k transfer yet.
   (It works anyway though.)
+ Power management is not implemented.
+ USB keyboard interaction is laggy and annoying.


Any idea why keyboard is laggy?

+ ASM1042 and some Intel PCH do not work at all.
   No interrupts.
+ Fresco1100 does not report CSC if the device is connected at boot.
   Only PortResetChange is set in change bits.
   uhub ignores ports without CSC bit, so cannot detect devices.
+ SS part of asm107x hub under hudson2 xhci roothub is not recognized.
   It drops Port Enable Disable (PED) bit after port reset.
   rhpsc: 0x21203CSC,PIC=0x0,XSPEED=0x4=SS,PP,PLS=0x0=U0,PED,CCS
   after reset: 
0x6202c0PLC,PRC,CSC,PIC=0x0,XSPEED=0x0=NONE,PP,PLS=0x6=SS_INA


Not sure what you mean by hudson2 xhci roothub here.



+ axen does not work after interface up - down - up.
   When the interface goes down, axen driver closes both of RX and TX 
pipes.
   That causes transtion slot state of this device to ADDRESSED from 
CONFIGURED.

+ xhci.c does not snoop SET_CONFIG request and does not transtion the
   slot state to CONFIGURED from ADDRESSED even if SET_CONFIG is issued.
+ Isochronous transfer is not implemented.
+ Stream protocol is not supported.
+ Conexant CX93010 umodem is not recognized (XACT in ADDRESS_DEVICE).
   It can be recognized by inserting 150ms delay before port reset
   while enumeration.
+ usbd_clear_endpoint_stall{,_async}() does not work on xhci to clear 
stall

   condition because this function does not issue RESET_ENDPOINT command.
   However, xhci.c detects whether xfer is stalled and issues 
RESET_ENDPOINT

   automatically.
+ Address of root hub is 0 (but it works anyway).
+ Not sure it work on big endian machines.

+ usbdevs(8) does not report correctly if num of ports  16.
   Size of udi_ports in struct usb_device_info should be
   expanded from 16 to 256. Needs compat treatment.
   (currently usbdevs is not part of nick-nhusb branch.)


I can change this :)

Nick



Re: usbd_do_request_flags_pipe diagnostic panic

2015-06-07 Thread Nick Hudson

On 06/07/15 14:33, takahiro hayashi wrote:

Hi,

On 2015/06/07 19:50, Nick Hudson wrote:

On 06/06/15 05:39, takahiro hayashi wrote:

Hello,

On nick-nhusb branch kernel panics in usbd_transfer() when the
zero-length request gets stalled.

This happens when the uhidev driver issues usbd_set_idle to my
USB keyboard, one of its uhidevs returns stall for SET_IDLE request.

While usbd_do_request_flags_pipe() processes the xfer for SET_IDLE,
it tries to read endpoint's status and clear stall condition
if the endpoint is stalled.
It reuses the xfer to store GET_STATUS request, but ux_buf of xfer
is not allocated, then KASSERT at line 298 in usbd_transfer() fails.

Should usbd_do_request*() allocate ux_buf even if ux_length is 0?
Should I file PR this prob?


How about this diff?


Does KASSERT(xfer-ux_buf == NULL) in usbd_alloc_buffer() fail
when valid buffer (data) is given and the request causes stall?



Oh, good point. I will update and allocate a fresh xfer.

Nick


Re: usbd_do_request_flags_pipe diagnostic panic

2015-06-07 Thread Nick Hudson

On 06/06/15 05:39, takahiro hayashi wrote:

Hello,

On nick-nhusb branch kernel panics in usbd_transfer() when the
zero-length request gets stalled.

This happens when the uhidev driver issues usbd_set_idle to my
USB keyboard, one of its uhidevs returns stall for SET_IDLE request.

While usbd_do_request_flags_pipe() processes the xfer for SET_IDLE,
it tries to read endpoint's status and clear stall condition
if the endpoint is stalled.
It reuses the xfer to store GET_STATUS request, but ux_buf of xfer
is not allocated, then KASSERT at line 298 in usbd_transfer() fails.

Should usbd_do_request*() allocate ux_buf even if ux_length is 0?
Should I file PR this prob?


How about this diff?

Nick
Index: dev/usb/usbdi.c
===
RCS file: /cvsroot/src/sys/dev/usb/usbdi.c,v
retrieving revision 1.162.2.26
diff -u -p -r1.162.2.26 usbdi.c
--- dev/usb/usbdi.c	30 Mar 2015 12:09:30 -	1.162.2.26
+++ dev/usb/usbdi.c	7 Jun 2015 10:49:55 -
@@ -1062,32 +1062,34 @@ usbd_do_request_flags_pipe(struct usbd_d
 		 * any halt condition.
 		 */
 		usb_device_request_t treq;
-		usb_status_t status;
-		uint16_t s;
 		usbd_status nerr;
 
-		treq.bmRequestType = UT_READ_ENDPOINT;
-		treq.bRequest = UR_GET_STATUS;
-		USETW(treq.wValue, 0);
-		USETW(treq.wIndex, 0);
-		USETW(treq.wLength, sizeof(usb_status_t));
-		usbd_setup_default_xfer(xfer, dev, 0, USBD_DEFAULT_TIMEOUT,
-	   treq, status,sizeof(usb_status_t),
-	   0, 0);
-		nerr = usbd_sync_transfer(xfer);
-		if (nerr)
-			goto bad;
-		s = UGETW(status.wStatus);
-		USBHIST_LOG(usbdebug, status = 0x%04x, s, 0, 0, 0);
-		if (!(s  UES_HALT))
-			goto bad;
+		void *buf = usbd_alloc_buffer(xfer, sizeof(usb_status_t));
+		if (buf) {
+			usb_status_t status;
+			treq.bmRequestType = UT_READ_ENDPOINT;
+			treq.bRequest = UR_GET_STATUS;
+			USETW(treq.wValue, 0);
+			USETW(treq.wIndex, 0);
+			USETW(treq.wLength, sizeof(usb_status_t));
+			usbd_setup_default_xfer(xfer, dev, 0,
+			USBD_DEFAULT_TIMEOUT, treq, status,
+			sizeof(usb_status_t), 0, 0);
+			nerr = usbd_sync_transfer(xfer);
+			if (nerr)
+goto bad;
+			uint16_t s = UGETW(status.wStatus);
+			USBHIST_LOG(usbdebug, status = 0x%04x, s, 0, 0, 0);
+			if (!(s  UES_HALT))
+goto bad;
+		}
 		treq.bmRequestType = UT_WRITE_ENDPOINT;
 		treq.bRequest = UR_CLEAR_FEATURE;
 		USETW(treq.wValue, UF_ENDPOINT_HALT);
 		USETW(treq.wIndex, 0);
 		USETW(treq.wLength, 0);
 		usbd_setup_default_xfer(xfer, dev, 0, USBD_DEFAULT_TIMEOUT,
-	   treq, status, 0, 0, 0);
+		treq, NULL, 0, 0, 0);
 		nerr = usbd_sync_transfer(xfer);
 		if (nerr)
 			goto bad;


Re: xhci patch 20150606

2015-06-07 Thread Nick Hudson

On 06/06/15 18:51, takahiro hayashi wrote:

On 2015/06/07 01:51, takahiro hayashi wrote:

On 2015/06/07 01:39, Nick Hudson wrote:

I don't think XHCI_* defines should appear in usb.h.


Oh that's my fault.
I'll fix it...


New diff of usb.h attached.



Thanks.

All committed now.

Nick


Re: xhci patch 20150606

2015-06-06 Thread Nick Hudson

On 06/06/15 06:20, takahiro hayashi wrote:

Hi,

I've committed most of the diffs.

Just nh-ssp.dif, I think...


--- src/sys/dev/usb/usb.h.orig  2015-05-28 15:54:12.0 +0900
+++ src/sys/dev/usb/usb.h   2015-06-02 11:38:22.0 +0900
@@ -395,15 +395,25 @@ typedef struct {
  } UPACKED usb_devcap_platform_descriptor_t;
  #define USB_DEVCAP_PLATFORM_DESCRIPTOR_SIZE 20
  
+/* usb 3.1 ch 9.6.2.4 */

  typedef struct {
uByte   bLength;
uByte   bDescriptorType;
uByte   bDevCapabilityType;
uByte   bReserved;
uDWord  bmAttributes;
+#defineXHCI_DEVCAP_SSP_SSAC(x) __SHIFTOUT(x, 
__BITS(4,0))
+#defineXHCI_DEVCAP_SSP_SSIC(x) __SHIFTOUT(x, 
__BITS(8,5))
uWord   wFunctionalitySupport;
+#defineXHCI_DEVCAP_SSP_SSID(x) __SHIFTOUT(x, 
__BITS(3,0))
+#defineXHCI_DEVCAP_SSP_MIN_RXLANE_COUNT(x) __SHIFTOUT(x, 
__BITS(11,8))
+#defineXHCI_DEVCAP_SSP_MIN_TXLANE_COUNT(x) __SHIFTOUT(x, 
__BITS(15,12))
uWord   wReserved;
uDWord  bmSublinkSpeedAttr[0];
+#defineXHCI_DEVCAP_SSP_LSE(x)  __SHIFTOUT(x, 
__BITS(5,4))
+#defineXHCI_DEVCAP_SSP_ST(x)   __SHIFTOUT(x, 
__BITS(7,6))
+#defineXHCI_DEVCAP_SSP_LP(x)   __SHIFTOUT(x, 
__BITS(15,14))
+#defineXHCI_DEVCAP_SSP_LSM(x)  __SHIFTOUT(x, 
__BITS(31,16))
  } UPACKED usb_devcap_ssp_descriptor_t;
  #define USB_DEVCAP_SSP_DESCRIPTOR_SIZE 12 /* variable length */
  

I don't think XHCI_* defines should appear in usb.h.

Nick


Re: patch: 3.0 hub support for xhci

2015-04-06 Thread Nick Hudson

On 04/06/15 00:15, Takahiro HAYASHI wrote:

Hello,

On 2015/04/02 21:10, Nick Hudson wrote:

On 03/24/15 13:30, Robert Sprowson wrote:
b) some experimental USB 3 work that might break it for some people, 
this

should probably be on a private branch


I'm happy to do this on my nick-nhusb branch, but maybe this isn't 
what you want.


Can/Should I post the patch for nick-nhusb branch?



Sure.

I'll merge the usb changes in HEAD into my branch to help.

Nick


Re: Brainy: Set of 11 potential bugs

2015-04-04 Thread Nick Hudson

On 04/04/15 10:12, Maxime Villard wrote:

Hi,
here is a new list of 11 potential bugs:

http://m00nbsd.net/ae123a9bae03f7dde5c6d654412daf5a.html#Report-5

Please carefully test/investigate these bugs before fixing them. Also,
please include the keyword Brainy in your commit messages so that I
can easily keep track of what is/isn't fixed - or send me a mail directly.


The  sys/dev/usb/umass_isdata.c report looks like a false +ve to me.



Found by The Brainy Code Scanner.

Regards,
Maxime



Nick


Re: Brainy: Set of 11 potential bugs

2015-04-04 Thread Nick Hudson

On 04/04/15 16:15, Nick Hudson wrote:

On 04/04/15 10:12, Maxime Villard wrote:

Hi,
here is a new list of 11 potential bugs:

http://m00nbsd.net/ae123a9bae03f7dde5c6d654412daf5a.html#Report-5

Please carefully test/investigate these bugs before fixing them. Also,
please include the keyword Brainy in your commit messages so that I
can easily keep track of what is/isn't fixed - or send me a mail 
directly.


The  sys/dev/usb/umass_isdata.c report looks like a false +ve to me.


but I'm blind... ignore me.

Nick




Re: patch: 3.0 hub support for xhci

2014-11-18 Thread Nick Hudson

On 10/31/14 03:59, Takahiro HAYASHI wrote:

hello,

This patch tries to support 3.0 hubs for xhci.

still xhci is at best experimental.


Thanks for working on this. Are you tracking other BSDs?

Can you update the diff to -current?

Thanks,
Nick


Re: Patch: xhci controller driver improvements

2014-09-20 Thread Nick Hudson

On 08/12/14 17:20, Takahiro HAYASHI wrote:

On 08/12/14 23:07, Nick Hudson wrote:

On 08/12/14 10:42, Takahiro HAYASHI wrote:

On 08/12/14 07:45, Takahiro HAYASHI wrote:

Please send diff :)


Wait for a while plz.


lessprf.diff
  replaces most of device_printf.
  I define new macros DPRINTD and DPRINTDF. former prints
  args with device_xname(sc-sc_dev), latter prints args
  with device name and function name.


Can you check that this compiles with all the combinations of 
USB_DEBUG, XHCI_DEBUG and DIAGNOSTIC please?


Sorry, some declrations was reported unused.
New patch is attached.


Hi,

Can you convert the DPRINTFs to USBHIST_LOG, please?

Thanks,
Nick



Re: Patch: xhci controller driver improvements

2014-08-12 Thread Nick Hudson

On 08/12/14 10:42, Takahiro HAYASHI wrote:

On 08/12/14 07:45, Takahiro HAYASHI wrote:

Please send diff :)


Wait for a while plz.


lessprf.diff
  replaces most of device_printf.
  I define new macros DPRINTD and DPRINTDF. former prints
  args with device_xname(sc-sc_dev), latter prints args
  with device name and function name.


Can you check that this compiles with all the combinations of USB_DEBUG, 
XHCI_DEBUG and DIAGNOSTIC please?




I'll dump my local misc patches too.

usb3.diff
  tries to make usb stack recognize super speed and
  make usbdevs(8) print super speed.
  This patch also adds XHCI_DEBUG flag to opt_usb.h.
I committed most of this apart from the memset as I don't think it's 
required.




lockmore.diff
  adds lock with sc_intr_lock to xhci_intr and xhci_poll.

committed


lsmps.diff
  makes use 8 as wMaxPacketSize for LS when addressing device.
http://mail-index.netbsd.org/source-changes/2013/03/20/msg042367.html


Committed.




Thanks,


Nick


Re: Patch: xhci controller driver improvements

2014-08-12 Thread Nick Hudson

On 08/12/14 15:07, Nick Hudson wrote:

On 08/12/14 10:42, Takahiro HAYASHI wrote:




I'll dump my local misc patches too.

usb3.diff
  tries to make usb stack recognize super speed and
  make usbdevs(8) print super speed.
  This patch also adds XHCI_DEBUG flag to opt_usb.h.
I committed most of this apart from the memset as I don't think it's 
required.


UPS_SUPER_SPEED should be done differently in the long run by adding 
super speed hub support properly


Nick


Re: Patch: xhci controller driver improvements

2014-08-11 Thread Nick Hudson

On 08/10/14 12:54, Takahiro HAYASHI wrote:

Hi,


Hi,


On 08/10/14 17:06, Nick Hudson wrote:

Some comments...




@@ -2889,6 +3028,7 @@ xhci_timeout(void *addr)
  struct xhci_softc * const sc = 
xfer-pipe-device-bus-hci_private;

  if (sc-sc_dying) {
+xhci_abort_xfer(xfer, USBD_TIMEOUT);
  return;
  }


This looks very strange.


Does it need sc_lock and unlock?
or it's strange that xhci_abort_xfer is called
from callout(softclock) interrupt context?


Calling xhci_abort_xfer while dying.



The driver makes far too much use of device_printf and all USB should 
move to KERNHIST.


I didn't know about KERNHIST, thanks for notifying.
I've replaced device_printf with DPRINTF or DPRINTFN in
my local tree.



Please send diff :)

Nick



Re: Patch: xhci controller driver improvements

2014-08-10 Thread Nick Hudson

On 08/10/14 04:27, Takahiro HAYASHI wrote:

Hello,

(05/11/14 02:57), Rajasekhar Pulluru wrote:

Hi,

The attached patch addresses following points.

- With some usb devices, often some endpoints gets stalled. Resetting 
the

endpoint helps recover usb transactions.

- When a usb device is removed while data is being copied to/from, 
all the

queued transfer requests needs to be aborted gracefully so that transfer
status is reported up to the application.

- Some boards require an additional interrupt acknowledgement 
specific to
the architecture. Added a function pointer that needs to be set 
during that

board arch-specific initialization. If this is not required, function
pointer needs to be set to NULL.

- Added debug functions to dump xhci registers that could help us in 
case

of any issue.

Note: I have manually patched the code changes from my older
version(verified) of code to the latest as of today in MAIN branch. 
And the
code is not compiled either, sorry about that. If anyone is 
interested to

patch this and verify, it would be really helpful.


I reformatted whitespace and made several changes:

- avoid lock error when calling xhci_do_command from
  xhci_stop_endpoint
- assume usb_uncallout(a,b,c) is callout_stop(a)
- comment out checking xfer-device-bus-intr_context
- declare gsc used in debug code (would be set in ddb?)

It should build at least, but still experimental.




Some comments...


--- src/sys/dev/usb/xhci.c.orig 2014-08-05 22:24:27.0 +0900
+++ src/sys/dev/usb/xhci.c  2014-08-07 17:35:16.0 +0900
@@ -55,6 +55,8 @@ __KERNEL_RCSID(0, $NetBSD: xhci.c,v 1.2
  #include dev/usb/xhcivar.h
  #include dev/usb/usbroothub_subr.h
  
+#include uvm/uvm.h /* for vtophys on arm */

+


Shouldn't be needed at all - the hexdump use is questionable.



@@ -1127,6 +1165,81 @@ xhci_open(usbd_pipe_handle pipe)
  }
  
  static void

+xhci_abort_xfer(usbd_xfer_handle xfer, usbd_status status)


This function needs usbmp-ifiction, i.e. updating for -current by 
removing spl, tsleep, wakeup, etc.



@@ -2889,6 +3028,7 @@ xhci_timeout(void *addr)
struct xhci_softc * const sc = xfer-pipe-device-bus-hci_private;
  
  	if (sc-sc_dying) {

+   xhci_abort_xfer(xfer, USBD_TIMEOUT);
return;
}
  


This looks very strange.

The driver makes far too much use of device_printf and all USB should 
move to KERNHIST.


Nick


Re: CVS commit: src/sys/kern

2014-07-22 Thread Nick Hudson

On 07/22/14 12:49, Greg Troxel wrote:

Maxime Villard m...@netbsd.org writes:


Module Name:src
Committed By:   maxv
Date:   Tue Jul 22 07:38:41 UTC 2014

Modified Files:
src/sys/kern: subr_kmem.c

Log Message:
Enable KMEM_REDZONE on DIAGNOSTIC. It will try to catch overflows.

No comment on tech-kern@

I didn't see this on tech-kern (nor did I see anything about defining
KMEM_POISON), and now that I'm aware I object.

DIAGNOSTIC, by longstanding tradition, is a lightweight and not have a
significant performance hit.  Basically it's about KASSERT of things
that must be true.  This is changing the size of memory allocations,
which is far more far-reaching.


options(4) says this

 options DIAGNOSTIC
 Adds code to the kernel that does internal consistency checks. 
This code
 will cause the kernel to panic if corruption of internal data 
structures

 is detected.  These checks can decrease performance up to 15%.

I'd guess that KMEM_REDZONE add much less than 15%.

Nick


Re: one time crash in usb_allocmem_flags

2014-02-10 Thread Nick Hudson

On 02/09/14 19:48, Alexander Nasonov wrote:

Hi,

I was running current amd64 (last updated few weeks ago) when I got
a random crash shortly after switching to X mode. If my analysis is
correct, it crashed in usb_allocmem_flags inside this loop:

 LIST_FOREACH(f, usb_frag_freelist, next) {
 KDASSERTMSG(usb_valid_block_p(f-block, usb_blk_fraglist),
 %s: usb frag %p: unknown block pointer %p,
  __func__, f, f-block);
 if (f-block-tag == tag)
 break;
 }

It couldn't access f-block-tag. I wasn't actively using any of
the usb devices at that time. I wonder if it's a known problem or
should I file a PR? Details of the analysis is below.


Please fill a PR so it doesn't get forgotten about.

At first glance it doesn't look like that usb_frag_freelist isn't 
protected correctly. I looks more like random corruption. What was the 
value of %edx?


Thanks,
Nick



Re: Sudden reboot

2013-12-02 Thread Nick Hudson

On 12/03/13 07:49, David Holland wrote:

On Tue, Dec 03, 2013 at 08:13:29AM +0100, Jan Danielsson wrote:
  I'm running netbsd-6 (a month old or so) on a Soekris net6501
   (NetBSD/amd64), and tonight I had an unexpected reboot.
   /var/log/messages contains:
  
   Dec  3 03:15:46 aria syslogd[188]: restart
   Dec  3 03:15:46 aria /netbsd: uvm_fault(0x8071b4d8, 0x0, 1) - e
   Dec  3 03:15:46 aria /netbsd: fatal page fault in supervisor mode
   Dec  3 03:15:46 aria /netbsd: trap type 6 code 0 rip 8031162f cs
   8 rflags 10283 cr2  60 cpl 4 rsp fe8004cf9b98
   Dec  3 03:15:46 aria /netbsd: panic: trap
   Dec  3 03:15:46 aria /netbsd: cpu1: Begin traceback...
  
  Thinking back, I recall noticing that I have had unexpected reboots
   once or twice before, but it's very rare -- been running this system for
   a year or so.
  
  The entry before syslogd restarting is simply a dhcp client renewal
   (againt my ISP), which occured 03:14:04.
  
  Trap 6 = illegal op-code? Should I start worrying about broken RAM
   modules?

from sys/arch/x86/include/trap.h:
#define T_PAGEFLT6  /* page fault */

and %cr2 contains 60, so in all probability it's just a null pointer
dereference.


Definitely null pointer deref - the second arg to uvm_fault is the 
virtual address.


uvm_fault(0x8071b4d8, 0x0, 1) - e



If you use objdump -d or nm -n or gdb or whatever to find the code
that's at 0x8031162f in your kernel, you'll probably get a
fairly good idea of what broke.


backtrace is always useful as well.

Nick


Re: Netbsd /avr32 update.

2013-09-30 Thread Nick Hudson

On 09/30/13 09:06, Martin Husemann wrote:

On Sun, Sep 29, 2013 at 08:36:15PM -0300, Tomas Niño Kehoe wrote:

The next step, besides continuing / fixing the actual port is to start
working on the toolchain. Particularly an llvm backend for NetBSD/avr32 seems
the way to go.

For those of us not knowing the details: isn't avr32 supported by stock gcc?
If not, why? If only in later versions, which?


http://gcc.gnu.org/gcc-4.7/changes.html

Says a lot about AVR

Nick




Re: __read_mostly and __mp_friendly annotations

2010-05-27 Thread Nick Hudson
On Thursday 27 May 2010 18:03:19 Mindaugas Rasiukevicius wrote:
[...]

 P.S. Does the very last object (both of __read_mostly and __cacheline
 areas) gets enough padded space?

Only in some of the scripts. I guess this needs fixing.

Nick