Re: EHCI on armv6 with Write-Back caches
Hi, Please try the attached patch for 10-current. The patch is not tested yet, only compiles. I will try to test more later today. Let me know if you see any issues. --HPS === dev/usb/serial/usb_serial.c == --- dev/usb/serial/usb_serial.c (revision 244051) +++ dev/usb/serial/usb_serial.c (local) @@ -797,10 +797,14 @@ DPRINTF(tp=%p\n, tp); if (ttydisc_can_bypass(tp) != 0 || - (sc-sc_flag UCOM_FLAG_HL_READY) == 0) { + (sc-sc_flag UCOM_FLAG_HL_READY) == 0 || + (sc-sc_flag UCOM_FLAG_INWAKEUP) != 0) { return; } + /* prevent recursion */ + sc-sc_flag |= UCOM_FLAG_INWAKEUP; + pos = sc-sc_jitterbuf_out; while (sc-sc_jitterbuf_in != pos) { @@ -821,6 +825,8 @@ if ((sc-sc_jitterbuf_in == pos) (sc-sc_flag UCOM_FLAG_RTS_IFLOW)) ucom_rts(sc, 0); + + sc-sc_flag = ~UCOM_FLAG_INWAKEUP; } static int === dev/usb/serial/usb_serial.h == --- dev/usb/serial/usb_serial.h (revision 244051) +++ dev/usb/serial/usb_serial.h (local) @@ -183,6 +183,7 @@ #define UCOM_FLAG_CONSOLE 0x80 /* set if device is a console */ #define UCOM_FLAG_WAIT_REFS 0x0100 /* set if we must wait for refs */ #define UCOM_FLAG_FREE_UNIT 0x0200 /* set if we must free the unit */ +#define UCOM_FLAG_INWAKEUP0x0400 /* set if we are in the tty_inwakeup callback */ uint8_t sc_lsr; uint8_t sc_msr; uint8_t sc_mcr; === dev/usb/storage/ustorage_fs.c == --- dev/usb/storage/ustorage_fs.c (revision 244051) +++ dev/usb/storage/ustorage_fs.c (local) @@ -74,7 +74,7 @@ /* Define some limits */ #ifndef USTORAGE_FS_BULK_SIZE -#define USTORAGE_FS_BULK_SIZE (1UL 17) /* bytes */ +#define USTORAGE_FS_BULK_SIZE (1U 17) /* bytes */ #endif #ifndef USTORAGE_FS_MAX_LUN @@ -85,8 +85,6 @@ #define USTORAGE_QDATA_MAX 40 /* bytes */ #endif -#define sc_cmd_data sc_cbw.CBWCDB - /* * The SCSI ID string must be exactly 28 characters long * exluding the terminating zero. @@ -176,8 +174,9 @@ struct ustorage_fs_softc { - ustorage_fs_bbb_cbw_t sc_cbw; /* Command Wrapper Block */ - ustorage_fs_bbb_csw_t sc_csw; /* Command Status Block */ + ustorage_fs_bbb_cbw_t *sc_cbw; /* Command Wrapper Block */ + ustorage_fs_bbb_csw_t *sc_csw; /* Command Status Block */ + void *sc_dma_ptr; /* Main data buffer */ struct mtx sc_mtx; @@ -275,7 +274,6 @@ .endpoint = UE_ADDR_ANY, .direction = UE_DIR_OUT, .bufsize = sizeof(ustorage_fs_bbb_cbw_t), - .flags = {.ext_buffer = 1,}, .callback = ustorage_fs_t_bbb_command_callback, .usb_mode = USB_MODE_DEVICE, }, @@ -295,7 +293,7 @@ .endpoint = UE_ADDR_ANY, .direction = UE_DIR_OUT, .bufsize = USTORAGE_FS_BULK_SIZE, - .flags = {.proxy_buffer = 1,.short_xfer_ok = 1,.ext_buffer = 1}, + .flags = {.proxy_buffer = 1,.short_xfer_ok = 1}, .callback = ustorage_fs_t_bbb_data_read_callback, .usb_mode = USB_MODE_DEVICE, }, @@ -315,7 +313,7 @@ .endpoint = UE_ADDR_ANY, .direction = UE_DIR_IN, .bufsize = sizeof(ustorage_fs_bbb_csw_t), - .flags = {.short_xfer_ok = 1,.ext_buffer = 1,}, + .flags = {.short_xfer_ok = 1}, .callback = ustorage_fs_t_bbb_status_callback, .usb_mode = USB_MODE_DEVICE, }, @@ -409,6 +407,14 @@ transfers, %s\n, usbd_errstr(err)); goto detach; } + + sc-sc_cbw = usbd_xfer_get_frame_buffer(sc-sc_xfer[ + USTORAGE_FS_T_BBB_COMMAND], 0); + sc-sc_csw = usbd_xfer_get_frame_buffer(sc-sc_xfer[ + USTORAGE_FS_T_BBB_STATUS], 0); + sc-sc_dma_ptr = usbd_xfer_get_frame_buffer(sc-sc_xfer[ + USTORAGE_FS_T_BBB_DATA_READ], 0); + /* start Mass Storage State Machine */ mtx_lock(sc-sc_mtx); @@ -518,44 +524,44 @@ switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: - tag = UGETDW(sc-sc_cbw.dCBWSignature); + tag = UGETDW(sc-sc_cbw-dCBWSignature); if (tag != CBWSIGNATURE) { /* do nothing */ DPRINTF(invalid signature 0x%08x\n, tag); break; } - tag = UGETDW(sc-sc_cbw.dCBWTag); + tag = UGETDW(sc-sc_cbw-dCBWTag); /* echo back tag */ - USETDW(sc-sc_csw.dCSWTag, tag); + USETDW(sc-sc_csw-dCSWTag, tag); /* reset status */ - sc-sc_csw.bCSWStatus = 0; + sc-sc_csw-bCSWStatus = 0; /* reset data offset, data length and data remainder */ sc-sc_transfer.offset = 0; sc-sc_transfer.data_rem = - UGETDW(sc-sc_cbw.dCBWDataTransferLength); + UGETDW(sc-sc_cbw-dCBWDataTransferLength); /* reset data flags */ sc-sc_transfer.data_short = 0; /* extract LUN */ - sc-sc_transfer.lun = sc-sc_cbw.bCBWLUN; + sc-sc_transfer.lun = sc-sc_cbw-bCBWLUN; if (sc-sc_transfer.data_rem == 0) { sc-sc_transfer.cbw_dir = DIR_NONE; } else { - if (sc-sc_cbw.bCBWFlags CBWFLAGS_IN) { + if (sc-sc_cbw-bCBWFlags CBWFLAGS_IN) { sc-sc_transfer.cbw_dir = DIR_WRITE; } else { sc-sc_transfer.cbw_dir = DIR_READ; } } - sc-sc_transfer.cmd_len =
Re: EHCI on armv6 with Write-Back caches
Hi, I've run some basic tests over here (x86) which passed after some patch modifications. Please test and verify for your ARM targets: http://svnweb.freebsd.org/changeset/base/244500 http://svnweb.freebsd.org/changeset/base/244503 Please also verify that upgt and uwrt and uath still works like expected. --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On 12/20/2012 10:46 AM, Hans Petter Selasky wrote: Hi, I've run some basic tests over here (x86) which passed after some patch modifications. Please test and verify for your ARM targets: http://svnweb.freebsd.org/changeset/base/244500 http://svnweb.freebsd.org/changeset/base/244503 Please also verify that upgt and uwrt and uath still works like expected. Thanks! I'll test umass on my ARM devices. EHCI driver suffers from this too. QH, QTD and other structures mixes DMA data and non-DMA fields. Splitting them would be a right thing to do too. Though I believe EHCI vs. WB caches issue is more complicated then just this. ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Tuesday 18 December 2012 21:32:46 Warner Losh wrote: On Dec 18, 2012, at 12:44 PM, Hans Petter Selasky wrote: On Tuesday 18 December 2012 20:12:29 Andrew Turner wrote: On Tue, 18 Dec 2012 13:06:44 +0100 Hans Petter Selasky hsela...@c2i.net wrote: Hi Andrew, The BUS_DMA_COHERENT flag does nothing on armv6 as we need the cache enabled for atomic operations to work correctly and we would have to disable the cache on the entire page. This is acceptable behaviour from the description of the flag in the busdma man page. Yes, but when I allocate memory from the USB stack, then I want that memory to not be cached. It is simply not that useful to have that memory cached. I didn't check the latest state of busdma, but if I'm not mistaken, if the BUS_DMA_COHERENT flag is set on the DMA tag, the flush/invalidate will simply return and do nothing. Maybe that is the problem ... The exact meaning of BUS_DMA_COHERENT depends on the architecture. The code is still required to call bus_dmamap_sync with a coherent map but when the flag is implemented the cost of the operation will be reduced. It doesn't guarantee the memory is uncached, it may be implemented that way but the USB code can't rely on it. Andrew The USB code doesn't rely on this flag. I'm wondering if BUSDMA by accident sets this flag, so that the cache sync ops are not called. No. It doesn't. Hi, Does the USB code still touch bytes in the same cache line while the DMA is still outstanding? Yes, only for data. The busdma API must handle this case properly as discussed before. --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
Hi again, Different vendors use different naming conventions about sync operations. Maybe we should start defining some names and agree about that first? usb_pc_cpu_flush: This function is a system abstraction which is supposed to ensure that all CPU cached data for the given buffer is written to RAM before this function returns. usb_pc_cpu_invalidate: This function is a system abstraction which is supposed to ensure that all CPU cached data is cleared for the given buffer. These functions have been carefully added in all the USB drivers using DMA. Atomicity: I understand that the ARM hardware is not always compatible to this approach. 1) Flushing data to RAM is not a problem in any case? Do you agree? 2) Invalidating data is a problem, because invalidation can cause nearby data to be cleared aswell. So basically for those systems which are not handling this, flushing data means a lock of all CPU's until the flush/invalidate sequence is complete? Any dispute about this? If the CPU does not support certain features we cannot have an efficient system. It is like having a CPU which doesn't support switching off interrupt levels, like an 8-bit AVR. Then no matter how you twist it, you cannot postpone an interrupt to a software thread. Same goes for DMA support. If their DMA engine doesn't support byte granularity and possibility to flush/invalidate on a per-byte basis, then implement a global system lock to flush/invalidate data like I suggest, if this is not doable in the hardware by the CPU instruction set. The approach that I was recommended several years ago, is that I can pass a pointer to a buffer, which then can be transferred by the USB engine. This pointer can be any pointer except NULL. 3) As per my knowledge, using busdma to allocate a separate buffer for a 13- byte buffer, results in having a buffer of PAGE_SIZE bytes allocated. 4) BUSDMA experts: Please verify that the flags passed to bus_dmamap_sync() causes the exact behaviour has listed on top of this e-mail to occur in the following two functions in sys/dev/usb/usb_busdma.c void usb_pc_cpu_invalidate(struct usb_page_cache *pc) { if (pc-page_offset_end == pc-page_offset_buf) { /* nothing has been loaded into this page cache! */ return; } /* * TODO: We currently do XXX_POSTREAD and XXX_PREREAD at the * same time, but in the future we should try to isolate the * different cases to optimise the code. --HPS */ bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_PREREAD); } void usb_pc_cpu_flush(struct usb_page_cache *pc) { if (pc-page_offset_end == pc-page_offset_buf) { /* nothing has been loaded into this page cache! */ return; } bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_PREWRITE); } --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Dec 19, 2012, at 1:56 AM, Hans Petter Selasky wrote: Hi again, Different vendors use different naming conventions about sync operations. The different names are often for subtly different types of cache flushing. Maybe we should start defining some names and agree about that first? usb_pc_cpu_flush: This function is a system abstraction which is supposed to ensure that all CPU cached data for the given buffer is written to RAM before this function returns. First off, why can't you just use the normal busdma interface? Second, does this also invalidate the cache lines? There's many kinds of flushes as well, depending on the cache coherency model for MP.. usb_pc_cpu_invalidate: This function is a system abstraction which is supposed to ensure that all CPU cached data is cleared for the given buffer. So does this throw the data that's in the cache away? Or does it write back before discarding it? These functions have been carefully added in all the USB drivers using DMA. Atomicity: I understand that the ARM hardware is not always compatible to this approach. Yes, since it fails to capture the subtly of the ARM hardware which the busdma abstraction captures. 1) Flushing data to RAM is not a problem in any case? Do you agree? As long as there's no DMA outstanding to the memory addressed by the cache line being flushed, yes. 2) Invalidating data is a problem, because invalidation can cause nearby data to be cleared aswell. So basically for those systems which are not handling this, flushing data means a lock of all CPU's until the flush/invalidate sequence is complete? Any dispute about this? Yes. You can generally only invalidate to the cache line level. That's why I was asking about the subtly in what you mean by invalidate. ARM can and does flush data properly, but this might not match your usage. You forgot 3) Touching data in the same cache line while the DMA is going on. This is a complete no-no and must never be done. Cache lines must not be polluted while the DMA is happening. If the CPU does not support certain features we cannot have an efficient system. It is like having a CPU which doesn't support switching off interrupt levels, like an 8-bit AVR. Most RISC CPUs don't support a coherent cache, it is true. However, they do support efficient and succinct cache operations. The busdma abtraction handles this nicely for all other kinds of devices, why is usb so different? Then no matter how you twist it, you cannot postpone an interrupt to a software thread. Same goes for DMA support. If their DMA engine doesn't support byte granularity and possibility to flush/invalidate on a per-byte basis, then implement a global system lock to flush/invalidate data like I suggest, if this is not doable in the hardware by the CPU instruction set. The DMA engine support it. You can do byte-aligned transfers all day long. However, the cache does not. You must segregate data to get proper operations. There's nothing that you can do about that. You must not mix accesses to memory within a cache line. To do so will cause problems, as evidence by the problems we are seeing. The approach that I was recommended several years ago, is that I can pass a pointer to a buffer, which then can be transferred by the USB engine. This pointer can be any pointer except NULL. 3) As per my knowledge, using busdma to allocate a separate buffer for a 13- byte buffer, results in having a buffer of PAGE_SIZE bytes allocated. Not with Ian's slab allocator... 4) BUSDMA experts: Please verify that the flags passed to bus_dmamap_sync() causes the exact behaviour has listed on top of this e-mail to occur in the following two functions in sys/dev/usb/usb_busdma.c void usb_pc_cpu_invalidate(struct usb_page_cache *pc) { if (pc-page_offset_end == pc-page_offset_buf) { /* nothing has been loaded into this page cache! */ return; } /* * TODO: We currently do XXX_POSTREAD and XXX_PREREAD at the * same time, but in the future we should try to isolate the * different cases to optimise the code. --HPS */ bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_PREREAD); } This is almost certainly wrong. POSTREAD should be called right after the read had completed. PREREAD should be called immediately before starting a DMA operation after all adjacent memory accesses have stopped. void usb_pc_cpu_flush(struct usb_page_cache *pc) { if (pc-page_offset_end == pc-page_offset_buf) { /* nothing has been loaded into this page cache! */ return; } bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_PREWRITE); } This is likely correct, but insufficient. There's no POSTWRITE sync that's done after all the data has been transferred. Warner
Re: EHCI on armv6 with Write-Back caches
BTW: This commit looks exactly like my X-mas present :-) http://svnweb.freebsd.org/changeset/base/244471 --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Wednesday 19 December 2012 20:12:55 Warner Losh wrote: On Dec 19, 2012, at 1:56 AM, Hans Petter Selasky wrote: Hi again, Different vendors use different naming conventions about sync operations. The different names are often for subtly different types of cache flushing. Maybe we should start defining some names and agree about that first? usb_pc_cpu_flush: This function is a system abstraction which is supposed to ensure that all CPU cached data for the given buffer is written to RAM before this function returns. Hi, First off, why can't you just use the normal busdma interface? Because the API was confusing when I wrote the USB code. It wasn't obvious what the different functions were doing. Second, does this also invalidate the cache lines? There's many kinds of flushes as well, depending on the cache coherency model for MP.. No, flush as used in the USB code does not require cache line invalidation. usb_pc_cpu_invalidate: This function is a system abstraction which is supposed to ensure that all CPU cached data is cleared for the given buffer. So does this throw the data that's in the cache away? Or does it write back before discarding it? It should trow away all data in the cache for the given range. No write back first. These functions have been carefully added in all the USB drivers using DMA. Atomicity: I understand that the ARM hardware is not always compatible to this approach. Yes, since it fails to capture the subtly of the ARM hardware which the busdma abstraction captures. 1) Flushing data to RAM is not a problem in any case? Do you agree? As long as there's no DMA outstanding to the memory addressed by the cache line being flushed, yes. 2) Invalidating data is a problem, because invalidation can cause nearby data to be cleared aswell. So basically for those systems which are not handling this, flushing data means a lock of all CPU's until the flush/invalidate sequence is complete? Any dispute about this? Yes. You can generally only invalidate to the cache line level. That's why I was asking about the subtly in what you mean by invalidate. ARM can and does flush data properly, but this might not match your usage. You forgot 3) Touching data in the same cache line while the DMA is going on. This is a complete no-no and must never be done. Cache lines must not be polluted while the DMA is happening. If the CPU does not support certain features we cannot have an efficient system. It is like having a CPU which doesn't support switching off interrupt levels, like an 8-bit AVR. Most RISC CPUs don't support a coherent cache, it is true. However, they do support efficient and succinct cache operations. The busdma abtraction handles this nicely for all other kinds of devices, why is usb so different? Then no matter how you twist it, you cannot postpone an interrupt to a software thread. Same goes for DMA support. If their DMA engine doesn't support byte granularity and possibility to flush/invalidate on a per-byte basis, then implement a global system lock to flush/invalidate data like I suggest, if this is not doable in the hardware by the CPU instruction set. The DMA engine support it. You can do byte-aligned transfers all day long. However, the cache does not. You must segregate data to get proper operations. There's nothing that you can do about that. You must not mix accesses to memory within a cache line. To do so will cause problems, as evidence by the problems we are seeing. The approach that I was recommended several years ago, is that I can pass a pointer to a buffer, which then can be transferred by the USB engine. This pointer can be any pointer except NULL. 3) As per my knowledge, using busdma to allocate a separate buffer for a 13- byte buffer, results in having a buffer of PAGE_SIZE bytes allocated. Not with Ian's slab allocator... 4) BUSDMA experts: Please verify that the flags passed to bus_dmamap_sync() causes the exact behaviour has listed on top of this e-mail to occur in the following two functions in sys/dev/usb/usb_busdma.c void usb_pc_cpu_invalidate(struct usb_page_cache *pc) { if (pc-page_offset_end == pc-page_offset_buf) { /* nothing has been loaded into this page cache! */ return; } /* * TODO: We currently do XXX_POSTREAD and XXX_PREREAD at the * same time, but in the future we should try to isolate the * different cases to optimise the code. --HPS */ bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(pc-tag, pc-map, BUS_DMASYNC_PREREAD); } This is almost certainly wrong. POSTREAD should be called right after the read had completed. PREREAD should be called immediately before
Re: EHCI on armv6 with Write-Back caches
On Tuesday 18 December 2012 08:49:31 Andrew Turner wrote: Hello, Oleksandr and myself have been looking into why when we enable the write-back cache on the PandaBoard there are kernel panics with USB. We have tracked it down to an issue appending the ehci_qh_t to the list at the end of ehci_setup_standard_chain(). I have a patch at [1] that allows me to run sha256 on a 40MB file over NSF using the built in smsc USB ethernet chip. The problem is I have had to place a call to DELAY before EHCI_APPEND_QH. This is obviously not the correct solution. Is anyone able to help me narrow down what is missing? It appears to be a missing cache invalidate or flush somewhere but I haven't been able to track down what cache function the DELAY is working around. Andrew [1] http://fubar.geek.nz/files/freebsd/ehci_4.diff Hi, Can you dump the DMA tag belonging to the QH via and check wether it is mapped coherent or not. Thes QH- and TD- structures should not be cache mapped. Else cache has not been disabled on those pages. qh-page_cache-tag --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Tue, 18 Dec 2012 09:22:22 +0100 Hans Petter Selasky hsela...@c2i.net wrote: On Tuesday 18 December 2012 08:49:31 Andrew Turner wrote: Hello, Oleksandr and myself have been looking into why when we enable the write-back cache on the PandaBoard there are kernel panics with USB. We have tracked it down to an issue appending the ehci_qh_t to the list at the end of ehci_setup_standard_chain(). I have a patch at [1] that allows me to run sha256 on a 40MB file over NSF using the built in smsc USB ethernet chip. The problem is I have had to place a call to DELAY before EHCI_APPEND_QH. This is obviously not the correct solution. Is anyone able to help me narrow down what is missing? It appears to be a missing cache invalidate or flush somewhere but I haven't been able to track down what cache function the DELAY is working around. Andrew [1] http://fubar.geek.nz/files/freebsd/ehci_4.diff Hi, Can you dump the DMA tag belonging to the QH via and check wether it is mapped coherent or not. Thes QH- and TD- structures should not be cache mapped. Else cache has not been disabled on those pages. qh-page_cache-tag The BUS_DMA_COHERENT flag does nothing on armv6 as we need the cache enabled for atomic operations to work correctly and we would have to disable the cache on the entire page. This is acceptable behaviour from the description of the flag in the busdma man page. Is there a reason the QH and TD structures shouldn't be cache mapped? Andrew ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Tuesday 18 December 2012 12:02:13 Andrew Turner wrote: On Tue, 18 Dec 2012 09:22:22 +0100 Hans Petter Selasky hsela...@c2i.net wrote: On Tuesday 18 December 2012 08:49:31 Andrew Turner wrote: Hello, Oleksandr and myself have been looking into why when we enable the write-back cache on the PandaBoard there are kernel panics with USB. We have tracked it down to an issue appending the ehci_qh_t to the list at the end of ehci_setup_standard_chain(). I have a patch at [1] that allows me to run sha256 on a 40MB file over NSF using the built in smsc USB ethernet chip. The problem is I have had to place a call to DELAY before EHCI_APPEND_QH. This is obviously not the correct solution. Is anyone able to help me narrow down what is missing? It appears to be a missing cache invalidate or flush somewhere but I haven't been able to track down what cache function the DELAY is working around. Andrew [1] http://fubar.geek.nz/files/freebsd/ehci_4.diff Hi, Can you dump the DMA tag belonging to the QH via and check wether it is mapped coherent or not. Thes QH- and TD- structures should not be cache mapped. Else cache has not been disabled on those pages. qh-page_cache-tag Hi Andrew, The BUS_DMA_COHERENT flag does nothing on armv6 as we need the cache enabled for atomic operations to work correctly and we would have to disable the cache on the entire page. This is acceptable behaviour from the description of the flag in the busdma man page. Yes, but when I allocate memory from the USB stack, then I want that memory to not be cached. It is simply not that useful to have that memory cached. I didn't check the latest state of busdma, but if I'm not mistaken, if the BUS_DMA_COHERENT flag is set on the DMA tag, the flush/invalidate will simply return and do nothing. Maybe that is the problem ... Is there a reason the QH and TD structures shouldn't be cache mapped? Because they are frequently used for setting up data operations, and the flush/invalidate operate on the whole page where multiple tags can be present, notably for the same USB transfer, under the same mutex. So it slows down everything. --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Tue, 18 Dec 2012 13:06:44 +0100 Hans Petter Selasky hsela...@c2i.net wrote: Hi Andrew, The BUS_DMA_COHERENT flag does nothing on armv6 as we need the cache enabled for atomic operations to work correctly and we would have to disable the cache on the entire page. This is acceptable behaviour from the description of the flag in the busdma man page. Yes, but when I allocate memory from the USB stack, then I want that memory to not be cached. It is simply not that useful to have that memory cached. I didn't check the latest state of busdma, but if I'm not mistaken, if the BUS_DMA_COHERENT flag is set on the DMA tag, the flush/invalidate will simply return and do nothing. Maybe that is the problem ... The exact meaning of BUS_DMA_COHERENT depends on the architecture. The code is still required to call bus_dmamap_sync with a coherent map but when the flag is implemented the cost of the operation will be reduced. It doesn't guarantee the memory is uncached, it may be implemented that way but the USB code can't rely on it. Andrew ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Tuesday 18 December 2012 20:12:29 Andrew Turner wrote: On Tue, 18 Dec 2012 13:06:44 +0100 Hans Petter Selasky hsela...@c2i.net wrote: Hi Andrew, The BUS_DMA_COHERENT flag does nothing on armv6 as we need the cache enabled for atomic operations to work correctly and we would have to disable the cache on the entire page. This is acceptable behaviour from the description of the flag in the busdma man page. Yes, but when I allocate memory from the USB stack, then I want that memory to not be cached. It is simply not that useful to have that memory cached. I didn't check the latest state of busdma, but if I'm not mistaken, if the BUS_DMA_COHERENT flag is set on the DMA tag, the flush/invalidate will simply return and do nothing. Maybe that is the problem ... The exact meaning of BUS_DMA_COHERENT depends on the architecture. The code is still required to call bus_dmamap_sync with a coherent map but when the flag is implemented the cost of the operation will be reduced. It doesn't guarantee the memory is uncached, it may be implemented that way but the USB code can't rely on it. Andrew The USB code doesn't rely on this flag. I'm wondering if BUSDMA by accident sets this flag, so that the cache sync ops are not called. --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Dec 18, 2012, at 1:22 AM, Hans Petter Selasky wrote: On Tuesday 18 December 2012 08:49:31 Andrew Turner wrote: Hello, Oleksandr and myself have been looking into why when we enable the write-back cache on the PandaBoard there are kernel panics with USB. We have tracked it down to an issue appending the ehci_qh_t to the list at the end of ehci_setup_standard_chain(). I have a patch at [1] that allows me to run sha256 on a 40MB file over NSF using the built in smsc USB ethernet chip. The problem is I have had to place a call to DELAY before EHCI_APPEND_QH. This is obviously not the correct solution. Is anyone able to help me narrow down what is missing? It appears to be a missing cache invalidate or flush somewhere but I haven't been able to track down what cache function the DELAY is working around. Andrew [1] http://fubar.geek.nz/files/freebsd/ehci_4.diff Hi, Can you dump the DMA tag belonging to the QH via and check wether it is mapped coherent or not. Thes QH- and TD- structures should not be cache mapped. Else cache has not been disabled on those pages. busdma doesn't work that way. The coherent flag is just a hint Warner qh-page_cache-tag --HPS ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org
Re: EHCI on armv6 with Write-Back caches
On Dec 18, 2012, at 12:44 PM, Hans Petter Selasky wrote: On Tuesday 18 December 2012 20:12:29 Andrew Turner wrote: On Tue, 18 Dec 2012 13:06:44 +0100 Hans Petter Selasky hsela...@c2i.net wrote: Hi Andrew, The BUS_DMA_COHERENT flag does nothing on armv6 as we need the cache enabled for atomic operations to work correctly and we would have to disable the cache on the entire page. This is acceptable behaviour from the description of the flag in the busdma man page. Yes, but when I allocate memory from the USB stack, then I want that memory to not be cached. It is simply not that useful to have that memory cached. I didn't check the latest state of busdma, but if I'm not mistaken, if the BUS_DMA_COHERENT flag is set on the DMA tag, the flush/invalidate will simply return and do nothing. Maybe that is the problem ... The exact meaning of BUS_DMA_COHERENT depends on the architecture. The code is still required to call bus_dmamap_sync with a coherent map but when the flag is implemented the cost of the operation will be reduced. It doesn't guarantee the memory is uncached, it may be implemented that way but the USB code can't rely on it. Andrew The USB code doesn't rely on this flag. I'm wondering if BUSDMA by accident sets this flag, so that the cache sync ops are not called. No. It doesn't. Does the USB code still touch bytes in the same cache line while the DMA is still outstanding? Warner ___ freebsd-usb@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/freebsd-usb To unsubscribe, send any mail to freebsd-usb-unsubscr...@freebsd.org