Re: [PATCH 7/7] media: uvcvideo: Use dma_alloc_noncontiguos API
Tue, 2 Feb 2021 10:51:10 +0100 > From: Ricardo Ribalda > > On architectures where the is no coherent caching such as ARM use the > dma_alloc_noncontiguos API and handle manually the cache flushing using > dma_sync_sgtable(). > > With this patch on the affected architectures we can measure up to 20x > performance improvement in uvc_video_copy_data_work(). > > Eg: aarch64 with an external usb camera > > NON_CONTIGUOUS > frames: 999 > packets: 999 > empty: 0 (0 %) > errors: 0 > invalid: 0 > pts: 0 early, 0 initial, 999 ok > scr: 0 count ok, 0 diff ok > sof: 2048 <= sof <= 0, freq 0.000 kHz > bytes 67034480 : duration 33303 > FPS: 29.99 > URB: 523446/4993 uS/qty: 104.836 avg 132.532 std 13.230 min 831.094 max (uS) > header: 76564/4993 uS/qty: 15.334 avg 15.229 std 3.438 min 186.875 max (uS) > latency: 468945/4992 uS/qty: 93.939 avg 132.577 std 9.531 min 824.010 max (uS) > decode: 54161/4993 uS/qty: 10.847 avg 6.313 std 1.614 min 111.458 max (uS) > raw decode speed: 9.931 Gbits/s > raw URB handling speed: 1.025 Gbits/s > throughput: 16.102 Mbits/s > URB decode CPU usage 0.162600 % > > COHERENT > frames: 999 > packets: 999 > empty: 0 (0 %) > errors: 0 > invalid: 0 > pts: 0 early, 0 initial, 999 ok > scr: 0 count ok, 0 diff ok > sof: 2048 <= sof <= 0, freq 0.000 kHz > bytes 54683536 : duration 33302 > FPS: 29.99 > URB: 1478135/4000 uS/qty: 369.533 avg 390.357 std 22.968 min 3337.865 max (uS) > header: 79761/4000 uS/qty: 19.940 avg 18.495 std 1.875 min 336.719 max (uS) > latency: 281077/4000 uS/qty: 70.269 avg 83.102 std 5.104 min 735.000 max (uS) > decode: 1197057/4000 uS/qty: 299.264 avg 318.080 std 1.615 min 2806.667 max > (uS) > raw decode speed: 365.470 Mbits/s > raw URB handling speed: 295.986 Mbits/s > throughput: 13.136 Mbits/s > URB decode CPU usage 3.594500 % > > Signed-off-by: Ricardo Ribalda > Signed-off-by: Christoph Hellwig > --- > drivers/media/usb/uvc/uvc_video.c | 79 ++- > drivers/media/usb/uvc/uvcvideo.h | 4 +- > 2 files changed, 60 insertions(+), 23 deletions(-) > > diff --git a/drivers/media/usb/uvc/uvc_video.c > b/drivers/media/usb/uvc/uvc_video.c > index a6a441d92b9488..0a7d287dc41528 100644 > --- a/drivers/media/usb/uvc/uvc_video.c > +++ b/drivers/media/usb/uvc/uvc_video.c > @@ -6,11 +6,13 @@ > * Laurent Pinchart (laurent.pinch...@ideasonboard.com) > */ > > +#include > #include > #include > #include > #include > #include > +#include > #include > #include > #include > @@ -1097,6 +1099,26 @@ static int uvc_video_decode_start(struct uvc_streaming > *stream, > return data[0]; > } > > +static inline struct device *stream_to_dmadev(struct uvc_streaming *stream) > +{ > + return bus_to_hcd(stream->dev->udev->bus)->self.sysdev; > +} > + > +static void uvc_urb_dma_sync(struct uvc_urb *uvc_urb, bool for_device) > +{ > + struct device *dma_dev = dma_dev = stream_to_dmadev(uvc_urb->stream); > + > + if (for_device) { > + dma_sync_sgtable_for_device(dma_dev, uvc_urb->sgt, > + DMA_FROM_DEVICE); > + } else { > + dma_sync_sgtable_for_cpu(dma_dev, uvc_urb->sgt, > + DMA_FROM_DEVICE); > + invalidate_kernel_vmap_range(uvc_urb->buffer, > + uvc_urb->stream->urb_size); > + } > +} > + > /* > * uvc_video_decode_data_work: Asynchronous memcpy processing > * > @@ -1118,6 +1140,8 @@ static void uvc_video_copy_data_work(struct work_struct > *work) > uvc_queue_buffer_release(op->buf); > } > > + uvc_urb_dma_sync(uvc_urb, true); > + > ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL); > if (ret < 0) > uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n", > @@ -1539,10 +1563,12 @@ static void uvc_video_complete(struct urb *urb) >* Process the URB headers, and optionally queue expensive memcpy tasks >* to be deferred to a work queue. >*/ > + uvc_urb_dma_sync(uvc_urb, false); > stream->decode(uvc_urb, buf, buf_meta); > > /* If no async work is needed, resubmit the URB immediately. */ > if (!uvc_urb->async_operations) { > + uvc_urb_dma_sync(uvc_urb, true); > ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC); > if (ret < 0) > uvc_printk(KERN_ERR, > @@ -1559,24 +1585,46 @@ static void uvc_video_complete(struct urb *urb) > */ > static void uvc_free_urb_buffers(struct uvc_streaming *stream) > { > + struct device *dma_dev = dma_dev = stream_to_dmadev(stream); > struct uvc_urb *uvc_urb; > > for_each_uvc_urb(uvc_urb, stream) { > if (!uvc_urb->buffer) > continue; > > -#ifndef CONFIG_DMA_NONCOHERENT > - usb_free_coherent(stream->dev->udev, stream->urb_size, > - uvc_urb->buffer, uvc_urb->dma); > -#else > -
Re: [PATCH 4/8] dma-direct: use __GFP_ZERO in dma_direct_alloc_pages
On Wed, 30 Sep 2020 18:09:13 Christoph Hellwig wrote: > > Prepare for supporting the DMA_ATTR_NO_KERNEL_MAPPING flag in > dma_alloc_pages. > > Signed-off-by: Christoph Hellwig > --- > kernel/dma/direct.c | 9 - > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c > index b5f20781d3a96f..b5d56810130b22 100644 > --- a/kernel/dma/direct.c > +++ b/kernel/dma/direct.c > @@ -296,9 +296,10 @@ struct page *dma_direct_alloc_pages(struct device *dev, > size_t size, > dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) > { > struct page *page; > - void *ret; > > if (dma_should_alloc_from_pool(dev, gfp, 0)) { > + void *ret; > + > page = dma_alloc_from_pool(dev, size, &ret, gfp, > dma_coherent_ok); > if (!page) > @@ -306,7 +307,7 @@ struct page *dma_direct_alloc_pages(struct device *dev, > size_t size, > goto done; > } > > - page = __dma_direct_alloc_pages(dev, size, gfp); > + page = __dma_direct_alloc_pages(dev, size, gfp | __GFP_ZERO); > if (!page) > return NULL; > if (PageHighMem(page)) { > @@ -320,13 +321,11 @@ struct page *dma_direct_alloc_pages(struct device *dev, > size_t size, > goto out_free_pages; > } > > - ret = page_address(page); > if (force_dma_unencrypted(dev)) { > - if (set_memory_decrypted((unsigned long)ret, > + if (set_memory_decrypted((unsigned long)page_address(page), > 1 << get_order(size))) > goto out_free_pages; > } > - memset(ret, 0, size); Not sure this works without changes in cma_alloc(). > done: > *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); > return page; > -- > 2.28.0 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] vfio: type1: fix kthread use case
On Tue, 7 Jul 2020 08:31:41 +0800 Yan Zhao wrote: > > I think "current->mm == NULL" in itself implies kthread and not use_mm, > as a user thread is not able to have "current->mm == NULL", right? Nope, it's the fix target as checking the current mm does not make sense without current being kthread. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] vfio: type1: fix kthread use case
On Mon, 6 Jul 2020 16:04:13 +0200 Markus Elfring wrote: > > Can another design approach make sense here? > > + bool thread_use_mm = ((current->flags & PF_KTHREAD) && !current->mm); > + if (thread_use_mm) Good. See below. > > > > kthread_use_mm(mm); --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2798,7 +2798,7 @@ static int vfio_iommu_type1_dma_rw_chunk struct mm_struct *mm; unsigned long vaddr; struct vfio_dma *dma; - bool kthread = current->mm == NULL; + bool kthread_load_mm; size_t offset; *copied = 0; @@ -2812,11 +2812,12 @@ static int vfio_iommu_type1_dma_rw_chunk return -EPERM; mm = get_task_mm(dma->task); - if (!mm) return -EPERM; - if (kthread) + kthread_load_mm = current->flags & PF_KTHREAD && + current->mm == NULL; + if (kthread_load_mm) kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2843,7 +2844,7 @@ static int vfio_iommu_type1_dma_rw_chunk } else *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; - if (kthread) + if (kthread_load_mm) kthread_unuse_mm(mm); out: mmput(mm); ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] vfio: type1: fix kthread use case
On Mon, 6 Jul 2020 15:33:58 +0200 Markus Elfring wrote: > > I would prefer the following variable declarations then. > > + bool kthread, use_mm; > > > > size_t offset; > > > > + kthread =3D current->flags & PF_KTHREAD; > > + use_mm =3D current->mm =3D=3D NULL; > > I propose to move such assignments directly before the corresponding check= > . Got it. See below. > > > if (!mm) > > return -EPERM; > > > + kthread =3D current->flags & PF_KTHREAD; > + use_mm =3D !current->mm; > > > - if (kthread) > > + if (kthread && use_mm) > > kthread_use_mm(mm); --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2798,7 +2798,7 @@ static int vfio_iommu_type1_dma_rw_chunk struct mm_struct *mm; unsigned long vaddr; struct vfio_dma *dma; - bool kthread = current->mm == NULL; + bool kthread, use_mm; size_t offset; *copied = 0; @@ -2812,11 +2812,13 @@ static int vfio_iommu_type1_dma_rw_chunk return -EPERM; mm = get_task_mm(dma->task); - if (!mm) return -EPERM; - if (kthread) + kthread = current->flags & PF_KTHREAD; + use_mm = current->mm == NULL; + + if (kthread && use_mm) kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2843,7 +2845,7 @@ static int vfio_iommu_type1_dma_rw_chunk } else *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; - if (kthread) + if (kthread && use_mm) kthread_unuse_mm(mm); out: mmput(mm); ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH] vfio: type1: fix kthread use case
On Mon, 6 Jul 2020 13:32:18 +0200 Markus Elfring wrote: > =E2=80=A6 > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -2798,7 +2798,8 @@ static int vfio_iommu_type1_dma_rw_chunk > =E2=80=A6 > > - bool kthread =3D current->mm =3D=3D NULL; > > + bool kthread =3D current->flags & PF_KTHREAD; > > + bool use_mm =3D current->mm =3D=3D NULL; > =E2=80=A6 > > Can it be helpful to convert initialisations for these variables > into later assignments? Perhaps. Then it looks like the below. --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2798,9 +2798,12 @@ static int vfio_iommu_type1_dma_rw_chunk struct mm_struct *mm; unsigned long vaddr; struct vfio_dma *dma; - bool kthread = current->mm == NULL; + bool kthread; + bool use_mm; size_t offset; + kthread = current->flags & PF_KTHREAD; + use_mm = current->mm == NULL; *copied = 0; dma = vfio_find_dma(iommu, user_iova, 1); @@ -2812,11 +2815,10 @@ static int vfio_iommu_type1_dma_rw_chunk return -EPERM; mm = get_task_mm(dma->task); - if (!mm) return -EPERM; - if (kthread) + if (kthread && use_mm) kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2843,7 +2845,7 @@ static int vfio_iommu_type1_dma_rw_chunk } else *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; - if (kthread) + if (kthread && use_mm) kthread_unuse_mm(mm); out: mmput(mm); ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RFC PATCH] vfio: type1: fix kthread use case
It's incorrect to tell out if a task is kthread without checking PF_KTHREAD at all. What's also fixed, if no need to be in a seperate patch, is to invoke kthread_use_mm() without checking the current mm first as the kthread may hold a mm struct atm and it's not the right place to switch mm. Fixes: 8d46c0cca5f4 ("vfio: introduce vfio_dma_rw to read/write a range of IOVAs") Cc: Yan Zhao Cc: Markus Elfring Cc: Christoph Hellwig Signed-off-by: Hillf Danton --- --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2798,7 +2798,8 @@ static int vfio_iommu_type1_dma_rw_chunk struct mm_struct *mm; unsigned long vaddr; struct vfio_dma *dma; - bool kthread = current->mm == NULL; + bool kthread = current->flags & PF_KTHREAD; + bool use_mm = current->mm == NULL; size_t offset; *copied = 0; @@ -2812,11 +2813,10 @@ static int vfio_iommu_type1_dma_rw_chunk return -EPERM; mm = get_task_mm(dma->task); - if (!mm) return -EPERM; - if (kthread) + if (kthread && use_mm) kthread_use_mm(mm); else if (current->mm != mm) goto out; @@ -2843,7 +2843,7 @@ static int vfio_iommu_type1_dma_rw_chunk } else *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; - if (kthread) + if (kthread && use_mm) kthread_unuse_mm(mm); out: mmput(mm); -- ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v3 07/34] iommu: Add probe_device() and release_device() call-backs
On Wed, 29 Apr 2020 15:36:45 +0200 Joerg Roedel wrote: > > +static int __iommu_probe_device(struct device *dev) > +{ > + const struct iommu_ops *ops = dev->bus->iommu_ops; > + struct iommu_device *iommu_dev; > + struct iommu_group *group; > + int ret; > + > + iommu_dev = ops->probe_device(dev); > + if (IS_ERR(iommu_dev)) > + return PTR_ERR(iommu_dev); > + > + dev->iommu->iommu_dev = iommu_dev; > + > + group = iommu_group_get_for_dev(dev); > + if (!IS_ERR(group)) { Typo? > + ret = PTR_ERR(group); > + goto out_release; > + } > + iommu_group_put(group); > + > + iommu_device_link(iommu_dev, dev); > + > + return 0; > + > +out_release: > + ops->release_device(dev); > + > + return ret; > +} ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [rfc v2 3/6] dma-pool: dynamically expanding atomic pools
On Wed, 8 Apr 2020 14:21:06 -0700 (PDT) David Rientjes wrote: > > When an atomic pool becomes fully depleted because it is now relied upon > for all non-blocking allocations through the DMA API, allow background > expansion of each pool by a kworker. > > When an atomic pool has less than the default size of memory left, kick > off a kworker to dynamically expand the pool in the background. The pool > is doubled in size, up to MAX_ORDER-1. If memory cannot be allocated at > the requested order, smaller allocation(s) are attempted. > What is proposed looks like a path of single lane without how to dynamically shrink the pool taken into account. Thus the risk may rise in corner cases where pools are over-expanded in long run after one-off peak allocation requests. Is it worth the complexity of expander + shrinker at the first place? > This allows the default size to be kept quite low when one or more of the > atomic pools is not used. > > This also allows __dma_atomic_pool_init to return a pointer to the pool > to make initialization cleaner. > > Also switch over some node ids to the more appropriate NUMA_NO_NODE. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 04/28] dma-mapping: use vmap insted of reimplementing it
On Wed, 8 Apr 2020 13:59:02 +0200 Christoph Hellwig wrote: > > @@ -62,24 +42,20 @@ void *dma_common_pages_remap(struct page **pages, size_t > size, > void *dma_common_contiguous_remap(struct page *page, size_t size, > pgprot_t prot, const void *caller) > { > - int i; > + int count = size >> PAGE_SHIFT; > struct page **pages; > - struct vm_struct *area; > + void *vaddr; > + int i; > > - pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL); > + pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL); Is it making sense to vmalloc pages as long as array size is bigger than PAGE_SIZE? > if (!pages) > return NULL; > - > - for (i = 0; i < (size >> PAGE_SHIFT); i++) > + for (i = 0; i < count; i++) > pages[i] = nth_page(page, i); > - > - area = __dma_common_pages_remap(pages, size, prot, caller); > - > + vaddr = vmap(pages, count, VM_DMA_COHERENT, prot); > kfree(pages); > > - if (!area) > - return NULL; > - return area->addr; > + return vaddr; > } ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 02/28] staging: android: ion: use vmap instead of vm_map_ram
On Wed, 8 Apr 2020 13:59:00 +0200 > > vm_map_ram can keep mappings around after the vm_unmap_ram. Using that > with non-PAGE_KERNEL mappings can lead to all kinds of aliasing issues. > > Signed-off-by: Christoph Hellwig > --- > drivers/staging/android/ion/ion_heap.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/staging/android/ion/ion_heap.c > b/drivers/staging/android/ion/ion_heap.c > index 473b465724f1..a2d5c6df4b96 100644 > --- a/drivers/staging/android/ion/ion_heap.c > +++ b/drivers/staging/android/ion/ion_heap.c > @@ -99,12 +99,12 @@ int ion_heap_map_user(struct ion_heap *heap, struct > ion_buffer *buffer, > > static int ion_heap_clear_pages(struct page **pages, int num, pgprot_t > pgprot) > { > - void *addr = vm_map_ram(pages, num, -1, pgprot); > + void *addr = vmap(pages, num, VM_MAP); A merge glitch? void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) > > if (!addr) > return -ENOMEM; > memset(addr, 0, PAGE_SIZE * num); > - vm_unmap_ram(addr, num); > + vunmap(addr); > > return 0; > } > -- > 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: regression in ath10k dma allocation
On Tue, 20 Aug 2019 05:05:14 +0200 Christoph Hellwig wrote: > > Tobias, plase try this patch: > A minute! > -- > >From 88c590a2ecafc8279388f25bfbe1ead8ea3507a6 Mon Sep 17 00:00:00 2001 > From: Christoph Hellwig > Date: Tue, 20 Aug 2019 11:45:49 +0900 > Subject: dma-direct: fix zone selection after an unaddressable CMA allocation > > The new dma_alloc_contiguous hides if we allocate CMA or regular > pages, and thus fails to retry a ZONE_NORMAL allocation if the CMA > allocation succeeds but isn't addressable. That means we either fail > outright or dip into a small zone that might not succeed either. > > Thanks to Hillf Danton for debugging this issue. > > Fixes: b1d2dc009dec ("dma-contiguous: add dma_{alloc,free}_contiguous() > helpers") > Reported-by: Tobias Klausmann > Signed-off-by: Christoph Hellwig > --- > drivers/iommu/dma-iommu.c | 3 +++ > include/linux/dma-contiguous.h | 5 + > kernel/dma/contiguous.c| 9 +++-- > kernel/dma/direct.c| 7 ++- > 4 files changed, 13 insertions(+), 11 deletions(-) > > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > index d991d40f797f..f68a62c3c32b 100644 > --- a/drivers/iommu/dma-iommu.c > +++ b/drivers/iommu/dma-iommu.c > @@ -965,10 +965,13 @@ static void *iommu_dma_alloc_pages(struct device *dev, > size_t size, > { > bool coherent = dev_is_dma_coherent(dev); > size_t alloc_size = PAGE_ALIGN(size); > + int node = dev_to_node(dev); > struct page *page = NULL; > void *cpu_addr; > > page = dma_alloc_contiguous(dev, alloc_size, gfp); > + if (!page) > + page = alloc_pages_node(node, gfp, get_order(alloc_size)); > if (!page) > return NULL; > > diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h > index c05d4e661489..03f8e98e3bcc 100644 > --- a/include/linux/dma-contiguous.h > +++ b/include/linux/dma-contiguous.h > @@ -160,10 +160,7 @@ bool dma_release_from_contiguous(struct device *dev, > struct page *pages, > static inline struct page *dma_alloc_contiguous(struct device *dev, size_t > size, > gfp_t gfp) > { > - int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; > - size_t align = get_order(PAGE_ALIGN(size)); > - > - return alloc_pages_node(node, gfp, align); > + return NULL; > } > > static inline void dma_free_contiguous(struct device *dev, struct page *page, > diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c > index 2bd410f934b3..e6b450fdbeb6 100644 > --- a/kernel/dma/contiguous.c > +++ b/kernel/dma/contiguous.c > @@ -230,9 +230,7 @@ bool dma_release_from_contiguous(struct device *dev, > struct page *pages, > */ > struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) > { > - int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; > - size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; > - size_t align = get_order(PAGE_ALIGN(size)); > + size_t count = size >> PAGE_SHIFT; > struct page *page = NULL; > struct cma *cma = NULL; > > @@ -243,14 +241,12 @@ struct page *dma_alloc_contiguous(struct device *dev, > size_t size, gfp_t gfp) > > /* CMA can be used only in the context which permits sleeping */ > if (cma && gfpflags_allow_blocking(gfp)) { > + size_t align = get_order(size); > size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); > > page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN); > } > > - /* Fallback allocation of normal pages */ > - if (!page) > - page = alloc_pages_node(node, gfp, align); > return page; > } > > @@ -258,6 +254,7 @@ struct page *dma_alloc_contiguous(struct device *dev, > size_t size, gfp_t gfp) > * dma_free_contiguous() - release allocated pages > * @dev: Pointer to device for which the pages were allocated. > * @page: Pointer to the allocated pages. > + int node = dev ? dev_to_node(dev) : NUMA_NO_NODE; > * @size: Size of allocated pages. > * > * This function releases memory allocated by dma_alloc_contiguous(). As the > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c > index 795c9b095d75..d82d184463ce 100644 > --- a/kernel/dma/direct.c > +++ b/kernel/dma/direct.c > @@ -85,6 +85,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t > phys, size_t size) > struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, > dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) > { > + size_t alloc_size = PAGE_ALIGN(size)
Re: regression in ath10k dma allocation
On Tue, 20 Aug 2019 10:15:01 +0800 Christoph Hellwig wrote: > On Mon, Aug 19, 2019 at 06:58:52PM -0700, Nicolin Chen wrote: > > Right...the condition was in-between. However, not every caller > > of dma_alloc_contiguous() is supposed to have a coherent check. > > So we either add a 'bool coherent_ok' to the API or revert the > > dma-direct part back to the original. Probably former option is > > better? > > > > Thank you for the debugging. I have been a bit distracted, may > > not be able to submit a fix very soon. Would you like to help? > > Yeah, it turns out that while the idea for the dma_alloc_contiguous > helper was neat it didn't work out at all, and me pushing Nicolin > down that route was not a very smart idea. Sorry for causing this > mess. > > I think we'll just need to open code it for dma-direct for 5.3. > Hillf do you want to cook up a patch or should I do it? Feel free to do that, Sir. Thanks Hillf ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: regression in ath10k dma allocation
On Sat, 17 Aug 2019 00:42:48 +0200 Tobias Klausmann wrote: >Hi Nicolin, > >On 17.08.19 00:25, Nicolin Chen wrote: >> Hi Tobias >> >> On Fri, Aug 16, 2019 at 10:16:45PM +0200, Tobias Klausmann wrote: do you have CONFIG_DMA_CMA set in your config? If not please make sure you have this commit in your testing tree, and if the problem still persists it would be a little odd and we'd have to dig deeper: commit dd3dcede9fa0a0b661ac1f24843f4a1b1317fdb6 Author: Nicolin Chen Date: Wed May 29 17:54:25 2019 -0700 dma-contiguous: fix !CONFIG_DMA_CMA version of dma_{alloc, free}_contiguous() >>> yes CONFIG_DMA_CMA is set (=y, see attached config), the commit you mention >>> above is included, if you have any hints how to go forward, please let me >>> know! >> For CONFIG_DMA_CMA=y, by judging the log with error code -12, I >> feel this one should work for you. Would you please check if it >> is included or try it out otherwise? >> >> dma-contiguous: do not overwrite align in dma_alloc_contiguous() >> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=c6622a425acd1d2f3a443cd39b490a8777b622d7 > > >Thanks for the hint, yet the commit is included and does not fix the >problem! > Hi Tobias Two minor diffs below in hope that they might make sense. 1, fallback unless dma coherent ok. --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -246,6 +246,10 @@ struct page *dma_alloc_contiguous(struct size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN); + if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { + dma_free_contiguous(dev, page, size); + page = NULL; + } } /* Fallback allocation of normal pages */ -- 2, cleanup: cma unless contiguous --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -234,18 +234,13 @@ struct page *dma_alloc_contiguous(struct size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT; size_t align = get_order(PAGE_ALIGN(size)); struct page *page = NULL; - struct cma *cma = NULL; - - if (dev && dev->cma_area) - cma = dev->cma_area; - else if (count > 1) - cma = dma_contiguous_default_area; /* CMA can be used only in the context which permits sleeping */ - if (cma && gfpflags_allow_blocking(gfp)) { + if (count > 1 && gfpflags_allow_blocking(gfp)) { size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT); - page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN); + page = cma_alloc(dev_get_cma_area(dev), count, cma_align, + gfp & __GFP_NOWARN); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { dma_free_contiguous(dev, page, size); page = NULL; -- ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH V5 3/5] iommu/dma-iommu: Handle deferred devices
On Thu, 15 Aug 2019 12:09:41 +0100 Tom Murphy wrote: > > Handle devices which defer their attach to the iommu in the dma-iommu api > > Signed-off-by: Tom Murphy > --- > drivers/iommu/dma-iommu.c | 27 ++- > 1 file changed, 26 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > index 2712fbc68b28..906b7fa14d3c 100644 > --- a/drivers/iommu/dma-iommu.c > +++ b/drivers/iommu/dma-iommu.c > @@ -22,6 +22,7 @@ > #include > #include > #include > +#include > > struct iommu_dma_msi_page { > struct list_headlist; > @@ -351,6 +352,21 @@ static int iommu_dma_init_domain(struct iommu_domain > *domain, dma_addr_t base, > return iova_reserve_iommu_regions(dev, domain); > } > > +static int handle_deferred_device(struct device *dev, > + struct iommu_domain *domain) > +{ > + const struct iommu_ops *ops = domain->ops; > + > + if (!is_kdump_kernel()) > + return 0; > + > + if (unlikely(ops->is_attach_deferred && > + ops->is_attach_deferred(domain, dev))) > + return iommu_attach_device(domain, dev); > + > + return 0; > +} > + > /** > * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU > API > *page flags. > @@ -463,6 +479,9 @@ static dma_addr_t __iommu_dma_map(struct device *dev, > phys_addr_t phys, > size_t iova_off = iova_offset(iovad, phys); > dma_addr_t iova; > > + if (unlikely(handle_deferred_device(dev, domain))) > + return DMA_MAPPING_ERROR; > + > size = iova_align(iovad, size + iova_off); > > iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); iommu_map_atomic() is applied to __iommu_dma_map() in 2/5. Is it an atomic context currently given the mutex_lock() in iommu_attach_device()? ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu