On 5/13/25 14:30, wangtao wrote:
>> -----Original Message-----
>> From: Christian König <christian.koe...@amd.com>
>> Sent: Tuesday, May 13, 2025 7:32 PM
>> To: wangtao <tao.wang...@honor.com>; sumit.sem...@linaro.org;
>> benjamin.gaign...@collabora.com; brian.star...@arm.com;
>> jstu...@google.com; tjmerc...@google.com
>> Cc: linux-me...@vger.kernel.org; dri-devel@lists.freedesktop.org; linaro-
>> mm-...@lists.linaro.org; linux-ker...@vger.kernel.org;
>> wangbintian(BintianWang) <bintian.w...@honor.com>; yipengxiang
>> <yipengxi...@honor.com>; liulu 00013167 <liulu....@honor.com>; hanfeng
>> 00012985 <feng....@honor.com>
>> Subject: Re: [PATCH 2/2] dmabuf/heaps: implement
>> DMA_BUF_IOCTL_RW_FILE for system_heap
>>
>> On 5/13/25 11:28, wangtao wrote:
>>> Support direct file I/O operations for system_heap dma-buf objects.
>>> Implementation includes:
>>> 1. Convert sg_table to bio_vec
>>
>> That is usually illegal for DMA-bufs.
> [wangtao] The term 'convert' is misleading in this context. The appropriate 
> phrasing should be: Construct bio_vec from sg_table.

Well it doesn't matter what you call it. Touching the page inside an sg table 
of a DMA-buf is illegal, we even have code to actively prevent that.

Once more: This approach was already rejected multiple times! Please use 
udmabuf instead!

The hack you came up here is simply not necessary.

Regards,
Christian.


> Appreciate your feedback.
>>
>> Regards,
>> Christian.
>>
>>> 2. Set IOCB_DIRECT when O_DIRECT is supported 3. Invoke
>>> vfs_iocb_iter_read()/vfs_iocb_iter_write() for actual I/O
>>>
>>> Performance metrics (UFS 4.0 device @4GB/s, Arm64 CPU @1GHz):
>>>
>>> | Metric             |    1MB |    8MB |    64MB |   1024MB |   3072MB |
>>> |--------------------|-------:|-------:|--------:|---------:|---------
>>> |--------------------|:|
>>> | Buffer Read (us)   |   1658 |   9028 |   69295 |  1019783 |  2978179 |
>>> | Direct Read (us)   |    707 |   2647 |   18689 |   299627 |   937758 |
>>> | Buffer Rate (MB/s) |    603 |    886 |     924 |     1004 |     1032 |
>>> | Direct Rate (MB/s) |   1414 |   3022 |    3425 |     3418 |     3276 |
>>>
>>> Signed-off-by: wangtao <tao.wang...@honor.com>
>>> ---
>>>  drivers/dma-buf/heaps/system_heap.c | 118
>>> ++++++++++++++++++++++++++++
>>>  1 file changed, 118 insertions(+)
>>>
>>> diff --git a/drivers/dma-buf/heaps/system_heap.c
>>> b/drivers/dma-buf/heaps/system_heap.c
>>> index 26d5dc89ea16..f7b71b9843aa 100644
>>> --- a/drivers/dma-buf/heaps/system_heap.c
>>> +++ b/drivers/dma-buf/heaps/system_heap.c
>>> @@ -20,6 +20,8 @@
>>>  #include <linux/scatterlist.h>
>>>  #include <linux/slab.h>
>>>  #include <linux/vmalloc.h>
>>> +#include <linux/bvec.h>
>>> +#include <linux/uio.h>
>>>
>>>  static struct dma_heap *sys_heap;
>>>
>>> @@ -281,6 +283,121 @@ static void system_heap_vunmap(struct dma_buf
>> *dmabuf, struct iosys_map *map)
>>>     iosys_map_clear(map);
>>>  }
>>>
>>> +static struct bio_vec *system_heap_init_bvec(struct
>> system_heap_buffer *buffer,
>>> +                   size_t offset, size_t len, int *nr_segs) {
>>> +   struct sg_table *sgt = &buffer->sg_table;
>>> +   struct scatterlist *sg;
>>> +   size_t length = 0;
>>> +   unsigned int i, k = 0;
>>> +   struct bio_vec *bvec;
>>> +   size_t sg_left;
>>> +   size_t sg_offset;
>>> +   size_t sg_len;
>>> +
>>> +   bvec = kvcalloc(sgt->nents, sizeof(*bvec), GFP_KERNEL);
>>> +   if (!bvec)
>>> +           return NULL;
>>> +
>>> +   for_each_sg(sgt->sgl, sg, sgt->nents, i) {
>>> +           length += sg->length;
>>> +           if (length <= offset)
>>> +                   continue;
>>> +
>>> +           sg_left = length - offset;
>>> +           sg_offset = sg->offset + sg->length - sg_left;
>>> +           sg_len = min(sg_left, len);
>>> +
>>> +           bvec[k].bv_page = sg_page(sg);
>>> +           bvec[k].bv_len = sg_len;
>>> +           bvec[k].bv_offset = sg_offset;
>>> +           k++;
>>> +
>>> +           offset += sg_len;
>>> +           len -= sg_len;
>>> +           if (len <= 0)
>>> +                   break;
>>> +   }
>>> +
>>> +   *nr_segs = k;
>>> +   return bvec;
>>> +}
>>> +
>>> +static int system_heap_rw_file(struct system_heap_buffer *buffer, bool
>> is_read,
>>> +           bool direct_io, struct file *filp, loff_t file_offset,
>>> +           size_t buf_offset, size_t len)
>>> +{
>>> +   struct bio_vec *bvec;
>>> +   int nr_segs = 0;
>>> +   struct iov_iter iter;
>>> +   struct kiocb kiocb;
>>> +   ssize_t ret = 0;
>>> +
>>> +   if (direct_io) {
>>> +           if (!(filp->f_mode & FMODE_CAN_ODIRECT))
>>> +                   return -EINVAL;
>>> +   }
>>> +
>>> +   bvec = system_heap_init_bvec(buffer, buf_offset, len, &nr_segs);
>>> +   if (!bvec)
>>> +           return -ENOMEM;
>>> +
>>> +   iov_iter_bvec(&iter, is_read ? ITER_DEST : ITER_SOURCE, bvec,
>> nr_segs, len);
>>> +   init_sync_kiocb(&kiocb, filp);
>>> +   kiocb.ki_pos = file_offset;
>>> +   if (direct_io)
>>> +           kiocb.ki_flags |= IOCB_DIRECT;
>>> +
>>> +   while (kiocb.ki_pos < file_offset + len) {
>>> +           if (is_read)
>>> +                   ret = vfs_iocb_iter_read(filp, &kiocb, &iter);
>>> +           else
>>> +                   ret = vfs_iocb_iter_write(filp, &kiocb, &iter);
>>> +           if (ret <= 0)
>>> +                   break;
>>> +   }
>>> +
>>> +   kvfree(bvec);
>>> +   return ret < 0 ? ret : 0;
>>> +}
>>> +
>>> +static int system_heap_dma_buf_rw_file(struct dma_buf *dmabuf,
>>> +                   struct dma_buf_rw_file *back)
>>> +{
>>> +   struct system_heap_buffer *buffer = dmabuf->priv;
>>> +   int ret = 0;
>>> +   __u32 op = back->flags & DMA_BUF_RW_FLAGS_OP_MASK;
>>> +   bool direct_io = back->flags & DMA_BUF_RW_FLAGS_DIRECT;
>>> +   struct file *filp;
>>> +
>>> +   if (op != DMA_BUF_RW_FLAGS_READ && op !=
>> DMA_BUF_RW_FLAGS_WRITE)
>>> +           return -EINVAL;
>>> +   if (direct_io) {
>>> +           if (!PAGE_ALIGNED(back->file_offset) ||
>>> +                   !PAGE_ALIGNED(back->buf_offset) ||
>>> +                   !PAGE_ALIGNED(back->buf_len))
>>> +           return -EINVAL;
>>> +   }
>>> +   if (!back->buf_len || back->buf_len > dmabuf->size ||
>>> +           back->buf_offset >= dmabuf->size ||
>>> +           back->buf_offset + back->buf_len > dmabuf->size)
>>> +           return -EINVAL;
>>> +   if (back->file_offset + back->buf_len < back->file_offset)
>>> +           return -EINVAL;
>>> +
>>> +   filp = fget(back->fd);
>>> +   if (!filp)
>>> +           return -EBADF;
>>> +
>>> +   mutex_lock(&buffer->lock);
>>> +   ret = system_heap_rw_file(buffer, op ==
>> DMA_BUF_RW_FLAGS_READ, direct_io,
>>> +                   filp, back->file_offset, back->buf_offset, back-
>>> buf_len);
>>> +   mutex_unlock(&buffer->lock);
>>> +
>>> +   fput(filp);
>>> +   return ret;
>>> +}
>>> +
>>>  static void system_heap_dma_buf_release(struct dma_buf *dmabuf)  {
>>>     struct system_heap_buffer *buffer = dmabuf->priv; @@ -308,6
>> +425,7
>>> @@ static const struct dma_buf_ops system_heap_buf_ops = {
>>>     .mmap = system_heap_mmap,
>>>     .vmap = system_heap_vmap,
>>>     .vunmap = system_heap_vunmap,
>>> +   .rw_file = system_heap_dma_buf_rw_file,
>>>     .release = system_heap_dma_buf_release,  };
>>>
> 

Reply via email to