Re: [PATCH drm-next v3 13/15] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm

2023-04-03 Thread kernel test robot
Hi Danilo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on d36d68fd1925d33066d52468b7c7c6aca6521248]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042
base:   d36d68fd1925d33066d52468b7c7c6aca6521248
patch link:
https://lore.kernel.org/r/20230404012741.116502-14-dakr%40redhat.com
patch subject: [PATCH drm-next v3 13/15] drm/nouveau: nvkm/vmm: implement raw 
ops to manage uvmm
config: arc-randconfig-r043-20230403 
(https://download.01.org/0day-ci/archive/20230404/202304041311.bwxdwpx0-...@intel.com/config)
compiler: arc-elf-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/ff73c969805aef784d47f6bedea6c15c8548d0bf
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042
git checkout ff73c969805aef784d47f6bedea6c15c8548d0bf
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=arc olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=arc SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202304041311.bwxdwpx0-...@intel.com/

All warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h:4,
from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h:5,
from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:22:
   drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c: In function 
'nvkm_uvmm_mthd_raw_map':
>> drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:422:31: warning: cast to 
>> pointer from integer of different size [-Wint-to-pointer-cast]
 422 |   (void *)args->argv, args->argc);
 |   ^
   drivers/gpu/drm/nouveau/include/nvkm/core/memory.h:66:43: note: in 
definition of macro 'nvkm_memory_map'
  66 | (p)->func->map((p),(o),(vm),(va),(av),(ac))
 |   ^~


vim +422 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c

   388  
   389  static int
   390  nvkm_uvmm_mthd_raw_map(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 
*args)
   391  {
   392  struct nvkm_client *client = uvmm->object.client;
   393  struct nvkm_vmm *vmm = uvmm->vmm;
   394  struct nvkm_vma vma = {
   395  .addr = args->addr,
   396  .size = args->size,
   397  .used = true,
   398  .mapref = false,
   399  .no_comp = true,
   400  };
   401  struct nvkm_memory *memory;
   402  u64 handle = args->memory;
   403  u8 refd;
   404  int ret;
   405  
   406  if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size))
   407  return -EINVAL;
   408  
   409  ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, 
);
   410  if (ret)
   411  return ret;
   412  
   413  vma.page = vma.refd = refd;
   414  
   415  memory = nvkm_umem_search(client, args->memory);
   416  if (IS_ERR(memory)) {
   417  VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, 
PTR_ERR(memory));
   418  return PTR_ERR(memory);
   419  }
   420  
   421  ret = nvkm_memory_map(memory, args->offset, vmm, ,
 > 422(void *)args->argv, args->argc);
   423  
   424  nvkm_memory_unref();
   425  nvkm_memory_unref();
   426  return ret;
   427  }
   428  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


Re: [PATCH drm-next v3 04/15] drm: manager to keep track of GPUs VA mappings

2023-04-03 Thread kernel test robot
Hi Danilo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on d36d68fd1925d33066d52468b7c7c6aca6521248]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042
base:   d36d68fd1925d33066d52468b7c7c6aca6521248
patch link:
https://lore.kernel.org/r/20230404012741.116502-5-dakr%40redhat.com
patch subject: [PATCH drm-next v3 04/15] drm: manager to keep track of GPUs VA 
mappings
config: mips-randconfig-r024-20230403 
(https://download.01.org/0day-ci/archive/20230404/202304041336.bd0g9u85-...@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project 
67409911353323ca5edf2049ef0df54132fa1ca7)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install mips cross compiling tool for clang build
# apt-get install binutils-mipsel-linux-gnu
# 
https://github.com/intel-lab-lkp/linux/commit/c25139e5a168ae8a3a3e5ca0b650c201e5f41367
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042
git checkout c25139e5a168ae8a3a3e5ca0b650c201e5f41367
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=mips olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/ drivers/iio/light/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202304041336.bd0g9u85-...@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/drm_gpuva_mgr.c:1031:25: warning: variable 'prev' set but 
>> not used [-Wunused-but-set-variable]
   struct drm_gpuva *va, *prev = NULL;
  ^
   1 warning generated.


vim +/prev +1031 drivers/gpu/drm/drm_gpuva_mgr.c

  1023  
  1024  static int
  1025  __drm_gpuva_sm_map(struct drm_gpuva_manager *mgr,
  1026 struct drm_gpuva_fn_ops *ops, void *priv,
  1027 u64 req_addr, u64 req_range,
  1028 struct drm_gem_object *req_obj, u64 req_offset)
  1029  {
  1030  DRM_GPUVA_ITER(it, mgr, req_addr);
> 1031  struct drm_gpuva *va, *prev = NULL;
  1032  u64 req_end = req_addr + req_range;
  1033  int ret;
  1034  
  1035  if (unlikely(!drm_gpuva_in_mm_range(mgr, req_addr, req_range)))
  1036  return -EINVAL;
  1037  
  1038  if (unlikely(drm_gpuva_in_kernel_node(mgr, req_addr, 
req_range)))
  1039  return -EINVAL;
  1040  
  1041  drm_gpuva_iter_for_each_range(va, it, req_end) {
  1042  struct drm_gem_object *obj = va->gem.obj;
  1043  u64 offset = va->gem.offset;
  1044  u64 addr = va->va.addr;
  1045  u64 range = va->va.range;
  1046  u64 end = addr + range;
  1047  bool merge = !!va->gem.obj;
  1048  
  1049  if (addr == req_addr) {
  1050  merge &= obj == req_obj &&
  1051   offset == req_offset;
  1052  
  1053  if (end == req_end) {
  1054  ret = op_unmap_cb(ops, , priv, va, 
merge);
  1055  if (ret)
  1056  return ret;
  1057  break;
  1058  }
  1059  
  1060  if (end < req_end) {
  1061  ret = op_unmap_cb(ops, , priv, va, 
merge);
  1062  if (ret)
  1063  return ret;
  1064  goto next;
  1065  }
  1066  
  1067  if (end > req_end) {
  1068  struct drm_gpuva_op_map n = {
  1069  .va.addr = req_end,
  1070  .va.range = range - req_range,
  1071  .gem.obj = obj,
  1072  .gem.offset = offset + 
req_range,
  1073  };
  1074  struct drm_gpuva_op_unmap u = {
  1075  .va = va,
  1076  .keep = merge,
  1077  };
  1078  
  1079   

Re: [Regression] drm/scheduler: track GPU active time per entity

2023-04-03 Thread Luben Tuikov
On 2023-03-28 04:54, Lucas Stach wrote:
> Hi Danilo,
> 
> Am Dienstag, dem 28.03.2023 um 02:57 +0200 schrieb Danilo Krummrich:
>> Hi all,
>>
>> Commit df622729ddbf ("drm/scheduler: track GPU active time per entity") 
>> tries to track the accumulated time that a job was active on the GPU 
>> writing it to the entity through which the job was deployed to the 
>> scheduler originally. This is done within drm_sched_get_cleanup_job() 
>> which fetches a job from the schedulers pending_list.
>>
>> Doing this can result in a race condition where the entity is already 
>> freed, but the entity's newly added elapsed_ns field is still accessed 
>> once the job is fetched from the pending_list.
>>
>> After drm_sched_entity_destroy() being called it should be safe to free 
>> the structure that embeds the entity. However, a job originally handed 
>> over to the scheduler by this entity might still reside in the 
>> schedulers pending_list for cleanup after drm_sched_entity_destroy() 
>> already being called and the entity being freed. Hence, we can run into 
>> a UAF.
>>
> Sorry about that, I clearly didn't properly consider this case.
> 
>> In my case it happened that a job, as explained above, was just picked 
>> from the schedulers pending_list after the entity was freed due to the 
>> client application exiting. Meanwhile this freed up memory was already 
>> allocated for a subsequent client applications job structure again. 
>> Hence, the new jobs memory got corrupted. Luckily, I was able to 
>> reproduce the same corruption over and over again by just using 
>> deqp-runner to run a specific set of VK test cases in parallel.
>>
>> Fixing this issue doesn't seem to be very straightforward though (unless 
>> I miss something), which is why I'm writing this mail instead of sending 
>> a fix directly.
>>
>> Spontaneously, I see three options to fix it:
>>
>> 1. Rather than embedding the entity into driver specific structures 
>> (e.g. tied to file_priv) we could allocate the entity separately and 
>> reference count it, such that it's only freed up once all jobs that were 
>> deployed through this entity are fetched from the schedulers pending list.
>>
> My vote is on this or something in similar vain for the long term. I
> have some hope to be able to add a GPU scheduling algorithm with a bit
> more fairness than the current one sometime in the future, which
> requires execution time tracking on the entities.

Danilo,

Using kref is preferable, i.e. option 1 above.

Lucas, can you shed some light on,

1. In what way the current FIFO scheduling is unfair, and
2. shed some details on this "scheduling algorithm with a bit
more fairness than the current one"? 

Regards,
Luben

> 
>> 2. Somehow make sure drm_sched_entity_destroy() does block until all 
>> jobs deployed through this entity were fetched from the schedulers 
>> pending list. Though, I'm pretty sure that this is not really desirable.
>>
>> 3. Just revert the change and let drivers implement tracking of GPU 
>> active times themselves.
>>
> Given that we are already pretty late in the release cycle and etnaviv
> being the only driver so far making use of the scheduler elapsed time
> tracking I think the right short term solution is to either move the
> tracking into etnaviv or just revert the change for now. I'll have a
> look at this.
> 
> Regards,
> Lucas
> 
>> In the case of just reverting the change I'd propose to also set a jobs 
>> entity pointer to NULL  once the job was taken from the entity, such 
>> that in case of a future issue we fail where the actual issue resides 
>> and to make it more obvious that the field shouldn't be used anymore 
>> after the job was taken from the entity.
>>
>> I'm happy to implement the solution we agree on. However, it might also 
>> make sense to revert the change until we have a solution in place. I'm 
>> also happy to send a revert with a proper description of the problem. 
>> Please let me know what you think.
>>
>> - Danilo
>>
> 



Re: [PATCH drm-next v3 05/15] drm: debugfs: provide infrastructure to dump a DRM GPU VA space

2023-04-03 Thread kernel test robot
Hi Danilo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on d36d68fd1925d33066d52468b7c7c6aca6521248]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042
base:   d36d68fd1925d33066d52468b7c7c6aca6521248
patch link:
https://lore.kernel.org/r/20230404012741.116502-6-dakr%40redhat.com
patch subject: [PATCH drm-next v3 05/15] drm: debugfs: provide infrastructure 
to dump a DRM GPU VA space
config: xtensa-allyesconfig 
(https://download.01.org/0day-ci/archive/20230404/202304041151.y2wmbgh6-...@intel.com/config)
compiler: xtensa-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/97d8731cc359143f6f790b1c4755d1055a72adb9
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042
git checkout 97d8731cc359143f6f790b1c4755d1055a72adb9
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=xtensa olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=xtensa SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202304041151.y2wmbgh6-...@intel.com/

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/drm_debugfs.c: In function 'drm_debugfs_gpuva_info':
>> drivers/gpu/drm/drm_debugfs.c:213:28: warning: cast from pointer to integer 
>> of different size [-Wpointer-to-int-cast]
 213 |(u64)va->gem.obj, va->gem.offset);
 |^


vim +213 drivers/gpu/drm/drm_debugfs.c

   178  
   179  /**
   180   * drm_debugfs_gpuva_info - dump the given DRM GPU VA space
   181   * @m: pointer to the _file to write
   182   * @mgr: the _gpuva_manager representing the GPU VA space
   183   *
   184   * Dumps the GPU VA mappings of a given DRM GPU VA manager.
   185   *
   186   * For each DRM GPU VA space drivers should call this function from 
their
   187   * _info_list's show callback.
   188   *
   189   * Returns: 0 on success, -ENODEV if the  is not initialized
   190   */
   191  int drm_debugfs_gpuva_info(struct seq_file *m,
   192 struct drm_gpuva_manager *mgr)
   193  {
   194  DRM_GPUVA_ITER(it, mgr, 0);
   195  struct drm_gpuva *va, *kva = >kernel_alloc_node;
   196  
   197  if (!mgr->name)
   198  return -ENODEV;
   199  
   200  seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n",
   201 mgr->name, mgr->mm_start, mgr->mm_start + 
mgr->mm_range);
   202  seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n",
   203 kva->va.addr, kva->va.addr + kva->va.range);
   204  seq_puts(m, "\n");
   205  seq_puts(m, " VAs | start  | range  | 
end| object | object offset\n");
   206  seq_puts(m, 
"-\n");
   207  drm_gpuva_iter_for_each(va, it) {
   208  if (unlikely(va == >kernel_alloc_node))
   209  continue;
   210  
   211  seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx 
| 0x%016llx | 0x%016llx\n",
   212 va->va.addr, va->va.range, va->va.addr + 
va->va.range,
 > 213 (u64)va->gem.obj, va->gem.offset);
   214  }
   215  
   216  return 0;
   217  }
   218  EXPORT_SYMBOL(drm_debugfs_gpuva_info);
   219  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


[PATCH] video/aperture: fix typos

2023-04-03 Thread Sui Jingfeng
 EFI FB, VESA FB or VGA FB etc are belong to firmware based framebuffer
 driver.

Signed-off-by: Sui Jingfeng 
---
 drivers/video/aperture.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c
index 41e77de1ea82..b009468ffdff 100644
--- a/drivers/video/aperture.c
+++ b/drivers/video/aperture.c
@@ -20,7 +20,7 @@
  * driver can be active at any given time. Many systems load a generic
  * graphics drivers, such as EFI-GOP or VESA, early during the boot process.
  * During later boot stages, they replace the generic driver with a dedicated,
- * hardware-specific driver. To take over the device the dedicated driver
+ * hardware-specific driver. To take over the device, the dedicated driver
  * first has to remove the generic driver. Aperture functions manage
  * ownership of framebuffer memory and hand-over between drivers.
  *
@@ -76,7 +76,7 @@
  * generic EFI or VESA drivers, have to register themselves as owners of their
  * framebuffer apertures. Ownership of the framebuffer memory is achieved
  * by calling devm_aperture_acquire_for_platform_device(). If successful, the
- * driveris the owner of the framebuffer range. The function fails if the
+ * driver is the owner of the framebuffer range. The function fails if the
  * framebuffer is already owned by another driver. See below for an example.
  *
  * .. code-block:: c
@@ -126,7 +126,7 @@
  * et al for the registered framebuffer range, the aperture helpers call
  * platform_device_unregister() and the generic driver unloads itself. The
  * generic driver also has to provide a remove function to make this work.
- * Once hot unplugged fro mhardware, it may not access the device's
+ * Once hot unplugged from hardware, it may not access the device's
  * registers, framebuffer memory, ROM, etc afterwards.
  */
 
@@ -203,7 +203,7 @@ static void aperture_detach_platform_device(struct device 
*dev)
 
/*
 * Remove the device from the device hierarchy. This is the right thing
-* to do for firmware-based DRM drivers, such as EFI, VESA or VGA. After
+* to do for firmware-based fb drivers, such as EFI, VESA or VGA. After
 * the new driver takes over the hardware, the firmware device's state
 * will be lost.
 *
-- 
2.25.1



Re: [PATCH v2] misc: sram: Add DMA-BUF Heap exporting of SRAM areas

2023-04-03 Thread kernel test robot
Hi Andrew,

kernel test robot noticed the following build warnings:

[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on char-misc/char-misc-next char-misc/char-misc-linus 
soc/for-next pza/reset/next linus/master v6.3-rc5 next-20230403]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:
https://github.com/intel-lab-lkp/linux/commits/Andrew-Davis/misc-sram-Add-DMA-BUF-Heap-exporting-of-SRAM-areas/20230404-032607
patch link:https://lore.kernel.org/r/20230403192433.26648-1-afd%40ti.com
patch subject: [PATCH v2] misc: sram: Add DMA-BUF Heap exporting of SRAM areas
config: loongarch-allyesconfig 
(https://download.01.org/0day-ci/archive/20230404/202304041144.t5jcogse-...@intel.com/config)
compiler: loongarch64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/6fcaa3c7cfbc144dd982f9abaa1c5af50dde24a8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Andrew-Davis/misc-sram-Add-DMA-BUF-Heap-exporting-of-SRAM-areas/20230404-032607
git checkout 6fcaa3c7cfbc144dd982f9abaa1c5af50dde24a8
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=loongarch olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=loongarch SHELL=/bin/bash drivers/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Link: 
https://lore.kernel.org/oe-kbuild-all/202304041144.t5jcogse-...@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/misc/sram-dma-heap.c:161:17: warning: no previous prototype for 
>> 'sram_dma_heap_allocate' [-Wmissing-prototypes]
 161 | struct dma_buf *sram_dma_heap_allocate(struct dma_heap *heap,
 | ^~


vim +/sram_dma_heap_allocate +161 drivers/misc/sram-dma-heap.c

   160  
 > 161  struct dma_buf *sram_dma_heap_allocate(struct dma_heap *heap,
   162 unsigned long len,
   163 unsigned long fd_flags,
   164 unsigned long heap_flags)
   165  {
   166  struct sram_dma_heap *sram_dma_heap = 
dma_heap_get_drvdata(heap);
   167  struct sram_dma_heap_buffer *buffer;
   168  
   169  DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
   170  struct dma_buf *dmabuf;
   171  int ret;
   172  
   173  buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
   174  if (!buffer)
   175  return ERR_PTR(-ENOMEM);
   176  buffer->pool = sram_dma_heap->pool;
   177  INIT_LIST_HEAD(>attachments);
   178  mutex_init(>attachments_lock);
   179  buffer->len = len;
   180  
   181  buffer->vaddr = (void *)gen_pool_alloc(buffer->pool, 
buffer->len);
   182  if (!buffer->vaddr) {
   183  ret = -ENOMEM;
   184  goto free_buffer;
   185  }
   186  
   187  buffer->paddr = gen_pool_virt_to_phys(buffer->pool, (unsigned 
long)buffer->vaddr);
   188  if (buffer->paddr == -1) {
   189  ret = -ENOMEM;
   190  goto free_pool;
   191  }
   192  
   193  /* create the dmabuf */
   194  exp_info.exp_name = dma_heap_get_name(heap);
   195  exp_info.ops = _dma_heap_buf_ops;
   196  exp_info.size = buffer->len;
   197  exp_info.flags = fd_flags;
   198  exp_info.priv = buffer;
   199  dmabuf = dma_buf_export(_info);
   200  if (IS_ERR(dmabuf)) {
   201  ret = PTR_ERR(dmabuf);
   202  goto free_pool;
   203  }
   204  
   205  return dmabuf;
   206  
   207  free_pool:
   208  gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, 
buffer->len);
   209  free_buffer:
   210  kfree(buffer);
   211  
   212  return ERR_PTR(ret);
   213  }
   214  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests


Re: [PATCH] drm/bridge: ti-sn65dsi83: Do not generate HFP/HBP/HSA and EOT packet

2023-04-03 Thread Laurent Pinchart
Hi Marek,

Thank you for the patch.

On Mon, Apr 03, 2023 at 09:02:42PM +0200, Marek Vasut wrote:
> Do not generate the HS front and back porch gaps, the HSA gap and
> EOT packet, as per "SN65DSI83 datasheet SLLSEC1I - SEPTEMBER 2012
> - REVISED OCTOBER 2020", page 22, these packets are not required.
> This makes the TI SN65DSI83 bridge work with Samsung DSIM on i.MX8MN.
> 
> Signed-off-by: Marek Vasut 

I have successfully used this driver with a Raspberry Pi CM4. The VC4
DSI driver does not seem to support the newly added flags, so this patch
shouldn't have any effect there.

Reviewed-by: Laurent Pinchart 

> ---
> Cc: Andrzej Hajda 
> Cc: Daniel Vetter 
> Cc: David Airlie 
> Cc: Jagan Teki 
> Cc: Jernej Skrabec 
> Cc: Jonas Karlman 
> Cc: Laurent Pinchart 
> Cc: Michael Walle 
> Cc: Neil Armstrong 
> Cc: Robert Foss 
> Cc: dri-devel@lists.freedesktop.org
> ---
>  drivers/gpu/drm/bridge/ti-sn65dsi83.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c 
> b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> index 91ecfbe45bf90..b60ae1dc1191d 100644
> --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
> @@ -642,7 +642,9 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
>  
>   dsi->lanes = dsi_lanes;
>   dsi->format = MIPI_DSI_FMT_RGB888;
> - dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST;
> + dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
> +   MIPI_DSI_MODE_VIDEO_NO_HFP | 
> MIPI_DSI_MODE_VIDEO_NO_HBP |
> +   MIPI_DSI_MODE_VIDEO_NO_HSA | 
> MIPI_DSI_MODE_NO_EOT_PACKET;
>  
>   ret = devm_mipi_dsi_attach(dev, dsi);
>   if (ret < 0) {

-- 
Regards,

Laurent Pinchart


Re: [PATCH] drm/fbdev-generic: optimize out a redundant assignment clause

2023-04-03 Thread Sui Jingfeng



On 2023/3/29 17:04, Thomas Zimmermann wrote:

(cc'ing Lucas)

Hi

Am 25.03.23 um 08:46 schrieb Sui Jingfeng:

  The assignment already done in drm_client_buffer_vmap(),
  just trival clean, no functional change.

Signed-off-by: Sui Jingfeng <15330273...@189.cn>
---
  drivers/gpu/drm/drm_fbdev_generic.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_fbdev_generic.c 
b/drivers/gpu/drm/drm_fbdev_generic.c

index 4d6325e91565..1da48e71c7f1 100644
--- a/drivers/gpu/drm/drm_fbdev_generic.c
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -282,7 +282,7 @@ static int drm_fbdev_damage_blit(struct 
drm_fb_helper *fb_helper,

   struct drm_clip_rect *clip)
  {
  struct drm_client_buffer *buffer = fb_helper->buffer;
-    struct iosys_map map, dst;
+    struct iosys_map map;
  int ret;
    /*
@@ -302,8 +302,7 @@ static int drm_fbdev_damage_blit(struct 
drm_fb_helper *fb_helper,

  if (ret)
  goto out;
  -    dst = map;
-    drm_fbdev_damage_blit_real(fb_helper, clip, );
+    drm_fbdev_damage_blit_real(fb_helper, clip, );


I see what you're doing and it's probably correct in this case.

But there's a larger issue with this iosys interfaces. Sometimes the 
address has to be modified (see calls of iosys_map_incr()). That can 
prevent incorrect uses of the mapping in other places, especially in 
unmap code.



Yes, I just realized that.

iosys_map_incr() change the internal state of a opaque structure, this 
is somewhat evil.


if it is non-opaque, then this is abstract failure.

You have to worry about that if it is changed by a accident call 
iosys_map_incr() from other place.


The map should be const, I guess most programmer expect  the map be a const.

making it const please, copy on demand, modify the copy only, leave the 
original mapping untouched.


Hope this could eliminate the embarrassing.

Sorry for missing the point.

I think it would make sense to consider a separate structure for the 
I/O location. The buffer as a whole would still be represented by 
struct iosys_map.  And that new structure, let's call it struct 
iosys_ptr, would point to an actual location within the buffer's 
memory range. A few locations and helpers would need changes, but 
there are not so many callers that it's an issue.  This would also 
allow for a few debugging tests that ensure that iosys_ptr always 
operates within the bounds of an iosys_map.


I've long considered this idea, but there was no pressure to work on 
it. Maybe now.



I have also get some idea from you idea.

Best regards
Thomas


    drm_client_buffer_vunmap(buffer);




[PATCH v3] dt-bindings: bridge: Convert Samsung MIPI DSIM bridge to yaml

2023-04-03 Thread Fabio Estevam
From: Jagan Teki 

Samsung MIPI DSIM bridge can be found on Exynos and NXP's 
i.MX8M Mini/Nano/Plus SoCs.

Convert exynos_dsim.txt to yaml.

Used the example node from exynos5433.dtsi instead of the one used in
the legacy exynos_dsim.txt.

Signed-off-by: Jagan Teki 
Signed-off-by: Fabio Estevam 
---
Changes since v2:
- Took previous Rob Herring's feedback into account:
https://lore.kernel.org/all/20210712151322.ga1931...@robh.at.kernel.org/
- Handled imx8mn and imx8mp.
- Remove unnecessary #address-cells/size-cells.

 .../display/bridge/samsung,mipi-dsim.yaml | 255 ++
 .../bindings/display/exynos/exynos_dsim.txt   |  92 ---
 MAINTAINERS   |   1 +
 3 files changed, 256 insertions(+), 92 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
 delete mode 100644 
Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt

diff --git 
a/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml 
b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
new file mode 100644
index ..55dbec178ea8
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
@@ -0,0 +1,255 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/samsung,mipi-dsim.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung MIPI DSIM bridge controller
+
+maintainers:
+  - Inki Dae 
+  - Jagan Teki 
+  - Marek Szyprowski 
+
+description: |
+  Samsung MIPI DSIM bridge controller can be found it on Exynos
+  and i.MX8M Mini/Nano/Plus SoC's.
+
+properties:
+  compatible:
+oneOf:
+  - enum:
+  - samsung,exynos3250-mipi-dsi
+  - samsung,exynos4210-mipi-dsi
+  - samsung,exynos5410-mipi-dsi
+  - samsung,exynos5422-mipi-dsi
+  - samsung,exynos5433-mipi-dsi
+  - fsl,imx8mm-mipi-dsim
+  - fsl,imx8mp-mipi-dsim
+  - items:
+  - const: fsl,imx8mn-mipi-dsim
+  - const: fsl,imx8mm-mipi-dsim
+
+  reg:
+maxItems: 1
+
+  interrupts:
+maxItems: 1
+
+  '#address-cells':
+const: 1
+
+  '#size-cells':
+const: 0
+
+  clocks:
+minItems: 2
+maxItems: 5
+
+  clock-names:
+minItems: 2
+maxItems: 5
+
+  samsung,phy-type:
+$ref: /schemas/types.yaml#/definitions/uint32
+description: phandle to the samsung phy-type
+
+  power-domains:
+maxItems: 1
+
+  samsung,power-domain:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: phandle to the associated samsung power domain
+
+  vddcore-supply:
+description: MIPI DSIM Core voltage supply (e.g. 1.1V)
+
+  vddio-supply:
+description: MIPI DSIM I/O and PLL voltage supply (e.g. 1.8V)
+
+  samsung,burst-clock-frequency:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  DSIM high speed burst mode frequency.
+
+  samsung,esc-clock-frequency:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  DSIM escape mode frequency.
+
+  samsung,pll-clock-frequency:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  DSIM oscillator clock frequency.
+
+  phys:
+maxItems: 1
+
+  phy-names:
+const: dsim
+
+  ports:
+$ref: /schemas/graph.yaml#/properties/ports
+
+properties:
+  port@0:
+$ref: /schemas/graph.yaml#/properties/port
+description:
+  Input port node to receive pixel data from the
+  display controller. Exactly one endpoint must be
+  specified.
+
+  port@1:
+$ref: /schemas/graph.yaml#/properties/port
+description:
+  DSI output port node to the panel or the next bridge
+  in the chain.
+
+required:
+  - clock-names
+  - clocks
+  - compatible
+  - interrupts
+  - reg
+  - samsung,burst-clock-frequency
+  - samsung,esc-clock-frequency
+  - samsung,pll-clock-frequency
+
+allOf:
+  - $ref: ../dsi-controller.yaml#
+  - if:
+  properties:
+compatible:
+  contains:
+const: samsung,exynos5433-mipi-dsi
+
+then:
+  properties:
+clocks:
+  minItems: 5
+
+clock-names:
+  items:
+- const: bus_clk
+- const: phyclk_mipidphy0_bitclkdiv8
+- const: phyclk_mipidphy0_rxclkesc0
+- const: sclk_rgb_vclk_to_dsim0
+- const: sclk_mipi
+
+ports:
+  required:
+- port@0
+
+  required:
+- ports
+- vddcore-supply
+- vddio-supply
+
+  - if:
+  properties:
+compatible:
+  contains:
+const: samsung,exynos5410-mipi-dsi
+
+then:
+  properties:
+clocks:
+  minItems: 2
+
+clock-names:
+  items:
+- const: bus_clk
+- const: pll_clk
+
+  required:
+- vddcore-supply
+- 

Re: [PATCH v2] drm/scdc-helper: Pimp SCDC debugs

2023-04-03 Thread Laurent Pinchart
Hi Ville,

Thank you for the patch.

On Tue, Apr 04, 2023 at 01:36:52AM +0300, Ville Syrjala wrote:
> From: Ville Syrjälä 
> 
> Include the device and connector information in the SCDC
> debugs. Makes it easier to figure out who did what.
> 
> v2: Rely on connector->ddc (Maxime)
> 
> Cc: Andrzej Hajda 
> Cc: Neil Armstrong 
> Cc: Robert Foss 
> Cc: Laurent Pinchart 
> Cc: Jonas Karlman 
> Cc: Jernej Skrabec 
> Cc: Thierry Reding 
> Cc: Emma Anholt 
> Cc: Maxime Ripard 
> Cc: intel-...@lists.freedesktop.org
> Cc: linux-te...@vger.kernel.org
> Signed-off-by: Ville Syrjälä 

Reviewed-by: Laurent Pinchart 

> ---
>  drivers/gpu/drm/bridge/synopsys/dw-hdmi.c |  8 ++--
>  drivers/gpu/drm/display/drm_scdc_helper.c | 46 +++
>  drivers/gpu/drm/i915/display/intel_ddi.c  |  4 +-
>  drivers/gpu/drm/i915/display/intel_hdmi.c |  8 +---
>  drivers/gpu/drm/tegra/sor.c   | 15 +++-
>  drivers/gpu/drm/vc4/vc4_hdmi.c| 21 ++-
>  include/drm/display/drm_scdc_helper.h |  7 ++--
>  7 files changed, 59 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c 
> b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> index aa51c61a78c7..603bb3c51027 100644
> --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
> @@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi 
> *hdmi,
>   /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
>   if (dw_hdmi_support_scdc(hdmi, display)) {
>   if (mtmdsclock > HDMI14_MAX_TMDSCLK)
> - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1);
> + drm_scdc_set_high_tmds_clock_ratio(>connector, 1);
>   else
> - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 0);
> + drm_scdc_set_high_tmds_clock_ratio(>connector, 0);
>   }
>  }
>  EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio);
> @@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
>   min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
>  
>   /* Enabled Scrambling in the Sink */
> - drm_scdc_set_scrambling(hdmi->ddc, 1);
> + drm_scdc_set_scrambling(>connector, 1);
>  
>   /*
>* To activate the scrambler feature, you must ensure
> @@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
>   hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
>   hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
>   HDMI_MC_SWRSTZ);
> - drm_scdc_set_scrambling(hdmi->ddc, 0);
> + drm_scdc_set_scrambling(>connector, 0);
>   }
>   }
>  
> diff --git a/drivers/gpu/drm/display/drm_scdc_helper.c 
> b/drivers/gpu/drm/display/drm_scdc_helper.c
> index c3ad4ab2b456..6d2f244e5830 100644
> --- a/drivers/gpu/drm/display/drm_scdc_helper.c
> +++ b/drivers/gpu/drm/display/drm_scdc_helper.c
> @@ -26,6 +26,8 @@
>  #include 
>  
>  #include 
> +#include 
> +#include 
>  #include 
>  
>  /**
> @@ -140,7 +142,7 @@ EXPORT_SYMBOL(drm_scdc_write);
>  
>  /**
>   * drm_scdc_get_scrambling_status - what is status of scrambling?
> - * @adapter: I2C adapter for DDC channel
> + * @connector: connector
>   *
>   * Reads the scrambler status over SCDC, and checks the
>   * scrambling status.
> @@ -148,14 +150,16 @@ EXPORT_SYMBOL(drm_scdc_write);
>   * Returns:
>   * True if the scrambling is enabled, false otherwise.
>   */
> -bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter)
> +bool drm_scdc_get_scrambling_status(struct drm_connector *connector)
>  {
>   u8 status;
>   int ret;
>  
> - ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, );
> + ret = drm_scdc_readb(connector->ddc, SCDC_SCRAMBLER_STATUS, );
>   if (ret < 0) {
> - DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret);
> + drm_dbg_kms(connector->dev,
> + "[CONNECTOR:%d:%s] Failed to read scrambling 
> status: %d\n",
> + connector->base.id, connector->name, ret);
>   return false;
>   }
>  
> @@ -165,7 +169,7 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status);
>  
>  /**
>   * drm_scdc_set_scrambling - enable scrambling
> - * @adapter: I2C adapter for DDC channel
> + * @connector: connector
>   * @enable: bool to indicate if scrambling is to be enabled/disabled
>   *
>   * Writes the TMDS config register over SCDC channel, and:
> @@ -175,14 +179,17 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status);
>   * Returns:
>   * True if scrambling is set/reset successfully, false otherwise.
>   */
> -bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable)
> +bool drm_scdc_set_scrambling(struct drm_connector *connector,
> +  bool 

Re: [PATCH v3 01/11] dmaengine: Add API function dmaengine_prep_slave_dma_array()

2023-04-03 Thread Hillf Danton
On 3 Apr 2023 17:47:50 +0200 Paul Cercueil 
> This function can be used to initiate a scatter-gather DMA transfer
> where the DMA addresses and lengths are located inside arrays.
> 
> The major difference with dmaengine_prep_slave_sg() is that it supports
> specifying the lengths of each DMA transfer; as trying to override the
> length of the transfer with dmaengine_prep_slave_sg() is a very tedious
> process. The introduction of a new API function is also justified by the
> fact that scatterlists are on their way out.

Given sg's wayout and conceptually iovec and kvec (in include/linux/uio.h),
what you add should have been dma_vec to ease people making use of it.

struct dma_vec {
dma_addr_t  addr;
size_t  len;
};
> 
> Signed-off-by: Paul Cercueil 
> 
> ---
> v3: New patch
> ---
>  include/linux/dmaengine.h | 16 
>  1 file changed, 16 insertions(+)
> 
> diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
> index c3656e590213..62efa28c009a 100644
> --- a/include/linux/dmaengine.h
> +++ b/include/linux/dmaengine.h
> @@ -912,6 +912,11 @@ struct dma_device {
>   struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
>   struct dma_chan *chan, unsigned long flags);
>  
> + struct dma_async_tx_descriptor *(*device_prep_slave_dma_array)(
> + struct dma_chan *chan, dma_addr_t *addrs,
> + size_t *lengths, size_t nb,
> + enum dma_transfer_direction direction,
> + unsigned long flags);

Then the callback looks like

struct dma_async_tx_descriptor *(*device_prep_slave_vec)(
struct dma_chan *chan,
struct dma_vec *vec,
int nvec,
enum dma_transfer_direction direction,
unsigned long flags);


[PATCH v3 2/2] drm/bridge: fsl-ldb: Add i.MX6SX support

2023-04-03 Thread Fabio Estevam
From: Fabio Estevam 

i.MX6SX has a single LVDS port and share a similar LDB_CTRL register layout
with i.MX8MP and i.MX93.

There is no LVDS CTRL register on the i.MX6SX, so only write to
this register on the appropriate SoCs.

Add support for the i.MX6SX LDB.

Tested on a imx6sx-sdb board with a Hannstar HSD100PXN1 LVDS panel
and also on a custom i.MX6SX-based board.

Signed-off-by: Fabio Estevam 
Reviewed-by: Neil Armstrong 
Reviewed-by: Marek Vasut 
---
Changes since v2:
- Rename it to 'single_ctrl_reg' to make it clearer that on i.MX6X, there
is a single ctrl register. On the newer SoCs there are two ctrl registers.

Changes since v1:
- None

 drivers/gpu/drm/bridge/fsl-ldb.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c
index 450b352914f4..f8e5d8ab98e3 100644
--- a/drivers/gpu/drm/bridge/fsl-ldb.c
+++ b/drivers/gpu/drm/bridge/fsl-ldb.c
@@ -56,6 +56,7 @@
 #define LVDS_CTRL_VBG_ADJ_MASK GENMASK(19, 17)
 
 enum fsl_ldb_devtype {
+   IMX6SX_LDB,
IMX8MP_LDB,
IMX93_LDB,
 };
@@ -64,9 +65,14 @@ struct fsl_ldb_devdata {
u32 ldb_ctrl;
u32 lvds_ctrl;
bool lvds_en_bit;
+   bool single_ctrl_reg;
 };
 
 static const struct fsl_ldb_devdata fsl_ldb_devdata[] = {
+   [IMX6SX_LDB] = {
+   .ldb_ctrl = 0x18,
+   .single_ctrl_reg = true,
+   },
[IMX8MP_LDB] = {
.ldb_ctrl = 0x5c,
.lvds_ctrl = 0x128,
@@ -202,6 +208,9 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge,
 
regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, reg);
 
+   if (fsl_ldb->devdata->single_ctrl_reg)
+   return;
+
/* Program LVDS_CTRL */
reg = LVDS_CTRL_CC_ADJ(2) | LVDS_CTRL_PRE_EMPH_EN |
  LVDS_CTRL_PRE_EMPH_ADJ(3) | LVDS_CTRL_VBG_EN;
@@ -228,7 +237,8 @@ static void fsl_ldb_atomic_disable(struct drm_bridge 
*bridge,
regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl,
 LVDS_CTRL_LVDS_EN);
else
-   regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, 0);
+   if (!fsl_ldb->devdata->single_ctrl_reg)
+   regmap_write(fsl_ldb->regmap, 
fsl_ldb->devdata->lvds_ctrl, 0);
regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, 0);
 
clk_disable_unprepare(fsl_ldb->clk);
@@ -355,6 +365,8 @@ static void fsl_ldb_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id fsl_ldb_match[] = {
+   { .compatible = "fsl,imx6sx-ldb",
+ .data = _ldb_devdata[IMX6SX_LDB], },
{ .compatible = "fsl,imx8mp-ldb",
  .data = _ldb_devdata[IMX8MP_LDB], },
{ .compatible = "fsl,imx93-ldb",
-- 
2.34.1



[PATCH v3 1/2] dt-bindings: display: bridge: ldb: Add an i.MX6SX entry

2023-04-03 Thread Fabio Estevam
From: Fabio Estevam 

i.MX6SX has a single LVDS port and share a similar LDB_CTRL register
layout with i.MX8MP and i.MX93.

Signed-off-by: Fabio Estevam 
Reviewed-by: Krzysztof Kozlowski 
Reviewed-by: Marek Vasut 
---
Changes since v2:
- Collected Reviewed-by tags.
- Improved the Subject by not stating support. (Marek).

Changes since v1:
- Do not duplicate the entire if. (Krzysztof)

 .../devicetree/bindings/display/bridge/fsl,ldb.yaml  | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml 
b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml
index 6e0e3ba9b49e..07388bf2b90d 100644
--- a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml
@@ -17,6 +17,7 @@ description: |
 properties:
   compatible:
 enum:
+  - fsl,imx6sx-ldb
   - fsl,imx8mp-ldb
   - fsl,imx93-ldb
 
@@ -64,7 +65,9 @@ allOf:
   properties:
 compatible:
   contains:
-const: fsl,imx93-ldb
+enum:
+  - fsl,imx6sx-ldb
+  - fsl,imx93-ldb
 then:
   properties:
 ports:
-- 
2.34.1



Re: [RFC PATCH 00/10] Xe DRM scheduler and long running workload plans

2023-04-03 Thread Matthew Brost
On Tue, Apr 04, 2023 at 10:07:48AM +0900, Asahi Lina wrote:
> Hi, thanks for the Cc!
> 

No problem.

> On 04/04/2023 09.22, Matthew Brost wrote:
> > Hello,
> > 
> > As a prerequisite to merging the new Intel Xe DRM driver [1] [2], we
> > have been asked to merge our common DRM scheduler patches first as well
> > as develop a common solution for long running workloads with the DRM
> > scheduler. This RFC series is our first attempt at doing this. We
> > welcome any and all feedback.
> > 
> > This can we thought of as 4 parts detailed below.
> > 
> > - DRM scheduler changes for 1 to 1 relationship between scheduler and
> > entity (patches 1-3)
> > 
> > In Xe all of the scheduling of jobs is done by a firmware scheduler (the
> > GuC) which is a new paradigm WRT to the DRM scheduler and presents
> > severals problems as the DRM was originally designed to schedule jobs on
> > hardware queues. The main problem being that DRM scheduler expects the
> > submission order of jobs to be the completion order of jobs even across
> > multiple entities. This assumption falls apart with a firmware scheduler
> > as a firmware scheduler has no concept of jobs and jobs can complete out
> > of order. A novel solution for was originally thought of by Faith during
> > the initial prototype of Xe, create a 1 to 1 relationship between scheduler
> > and entity. I believe the AGX driver [3] is using this approach and
> > Boris may use approach as well for the Mali driver [4].
> > 
> > To support a 1 to 1 relationship we move the main execution function
> > from a kthread to a work queue and add a new scheduling mode which
> > bypasses code in the DRM which isn't needed in a 1 to 1 relationship.
> > The new scheduling mode should unify all drivers usage with a 1 to 1
> > relationship and can be thought of as using scheduler as a dependency /
> > infligt job tracker rather than a true scheduler.
> 
> Yup, we're in the exact same situation with drm/asahi, so this is very
> welcome! We've been using the existing scheduler as-is, but this should help
> remove some unneeded complexity in this use case.
>

That's the idea.

> Do you want me to pull in this series into our tree and make sure this all
> works out for us?
>

We tested this in Xe and it definitely works for us but the more testing
the better.

> I also have a couple bugfixes for drm/sched I need to send out, but I think
> the rebase/merge with this series should be trivial. I'll send that out this
> week.
> 
> > - Generic messaging interface for DRM scheduler
> > 
> > Idea is to be able to communicate to the submission backend with in band
> > (relative to main execution function) messages. Messages are backend
> > defined and flexable enough for any use case. In Xe we use these
> > messages to clean up entites, set properties for entites, and suspend /
> > resume execution of an entity [5]. I suspect other driver can leverage
> > this messaging concept too as it a convenient way to avoid races in the
> > backend.
> 
> We haven't needed this so far (mostly by using fine-grained locking and
> refcounting all over the place) but I can see it being useful to simplify
> some of those constructs and maybe avoid potential deadlocks in some places.
> I'm not sure yet whether we can fully get rid of the main queue
> refcounting/locking (our completion/error signaling path doesn't map well to
> DMA fences directly so we still need something there to get from the global
> GPU completion signaling thread to individual queues) but it might be a step
> in the right direction at least!
>

With this messaging interface we essentially have a lockless submission
backend which is really nice compared to what we did in the i915.

Matt

> ~~ Lina
> 


[PATCH v3] dt-bindings: bridge: Convert Samsung MIPI DSIM bridge to yaml

2023-04-03 Thread Fabio Estevam
From: Jagan Teki 

Samsung MIPI DSIM bridge can be found on Exynos and NXP's 
i.MX8M Mini/Nano/Plus SoCs.

Convert exynos_dsim.txt to yaml.

Used the example node from exynos5433.dtsi instead of the one used in
the legacy exynos_dsim.txt.

Signed-off-by: Jagan Teki 
Signed-off-by: Fabio Estevam 
---
Changes since v2:
- Took previous Rob Herring's feedback into account:
https://lore.kernel.org/all/20210712151322.ga1931...@robh.at.kernel.org/
- Handled imx8mn and imx8mp
- Remove unnecessary #address-cells/size-cells.

 .../display/bridge/samsung,mipi-dsim.yaml | 255 ++
 .../bindings/display/exynos/exynos_dsim.txt   |  92 ---
 MAINTAINERS   |   1 +
 3 files changed, 256 insertions(+), 92 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
 delete mode 100644 
Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt

diff --git 
a/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml 
b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
new file mode 100644
index ..55dbec178ea8
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml
@@ -0,0 +1,255 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/samsung,mipi-dsim.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Samsung MIPI DSIM bridge controller
+
+maintainers:
+  - Inki Dae 
+  - Jagan Teki 
+  - Marek Szyprowski 
+
+description: |
+  Samsung MIPI DSIM bridge controller can be found it on Exynos
+  and i.MX8M Mini/Nano/Plus SoC's.
+
+properties:
+  compatible:
+oneOf:
+  - enum:
+  - samsung,exynos3250-mipi-dsi
+  - samsung,exynos4210-mipi-dsi
+  - samsung,exynos5410-mipi-dsi
+  - samsung,exynos5422-mipi-dsi
+  - samsung,exynos5433-mipi-dsi
+  - fsl,imx8mm-mipi-dsim
+  - fsl,imx8mp-mipi-dsim
+  - items:
+  - const: fsl,imx8mn-mipi-dsim
+  - const: fsl,imx8mm-mipi-dsim
+
+  reg:
+maxItems: 1
+
+  interrupts:
+maxItems: 1
+
+  '#address-cells':
+const: 1
+
+  '#size-cells':
+const: 0
+
+  clocks:
+minItems: 2
+maxItems: 5
+
+  clock-names:
+minItems: 2
+maxItems: 5
+
+  samsung,phy-type:
+$ref: /schemas/types.yaml#/definitions/uint32
+description: phandle to the samsung phy-type
+
+  power-domains:
+maxItems: 1
+
+  samsung,power-domain:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: phandle to the associated samsung power domain
+
+  vddcore-supply:
+description: MIPI DSIM Core voltage supply (e.g. 1.1V)
+
+  vddio-supply:
+description: MIPI DSIM I/O and PLL voltage supply (e.g. 1.8V)
+
+  samsung,burst-clock-frequency:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  DSIM high speed burst mode frequency.
+
+  samsung,esc-clock-frequency:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  DSIM escape mode frequency.
+
+  samsung,pll-clock-frequency:
+$ref: /schemas/types.yaml#/definitions/uint32
+description:
+  DSIM oscillator clock frequency.
+
+  phys:
+maxItems: 1
+
+  phy-names:
+const: dsim
+
+  ports:
+$ref: /schemas/graph.yaml#/properties/ports
+
+properties:
+  port@0:
+$ref: /schemas/graph.yaml#/properties/port
+description:
+  Input port node to receive pixel data from the
+  display controller. Exactly one endpoint must be
+  specified.
+
+  port@1:
+$ref: /schemas/graph.yaml#/properties/port
+description:
+  DSI output port node to the panel or the next bridge
+  in the chain
+
+required:
+  - clock-names
+  - clocks
+  - compatible
+  - interrupts
+  - reg
+  - samsung,burst-clock-frequency
+  - samsung,esc-clock-frequency
+  - samsung,pll-clock-frequency
+
+allOf:
+  - $ref: ../dsi-controller.yaml#
+  - if:
+  properties:
+compatible:
+  contains:
+const: samsung,exynos5433-mipi-dsi
+
+then:
+  properties:
+clocks:
+  minItems: 5
+
+clock-names:
+  items:
+- const: bus_clk
+- const: phyclk_mipidphy0_bitclkdiv8
+- const: phyclk_mipidphy0_rxclkesc0
+- const: sclk_rgb_vclk_to_dsim0
+- const: sclk_mipi
+
+ports:
+  required:
+- port@0
+
+  required:
+- ports
+- vddcore-supply
+- vddio-supply
+
+  - if:
+  properties:
+compatible:
+  contains:
+const: samsung,exynos5410-mipi-dsi
+
+then:
+  properties:
+clocks:
+  minItems: 2
+
+clock-names:
+  items:
+- const: bus_clk
+- const: pll_clk
+
+  required:
+- vddcore-supply
+- vddio-supply

Re: [PATCH v2 1/2] dt-bindings: display: bridge: ldb: Add i.MX6SX support

2023-04-03 Thread Marek Vasut

On 3/30/23 12:42, Fabio Estevam wrote:

From: Fabio Estevam 

i.MX6SX has a single LVDS port and share a similar LDB_CTRL register
layout with i.MX8MP and i.MX93.

Signed-off-by: Fabio Estevam 


Nit: you are not adding 'support' for the IP here, you are documenting 
bindings in this patch. The support is added in 2/2 .


Reviewed-by: Marek Vasut 


[PATCH drm-next v3 14/15] drm/nouveau: implement new VM_BIND uAPI

2023-04-03 Thread Danilo Krummrich
This commit provides the implementation for the new uapi motivated by the
Vulkan API. It allows user mode drivers (UMDs) to:

1) Initialize a GPU virtual address (VA) space via the new
   DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA
   space managed by the kernel and userspace, respectively.

2) Allocate and free a VA space region as well as bind and unbind memory
   to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl.
   UMDs can request the named operations to be processed either
   synchronously or asynchronously. It supports DRM syncobjs
   (incl. timelines) as synchronization mechanism. The management of the
   GPU VA mappings is implemented with the DRM GPU VA manager.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The
   execution happens asynchronously. It supports DRM syncobj (incl.
   timelines) as synchronization mechanism. DRM GEM object locking is
   handled with drm_exec.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM
GPU scheduler for the asynchronous paths.

Signed-off-by: Danilo Krummrich 
---
 Documentation/gpu/driver-uapi.rst   |3 +
 drivers/gpu/drm/nouveau/Kbuild  |3 +
 drivers/gpu/drm/nouveau/Kconfig |2 +
 drivers/gpu/drm/nouveau/nouveau_abi16.c |   24 +
 drivers/gpu/drm/nouveau/nouveau_abi16.h |1 +
 drivers/gpu/drm/nouveau/nouveau_bo.c|  147 +-
 drivers/gpu/drm/nouveau/nouveau_bo.h|2 +-
 drivers/gpu/drm/nouveau/nouveau_drm.c   |   27 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h   |   59 +-
 drivers/gpu/drm/nouveau/nouveau_exec.c  |  363 +
 drivers/gpu/drm/nouveau/nouveau_exec.h  |   42 +
 drivers/gpu/drm/nouveau/nouveau_gem.c   |   25 +-
 drivers/gpu/drm/nouveau/nouveau_mem.h   |5 +
 drivers/gpu/drm/nouveau/nouveau_prime.c |2 +-
 drivers/gpu/drm/nouveau/nouveau_sched.c |  494 ++
 drivers/gpu/drm/nouveau/nouveau_sched.h |  116 ++
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  | 1836 +++
 drivers/gpu/drm/nouveau/nouveau_uvmm.h  |   98 ++
 18 files changed, 3184 insertions(+), 65 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.h
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.h
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.h

diff --git a/Documentation/gpu/driver-uapi.rst 
b/Documentation/gpu/driver-uapi.rst
index 9c7ca6e33a68..c08bcbb95fb3 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -13,4 +13,7 @@ drm/nouveau uAPI
 VM_BIND / EXEC uAPI
 ---
 
+.. kernel-doc:: drivers/gpu/drm/nouveau/nouveau_exec.c
+:doc: Overview
+
 .. kernel-doc:: include/uapi/drm/nouveau_drm.h
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 5e5617006da5..cf6b3a80c0c8 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -47,6 +47,9 @@ nouveau-y += nouveau_prime.o
 nouveau-y += nouveau_sgdma.o
 nouveau-y += nouveau_ttm.o
 nouveau-y += nouveau_vmm.o
+nouveau-y += nouveau_exec.o
+nouveau-y += nouveau_sched.o
+nouveau-y += nouveau_uvmm.o
 
 # DRM - modesetting
 nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index a70bd65e1400..c52e8096cca4 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -10,6 +10,8 @@ config DRM_NOUVEAU
select DRM_KMS_HELPER
select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
+   select DRM_SCHED
select I2C
select I2C_ALGOBIT
select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c 
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 82dab51d8aeb..a112f28681d3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -35,6 +35,7 @@
 #include "nouveau_chan.h"
 #include "nouveau_abi16.h"
 #include "nouveau_vmm.h"
+#include "nouveau_sched.h"
 
 static struct nouveau_abi16 *
 nouveau_abi16(struct drm_file *file_priv)
@@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 {
struct nouveau_abi16_ntfy *ntfy, *temp;
 
+   /* When a client exits without waiting for it's queued up jobs to
+* finish it might happen that we fault the channel. This is due to
+* drm_file_free() calling drm_gem_release() before the postclose()
+* callback. Hence, we can't tear down this scheduler entity before
+* uvmm mappings are unmapped. Currently, we can't detect this case.
+*
+* However, this should be rare and harmless, since the channel isn't
+* needed anymore.
+*/
+   nouveau_sched_entity_fini(>sched_entity);
+
/* wait for all activity to 

[PATCH drm-next v3 15/15] drm/nouveau: debugfs: implement DRM GPU VA debugfs

2023-04-03 Thread Danilo Krummrich
Provide the driver indirection iterating over all DRM GPU VA spaces to
enable the common 'gpuvas' debugfs file for dumping DRM GPU VA spaces.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_debugfs.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c 
b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 2a36d1ca8fda..d5487e655b0c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -202,6 +202,44 @@ nouveau_debugfs_pstate_open(struct inode *inode, struct 
file *file)
return single_open(file, nouveau_debugfs_pstate_get, inode->i_private);
 }
 
+static void
+nouveau_debugfs_gpuva_regions(struct seq_file *m, struct nouveau_uvmm *uvmm)
+{
+   MA_STATE(mas, >region_mt, 0, 0);
+   struct nouveau_uvma_region *reg;
+
+   seq_puts  (m, " VA regions  | start  | range  | 
end\n");
+   seq_puts  (m, 
"\n");
+   mas_for_each(, reg, ULONG_MAX)
+   seq_printf(m, " | 0x%016llx | 0x%016llx | 
0x%016llx\n",
+  reg->va.addr, reg->va.range, reg->va.addr + 
reg->va.range);
+}
+
+static int
+nouveau_debugfs_gpuva(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m->private;
+   struct nouveau_drm *drm = nouveau_drm(node->minor->dev);
+   struct nouveau_cli *cli;
+
+   mutex_lock(>clients_lock);
+   list_for_each_entry(cli, >clients, head) {
+   struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+
+   if (!uvmm)
+   continue;
+
+   nouveau_uvmm_lock(uvmm);
+   drm_debugfs_gpuva_info(m, >umgr);
+   seq_puts(m, "\n");
+   nouveau_debugfs_gpuva_regions(m, uvmm);
+   nouveau_uvmm_unlock(uvmm);
+   }
+   mutex_unlock(>clients_lock);
+
+   return 0;
+}
+
 static const struct file_operations nouveau_pstate_fops = {
.owner = THIS_MODULE,
.open = nouveau_debugfs_pstate_open,
@@ -213,6 +251,7 @@ static const struct file_operations nouveau_pstate_fops = {
 static struct drm_info_list nouveau_debugfs_list[] = {
{ "vbios.rom",  nouveau_debugfs_vbios_image, 0, NULL },
{ "strap_peek", nouveau_debugfs_strap_peek, 0, NULL },
+   DRM_DEBUGFS_GPUVA_INFO(nouveau_debugfs_gpuva, NULL),
 };
 #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
 
-- 
2.39.2



[PATCH drm-next v3 13/15] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm

2023-04-03 Thread Danilo Krummrich
The new VM_BIND UAPI uses the DRM GPU VA manager to manage the VA space.
Hence, we a need a way to manipulate the MMUs page tables without going
through the internal range allocator implemented by nvkm/vmm.

This patch adds a raw interface for nvkm/vmm to pass the resposibility
for managing the address space and the corresponding map/unmap/sparse
operations to the upper layers.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/include/nvif/if000c.h |  26 ++-
 drivers/gpu/drm/nouveau/include/nvif/vmm.h|  19 +-
 .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h |  20 +-
 drivers/gpu/drm/nouveau/nouveau_svm.c |   2 +-
 drivers/gpu/drm/nouveau/nouveau_vmm.c |   4 +-
 drivers/gpu/drm/nouveau/nvif/vmm.c| 100 +++-
 .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c| 213 --
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 197 
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h |  25 ++
 .../drm/nouveau/nvkm/subdev/mmu/vmmgf100.c|  16 +-
 .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c|  16 +-
 .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c |  27 ++-
 12 files changed, 566 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h 
b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
index 9c7ff56831c5..a5a182b3c28d 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
@@ -3,7 +3,10 @@
 struct nvif_vmm_v0 {
__u8  version;
__u8  page_nr;
-   __u8  managed;
+#define NVIF_VMM_V0_TYPE_UNMANAGED 0x00
+#define NVIF_VMM_V0_TYPE_MANAGED   0x01
+#define NVIF_VMM_V0_TYPE_RAW   0x02
+   __u8  type;
__u8  pad03[5];
__u64 addr;
__u64 size;
@@ -17,6 +20,7 @@ struct nvif_vmm_v0 {
 #define NVIF_VMM_V0_UNMAP  0x04
 #define NVIF_VMM_V0_PFNMAP 0x05
 #define NVIF_VMM_V0_PFNCLR 0x06
+#define NVIF_VMM_V0_RAW0x07
 #define NVIF_VMM_V0_MTHD(i) ((i) + 
0x80)
 
 struct nvif_vmm_page_v0 {
@@ -66,6 +70,26 @@ struct nvif_vmm_unmap_v0 {
__u64 addr;
 };
 
+struct nvif_vmm_raw_v0 {
+   __u8 version;
+#define NVIF_VMM_RAW_V0_GET0x0
+#define NVIF_VMM_RAW_V0_PUT0x1
+#define NVIF_VMM_RAW_V0_MAP0x2
+#define NVIF_VMM_RAW_V0_UNMAP  0x3
+#define NVIF_VMM_RAW_V0_SPARSE 0x4
+   __u8  op;
+   __u8  sparse;
+   __u8  ref;
+   __u8  shift;
+   __u32 argc;
+   __u8  pad01[7];
+   __u64 addr;
+   __u64 size;
+   __u64 offset;
+   __u64 memory;
+   __u64 argv;
+};
+
 struct nvif_vmm_pfnmap_v0 {
__u8  version;
__u8  page;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h 
b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
index a2ee92201ace..0ecedd0ee0a5 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
@@ -4,6 +4,12 @@
 struct nvif_mem;
 struct nvif_mmu;
 
+enum nvif_vmm_type {
+   UNMANAGED,
+   MANAGED,
+   RAW,
+};
+
 enum nvif_vmm_get {
ADDR,
PTES,
@@ -30,8 +36,9 @@ struct nvif_vmm {
int page_nr;
 };
 
-int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, bool 
managed,
- u64 addr, u64 size, void *argv, u32 argc, struct nvif_vmm *);
+int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass,
+ enum nvif_vmm_type, u64 addr, u64 size, void *argv, u32 argc,
+ struct nvif_vmm *);
 void nvif_vmm_dtor(struct nvif_vmm *);
 int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse,
 u8 page, u8 align, u64 size, struct nvif_vma *);
@@ -39,4 +46,12 @@ void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *);
 int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc,
 struct nvif_mem *, u64 offset);
 int nvif_vmm_unmap(struct nvif_vmm *, u64);
+
+int nvif_vmm_raw_get(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_put(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_map(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift,
+void *argv, u32 argc, struct nvif_mem *mem, u64 offset);
+int nvif_vmm_raw_unmap(struct nvif_vmm *vmm, u64 addr, u64 size,
+  u8 shift, bool sparse);
+int nvif_vmm_raw_sparse(struct nvif_vmm *vmm, u64 addr, u64 size, bool ref);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h 
b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 70e7887ef4b4..2fd2f2433fc7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -17,6 +17,7 @@ 

[PATCH] drm/nouveau/disp: set varaiable gv100_disp_core_mthd_base storage-class-specifier to static

2023-04-03 Thread Tom Rix
smatch reports
drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c:610:1: warning: symbol
  'gv100_disp_core_mthd_base' was not declared. Should it be static?

This variable is only used in one file so it should be static.

Signed-off-by: Tom Rix 
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
index 115d0997fd62..4ebc030e40d1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
@@ -606,7 +606,7 @@ gv100_disp_curs = {
.user = 73,
 };
 
-const struct nvkm_disp_mthd_list
+static const struct nvkm_disp_mthd_list
 gv100_disp_core_mthd_base = {
.mthd = 0x,
.addr = 0x00,
-- 
2.27.0



[PATCH drm-next v3 12/15] drm/nouveau: chan: provide nouveau_channel_kill()

2023-04-03 Thread Danilo Krummrich
The new VM_BIND UAPI implementation introduced in subsequent commits
will allow asynchronous jobs processing push buffers and emitting fences.

If a job times out, we need a way to recover from this situation. For
now, simply kill the channel to unblock all hung up jobs and signal
userspace that the device is dead on the next EXEC or VM_BIND ioctl.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_chan.c | 14 +++---
 drivers/gpu/drm/nouveau/nouveau_chan.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index f47c0363683c..a975f8b0e0e5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -40,6 +40,14 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in 
VRAM");
 int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
+void
+nouveau_channel_kill(struct nouveau_channel *chan)
+{
+   atomic_set(>killed, 1);
+   if (chan->fence)
+   nouveau_fence_context_kill(chan->fence, -ENODEV);
+}
+
 static int
 nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc)
 {
@@ -47,9 +55,9 @@ nouveau_channel_killed(struct nvif_event *event, void *repv, 
u32 repc)
struct nouveau_cli *cli = (void *)chan->user.client;
 
NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
-   atomic_set(>killed, 1);
-   if (chan->fence)
-   nouveau_fence_context_kill(chan->fence, -ENODEV);
+
+   if (unlikely(!atomic_read(>killed)))
+   nouveau_channel_kill(chan);
 
return NVIF_EVENT_DROP;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h 
b/drivers/gpu/drm/nouveau/nouveau_chan.h
index e06a8ffed31a..e483f4a254da 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -65,6 +65,7 @@ int  nouveau_channel_new(struct nouveau_drm *, struct 
nvif_device *, bool priv,
 u32 vram, u32 gart, struct nouveau_channel **);
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
+void nouveau_channel_kill(struct nouveau_channel *);
 
 extern int nouveau_vram_pushbuf;
 
-- 
2.39.2



[PATCH drm-next v3 11/15] drm/nouveau: fence: fail to emit when fence context is killed

2023-04-03 Thread Danilo Krummrich
The new VM_BIND UAPI implementation introduced in subsequent commits
will allow asynchronous jobs processing push buffers and emitting
fences.

If a fence context is killed, e.g. due to a channel fault, jobs which
are already queued for execution might still emit new fences. In such a
case a job would hang forever.

To fix that, fail to emit a new fence on a killed fence context with
-ENODEV to unblock the job.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 7 +++
 drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
b/drivers/gpu/drm/nouveau/nouveau_fence.c
index e946408f945b..77c739a55b19 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -96,6 +96,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, 
int error)
if (nouveau_fence_signal(fence))
nvif_event_block(>event);
}
+   fctx->killed = 1;
spin_unlock_irqrestore(>lock, flags);
 }
 
@@ -229,6 +230,12 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct 
nouveau_channel *chan)
dma_fence_get(>base);
spin_lock_irq(>lock);
 
+   if (unlikely(fctx->killed)) {
+   spin_unlock_irq(>lock);
+   dma_fence_put(>base);
+   return -ENODEV;
+   }
+
if (nouveau_fence_update(chan, fctx))
nvif_event_block(>event);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 7c73c7c9834a..2c72d96ef17d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -44,7 +44,7 @@ struct nouveau_fence_chan {
char name[32];
 
struct nvif_event event;
-   int notify_ref, dead;
+   int notify_ref, dead, killed;
 };
 
 struct nouveau_fence_priv {
-- 
2.39.2



[PATCH drm-next v3 10/15] drm/nouveau: fence: separate fence alloc and emit

2023-04-03 Thread Danilo Krummrich
The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
in order to emit fences within DMA fence signalling critical sections
(e.g. as typically done in the DRM GPU schedulers run_job() callback) we
need to separate fence allocation and fence emitting.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 -
 drivers/gpu/drm/nouveau/nouveau_bo.c| 52 +++--
 drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
 drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
 drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-
 drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
 drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
 7 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c 
b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a6f2e681bde9..a34924523133 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x);
PUSH_KICK(push);
 
-   ret = nouveau_fence_new(chan, false, pfence);
+   ret = nouveau_fence_new(pfence);
if (ret)
goto fail;
 
+   ret = nouveau_fence_emit(*pfence, chan);
+   if (ret)
+   goto fail_fence_unref;
+
return 0;
+
+fail_fence_unref:
+   nouveau_fence_unref(pfence);
 fail:
spin_lock_irqsave(>event_lock, flags);
list_del(>head);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 057bc995f19b..e9cbbf594e6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict,
mutex_lock(>mutex);
else
mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING);
+
ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, 
ctx->interruptible);
-   if (ret == 0) {
-   ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
-   if (ret == 0) {
-   ret = nouveau_fence_new(chan, false, );
-   if (ret == 0) {
-   /* TODO: figure out a better solution here
-*
-* wait on the fence here explicitly as going 
through
-* ttm_bo_move_accel_cleanup somehow doesn't 
seem to do it.
-*
-* Without this the operation can timeout and 
we'll fallback to a
-* software copy, which might take several 
minutes to finish.
-*/
-   nouveau_fence_wait(fence, false, false);
-   ret = ttm_bo_move_accel_cleanup(bo,
-   >base,
-   evict, false,
-   new_reg);
-   nouveau_fence_unref();
-   }
-   }
+   if (ret)
+   goto out_unlock;
+
+   ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
+   if (ret)
+   goto out_unlock;
+
+   ret = nouveau_fence_new();
+   if (ret)
+   goto out_unlock;
+
+   ret = nouveau_fence_emit(fence, chan);
+   if (ret) {
+   nouveau_fence_unref();
+   goto out_unlock;
}
+
+   /* TODO: figure out a better solution here
+*
+* wait on the fence here explicitly as going through
+* ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+*
+* Without this the operation can timeout and we'll fallback to a
+* software copy, which might take several minutes to finish.
+*/
+   nouveau_fence_wait(fence, false, false);
+   ret = ttm_bo_move_accel_cleanup(bo, >base, evict, false,
+   new_reg);
+   nouveau_fence_unref();
+
+out_unlock:
mutex_unlock(>mutex);
return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 1068abe41024..f47c0363683c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
struct nouveau_fence *fence = NULL;
int ret;
 
-   ret = nouveau_fence_new(chan, false, );
+   ret = nouveau_fence_new();
if (!ret) {
-   ret = nouveau_fence_wait(fence, false, false);
+   ret = nouveau_fence_emit(fence, chan);
+   if (!ret)
+ 

[PATCH drm-next v3 09/15] drm/nouveau: move usercopy helpers to nouveau_drv.h

2023-04-03 Thread Danilo Krummrich
Move the usercopy helpers to a common driver header file to make it
usable for the new API added in subsequent commits.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_drv.h | 26 ++
 drivers/gpu/drm/nouveau/nouveau_gem.c | 26 --
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 81350e685b50..20a7f31b9082 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -130,6 +130,32 @@ nouveau_cli(struct drm_file *fpriv)
return fpriv ? fpriv->driver_priv : NULL;
 }
 
+static inline void
+u_free(void *addr)
+{
+   kvfree(addr);
+}
+
+static inline void *
+u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
+{
+   void *mem;
+   void __user *userptr = (void __force __user *)(uintptr_t)user;
+
+   size *= nmemb;
+
+   mem = kvmalloc(size, GFP_KERNEL);
+   if (!mem)
+   return ERR_PTR(-ENOMEM);
+
+   if (copy_from_user(mem, userptr, size)) {
+   u_free(mem);
+   return ERR_PTR(-EFAULT);
+   }
+
+   return mem;
+}
+
 #include 
 #include 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 08689ced4f6a..4369c8dc8b5b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -613,32 +613,6 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
return 0;
 }
 
-static inline void
-u_free(void *addr)
-{
-   kvfree(addr);
-}
-
-static inline void *
-u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
-{
-   void *mem;
-   void __user *userptr = (void __force __user *)(uintptr_t)user;
-
-   size *= nmemb;
-
-   mem = kvmalloc(size, GFP_KERNEL);
-   if (!mem)
-   return ERR_PTR(-ENOMEM);
-
-   if (copy_from_user(mem, userptr, size)) {
-   u_free(mem);
-   return ERR_PTR(-EFAULT);
-   }
-
-   return mem;
-}
-
 static int
 nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
struct drm_nouveau_gem_pushbuf *req,
-- 
2.39.2



[PATCH drm-next v3 08/15] drm/nouveau: bo: initialize GEM GPU VA interface

2023-04-03 Thread Danilo Krummrich
Initialize the GEM's DRM GPU VA manager interface in preparation for the
(u)vmm implementation, provided by subsequent commits, to make use of it.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7724fe63067d..057bc995f19b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -215,11 +215,14 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int 
*align, u32 domain,
nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL);
if (!nvbo)
return ERR_PTR(-ENOMEM);
+
INIT_LIST_HEAD(>head);
INIT_LIST_HEAD(>entry);
INIT_LIST_HEAD(>vma_list);
nvbo->bo.bdev = >ttm.bdev;
 
+   drm_gem_gpuva_init(>bo.base);
+
/* This is confusing, and doesn't actually mean we want an uncached
 * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated
 * into in nouveau_gem_new().
-- 
2.39.2



[PATCH drm-next v3 07/15] drm/nouveau: get vmm via nouveau_cli_vmm()

2023-04-03 Thread Danilo Krummrich
Provide a getter function for the client's current vmm context. Since
we'll add a new (u)vmm context for UMD bindings in subsequent commits,
this will keep the code clean.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   | 2 +-
 drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h  | 9 +
 drivers/gpu/drm/nouveau/nouveau_gem.c  | 6 +++---
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index c2ec91cc845d..7724fe63067d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -204,7 +204,7 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int 
*align, u32 domain,
struct nouveau_drm *drm = cli->drm;
struct nouveau_bo *nvbo;
struct nvif_mmu *mmu = >mmu;
-   struct nvif_vmm *vmm = cli->svm.cli ? >svm.vmm : >vmm.vmm;
+   struct nvif_vmm *vmm = _cli_vmm(cli)->vmm;
int i, pi = -1;
 
if (!*size) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index e648ecd0c1a0..1068abe41024 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -148,7 +148,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct 
nvif_device *device,
 
chan->device = device;
chan->drm = drm;
-   chan->vmm = cli->svm.cli ? >svm : >vmm;
+   chan->vmm = nouveau_cli_vmm(cli);
atomic_set(>killed, 0);
 
/* allocate memory for dma push buffer */
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index b5de312a523f..81350e685b50 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -112,6 +112,15 @@ struct nouveau_cli_work {
struct dma_fence_cb cb;
 };
 
+static inline struct nouveau_vmm *
+nouveau_cli_vmm(struct nouveau_cli *cli)
+{
+   if (cli->svm.cli)
+   return >svm;
+
+   return >vmm;
+}
+
 void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *,
struct nouveau_cli_work *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index f77e44958037..08689ced4f6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -103,7 +103,7 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct 
drm_file *file_priv)
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct device *dev = drm->dev->dev;
-   struct nouveau_vmm *vmm = cli->svm.cli ? >svm : >vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
int ret;
 
@@ -180,7 +180,7 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct 
drm_file *file_priv)
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct device *dev = drm->dev->dev;
-   struct nouveau_vmm *vmm = cli->svm.cli ? >svm : & cli->vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
int ret;
 
@@ -269,7 +269,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct 
drm_gem_object *gem,
 {
struct nouveau_cli *cli = nouveau_cli(file_priv);
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
-   struct nouveau_vmm *vmm = cli->svm.cli ? >svm : >vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
 
if (is_power_of_2(nvbo->valid_domains))
-- 
2.39.2



[PATCH drm-next v3 05/15] drm: debugfs: provide infrastructure to dump a DRM GPU VA space

2023-04-03 Thread Danilo Krummrich
This commit adds a function to dump a DRM GPU VA space and a macro for
drivers to register the struct drm_info_list 'gpuvas' entry.

Most likely, most drivers might maintain one DRM GPU VA space per struct
drm_file, but there might also be drivers not having a fixed relation
between DRM GPU VA spaces and a DRM core infrastructure, hence we need the
indirection via the driver iterating it's maintained DRM GPU VA spaces.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/drm_debugfs.c | 41 +++
 include/drm/drm_debugfs.h | 25 +
 2 files changed, 66 insertions(+)

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 4855230ba2c6..82180fb1c200 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -175,6 +176,46 @@ static const struct file_operations drm_debugfs_fops = {
.release = single_release,
 };
 
+/**
+ * drm_debugfs_gpuva_info - dump the given DRM GPU VA space
+ * @m: pointer to the _file to write
+ * @mgr: the _gpuva_manager representing the GPU VA space
+ *
+ * Dumps the GPU VA mappings of a given DRM GPU VA manager.
+ *
+ * For each DRM GPU VA space drivers should call this function from their
+ * _info_list's show callback.
+ *
+ * Returns: 0 on success, -ENODEV if the  is not initialized
+ */
+int drm_debugfs_gpuva_info(struct seq_file *m,
+  struct drm_gpuva_manager *mgr)
+{
+   DRM_GPUVA_ITER(it, mgr, 0);
+   struct drm_gpuva *va, *kva = >kernel_alloc_node;
+
+   if (!mgr->name)
+   return -ENODEV;
+
+   seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n",
+  mgr->name, mgr->mm_start, mgr->mm_start + mgr->mm_range);
+   seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n",
+  kva->va.addr, kva->va.addr + kva->va.range);
+   seq_puts(m, "\n");
+   seq_puts(m, " VAs | start  | range  | end   
 | object | object offset\n");
+   seq_puts(m, 
"-\n");
+   drm_gpuva_iter_for_each(va, it) {
+   if (unlikely(va == >kernel_alloc_node))
+   continue;
+
+   seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx | 
0x%016llx | 0x%016llx\n",
+  va->va.addr, va->va.range, va->va.addr + 
va->va.range,
+  (u64)va->gem.obj, va->gem.offset);
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL(drm_debugfs_gpuva_info);
 
 /**
  * drm_debugfs_create_files - Initialize a given set of debugfs files for DRM
diff --git a/include/drm/drm_debugfs.h b/include/drm/drm_debugfs.h
index 7616f457ce70..cb2c1956a214 100644
--- a/include/drm/drm_debugfs.h
+++ b/include/drm/drm_debugfs.h
@@ -34,6 +34,22 @@
 
 #include 
 #include 
+
+#include 
+
+/**
+ * DRM_DEBUGFS_GPUVA_INFO - _info_list entry to dump a GPU VA space
+ * @show: the _info_list's show callback
+ * @data: driver private data
+ *
+ * Drivers should use this macro to define a _info_list entry to provide a
+ * debugfs file for dumping the GPU VA space regions and mappings.
+ *
+ * For each DRM GPU VA space drivers should call drm_debugfs_gpuva_info() from
+ * their @show callback.
+ */
+#define DRM_DEBUGFS_GPUVA_INFO(show, data) {"gpuvas", show, DRIVER_GEM_GPUVA, 
data}
+
 /**
  * struct drm_info_list - debugfs info list entry
  *
@@ -134,6 +150,9 @@ void drm_debugfs_add_file(struct drm_device *dev, const 
char *name,
 
 void drm_debugfs_add_files(struct drm_device *dev,
   const struct drm_debugfs_info *files, int count);
+
+int drm_debugfs_gpuva_info(struct seq_file *m,
+  struct drm_gpuva_manager *mgr);
 #else
 static inline void drm_debugfs_create_files(const struct drm_info_list *files,
int count, struct dentry *root,
@@ -155,6 +174,12 @@ static inline void drm_debugfs_add_files(struct drm_device 
*dev,
 const struct drm_debugfs_info *files,
 int count)
 {}
+
+static inline int drm_debugfs_gpuva_info(struct seq_file *m,
+struct drm_gpuva_manager *mgr)
+{
+   return 0;
+}
 #endif
 
 #endif /* _DRM_DEBUGFS_H_ */
-- 
2.39.2



[PATCH drm-next v3 06/15] drm/nouveau: new VM_BIND uapi interfaces

2023-04-03 Thread Danilo Krummrich
This commit provides the interfaces for the new UAPI motivated by the
Vulkan API. It allows user mode drivers (UMDs) to:

1) Initialize a GPU virtual address (VA) space via the new
   DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel reserved
   VA area.

2) Bind and unbind GPU VA space mappings via the new
   DRM_IOCTL_NOUVEAU_VM_BIND ioctl.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support
asynchronous processing with DRM syncobjs as synchronization mechanism.

The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing,
DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.

Co-authored-by: Dave Airlie 
Signed-off-by: Danilo Krummrich 
---
 Documentation/gpu/driver-uapi.rst |   8 ++
 include/uapi/drm/nouveau_drm.h| 209 ++
 2 files changed, 217 insertions(+)

diff --git a/Documentation/gpu/driver-uapi.rst 
b/Documentation/gpu/driver-uapi.rst
index 4411e6919a3d..9c7ca6e33a68 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -6,3 +6,11 @@ drm/i915 uAPI
 =
 
 .. kernel-doc:: include/uapi/drm/i915_drm.h
+
+drm/nouveau uAPI
+
+
+VM_BIND / EXEC uAPI
+---
+
+.. kernel-doc:: include/uapi/drm/nouveau_drm.h
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 853a327433d3..4d3a70529637 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -126,6 +126,209 @@ struct drm_nouveau_gem_cpu_fini {
__u32 handle;
 };
 
+/**
+ * struct drm_nouveau_sync - sync object
+ *
+ * This structure serves as synchronization mechanism for (potentially)
+ * asynchronous operations such as EXEC or VM_BIND.
+ */
+struct drm_nouveau_sync {
+   /**
+* @flags: the flags for a sync object
+*
+* The first 8 bits are used to determine the type of the sync object.
+*/
+   __u32 flags;
+#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0
+#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1
+#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf
+   /**
+* @handle: the handle of the sync object
+*/
+   __u32 handle;
+   /**
+* @timeline_value:
+*
+* The timeline point of the sync object in case the syncobj is of
+* type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ.
+*/
+   __u64 timeline_value;
+};
+
+/**
+ * struct drm_nouveau_vm_init - GPU VA space init structure
+ *
+ * Used to initialize the GPU's VA space for a user client, telling the kernel
+ * which portion of the VA space is managed by the UMD and kernel respectively.
+ */
+struct drm_nouveau_vm_init {
+   /**
+* @unmanaged_addr: start address of the kernel managed VA space region
+*/
+   __u64 unmanaged_addr;
+   /**
+* @unmanaged_size: size of the kernel managed VA space region in bytes
+*/
+   __u64 unmanaged_size;
+};
+
+/**
+ * struct drm_nouveau_vm_bind_op - VM_BIND operation
+ *
+ * This structure represents a single VM_BIND operation. UMDs should pass
+ * an array of this structure via struct drm_nouveau_vm_bind's _ptr field.
+ */
+struct drm_nouveau_vm_bind_op {
+   /**
+* @op: the operation type
+*/
+   __u32 op;
+/**
+ * @DRM_NOUVEAU_VM_BIND_OP_MAP:
+ *
+ * Map a GEM object to the GPU's VA space. Optionally, the
+ * _NOUVEAU_VM_BIND_SPARSE flag can be passed to instruct the kernel to
+ * create sparse mappings for the given range.
+ */
+#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0
+/**
+ * @DRM_NOUVEAU_VM_BIND_OP_UNMAP:
+ *
+ * Unmap an existing mapping in the GPU's VA space. If the region the mapping
+ * is located in is a sparse region, new sparse mappings are created where the
+ * unmapped (memory backed) mapping was mapped previously. To remove a sparse
+ * region the _NOUVEAU_VM_BIND_SPARSE must be set.
+ */
+#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1
+   /**
+* @flags: the flags for a _nouveau_vm_bind_op
+*/
+   __u32 flags;
+/**
+ * @DRM_NOUVEAU_VM_BIND_SPARSE:
+ *
+ * Indicates that an allocated VA space region should be sparse.
+ */
+#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8)
+   /**
+* @handle: the handle of the DRM GEM object to map
+*/
+   __u32 handle;
+   /**
+* @pad: 32 bit padding, should be 0
+*/
+   __u32 pad;
+   /**
+* @addr:
+*
+* the address the VA space region or (memory backed) mapping should be 
mapped to
+*/
+   __u64 addr;
+   /**
+* @bo_offset: the offset within the BO backing the mapping
+*/
+   __u64 bo_offset;
+   /**
+* @range: the size of the requested mapping in bytes
+*/
+   __u64 range;
+};
+
+/**
+ * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND
+ */
+struct drm_nouveau_vm_bind {
+   /**
+* @op_count: the number of _nouveau_vm_bind_op
+  

[PATCH drm-next v3 03/15] maple_tree: split up MA_STATE() macro

2023-04-03 Thread Danilo Krummrich
Split up the MA_STATE() macro such that components using the maple tree
can easily inherit from struct ma_state and build custom tree walk
macros to hide their internals from users.

Example:

struct sample_iterator {
struct ma_state mas;
struct sample_mgr *mgr;
};

\#define SAMPLE_ITERATOR(name, __mgr, start)\
struct sample_iterator name = { \
.mas = MA_STATE_INIT(&(__mgr)->mt, start, 0),   \
.mgr = __mgr,   \
}

\#define sample_iter_for_each_range(it__, entry__, end__) \
mas_for_each(&(it__).mas, entry__, end__)

--

struct sample *sample;
SAMPLE_ITERATOR(si, min);

sample_iter_for_each_range(, sample, max) {
frob(mgr, sample);
}

Signed-off-by: Danilo Krummrich 
---
 include/linux/maple_tree.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 1fadb5f5978b..87d55334f1c2 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -423,8 +423,8 @@ struct ma_wr_state {
 #define MA_ERROR(err) \
((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
 
-#define MA_STATE(name, mt, first, end) \
-   struct ma_state name = {\
+#define MA_STATE_INIT(mt, first, end)  \
+   {   \
.tree = mt, \
.index = first, \
.last = end,\
@@ -435,6 +435,9 @@ struct ma_wr_state {
.mas_flags = 0, \
}
 
+#define MA_STATE(name, mt, first, end) \
+   struct ma_state name = MA_STATE_INIT(mt, first, end)
+
 #define MA_WR_STATE(name, ma_state, wr_entry)  \
struct ma_wr_state name = { \
.mas = ma_state,\
-- 
2.39.2



Re: [PATCH v2 2/2] drm/bridge: fsl-ldb: Add i.MX6SX support

2023-04-03 Thread Marek Vasut

On 3/30/23 12:42, Fabio Estevam wrote:

From: Fabio Estevam 

i.MX6SX has a single LVDS port and share a similar LDB_CTRL register layout
with i.MX8MP and i.MX93.

There is no LVDS CTRL register on the i.MX6SX, so only write to
this register on the appropriate SoCs.

Add support for the i.MX6SX LDB.

Tested on a imx6sx-sdb board with a Hannstar HSD100PXN1 LVDS panel
and also on a custom i.MX6SX-based board.

Signed-off-by: Fabio Estevam 
---
Changes since v1:
- None

  drivers/gpu/drm/bridge/fsl-ldb.c | 14 +-
  1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c
index 450b352914f4..f8e5d8ab98e3 100644
--- a/drivers/gpu/drm/bridge/fsl-ldb.c
+++ b/drivers/gpu/drm/bridge/fsl-ldb.c
@@ -56,6 +56,7 @@
  #define LVDS_CTRL_VBG_ADJ_MASKGENMASK(19, 17)
  
  enum fsl_ldb_devtype {

+   IMX6SX_LDB,
IMX8MP_LDB,
IMX93_LDB,
  };
@@ -64,9 +65,14 @@ struct fsl_ldb_devdata {
u32 ldb_ctrl;
u32 lvds_ctrl;
bool lvds_en_bit;
+   bool not_lvds_ctrl;


You might want to rename this one to something like 
"composite_control_reg" since the MX6SX only has one LDB control 
register instead of two like the newer SoCs. But that's optional change.


Reviewed-by: Marek Vasut 


[PATCH drm-next v3 04/15] drm: manager to keep track of GPUs VA mappings

2023-04-03 Thread Danilo Krummrich
Add infrastructure to keep track of GPU virtual address (VA) mappings
with a decicated VA space manager implementation.

New UAPIs, motivated by Vulkan sparse memory bindings graphics drivers
start implementing, allow userspace applications to request multiple and
arbitrary GPU VA mappings of buffer objects. The DRM GPU VA manager is
intended to serve the following purposes in this context.

1) Provide infrastructure to track GPU VA allocations and mappings,
   making use of the maple_tree.

2) Generically connect GPU VA mappings to their backing buffers, in
   particular DRM GEM objects.

3) Provide a common implementation to perform more complex mapping
   operations on the GPU VA space. In particular splitting and merging
   of GPU VA mappings, e.g. for intersecting mapping requests or partial
   unmap requests.

Suggested-by: Dave Airlie 
Signed-off-by: Danilo Krummrich 
---
 Documentation/gpu/drm-mm.rst|   31 +
 drivers/gpu/drm/Makefile|1 +
 drivers/gpu/drm/drm_gem.c   |3 +
 drivers/gpu/drm/drm_gpuva_mgr.c | 1686 +++
 include/drm/drm_drv.h   |6 +
 include/drm/drm_gem.h   |   75 ++
 include/drm/drm_gpuva_mgr.h |  681 +
 7 files changed, 2483 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_gpuva_mgr.c
 create mode 100644 include/drm/drm_gpuva_mgr.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a52e6f4117d6..c9f120cfe730 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -466,6 +466,37 @@ DRM MM Range Allocator Function References
 .. kernel-doc:: drivers/gpu/drm/drm_mm.c
:export:
 
+DRM GPU VA Manager
+==
+
+Overview
+
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :doc: Overview
+
+Split and Merge
+---
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :doc: Split and Merge
+
+Locking
+---
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :doc: Locking
+
+
+DRM GPU VA Manager Function References
+--
+
+.. kernel-doc:: include/drm/drm_gpuva_mgr.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :export:
+
 DRM Buddy Allocator
 ===
 
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 9c6446eb3c83..8eeed446a078 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -45,6 +45,7 @@ drm-y := \
drm_vblank.o \
drm_vblank_work.o \
drm_vma_manager.o \
+   drm_gpuva_mgr.o \
drm_writeback.o
 drm-$(CONFIG_DRM_LEGACY) += \
drm_agpsupport.o \
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ee3e11e7177d..dd50c46f21b7 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -164,6 +164,9 @@ void drm_gem_private_object_init(struct drm_device *dev,
if (!obj->resv)
obj->resv = >_resv;
 
+   if (drm_core_check_feature(dev, DRIVER_GEM_GPUVA))
+   drm_gem_gpuva_init(obj);
+
drm_vma_node_reset(>vma_node);
INIT_LIST_HEAD(>lru_node);
 }
diff --git a/drivers/gpu/drm/drm_gpuva_mgr.c b/drivers/gpu/drm/drm_gpuva_mgr.c
new file mode 100644
index ..bd7d27ee44bb
--- /dev/null
+++ b/drivers/gpu/drm/drm_gpuva_mgr.c
@@ -0,0 +1,1686 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Danilo Krummrich 
+ *
+ */
+
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * The DRM GPU VA Manager, represented by struct drm_gpuva_manager keeps track
+ * of a GPU's virtual address (VA) space and manages the corresponding virtual
+ * mappings represented by _gpuva objects. It also keeps track of the
+ * mapping's backing _gem_object buffers.
+ *
+ * _gem_object buffers maintain a list (and a corresponding list lock) of
+ * _gpuva objects representing 

[PATCH drm-next v3 02/15] drm_exec: fix double dma_resv unlock

2023-04-03 Thread Danilo Krummrich
Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/drm_exec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
index df546cc5a227..f645d22a0863 100644
--- a/drivers/gpu/drm/drm_exec.c
+++ b/drivers/gpu/drm/drm_exec.c
@@ -62,7 +62,6 @@ static void drm_exec_unlock_all(struct drm_exec *exec)
}
 
if (exec->prelocked) {
-   dma_resv_unlock(exec->prelocked->resv);
drm_gem_object_put(exec->prelocked);
exec->prelocked = NULL;
}
-- 
2.39.2



[PATCH drm-next v3 01/15] drm: execution context for GEM buffers v3

2023-04-03 Thread Danilo Krummrich
From: Christian König 

This adds the infrastructure for an execution context for GEM buffers
which is similar to the existinc TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
overhead is unecessary and measureable.
v3: drop duplicate tracking, radeon is really the only one needing that.

Signed-off-by: Christian König 
---
 Documentation/gpu/drm-mm.rst |  12 ++
 drivers/gpu/drm/Kconfig  |   6 +
 drivers/gpu/drm/Makefile |   2 +
 drivers/gpu/drm/drm_exec.c   | 249 +++
 include/drm/drm_exec.h   | 115 
 5 files changed, 384 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_exec.c
 create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a79fd3549ff8..a52e6f4117d6 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
 .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
:export:
 
+DRM Execution context
+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
 GPU Scheduler
 =
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index ba3fb04bb691..2dc81eb062eb 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -201,6 +201,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
 
+config DRM_EXEC
+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index a33257d2bc7f..9c6446eb3c83 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
 #
 # Memory-management helpers
 #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
 
 obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
 
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
new file mode 100644
index ..df546cc5a227
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(, true);
+ * drm_exec_while_not_all_locked() {
+ * ret = drm_exec_prepare_obj(, boA, 1);
+ * drm_exec_continue_on_contention();
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_lock(, boB, 1);
+ * drm_exec_continue_on_contention();
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini();
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Unlock all objects and drop references */
+static void drm_exec_unlock_all(struct drm_exec *exec)
+{
+   struct drm_gem_object *obj;
+   unsigned long index;
+
+   drm_exec_for_each_locked_object(exec, index, obj) {
+   dma_resv_unlock(obj->resv);
+   drm_gem_object_put(obj);
+   }
+
+   if (exec->prelocked) {
+   dma_resv_unlock(exec->prelocked->resv);
+   drm_gem_object_put(exec->prelocked);
+   exec->prelocked = NULL;
+   }
+}
+
+/**
+ * drm_exec_init - initialize a drm_exec object
+ * @exec: the drm_exec object to initialize
+ * @interruptible: if locks should be acquired interruptible
+ *
+ * Initialize the object and make sure that we can track locked and duplicate
+ * objects.
+ */
+void drm_exec_init(struct drm_exec *exec, bool interruptible)
+{
+   exec->interruptible = interruptible;
+   exec->objects = 

[PATCH drm-next v3 00/15] [RFC] DRM GPUVA Manager & Nouveau VM_BIND UAPI

2023-04-03 Thread Danilo Krummrich
This patch series provides a new UAPI for the Nouveau driver in order to
support Vulkan features, such as sparse bindings and sparse residency.

Furthermore, with the DRM GPUVA manager it provides a new DRM core feature to
keep track of GPU virtual address (VA) mappings in a more generic way.

The DRM GPUVA manager is indented to help drivers implement userspace-manageable
GPU VA spaces in reference to the Vulkan API. In order to achieve this goal it
serves the following purposes in this context.

1) Provide infrastructure to track GPU VA allocations and mappings,
   making use of the maple_tree.

2) Generically connect GPU VA mappings to their backing buffers, in
   particular DRM GEM objects.

3) Provide a common implementation to perform more complex mapping
   operations on the GPU VA space. In particular splitting and merging
   of GPU VA mappings, e.g. for intersecting mapping requests or partial
   unmap requests.

The new VM_BIND Nouveau UAPI build on top of the DRM GPUVA manager, itself
providing the following new interfaces.

1) Initialize a GPU VA space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl
   for UMDs to specify the portion of VA space managed by the kernel and
   userspace, respectively.

2) Allocate and free a VA space region as well as bind and unbind memory
   to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, make use of the DRM
scheduler to queue jobs and support asynchronous processing with DRM syncobjs
as synchronization mechanism.

By default DRM_IOCTL_NOUVEAU_VM_BIND does synchronous processing,
DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.

The new VM_BIND UAPI for Nouveau makes also use of drm_exec (execution context
for GEM buffers) by Christian König. Since the patch implementing drm_exec was
not yet merged into drm-next it is part of this series, as well as a small fix
for this patch, which was found while testing this series.

This patch series is also available at [1].

There is a Mesa NVK merge request by Dave Airlie [2] implementing the
corresponding userspace parts for this series.

The Vulkan CTS test suite passes the sparse binding and sparse residency test
cases for the new UAPI together with Dave's Mesa work.

There are also some test cases in the igt-gpu-tools project [3] for the new UAPI
and hence the DRM GPU VA manager. However, most of them are testing the DRM GPU
VA manager's logic through Nouveau's new UAPI and should be considered just as
helper for implementation.

However, I absolutely intend to change those test cases to proper kunit test
cases for the DRM GPUVA manager, once and if we agree on it's usefulness and
design.

[1] https://gitlab.freedesktop.org/nouvelles/kernel/-/tree/new-uapi-drm-next /
https://gitlab.freedesktop.org/nouvelles/kernel/-/merge_requests/1
[2] https://gitlab.freedesktop.org/nouveau/mesa/-/merge_requests/150/
[3] https://gitlab.freedesktop.org/dakr/igt-gpu-tools/-/tree/wip_nouveau_vm_bind

Changes in V2:
==
  Nouveau:
- Reworked the Nouveau VM_BIND UAPI to avoid memory allocations in fence
  signalling critical sections. Updates to the VA space are split up in 
three
  separate stages, where only the 2. stage executes in a fence signalling
  critical section:

1. update the VA space, allocate new structures and page tables
2. (un-)map the requested memory bindings
3. free structures and page tables

- Separated generic job scheduler code from specific job implementations.
- Separated the EXEC and VM_BIND implementation of the UAPI.
- Reworked the locking parts of the nvkm/vmm RAW interface, such that
  (un-)map operations can be executed in fence signalling critical sections.

  GPUVA Manager:
- made drm_gpuva_regions optional for users of the GPUVA manager
- allow NULL GEMs for drm_gpuva entries
- swichted from drm_mm to maple_tree for track drm_gpuva / drm_gpuva_region
  entries
- provide callbacks for users to allocate custom drm_gpuva_op structures to
  allow inheritance
- added user bits to drm_gpuva_flags
- added a prefetch operation type in order to support generating prefetch
  operations in the same way other operations generated
- hand the responsibility for mutual exclusion for a GEM's
  drm_gpuva list to the user; simplified corresponding (un-)link functions

  Maple Tree:
- I added two maple tree patches to the series, one to support custom tree
  walk macros and one to hand the locking responsibility to the user of the
  GPUVA manager without pre-defined lockdep checks.

Changes in V3:
==
  Nouveau:
- Reworked the Nouveau VM_BIND UAPI to do the job cleanup (including page
  table cleanup) within a workqueue rather than the job_free() callback of
  the 

Re: linux-next: build failure after merge of the drm-misc tree

2023-04-03 Thread Stephen Rothwell
Hi Qiang,

On Mon, 3 Apr 2023 16:51:27 +0800 Qiang Yu  wrote:
>
> I think you can just revert the following three lima commits when merge:
> * 4a66f3da99dc ("drm/lima: add show_fdinfo for drm usage stats")
> * 87767de835ed ("drm/lima: allocate unique id per drm_file")
> * bccafec957a5 ("drm/lima: add usage counting method to ctx_mgr")

OK, I have done that from today.

-- 
Cheers,
Stephen Rothwell


pgpmw4XMs2RWB.pgp
Description: OpenPGP digital signature


Re: [RFC PATCH 00/10] Xe DRM scheduler and long running workload plans

2023-04-03 Thread Asahi Lina

Hi, thanks for the Cc!

On 04/04/2023 09.22, Matthew Brost wrote:

Hello,

As a prerequisite to merging the new Intel Xe DRM driver [1] [2], we
have been asked to merge our common DRM scheduler patches first as well
as develop a common solution for long running workloads with the DRM
scheduler. This RFC series is our first attempt at doing this. We
welcome any and all feedback.

This can we thought of as 4 parts detailed below.

- DRM scheduler changes for 1 to 1 relationship between scheduler and
entity (patches 1-3)

In Xe all of the scheduling of jobs is done by a firmware scheduler (the
GuC) which is a new paradigm WRT to the DRM scheduler and presents
severals problems as the DRM was originally designed to schedule jobs on
hardware queues. The main problem being that DRM scheduler expects the
submission order of jobs to be the completion order of jobs even across
multiple entities. This assumption falls apart with a firmware scheduler
as a firmware scheduler has no concept of jobs and jobs can complete out
of order. A novel solution for was originally thought of by Faith during
the initial prototype of Xe, create a 1 to 1 relationship between scheduler
and entity. I believe the AGX driver [3] is using this approach and
Boris may use approach as well for the Mali driver [4].

To support a 1 to 1 relationship we move the main execution function
from a kthread to a work queue and add a new scheduling mode which
bypasses code in the DRM which isn't needed in a 1 to 1 relationship.
The new scheduling mode should unify all drivers usage with a 1 to 1
relationship and can be thought of as using scheduler as a dependency /
infligt job tracker rather than a true scheduler.


Yup, we're in the exact same situation with drm/asahi, so this is very 
welcome! We've been using the existing scheduler as-is, but this should 
help remove some unneeded complexity in this use case.


Do you want me to pull in this series into our tree and make sure this 
all works out for us?


I also have a couple bugfixes for drm/sched I need to send out, but I 
think the rebase/merge with this series should be trivial. I'll send 
that out this week.



- Generic messaging interface for DRM scheduler

Idea is to be able to communicate to the submission backend with in band
(relative to main execution function) messages. Messages are backend
defined and flexable enough for any use case. In Xe we use these
messages to clean up entites, set properties for entites, and suspend /
resume execution of an entity [5]. I suspect other driver can leverage
this messaging concept too as it a convenient way to avoid races in the
backend.


We haven't needed this so far (mostly by using fine-grained locking and 
refcounting all over the place) but I can see it being useful to 
simplify some of those constructs and maybe avoid potential deadlocks in 
some places. I'm not sure yet whether we can fully get rid of the main 
queue refcounting/locking (our completion/error signaling path doesn't 
map well to DMA fences directly so we still need something there to get 
from the global GPU completion signaling thread to individual queues) 
but it might be a step in the right direction at least!


~~ Lina



Re: [PATCH] drm/i915/guc: Don't capture Gen8 regs on Gen12 devices

2023-04-03 Thread Matt Roper
On Mon, Apr 03, 2023 at 02:33:34PM -0700, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> A pair of pre-Gen12 registers were being included in the Gen12 capture
> list. GuC was rejecting those as being invalid and logging errors
> about them. So, stop doing it.

Looks like these registers existed from gen8-gen11.  With this change,
it looks like they also won't be included in the GuC error capture for
gen11 (ICL and EHL/JSL) since those platforms return xe_lpd_lists [1]
rather than default_lists; do we care about that?  I assume not (since
those platforms don't use GuC submission unless you force it with the
enable_guc modparam and taint your kernel), but I figured I should point
it out.

Reviewed-by: Matt Roper 


[1] Why is the main list we use called xe_lpd (i.e., the name of ADL-P's
display IP)?  It doesn't seem like we're doing anything with display
registers here so using display IP naming seems really confusing.


Matt

> 
> Signed-off-by: John Harrison 
> Fixes: dce2bd542337 ("drm/i915/guc: Add Gen9 registers for GuC error state 
> capture.")
> Cc: Alan Previn 
> Cc: Umesh Nerlige Ramappa 
> Cc: Lucas De Marchi 
> Cc: John Harrison 
> Cc: Jani Nikula 
> Cc: Matt Roper 
> Cc: Balasubramani Vivekanandan 
> Cc: Daniele Ceraolo Spurio 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> index cf49188db6a6e..e0e793167d61b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
> @@ -31,12 +31,14 @@
>   { FORCEWAKE_MT, 0,  0, "FORCEWAKE" }
>  
>  #define COMMON_GEN9BASE_GLOBAL \
> - { GEN8_FAULT_TLB_DATA0, 0,  0, "GEN8_FAULT_TLB_DATA0" }, \
> - { GEN8_FAULT_TLB_DATA1, 0,  0, "GEN8_FAULT_TLB_DATA1" }, \
>   { ERROR_GEN6,   0,  0, "ERROR_GEN6" }, \
>   { DONE_REG, 0,  0, "DONE_REG" }, \
>   { HSW_GTT_CACHE_EN, 0,  0, "HSW_GTT_CACHE_EN" }
>  
> +#define GEN9_GLOBAL \
> + { GEN8_FAULT_TLB_DATA0, 0,  0, "GEN8_FAULT_TLB_DATA0" }, \
> + { GEN8_FAULT_TLB_DATA1, 0,  0, "GEN8_FAULT_TLB_DATA1" }
> +
>  #define COMMON_GEN12BASE_GLOBAL \
>   { GEN12_FAULT_TLB_DATA0,0,  0, "GEN12_FAULT_TLB_DATA0" }, \
>   { GEN12_FAULT_TLB_DATA1,0,  0, "GEN12_FAULT_TLB_DATA1" }, \
> @@ -142,6 +144,7 @@ static const struct __guc_mmio_reg_descr 
> xe_lpd_gsc_inst_regs[] = {
>  static const struct __guc_mmio_reg_descr default_global_regs[] = {
>   COMMON_BASE_GLOBAL,
>   COMMON_GEN9BASE_GLOBAL,
> + GEN9_GLOBAL,
>  };
>  
>  static const struct __guc_mmio_reg_descr default_rc_class_regs[] = {
> -- 
> 2.39.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [PATCH RFC v2 2/6] drm/msm: Add MSM-specific DSC helper methods

2023-04-03 Thread Dmitry Baryshkov

On 04/04/2023 00:38, Jessica Zhang wrote:



On 4/2/2023 4:21 AM, Dmitry Baryshkov wrote:

On 31/03/2023 21:49, Jessica Zhang wrote:

Introduce MSM-specific DSC helper methods, as some calculations are
common between DP and DSC.

Changes in v2:
- Moved files up to msm/ directory
- Dropped get_comp_ratio() helper
- Used drm_int2fixp() to convert to integers to fp
- Style changes to improve readability
- Dropped unused bpp variable in msm_dsc_get_dce_bytes_per_line()
- Changed msm_dsc_get_slice_per_intf() to a static inline method
- Dropped last division step of msm_dsc_get_pclk_per_line() and changed
   method name accordingly
- Changed DSC_BPP macro to drm_dsc_get_bpp_int() helper method
- Fixed some math issues caused by passing in incorrect types to
   drm_fixed methods in get_bytes_per_soft_slice()

Signed-off-by: Jessica Zhang 
---
  drivers/gpu/drm/msm/Makefile |  1 +
  drivers/gpu/drm/msm/msm_dsc_helper.c | 53 


  drivers/gpu/drm/msm/msm_dsc_helper.h | 42 
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 7274c41228ed..b814fc80e2d5 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -94,6 +94,7 @@ msm-y += \
  msm_atomic_tracepoints.o \
  msm_debugfs.o \
  msm_drv.o \
+    msm_dsc_helper.o \
  msm_fb.o \
  msm_fence.o \
  msm_gem.o \
diff --git a/drivers/gpu/drm/msm/msm_dsc_helper.c 
b/drivers/gpu/drm/msm/msm_dsc_helper.c

new file mode 100644
index ..60b73e17e6eb
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_dsc_helper.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights 
reserved

+ */
+
+#include 
+#include 
+#include 
+
+#include "msm_drv.h"
+#include "msm_dsc_helper.h"
+
+static s64 get_bytes_per_soft_slice(struct drm_dsc_config *dsc, int 
intf_width, u32 src_bpp)


intf_width is unused


Hi Dmitry,

Acked.




+{
+    int bpp = msm_dsc_get_bpp_int(dsc);
+    s64 numerator_fp, denominator_fp;
+    s64 comp_ratio_fp = drm_fixp_from_fraction(src_bpp, bpp);
+
+    numerator_fp = drm_int2fixp(dsc->slice_width * 3);


You have lost dsc->bits_per_component here.


This was moved to the denominator calculation, but I'll move it back to 
this line to avoid confusion.


Maybe you occasionally mixed bpp and bpc, because there is no 
bits_per_component usage in denominator. Could you please recheck the 
calculations.






+    denominator_fp = drm_fixp_from_fraction(comp_ratio_fp * 8, 
drm_int2fixp(bpp));


denominator_fp = drm_fixp_from_fraction(src_bpp * 8, bpp);


Acked.




+
+    return drm_fixp_div(numerator_fp, denominator_fp);
+}
+
+u32 msm_dsc_get_eol_byte_num(struct drm_dsc_config *dsc, int 
intf_width, u32 src_bpp)

+{
+    u32 bytes_per_soft_slice, extra_eol_bytes, bytes_per_intf;
+    s64 bytes_per_soft_slice_fp;
+    int slice_per_intf = msm_dsc_get_slice_per_intf(dsc, intf_width);
+
+    bytes_per_soft_slice_fp = get_bytes_per_soft_slice(dsc, 
intf_width, src_bpp);

+    bytes_per_soft_slice = drm_fixp2int_ceil(bytes_per_soft_slice_fp);
+
+    bytes_per_intf = bytes_per_soft_slice * slice_per_intf;
+    extra_eol_bytes = bytes_per_intf % 3;
+    if (extra_eol_bytes != 0)
+    extra_eol_bytes = 3 - extra_eol_bytes;


I become confused here when I checked eol_bytes in the display techpack.

I see that for DP the dp_panel_dsc_pclk_param_calc() calculates 
dsc->eol_bytes_num in this way, the size to pad dsc_byte_count * 
slice_per_intf to 3 bytes.


However, for DSI this is a simple as total_bytes_per_intf % 3 , so it 
is not a padding, but a length of the last chunk.


Could you please clarify? If the techpack code is correct, I'd prefer 
if we return last chunk size here and calculate the padding length in 
the DP driver.


I've double checked the calculations between DP and DSI, and I think 
you're right. Will move the `if (extra_eol_bytes != 0)` block out to DP 
code.


Ack. Could you please check with HW team that our understanding is correct?






+
+    return extra_eol_bytes;
+}
+
+int msm_dsc_get_uncompressed_pclk_per_line(struct drm_dsc_config 
*dsc, int intf_width, u32 src_bpp)


Basing on Abhinav's description ("pclk_per_line can be only per 
interface") would it better be named as 
msm_dsc_get_uncompressed_pclk_per_intf() ? or 
msm_dsc_get_uncompressed_pclk_for_intf() ?


BTW: if get_bytes_per_soft_slice() doesn't use intf_width, we can 
probably drop it here too.



+{
+    s64 data_width;
+
+    if (!dsc->slice_width || (intf_width < dsc->slice_width))
+    return -EINVAL;


Error code is not validated at dsi_timing_setup. I'd suggest moving 
error checks there and dropping the error handling here. If 
dsc->slice_width is not set, we should stop much earlier than 
drm_bridge's pre_enable() callback.


Acked.

Thanks,

Jessica Zhang




+
+    data_width = drm_fixp_mul(dsc->slice_count,
+

[RFC PATCH 10/10] drm/syncobj: Warn on long running dma-fences

2023-04-03 Thread Matthew Brost
Long running dma-fences are not allowed to be exported, a drm_syncobj is
designed to be exported to the user, so add a warn if drm_syncobj
install long running dna-fences as this is not allowed.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/drm_syncobj.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 0c2be8360525..7c304cd7d037 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -291,6 +291,7 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
struct syncobj_wait_entry *cur, *tmp;
struct dma_fence *prev;
 
+   WARN_ON_ONCE(dma_fence_is_lr(fence));
dma_fence_get(fence);
 
spin_lock(>lock);
@@ -325,8 +326,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
struct dma_fence *old_fence;
struct syncobj_wait_entry *cur, *tmp;
 
-   if (fence)
+   if (fence) {
+   WARN_ON_ONCE(dma_fence_is_lr(fence));
dma_fence_get(fence);
+   }
 
spin_lock(>lock);
 
-- 
2.34.1



[RFC PATCH 05/10] drm/sched: Start run wq before TDR in drm_sched_start

2023-04-03 Thread Matthew Brost
If the TDR is set to a very small value it can fire before the run wq is
started in the function drm_sched_start. The run wq is expected to
running when the TDR fires, fix this ordering so this expectation is
always met.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/scheduler/sched_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 9dc3378e9c5e..6ae710017024 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -611,13 +611,13 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, 
bool full_recovery)
drm_sched_job_done(s_job);
}
 
+   drm_sched_run_wq_start(sched);
+
if (full_recovery) {
spin_lock(>job_list_lock);
drm_sched_start_timeout(sched);
spin_unlock(>job_list_lock);
}
-
-   drm_sched_run_wq_start(sched);
 }
 EXPORT_SYMBOL(drm_sched_start);
 
-- 
2.34.1



[RFC PATCH 09/10] drm/sched: Support long-running sched entities

2023-04-03 Thread Matthew Brost
From: Thomas Hellström 

Make the drm scheduler aware of long-running dma fences by

* Enable marking a sched entity as producing long-running fences.
* Disallowing long-running fences as dependencies for non-long-running
  sched entities, while long-running sched entities allow those.

Signed-off-by: Matthew Brost 
Signed-off-by: Thomas Hellström 
---
 drivers/gpu/drm/scheduler/sched_entity.c | 44 +++-
 drivers/gpu/drm/scheduler/sched_fence.c  |  4 +++
 drivers/gpu/drm/scheduler/sched_main.c   |  9 ++---
 include/drm/gpu_scheduler.h  | 36 +++
 include/linux/dma-fence.h|  5 +++
 5 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index ccea4d079d0f..0640fc9d4491 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -174,6 +174,32 @@ static void drm_sched_entity_kill_jobs_work(struct 
work_struct *wrk)
job->sched->ops->free_job(job);
 }
 
+/**
+ * drm_sched_entity_add_fence_cb() - Helper to add a fence callback
+ * @entity: The sched entity
+ * @f: The possbily long-running dma-fence on which to add a callback
+ * @cb: The struct dma_fence_cb to use for the callback
+ * @func: The callback function.
+ *
+ * This function calls the proper dma_fence add callback function
+ * depending on whether @entity is marked as long-running or not. If it
+ * is not, this will make sure we get a warning if trying to add a
+ * callback on a long-running dma-fence.
+ *
+ * Return: Zero on success, -ENOENT if already signaled and -EINVAL in case
+ * of error.
+ */
+int drm_sched_entity_add_fence_cb(struct drm_sched_entity *entity,
+ struct dma_fence *f,
+ struct dma_fence_cb *cb,
+ dma_fence_func_t func)
+{
+   if (drm_sched_entity_is_lr(entity))
+   return dma_fence_lr_add_callback(f, cb, func);
+
+   return dma_fence_add_callback(f, cb, func);
+}
+
 /* Signal the scheduler finished fence when the entity in question is killed. 
*/
 static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
  struct dma_fence_cb *cb)
@@ -187,8 +213,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence 
*f,
/* Wait for all dependencies to avoid data corruptions */
while (!xa_empty(>dependencies)) {
f = xa_erase(>dependencies, job->last_dependency++);
-   r = dma_fence_add_callback(f, >finish_cb,
-  drm_sched_entity_kill_jobs_cb);
+   r = drm_sched_entity_add_fence_cb(job->entity, f, 
>finish_cb,
+ 
drm_sched_entity_kill_jobs_cb);
if (!r)
return;
 
@@ -226,8 +252,9 @@ static void drm_sched_entity_kill(struct drm_sched_entity 
*entity)
dma_fence_set_error(_fence->finished, -ESRCH);
 
dma_fence_get(_fence->finished);
-   if (!prev || dma_fence_add_callback(prev, >finish_cb,
-  drm_sched_entity_kill_jobs_cb))
+   if (!prev || drm_sched_entity_add_fence_cb(job->entity, prev,
+  >finish_cb,
+  
drm_sched_entity_kill_jobs_cb))
drm_sched_entity_kill_jobs_cb(NULL, >finish_cb);
 
prev = _fence->finished;
@@ -420,8 +447,8 @@ static bool drm_sched_entity_add_dependency_cb(struct 
drm_sched_entity *entity)
fence = dma_fence_get(_fence->scheduled);
dma_fence_put(entity->dependency);
entity->dependency = fence;
-   if (!dma_fence_add_callback(fence, >cb,
-   drm_sched_entity_clear_dep))
+   if (!drm_sched_entity_add_fence_cb(entity, fence, >cb,
+  drm_sched_entity_clear_dep))
return true;
 
/* Ignore it when it is already scheduled */
@@ -429,8 +456,9 @@ static bool drm_sched_entity_add_dependency_cb(struct 
drm_sched_entity *entity)
return false;
}
 
-   if (!dma_fence_add_callback(entity->dependency, >cb,
-   drm_sched_entity_wakeup))
+   if (!drm_sched_entity_add_fence_cb(entity, entity->dependency,
+  >cb,
+  drm_sched_entity_wakeup))
return true;
 
dma_fence_put(entity->dependency);
diff --git a/drivers/gpu/drm/scheduler/sched_fence.c 
b/drivers/gpu/drm/scheduler/sched_fence.c
index d7cfc0441885..a566723ecc2c 100644
--- a/drivers/gpu/drm/scheduler/sched_fence.c
+++ 

[RFC PATCH 02/10] drm/sched: Move schedule policy to scheduler / entity

2023-04-03 Thread Matthew Brost
Rather than a global modparam for scheduling policy, move the scheduling
policy to scheduler / entity so user can control each scheduler / entity
policy.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/etnaviv/etnaviv_sched.c|  3 ++-
 drivers/gpu/drm/lima/lima_sched.c  |  3 ++-
 drivers/gpu/drm/msm/msm_ringbuffer.c   |  3 ++-
 drivers/gpu/drm/panfrost/panfrost_job.c|  3 ++-
 drivers/gpu/drm/scheduler/sched_entity.c   | 25 ++
 drivers/gpu/drm/scheduler/sched_main.c | 21 +-
 drivers/gpu/drm/v3d/v3d_sched.c| 15 -
 include/drm/gpu_scheduler.h| 23 ++--
 9 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 00c9c03c8f94..4df0fca5a74c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2368,6 +2368,7 @@ static int amdgpu_device_init_schedulers(struct 
amdgpu_device *adev)
   ring->num_hw_submission, 
amdgpu_job_hang_limit,
   timeout, adev->reset_domain->wq,
   ring->sched_score, ring->name,
+  DRM_SCHED_POLICY_DEFAULT,
   adev->dev);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c 
b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 8486a2923f1b..61204a3f8b0b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -136,7 +136,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
ret = drm_sched_init(>sched, _sched_ops, NULL,
 etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
 msecs_to_jiffies(500), NULL, NULL,
-dev_name(gpu->dev), gpu->dev);
+dev_name(gpu->dev), DRM_SCHED_POLICY_DEFAULT,
+gpu->dev);
if (ret)
return ret;
 
diff --git a/drivers/gpu/drm/lima/lima_sched.c 
b/drivers/gpu/drm/lima/lima_sched.c
index 54f53bece27c..33042ba6ae93 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -491,7 +491,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, 
const char *name)
return drm_sched_init(>base, _sched_ops, NULL, 1,
  lima_job_hang_limit,
  msecs_to_jiffies(timeout), NULL,
- NULL, name, pipe->ldev->dev);
+ NULL, name, DRM_SCHED_POLICY_DEFAULT,
+ pipe->ldev->dev);
 }
 
 void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c 
b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 5879fc262047..f408a9097315 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -97,7 +97,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu 
*gpu, int id,
 
ret = drm_sched_init(>sched, _sched_ops, NULL,
num_hw_submissions, 0, sched_timeout,
-   NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);
+   NULL, NULL, to_msm_bo(ring->bo)->name,
+   DRM_SCHED_POLICY_DEFAULT, gpu->dev->dev);
if (ret) {
goto fail;
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index f48b07056a16..effa48b33dce 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -819,7 +819,8 @@ int panfrost_job_init(struct panfrost_device *pfdev)
 nentries, 0,
 msecs_to_jiffies(JOB_TIMEOUT_MS),
 pfdev->reset.wq,
-NULL, "pan_js", pfdev->dev);
+NULL, "pan_js", DRM_SCHED_POLICY_DEFAULT,
+pfdev->dev);
if (ret) {
dev_err(pfdev->dev, "Failed to create scheduler: %d.", 
ret);
goto err_sched;
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 15d04a0ec623..f1299e51860b 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -33,6 +33,20 @@
 #define to_drm_sched_job(sched_job)\
container_of((sched_job), struct drm_sched_job, queue_node)
 
+static bool bad_policies(struct drm_gpu_scheduler **sched_list,
+unsigned int num_sched_list)
+{
+   enum 

[RFC PATCH 04/10] drm/sched: Add generic scheduler message interface

2023-04-03 Thread Matthew Brost
Add generic schedule message interface which sends messages to backend
from the drm_gpu_scheduler main submission thread. The idea is some of
these messages modify some state in drm_sched_entity which is also
modified during submission. By scheduling these messages and submission
in the same thread their is not race changing states in
drm_sched_entity.

This interface will be used in XE, new Intel GPU driver, to cleanup,
suspend, resume, and change scheduling properties of a drm_sched_entity.

The interface is designed to be generic and extendable with only the
backend understanding the messages.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/scheduler/sched_main.c | 58 +-
 include/drm/gpu_scheduler.h| 29 -
 2 files changed, 84 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 2795021efe7b..9dc3378e9c5e 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1055,6 +1055,54 @@ drm_sched_pick_best(struct drm_gpu_scheduler 
**sched_list,
 }
 EXPORT_SYMBOL(drm_sched_pick_best);
 
+/**
+ * drm_sched_add_msg - add scheduler message
+ *
+ * @sched: scheduler instance
+ * @msg: message to be added
+ *
+ * Can and will pass an jobs waiting on dependencies or in a runnable queue.
+ * Messages processing will stop if schedule run wq is stopped and resume when
+ * run wq is started.
+ */
+void drm_sched_add_msg(struct drm_gpu_scheduler *sched,
+  struct drm_sched_msg *msg)
+{
+   spin_lock(>job_list_lock);
+   list_add_tail(>link, >msgs);
+   spin_unlock(>job_list_lock);
+
+   /*
+* Same as above in drm_sched_run_wq_queue, try to kick worker if
+* paused, harmless if this races
+*/
+   if (!sched->pause_run_wq)
+   queue_work(sched->run_wq, >work_run);
+}
+EXPORT_SYMBOL(drm_sched_add_msg);
+
+/**
+ * drm_sched_get_msg - get scheduler message
+ *
+ * @sched: scheduler instance
+ *
+ * Returns NULL or message
+ */
+static struct drm_sched_msg *
+drm_sched_get_msg(struct drm_gpu_scheduler *sched)
+{
+   struct drm_sched_msg *msg;
+
+   spin_lock(>job_list_lock);
+   msg = list_first_entry_or_null(>msgs,
+  struct drm_sched_msg, link);
+   if (msg)
+   list_del(>link);
+   spin_unlock(>job_list_lock);
+
+   return msg;
+}
+
 /**
  * drm_sched_main - main scheduler thread
  *
@@ -1068,6 +1116,7 @@ static void drm_sched_main(struct work_struct *w)
 
while (!READ_ONCE(sched->pause_run_wq)) {
struct drm_sched_entity *entity;
+   struct drm_sched_msg *msg;
struct drm_sched_fence *s_fence;
struct drm_sched_job *sched_job;
struct dma_fence *fence;
@@ -1075,12 +1124,16 @@ static void drm_sched_main(struct work_struct *w)
 
cleanup_job = drm_sched_get_cleanup_job(sched);
entity = drm_sched_select_entity(sched);
+   msg = drm_sched_get_msg(sched);
 
if (cleanup_job)
sched->ops->free_job(cleanup_job);
 
+   if (msg)
+   sched->ops->process_msg(msg);
+
if (!entity) {
-   if (!cleanup_job)
+   if (!cleanup_job && !msg)
break;
continue;
}
@@ -1089,7 +1142,7 @@ static void drm_sched_main(struct work_struct *w)
 
if (!sched_job) {
complete_all(>entity_idle);
-   if (!cleanup_job)
+   if (!cleanup_job && !msg)
break;
continue;
}
@@ -1181,6 +1234,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 
init_waitqueue_head(>job_scheduled);
INIT_LIST_HEAD(>pending_list);
+   INIT_LIST_HEAD(>msgs);
spin_lock_init(>job_list_lock);
atomic_set(>hw_rq_count, 0);
INIT_DELAYED_WORK(>work_tdr, drm_sched_job_timedout);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 3e421f5a710c..18172ae63ab7 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -398,6 +398,23 @@ enum drm_gpu_sched_stat {
DRM_GPU_SCHED_STAT_ENODEV,
 };
 
+/**
+ * struct drm_sched_msg - an in-band (relative to GPU scheduler run queue)
+ * message
+ *
+ * Generic enough for backend defined messages, backend can expand if needed.
+ */
+struct drm_sched_msg {
+   /** @link: list link into the gpu scheduler list of messages */
+   struct list_headlink;
+   /**
+* @private_data: opaque pointer to message private data (backend 
defined)
+*/
+   void*private_data;
+   /** @opcode: opcode of message (backend 

[RFC PATCH 00/10] Xe DRM scheduler and long running workload plans

2023-04-03 Thread Matthew Brost
Hello,

As a prerequisite to merging the new Intel Xe DRM driver [1] [2], we
have been asked to merge our common DRM scheduler patches first as well
as develop a common solution for long running workloads with the DRM
scheduler. This RFC series is our first attempt at doing this. We
welcome any and all feedback.

This can we thought of as 4 parts detailed below.

- DRM scheduler changes for 1 to 1 relationship between scheduler and
entity (patches 1-3)

In Xe all of the scheduling of jobs is done by a firmware scheduler (the
GuC) which is a new paradigm WRT to the DRM scheduler and presents
severals problems as the DRM was originally designed to schedule jobs on
hardware queues. The main problem being that DRM scheduler expects the
submission order of jobs to be the completion order of jobs even across
multiple entities. This assumption falls apart with a firmware scheduler
as a firmware scheduler has no concept of jobs and jobs can complete out
of order. A novel solution for was originally thought of by Faith during
the initial prototype of Xe, create a 1 to 1 relationship between scheduler
and entity. I believe the AGX driver [3] is using this approach and
Boris may use approach as well for the Mali driver [4].

To support a 1 to 1 relationship we move the main execution function
from a kthread to a work queue and add a new scheduling mode which
bypasses code in the DRM which isn't needed in a 1 to 1 relationship.
The new scheduling mode should unify all drivers usage with a 1 to 1
relationship and can be thought of as using scheduler as a dependency /
infligt job tracker rather than a true scheduler.

- Generic messaging interface for DRM scheduler

Idea is to be able to communicate to the submission backend with in band
(relative to main execution function) messages. Messages are backend
defined and flexable enough for any use case. In Xe we use these
messages to clean up entites, set properties for entites, and suspend /
resume execution of an entity [5]. I suspect other driver can leverage
this messaging concept too as it a convenient way to avoid races in the
backend.

- Support for using TDR for all error paths of a scheduler / entity

Fix a few races / bugs, add function to dynamically set the TDR timeout.

- Annotate dma-fences for long running workloads.

The idea here is to use dma-fences only as sync points within the
scheduler and never export them for long running workloads. By
annotating these fences as long running we ensure that these dma-fences
are never used in a way that breaks the dma-fence rules. A benefit of
thus approach is the scheduler can still safely flow control the
execution ring buffer via the job limit without breaking the dma-fence
rules.

Again this a first draft and looking forward to feedback.

Enjoy - Matt

[1] https://gitlab.freedesktop.org/drm/xe/kernel
[2] https://patchwork.freedesktop.org/series/112188/ 
[3] https://patchwork.freedesktop.org/series/114772/
[4] https://patchwork.freedesktop.org/patch/515854/?series=112188=1
[5] 
https://gitlab.freedesktop.org/drm/xe/kernel/-/blob/drm-xe-next/drivers/gpu/drm/xe/xe_guc_submit.c#L1031

Matthew Brost (8):
  drm/sched: Convert drm scheduler to use a work queue rather than
kthread
  drm/sched: Move schedule policy to scheduler / entity
  drm/sched: Add DRM_SCHED_POLICY_SINGLE_ENTITY scheduling policy
  drm/sched: Add generic scheduler message interface
  drm/sched: Start run wq before TDR in drm_sched_start
  drm/sched: Submit job before starting TDR
  drm/sched: Add helper to set TDR timeout
  drm/syncobj: Warn on long running dma-fences

Thomas Hellström (2):
  dma-buf/dma-fence: Introduce long-running completion fences
  drm/sched: Support long-running sched entities

 drivers/dma-buf/dma-fence.c | 142 +++---
 drivers/dma-buf/dma-resv.c  |   5 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  15 +-
 drivers/gpu/drm/drm_syncobj.c   |   5 +-
 drivers/gpu/drm/etnaviv/etnaviv_sched.c |   5 +-
 drivers/gpu/drm/lima/lima_sched.c   |   5 +-
 drivers/gpu/drm/msm/adreno/adreno_device.c  |   6 +-
 drivers/gpu/drm/msm/msm_ringbuffer.c|   5 +-
 drivers/gpu/drm/panfrost/panfrost_job.c |   5 +-
 drivers/gpu/drm/scheduler/sched_entity.c| 127 +++--
 drivers/gpu/drm/scheduler/sched_fence.c |   6 +-
 drivers/gpu/drm/scheduler/sched_main.c  | 278 +++-
 drivers/gpu/drm/v3d/v3d_sched.c |  25 +-
 include/drm/gpu_scheduler.h | 130 +++--
 include/linux/dma-fence.h   |  60 -
 16 files changed, 649 insertions(+), 184 deletions(-)

-- 
2.34.1



[RFC PATCH 07/10] drm/sched: Add helper to set TDR timeout

2023-04-03 Thread Matthew Brost
Add helper to set TDR timeout and restart the TDR with new timeout
value. This will be used in XE, new Intel GPU driver, to trigger the TDR
to cleanup drm_sched_entity that encounter errors.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/scheduler/sched_main.c | 18 ++
 include/drm/gpu_scheduler.h|  1 +
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 4eac02d212c1..d61880315d8d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -370,6 +370,24 @@ static void drm_sched_start_timeout(struct 
drm_gpu_scheduler *sched)
queue_delayed_work(sched->timeout_wq, >work_tdr, 
sched->timeout);
 }
 
+/**
+ * drm_sched_set_timeout - set timeout for reset worker
+ *
+ * @sched: scheduler instance to set and (re)-start the worker for
+ * @timeout: timeout period
+ *
+ * Set and (re)-start the timeout for the given scheduler.
+ */
+void drm_sched_set_timeout(struct drm_gpu_scheduler *sched, long timeout)
+{
+   spin_lock(>job_list_lock);
+   sched->timeout = timeout;
+   cancel_delayed_work(>work_tdr);
+   drm_sched_start_timeout(sched);
+   spin_unlock(>job_list_lock);
+}
+EXPORT_SYMBOL(drm_sched_set_timeout);
+
 /**
  * drm_sched_fault - immediately start timeout handler
  *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 18172ae63ab7..6258e324bd7c 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -593,6 +593,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity 
*entity,
struct drm_gpu_scheduler **sched_list,
unsigned int num_sched_list);
 
+void drm_sched_set_timeout(struct drm_gpu_scheduler *sched, long timeout);
 void drm_sched_job_cleanup(struct drm_sched_job *job);
 void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
 void drm_sched_add_msg(struct drm_gpu_scheduler *sched,
-- 
2.34.1



[RFC PATCH 03/10] drm/sched: Add DRM_SCHED_POLICY_SINGLE_ENTITY scheduling policy

2023-04-03 Thread Matthew Brost
DRM_SCHED_POLICY_SINGLE_ENTITY creates a 1 to 1 relationship between
scheduler and entity. No priorities or run queue used in this mode.
Intended for devices with firmware schedulers.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/scheduler/sched_entity.c | 58 +
 drivers/gpu/drm/scheduler/sched_fence.c  |  2 +-
 drivers/gpu/drm/scheduler/sched_main.c   | 64 +---
 include/drm/gpu_scheduler.h  | 29 +++
 4 files changed, 123 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index f1299e51860b..ccea4d079d0f 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -91,8 +91,15 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
RB_CLEAR_NODE(>rb_tree_node);
 
if(num_sched_list) {
-   entity->rq = _list[0]->sched_rq[entity->priority];
entity->sched_policy = sched_list[0]->sched_policy;
+   if (entity->sched_policy != DRM_SCHED_POLICY_SINGLE_ENTITY) {
+   entity->rq = _list[0]->sched_rq[entity->priority];
+   } else {
+   if (num_sched_list != 1 || sched_list[0]->single_entity)
+   return -EINVAL;
+   sched_list[0]->single_entity = entity;
+   entity->single_sched = sched_list[0];
+   }
}
 
init_completion(>entity_idle);
@@ -126,7 +133,8 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity 
*entity,
struct drm_gpu_scheduler **sched_list,
unsigned int num_sched_list)
 {
-   WARN_ON(!num_sched_list || !sched_list);
+   WARN_ON(!num_sched_list || !sched_list ||
+   entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY);
 
entity->sched_list = sched_list;
entity->num_sched_list = num_sched_list;
@@ -196,13 +204,16 @@ static void drm_sched_entity_kill(struct drm_sched_entity 
*entity)
 {
struct drm_sched_job *job;
struct dma_fence *prev;
+   bool single_entity =
+   entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY;
 
-   if (!entity->rq)
+   if (!entity->rq && !single_entity)
return;
 
spin_lock(>rq_lock);
entity->stopped = true;
-   drm_sched_rq_remove_entity(entity->rq, entity);
+   if (!single_entity)
+   drm_sched_rq_remove_entity(entity->rq, entity);
spin_unlock(>rq_lock);
 
/* Make sure this entity is not used by the scheduler at the moment */
@@ -224,6 +235,21 @@ static void drm_sched_entity_kill(struct drm_sched_entity 
*entity)
dma_fence_put(prev);
 }
 
+/**
+ * drm_sched_entity_to_scheduler - Schedule entity to GPU scheduler
+ * @entity: scheduler entity
+ *
+ * Returns GPU scheduler for the entity
+ */
+struct drm_gpu_scheduler *
+drm_sched_entity_to_scheduler(struct drm_sched_entity *entity)
+{
+   bool single_entity =
+   entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY;
+
+   return single_entity ? entity->single_sched : entity->rq->sched;
+}
+
 /**
  * drm_sched_entity_flush - Flush a context entity
  *
@@ -241,11 +267,13 @@ long drm_sched_entity_flush(struct drm_sched_entity 
*entity, long timeout)
struct drm_gpu_scheduler *sched;
struct task_struct *last_user;
long ret = timeout;
+   bool single_entity =
+   entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY;
 
-   if (!entity->rq)
+   if (!entity->rq && !single_entity)
return 0;
 
-   sched = entity->rq->sched;
+   sched = drm_sched_entity_to_scheduler(entity);
/**
 * The client will not queue more IBs during this fini, consume existing
 * queued IBs or discard them on SIGKILL
@@ -338,7 +366,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
container_of(cb, struct drm_sched_entity, cb);
 
drm_sched_entity_clear_dep(f, cb);
-   drm_sched_wakeup(entity->rq->sched);
+   drm_sched_wakeup(drm_sched_entity_to_scheduler(entity));
 }
 
 /**
@@ -352,6 +380,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
 void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
   enum drm_sched_priority priority)
 {
+   WARN_ON(entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY);
+
spin_lock(>rq_lock);
entity->priority = priority;
spin_unlock(>rq_lock);
@@ -364,7 +394,7 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority);
  */
 static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
 {
-   struct drm_gpu_scheduler *sched = entity->rq->sched;
+   struct drm_gpu_scheduler *sched = drm_sched_entity_to_scheduler(entity);
struct dma_fence *fence = 

[RFC PATCH 08/10] dma-buf/dma-fence: Introduce long-running completion fences

2023-04-03 Thread Matthew Brost
From: Thomas Hellström 

For long-running workloads, drivers either need to open-code completion
waits, invent their own synchronization primitives or internally use
dma-fences that do not obey the cross-driver dma-fence protocol, but
without any lockdep annotation all these approaches are error prone.

So since for example the drm scheduler uses dma-fences it is desirable for
a driver to be able to use it for throttling and error handling also with
internal dma-fences tha do not obey the cros-driver dma-fence protocol.

Introduce long-running completion fences in form of dma-fences, and add
lockdep annotation for them. In particular:

* Do not allow waiting under any memory management locks.
* Do not allow to attach them to a dma-resv object.
* Introduce a new interface for adding callbacks making the helper adding
  a callback sign off on that it is aware that the dma-fence may not
  complete anytime soon. Typically this will be the scheduler chaining
  a new long-running fence on another one.

Signed-off-by: Matthew Brost 
Signed-off-by: Thomas Hellström 
---
 drivers/dma-buf/dma-fence.c | 142 ++--
 drivers/dma-buf/dma-resv.c  |   5 ++
 include/linux/dma-fence.h   |  55 +-
 3 files changed, 160 insertions(+), 42 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index f177c56269bb..9726b2a3c67d 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -111,6 +111,20 @@ static atomic64_t dma_fence_context_counter = 
ATOMIC64_INIT(1);
  * drivers/gpu should ever call dma_fence_wait() in such contexts.
  */
 
+/**
+ * DOC: Long-Running (lr) dma-fences.
+ *
+ * * Long-running dma-fences are NOT required to complete in reasonable time.
+ *   Typically they signal completion of user-space controlled workloads and
+ *   as such, need to never be part of a cross-driver contract, never waited
+ *   for inside a kernel lock, nor attached to a dma-resv. There are helpers
+ *   and warnings in place to help facilitate that that never happens.
+ *
+ * * The motivation for their existense is that helpers that are intended to
+ *   be used by drivers may use dma-fences that, given the workloads mentioned
+ *   above, become long-running.
+ */
+
 static const char *dma_fence_stub_get_name(struct dma_fence *fence)
 {
 return "stub";
@@ -284,6 +298,34 @@ static struct lockdep_map dma_fence_lockdep_map = {
.name = "dma_fence_map"
 };
 
+static struct lockdep_map dma_fence_lr_lockdep_map = {
+   .name = "dma_fence_lr_map"
+};
+
+static bool __dma_fence_begin_signalling(struct lockdep_map *map)
+{
+   /* explicitly nesting ... */
+   if (lock_is_held_type(map, 1))
+   return true;
+
+   /* rely on might_sleep check for soft/hardirq locks */
+   if (in_atomic())
+   return true;
+
+   /* ... and non-recursive readlock */
+   lock_acquire(map, 0, 0, 1, 1, NULL, _RET_IP_);
+
+   return false;
+}
+
+static void __dma_fence_end_signalling(bool cookie, struct lockdep_map *map)
+{
+   if (cookie)
+   return;
+
+   lock_release(map, _RET_IP_);
+}
+
 /**
  * dma_fence_begin_signalling - begin a critical DMA fence signalling section
  *
@@ -300,18 +342,7 @@ static struct lockdep_map dma_fence_lockdep_map = {
  */
 bool dma_fence_begin_signalling(void)
 {
-   /* explicitly nesting ... */
-   if (lock_is_held_type(_fence_lockdep_map, 1))
-   return true;
-
-   /* rely on might_sleep check for soft/hardirq locks */
-   if (in_atomic())
-   return true;
-
-   /* ... and non-recursive readlock */
-   lock_acquire(_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_);
-
-   return false;
+   return __dma_fence_begin_signalling(_fence_lockdep_map);
 }
 EXPORT_SYMBOL(dma_fence_begin_signalling);
 
@@ -323,25 +354,61 @@ EXPORT_SYMBOL(dma_fence_begin_signalling);
  */
 void dma_fence_end_signalling(bool cookie)
 {
-   if (cookie)
-   return;
-
-   lock_release(_fence_lockdep_map, _RET_IP_);
+   __dma_fence_end_signalling(cookie, _fence_lockdep_map);
 }
 EXPORT_SYMBOL(dma_fence_end_signalling);
 
-void __dma_fence_might_wait(void)
+/**
+ * dma_fence_lr begin_signalling - begin a critical long-running DMA fence
+ * signalling section
+ *
+ * Drivers should use this to annotate the beginning of any code section
+ * required to eventually complete _fence by calling dma_fence_signal().
+ *
+ * The end of these critical sections are annotated with
+ * dma_fence_lr_end_signalling(). Ideally the section should encompass all
+ * locks that are ever required to signal a long-running dma-fence.
+ *
+ * Return: An opaque cookie needed by the implementation, which needs to be
+ * passed to dma_fence_lr end_signalling().
+ */
+bool dma_fence_lr_begin_signalling(void)
+{
+   return __dma_fence_begin_signalling(_fence_lr_lockdep_map);
+}
+EXPORT_SYMBOL(dma_fence_lr_begin_signalling);
+
+/**
+ * 

[RFC PATCH 06/10] drm/sched: Submit job before starting TDR

2023-04-03 Thread Matthew Brost
If the TDR is set to a value, it can fire before a job is submitted in
drm_sched_main. The job should be always be submitted before the TDR
fires, fix this ordering.

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/scheduler/sched_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 6ae710017024..4eac02d212c1 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -1150,10 +1150,10 @@ static void drm_sched_main(struct work_struct *w)
s_fence = sched_job->s_fence;
 
atomic_inc(>hw_rq_count);
-   drm_sched_job_begin(sched_job);
 
trace_drm_run_job(sched_job, entity);
fence = sched->ops->run_job(sched_job);
+   drm_sched_job_begin(sched_job);
complete_all(>entity_idle);
drm_sched_fence_scheduled(s_fence);
 
-- 
2.34.1



[RFC PATCH 01/10] drm/sched: Convert drm scheduler to use a work queue rather than kthread

2023-04-03 Thread Matthew Brost
In XE, the new Intel GPU driver, a choice has made to have a 1 to 1
mapping between a drm_gpu_scheduler and drm_sched_entity. At first this
seems a bit odd but let us explain the reasoning below.

1. In XE the submission order from multiple drm_sched_entity is not
guaranteed to be the same completion even if targeting the same hardware
engine. This is because in XE we have a firmware scheduler, the GuC,
which allowed to reorder, timeslice, and preempt submissions. If a using
shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls
apart as the TDR expects submission order == completion order. Using a
dedicated drm_gpu_scheduler per drm_sched_entity solve this problem.

2. In XE submissions are done via programming a ring buffer (circular
buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the
limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow
control on the ring for free.

A problem with this design is currently a drm_gpu_scheduler uses a
kthread for submission / job cleanup. This doesn't scale if a large
number of drm_gpu_scheduler are used. To work around the scaling issue,
use a worker rather than kthread for submission / job cleanup.

v2:
  - (Rob Clark) Fix msm build
  - Pass in run work queue

Signed-off-by: Matthew Brost 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c |  14 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  |  14 +--
 drivers/gpu/drm/etnaviv/etnaviv_sched.c |   2 +-
 drivers/gpu/drm/lima/lima_sched.c   |   2 +-
 drivers/gpu/drm/msm/adreno/adreno_device.c  |   6 +-
 drivers/gpu/drm/msm/msm_ringbuffer.c|   2 +-
 drivers/gpu/drm/panfrost/panfrost_job.c |   2 +-
 drivers/gpu/drm/scheduler/sched_main.c  | 126 
 drivers/gpu/drm/v3d/v3d_sched.c |  10 +-
 include/drm/gpu_scheduler.h |  14 ++-
 10 files changed, 110 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index f60753f97ac5..9c2a10aeb0b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1489,9 +1489,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file 
*m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
 
-   if (!ring || !ring->sched.thread)
+   if (!ring || !ring->sched.ready)
continue;
-   kthread_park(ring->sched.thread);
+   drm_sched_run_wq_stop(>sched);
}
 
seq_printf(m, "run ib test:\n");
@@ -1505,9 +1505,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file 
*m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
 
-   if (!ring || !ring->sched.thread)
+   if (!ring || !ring->sched.ready)
continue;
-   kthread_unpark(ring->sched.thread);
+   drm_sched_run_wq_start(>sched);
}
 
up_write(>reset_domain->sem);
@@ -1727,7 +1727,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 
ring = adev->rings[val];
 
-   if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+   if (!ring || !ring->funcs->preempt_ib || !ring->sched.ready)
return -EINVAL;
 
/* the last preemption failed */
@@ -1745,7 +1745,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
goto pro_end;
 
/* stop the scheduler */
-   kthread_park(ring->sched.thread);
+   drm_sched_run_wq_stop(>sched);
 
/* preempt the IB */
r = amdgpu_ring_preempt_ib(ring);
@@ -1779,7 +1779,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
 
 failure:
/* restart the scheduler */
-   kthread_unpark(ring->sched.thread);
+   drm_sched_run_wq_start(>sched);
 
up_read(>reset_domain->sem);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fac9312b1695..00c9c03c8f94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2364,7 +2364,7 @@ static int amdgpu_device_init_schedulers(struct 
amdgpu_device *adev)
break;
}
 
-   r = drm_sched_init(>sched, _sched_ops,
+   r = drm_sched_init(>sched, _sched_ops, NULL,
   ring->num_hw_submission, 
amdgpu_job_hang_limit,
   timeout, adev->reset_domain->wq,
   ring->sched_score, ring->name,
@@ -4627,7 +4627,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device 
*adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
 
-   if (!ring || !ring->sched.thread)
+

Re: [PATCH v1 1/2] dt-bindings: display: bridge: sn65dsi83: Add DSI video mode

2023-04-03 Thread Marek Vasut

On 4/3/23 23:15, Francesco Dolcini wrote:

On Mon, Apr 03, 2023 at 04:06:22PM -0500, Rob Herring wrote:

On Thu, Mar 30, 2023 at 12:17:51PM +0200, Francesco Dolcini wrote:

From: Francesco Dolcini 

SN65DSI8[34] device supports burst video mode and non-burst video mode
with sync events or with sync pulses packet transmission as described in
the DSI specification.

Add property to select the expected mode, this allows for example to
select a mode that is compatible with the DSI host interface.


Why does this need to be in DT?



The source and sink drivers should know what their capabilities are
and pick the best common one.


Is there a best mode?


I think yes:
Burst (is better than) Sync Events (is better than) Sync Pulses

Burst is most energy efficient, Sync-Pulses is the simplest and least 
energy efficient and with most constraints.



Isn't this a decision how do we want the 2 peers
to communicate?


I don't think so, I believe the Host and nearest bridge should be able 
to negotiate their capabilities (mode, link rate, etc.) within the DRM 
subsystem.



For the MIPI-DSI Linux/DRM experts: am I missing something? Is there
another way to have a DSI video sink to ask for a specific mode?


I'm afraid this is not implemented yet, so ... plumbing needed.

[...]


[PATCH v2] drm/scdc-helper: Pimp SCDC debugs

2023-04-03 Thread Ville Syrjala
From: Ville Syrjälä 

Include the device and connector information in the SCDC
debugs. Makes it easier to figure out who did what.

v2: Rely on connector->ddc (Maxime)

Cc: Andrzej Hajda 
Cc: Neil Armstrong 
Cc: Robert Foss 
Cc: Laurent Pinchart 
Cc: Jonas Karlman 
Cc: Jernej Skrabec 
Cc: Thierry Reding 
Cc: Emma Anholt 
Cc: Maxime Ripard 
Cc: intel-...@lists.freedesktop.org
Cc: linux-te...@vger.kernel.org
Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/bridge/synopsys/dw-hdmi.c |  8 ++--
 drivers/gpu/drm/display/drm_scdc_helper.c | 46 +++
 drivers/gpu/drm/i915/display/intel_ddi.c  |  4 +-
 drivers/gpu/drm/i915/display/intel_hdmi.c |  8 +---
 drivers/gpu/drm/tegra/sor.c   | 15 +++-
 drivers/gpu/drm/vc4/vc4_hdmi.c| 21 ++-
 include/drm/display/drm_scdc_helper.h |  7 ++--
 7 files changed, 59 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c 
b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index aa51c61a78c7..603bb3c51027 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi 
*hdmi,
/* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
if (dw_hdmi_support_scdc(hdmi, display)) {
if (mtmdsclock > HDMI14_MAX_TMDSCLK)
-   drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1);
+   drm_scdc_set_high_tmds_clock_ratio(>connector, 1);
else
-   drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 0);
+   drm_scdc_set_high_tmds_clock_ratio(>connector, 0);
}
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio);
@@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
 
/* Enabled Scrambling in the Sink */
-   drm_scdc_set_scrambling(hdmi->ddc, 1);
+   drm_scdc_set_scrambling(>connector, 1);
 
/*
 * To activate the scrambler feature, you must ensure
@@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
HDMI_MC_SWRSTZ);
-   drm_scdc_set_scrambling(hdmi->ddc, 0);
+   drm_scdc_set_scrambling(>connector, 0);
}
}
 
diff --git a/drivers/gpu/drm/display/drm_scdc_helper.c 
b/drivers/gpu/drm/display/drm_scdc_helper.c
index c3ad4ab2b456..6d2f244e5830 100644
--- a/drivers/gpu/drm/display/drm_scdc_helper.c
+++ b/drivers/gpu/drm/display/drm_scdc_helper.c
@@ -26,6 +26,8 @@
 #include 
 
 #include 
+#include 
+#include 
 #include 
 
 /**
@@ -140,7 +142,7 @@ EXPORT_SYMBOL(drm_scdc_write);
 
 /**
  * drm_scdc_get_scrambling_status - what is status of scrambling?
- * @adapter: I2C adapter for DDC channel
+ * @connector: connector
  *
  * Reads the scrambler status over SCDC, and checks the
  * scrambling status.
@@ -148,14 +150,16 @@ EXPORT_SYMBOL(drm_scdc_write);
  * Returns:
  * True if the scrambling is enabled, false otherwise.
  */
-bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter)
+bool drm_scdc_get_scrambling_status(struct drm_connector *connector)
 {
u8 status;
int ret;
 
-   ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, );
+   ret = drm_scdc_readb(connector->ddc, SCDC_SCRAMBLER_STATUS, );
if (ret < 0) {
-   DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret);
+   drm_dbg_kms(connector->dev,
+   "[CONNECTOR:%d:%s] Failed to read scrambling 
status: %d\n",
+   connector->base.id, connector->name, ret);
return false;
}
 
@@ -165,7 +169,7 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status);
 
 /**
  * drm_scdc_set_scrambling - enable scrambling
- * @adapter: I2C adapter for DDC channel
+ * @connector: connector
  * @enable: bool to indicate if scrambling is to be enabled/disabled
  *
  * Writes the TMDS config register over SCDC channel, and:
@@ -175,14 +179,17 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status);
  * Returns:
  * True if scrambling is set/reset successfully, false otherwise.
  */
-bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable)
+bool drm_scdc_set_scrambling(struct drm_connector *connector,
+bool enable)
 {
u8 config;
int ret;
 
-   ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, );
+   ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, );
if (ret < 0) {
-   DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret);
+   drm_dbg_kms(connector->dev,
+

Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog

2023-04-03 Thread Dmitry Baryshkov

On 03/04/2023 22:07, Abhinav Kumar wrote:



On 4/3/2023 11:48 AM, Dmitry Baryshkov wrote:

On 03/04/2023 21:06, Abhinav Kumar wrote:



On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote:

This huge series attempts to restructure the DPU HW catalog into a
manageable and reviewable data set. In order to ease review and testing
I merged all the necessary fixes into this series. Also I cherry-picked
& slightly fixed Konrad's patch adding size to the SSPP and INTF 
macros.




I had to first dig up some history about why dpu catalog grew so much 
in the first place before starting this review. When the DPU driver 
first landed (which pre-dates my work in upstream), it looks like it 
followed mdp5 model from mdp5_cfg.c. But looks like as the number of 
chipsets which use DPU kept growing, this is becoming a burden.


As everyone knows, downstream follows a devicetree model for the dpu 
hardware and that should have always been the case. Perhaps in the 
last 2-3 years more time could have been spent on standardizing the 
bindings used for hw blocks in order to maintain a less hard-coded 
catalog file and more in the device tree.


Unfortunately, this is not how the upstream DT works. If something is 
a constant hardware property, it should not go into the DT. So pushing 
catalog to dt would have been immediately frowned upon by Rob Herring 
or Krzysztof.




Yes certainly we cannot put hardware specific properties. But in 
general, modelling the hardware like the number of sspps, number of 
interfaces and number of dspps etc can be a bit abstracted? like 
blk-type and blk-offset? blk-type can be a custom string because each 
block is named differently for different vendors?


No.



The number of blk_offsets decides number of blocks. Its not constant 
right. We are seeing it varying with chipsets.


Then the catalog would have just been a place to parse the device 
tree, set the feature capability based on chipset (refer 
_sde_hardware_pre_caps). That way offsets , number of blocks and the 
blocks themselves still come from the device tree but perhaps some 
specific features are at SOC level for which the catalog still stays.


That being said, I thought of different strategies even before the 
review but two issues prevented me from suggesting those ideas (one 
of which I am seeing even here , which I am going to suggest below 
and also suggest why it wont work).


1) For the same DPU major/minor version, some features might get 
dropped or even get added with different SOCs as overall the system 
capabilities might differ like number of SSPPs or memory footprint of 
the SOC etc.


So there is no good way right now to generalize any dpu catalog or to 
tie it with a DPU major/minor version. We will have to stick with a 
per-SOC model.


Up to now, the SoC was equal to major+minor. Could you please be more 
specific here, if there are any actual differences within major+minor 
families?




So lets say, the same DPU major/minor version is used but we have only 
one DSI on one chipset Vs two DSIs on the other, some of the features 
which come into play only for dual DSI cannot be used. Like broadcasting 
a DCS command across two DSIs etc. This is a very basic example, but 
there are many examples.


I'm asking for the exact details, because up to now the driver was using 
major:minor to find the catalog entry. It was modelled this way in 
sdm845/sc7180, then it was natural for us to continue down this path.


I will put reworking catalog to be bound to the binding data





This is what led me to not pursue that route.

2) For the same DPU major/minor version, even if core-DPU is same (in 
terms of SSPP, DSPP etc), the number of interfaces can change. So 
again no room to generalize same DPU hw version.


Again, I might be just scratching the surface, but I have not observed 
this.




This typically happens based on what products that chipset is catered 
towards. Thats pretty much what I can share. But more number of 
interfaces for more number of displays / use-cases.


Ack, I will not that we should be more careful about this items.





3) For the same reason as (1) and (2), I think the de-duplication 
strategy used in this series is not correct. The idea of 
dpu_hw_version_num_layer_mixer is just not scalable as I dont know 
how many variants that will lead to. So it seems like just an attempt 
to de-duplicate which perhaps works today for existing dpu chipsets 
in upstream but by no means scalable. Lets go ahead with per-SOC 
catalog file but lets live with some amount of duplication between 
them if we really have to split it across header files.


Indeed, this leads to minor differences on top of major+lm. However, I 
think, the overall complexity is lowered.


Nevertheless, let's land the major set of patches and leave 
generalization for the later time. I think, with the addition of the 
next several platforms we will see the drill.




Yes, I would say lets handle generalization/de-duplication later when 

Re: [Freedreno] [PATCH RFC v2 4/6] drm/msm/dpu: Fix slice_last_group_size calculation

2023-04-03 Thread Jessica Zhang




On 4/3/2023 2:51 PM, Dmitry Baryshkov wrote:

On 04/04/2023 00:45, Jessica Zhang wrote:



On 4/2/2023 4:27 AM, Dmitry Baryshkov wrote:

On 31/03/2023 21:49, Jessica Zhang wrote:

Correct the math for slice_last_group_size so that it matches the
calculations downstream.

Fixes: c110cfd1753e ("drm/msm/disp/dpu1: Add support for DSC")
Signed-off-by: Jessica Zhang 
Reviewed-by: Dmitry Baryshkov 
---
  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c

index b952f7d2b7f5..9312a8d7fbd9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
@@ -56,7 +56,11 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc 
*hw_dsc,

  if (is_cmd_mode)
  initial_lines += 1;
-    slice_last_group_size = 3 - (dsc->slice_width % 3);
+    slice_last_group_size = dsc->slice_width % 3;
+
+    if (slice_last_group_size == 0)
+    slice_last_group_size = 3;


Hmm. As I went on checking this against techpack:

mod = dsc->slice_width % 3

mod | techpack | old | your_patch
0   | 2    | 3   | 3
1   | 0    | 2   | 1
2   | 1    | 1   | 2

So, obviously neither old nor new code match the calculations of the 
techpack. If we assume that sde_dsc_helper code is correct (which I 
have no reasons to doubt), then the proper code should be:


slice_last_group_size = (dsc->slice_width + 2) % 3;

Could you please doublecheck and adjust.


Hi Dmitry,

The calculation should match the techpack calculation (I kept the 
`data |= ((slice_last_group_size - 1) << 18);` a few lines down).


And the techpack doesn't have -1.

I think the following code piece would be more convenient as it is simpler:

slice_last_group_size = (dsc->slice_width + 2) % 3;
[...]
data |= slice_last_group_size << 18;

If you agree, could you please switch to it?


Sure.

Thanks,

Jessica Zhang





Thanks,

Jessica Zhang




+
  data = (initial_lines << 20);
  data |= ((slice_last_group_size - 1) << 18);
  /* bpp is 6.4 format, 4 LSBs bits are for fractional part */



--
With best wishes
Dmitry



--
With best wishes
Dmitry



[PATCH 2/2] drm/bridge: lt9611: Do not generate HFP/HBP/HSA and EOT packet

2023-04-03 Thread Marek Vasut
Do not generate the HS front and back porch gaps, the HSA gap and
EOT packet, as these packets are not required. This makes the bridge
work with Samsung DSIM on i.MX8MM and i.MX8MP.

Signed-off-by: Marek Vasut 
---
Cc: Andrzej Hajda 
Cc: Daniel Vetter 
Cc: David Airlie 
Cc: Jagan Teki 
Cc: Jernej Skrabec 
Cc: Jonas Karlman 
Cc: Laurent Pinchart 
Cc: Michael Walle 
Cc: Neil Armstrong 
Cc: Robert Foss 
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/bridge/lontium-lt9611.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c 
b/drivers/gpu/drm/bridge/lontium-lt9611.c
index a25d21a7d5c19..151efe92711c4 100644
--- a/drivers/gpu/drm/bridge/lontium-lt9611.c
+++ b/drivers/gpu/drm/bridge/lontium-lt9611.c
@@ -774,7 +774,9 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct 
lt9611 *lt9611,
dsi->lanes = 4;
dsi->format = MIPI_DSI_FMT_RGB888;
dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
- MIPI_DSI_MODE_VIDEO_HSE;
+ MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA |
+ MIPI_DSI_MODE_VIDEO_NO_HFP | 
MIPI_DSI_MODE_VIDEO_NO_HBP |
+ MIPI_DSI_MODE_NO_EOT_PACKET;
 
ret = devm_mipi_dsi_attach(dev, dsi);
if (ret < 0) {
-- 
2.39.2



[PATCH 1/2] drm/bridge: lt9211: Do not generate HFP/HBP/HSA and EOT packet

2023-04-03 Thread Marek Vasut
Do not generate the HS front and back porch gaps, the HSA gap and
EOT packet, as these packets are not required. This makes the bridge
work with Samsung DSIM on i.MX8MM and i.MX8MP.

Signed-off-by: Marek Vasut 
---
Cc: Andrzej Hajda 
Cc: Daniel Vetter 
Cc: David Airlie 
Cc: Jagan Teki 
Cc: Jernej Skrabec 
Cc: Jonas Karlman 
Cc: Laurent Pinchart 
Cc: Michael Walle 
Cc: Neil Armstrong 
Cc: Robert Foss 
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/bridge/lontium-lt9211.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c 
b/drivers/gpu/drm/bridge/lontium-lt9211.c
index 3e19fff6547a2..00db681512385 100644
--- a/drivers/gpu/drm/bridge/lontium-lt9211.c
+++ b/drivers/gpu/drm/bridge/lontium-lt9211.c
@@ -709,7 +709,9 @@ static int lt9211_host_attach(struct lt9211 *ctx)
dsi->lanes = dsi_lanes;
dsi->format = MIPI_DSI_FMT_RGB888;
dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
- MIPI_DSI_MODE_VIDEO_HSE;
+ MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA |
+ MIPI_DSI_MODE_VIDEO_NO_HFP | 
MIPI_DSI_MODE_VIDEO_NO_HBP |
+ MIPI_DSI_MODE_NO_EOT_PACKET;
 
ret = devm_mipi_dsi_attach(dev, dsi);
if (ret < 0) {
-- 
2.39.2



Re: [Freedreno] [PATCH v3 01/38] drm/msm/dpu: Allow variable SSPP/INTF_BLK size

2023-04-03 Thread Abhinav Kumar




On 4/1/2023 2:37 AM, Dmitry Baryshkov wrote:

On 01/04/2023 03:57, Abhinav Kumar wrote:



On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote:

From: Konrad Dybcio 

These blocks are of variable length on different SoCs. Set the
correct values where I was able to retrieve it from downstream
DTs and leave the old defaults (0x1c8 for sspp and 0x280 for
intf) otherwise.

Signed-off-by: Konrad Dybcio 
[DB: fixed some of lengths]
Signed-off-by: Dmitry Baryshkov 


Can you please split this to two changes one for SSPP and one for INTF 
block?


Ack, of course. Thanks a lot for the comments to this patch. This is, I 
think,  what we missed from the vendor dtsi files.





---
  .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c    | 243 +-
  1 file changed, 122 insertions(+), 121 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c

index 6840b22a4159..e44e7455a56e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -1172,11 +1172,11 @@ static const struct dpu_sspp_sub_blks 
sdm845_dma_sblk_1 = _DMA_SBLK("9", 2);
  static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = 
_DMA_SBLK("10", 3);
  static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = 
_DMA_SBLK("11", 4);

-#define SSPP_BLK(_name, _id, _base, _features, \
+#define SSPP_BLK(_name, _id, _base, _len, _features, \
  _sblk, _xinid, _type, _clkctrl) \
  { \
  .name = _name, .id = _id, \
-    .base = _base, .len = 0x1c8, \
+    .base = _base, .len = _len, \
  .features = _features, \
  .sblk = &_sblk, \
  .xin_id = _xinid, \
@@ -1185,40 +1185,40 @@ static const struct dpu_sspp_sub_blks 
sdm845_dma_sblk_3 = _DMA_SBLK("11", 4);

  }
  static const struct dpu_sspp_cfg msm8998_sspp[] = {
-    SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_MSM8998_MASK,
+    SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, 0x1ac, VIG_MSM8998_MASK,
  msm8998_vig_sblk_0, 0,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0),
-    SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_MSM8998_MASK,
+    SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, 0x1ac, VIG_MSM8998_MASK,
  msm8998_vig_sblk_1, 4,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG1),
-    SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, VIG_MSM8998_MASK,
+    SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, 0x1ac, VIG_MSM8998_MASK,
  msm8998_vig_sblk_2, 8, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG2),
-    SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, VIG_MSM8998_MASK,
+    SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, 0x1ac, VIG_MSM8998_MASK,
  msm8998_vig_sblk_3, 12,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG3),
-    SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000,  DMA_MSM8998_MASK,
+    SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, 0x1ac, DMA_MSM8998_MASK,
  sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0),
-    SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000,  DMA_MSM8998_MASK,
+    SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000, 0x1ac, DMA_MSM8998_MASK,
  sdm845_dma_sblk_1, 5, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA1),
-    SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000,  DMA_CURSOR_MSM8998_MASK,
+    SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, 0x1ac, 
DMA_CURSOR_MSM8998_MASK,

  sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA2),
-    SSPP_BLK("sspp_11", SSPP_DMA3, 0x2a000,  DMA_CURSOR_MSM8998_MASK,
+    SSPP_BLK("sspp_11", SSPP_DMA3, 0x2a000, 0x1ac, 
DMA_CURSOR_MSM8998_MASK,

  sdm845_dma_sblk_3, 13, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA3),
  };
  static const struct dpu_sspp_cfg sdm845_sspp[] = {
-    SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SDM845_MASK_SDMA,
+    SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, 0x1c8, VIG_SDM845_MASK_SDMA,
  sdm845_vig_sblk_0, 0,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0),
-    SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_SDM845_MASK_SDMA,
+    SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, 0x1c8, VIG_SDM845_MASK_SDMA,
  sdm845_vig_sblk_1, 4,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG1),
-    SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, VIG_SDM845_MASK_SDMA,
+    SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, 0x1c8, VIG_SDM845_MASK_SDMA,
  sdm845_vig_sblk_2, 8, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG2),
-    SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, VIG_SDM845_MASK_SDMA,
+    SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, 0x1c8, VIG_SDM845_MASK_SDMA,
  sdm845_vig_sblk_3, 12,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG3),
-    SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000,  DMA_SDM845_MASK_SDMA,
+    SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, 0x1c8, DMA_SDM845_MASK_SDMA,
  sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0),
-    SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000,  DMA_SDM845_MASK_SDMA,
+    SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000, 0x1c8, DMA_SDM845_MASK_SDMA,
  sdm845_dma_sblk_1, 5, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA1),
-    SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, 
DMA_CURSOR_SDM845_MASK_SDMA,
+    SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, 0x1c8, 
DMA_CURSOR_SDM845_MASK_SDMA,

  sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA2),
-    

Re: [PATCH RFC v2 4/6] drm/msm/dpu: Fix slice_last_group_size calculation

2023-04-03 Thread Dmitry Baryshkov

On 04/04/2023 00:45, Jessica Zhang wrote:



On 4/2/2023 4:27 AM, Dmitry Baryshkov wrote:

On 31/03/2023 21:49, Jessica Zhang wrote:

Correct the math for slice_last_group_size so that it matches the
calculations downstream.

Fixes: c110cfd1753e ("drm/msm/disp/dpu1: Add support for DSC")
Signed-off-by: Jessica Zhang 
Reviewed-by: Dmitry Baryshkov 
---
  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c

index b952f7d2b7f5..9312a8d7fbd9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
@@ -56,7 +56,11 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc 
*hw_dsc,

  if (is_cmd_mode)
  initial_lines += 1;
-    slice_last_group_size = 3 - (dsc->slice_width % 3);
+    slice_last_group_size = dsc->slice_width % 3;
+
+    if (slice_last_group_size == 0)
+    slice_last_group_size = 3;


Hmm. As I went on checking this against techpack:

mod = dsc->slice_width % 3

mod | techpack | old | your_patch
0   | 2    | 3   | 3
1   | 0    | 2   | 1
2   | 1    | 1   | 2

So, obviously neither old nor new code match the calculations of the 
techpack. If we assume that sde_dsc_helper code is correct (which I 
have no reasons to doubt), then the proper code should be:


slice_last_group_size = (dsc->slice_width + 2) % 3;

Could you please doublecheck and adjust.


Hi Dmitry,

The calculation should match the techpack calculation (I kept the `data 
|= ((slice_last_group_size - 1) << 18);` a few lines down).


And the techpack doesn't have -1.

I think the following code piece would be more convenient as it is simpler:

slice_last_group_size = (dsc->slice_width + 2) % 3;
[...]
data |= slice_last_group_size << 18;

If you agree, could you please switch to it?



Thanks,

Jessica Zhang




+
  data = (initial_lines << 20);
  data |= ((slice_last_group_size - 1) << 18);
  /* bpp is 6.4 format, 4 LSBs bits are for fractional part */



--
With best wishes
Dmitry



--
With best wishes
Dmitry



Re: [PATCH RFC v2 5/6] drm/msm/dsi: Use MSM and DRM DSC helper methods

2023-04-03 Thread Jessica Zhang




On 4/2/2023 4:29 AM, Dmitry Baryshkov wrote:

On 31/03/2023 21:49, Jessica Zhang wrote:

Use MSM and DRM DSC helper methods to configure DSC for DSI.

Changes in V2:
- *_calculate_initial_scale_value --> *_set_initial_scale_value
- Split pkt_per_line and eol_byte_num changes to a separate patch
- Moved pclk_per_line calculation to hdisplay adjustment in `if (dsc)`
   block of dsi_update_dsc_timing()

Signed-off-by: Jessica Zhang 
---
  drivers/gpu/drm/msm/dsi/dsi_host.c | 11 ---
  1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c 
b/drivers/gpu/drm/msm/dsi/dsi_host.c

index 74d38f90398a..b7ab81737473 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -28,6 +28,7 @@
  #include "dsi.xml.h"
  #include "sfpb.xml.h"
  #include "dsi_cfg.h"
+#include "msm_dsc_helper.h"
  #include "msm_kms.h"
  #include "msm_gem.h"
  #include "phy/dsi_phy.h"
@@ -848,7 +849,7 @@ static void dsi_update_dsc_timing(struct 
msm_dsi_host *msm_host, bool is_cmd_mod

  /* first calculate dsc parameters and then program
   * compress mode registers
   */
-    slice_per_intf = DIV_ROUND_UP(hdisplay, dsc->slice_width);
+    slice_per_intf = msm_dsc_get_slice_per_intf(dsc, hdisplay);
  /*
   * If slice_count is greater than slice_per_intf
@@ -951,7 +952,11 @@ static void dsi_timing_setup(struct msm_dsi_host 
*msm_host, bool is_bonded_dsi)

   * pulse width same
   */
  h_total -= hdisplay;
-    hdisplay /= 3;
+    if (msm_host->mode_flags & MIPI_DSI_MODE_VIDEO)
+    hdisplay = msm_dsc_get_uncompressed_pclk_per_line(dsc, 
hdisplay,

+    dsi_get_bpp(msm_host->format)) / 3;
+    else
+    hdisplay /= 3;
  h_total += hdisplay;
  ha_end = ha_start + hdisplay;


This chunk changes the calculated value (two other are mere updates to 
use new functions). Please move it to a separate patch, add proper 
description/justification and possibly a Fixes tag, if the original code 
was incorrect.


Hi Dmitry,

Acked.

Thanks,

Jessica Zhang




  }
@@ -1759,7 +1764,7 @@ static int dsi_populate_dsc_params(struct 
msm_dsi_host *msm_host, struct drm_dsc

  return ret;
  }
-    dsc->initial_scale_value = 32;
+    drm_dsc_set_initial_scale_value(dsc);
  dsc->line_buf_depth = dsc->bits_per_component + 1;
  return drm_dsc_compute_rc_parameters(dsc);



--
With best wishes
Dmitry



Re: [PATCH RFC v2 4/6] drm/msm/dpu: Fix slice_last_group_size calculation

2023-04-03 Thread Jessica Zhang




On 4/2/2023 4:27 AM, Dmitry Baryshkov wrote:

On 31/03/2023 21:49, Jessica Zhang wrote:

Correct the math for slice_last_group_size so that it matches the
calculations downstream.

Fixes: c110cfd1753e ("drm/msm/disp/dpu1: Add support for DSC")
Signed-off-by: Jessica Zhang 
Reviewed-by: Dmitry Baryshkov 
---
  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c 
b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c

index b952f7d2b7f5..9312a8d7fbd9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
@@ -56,7 +56,11 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc 
*hw_dsc,

  if (is_cmd_mode)
  initial_lines += 1;
-    slice_last_group_size = 3 - (dsc->slice_width % 3);
+    slice_last_group_size = dsc->slice_width % 3;
+
+    if (slice_last_group_size == 0)
+    slice_last_group_size = 3;


Hmm. As I went on checking this against techpack:

mod = dsc->slice_width % 3

mod | techpack | old | your_patch
0   | 2    | 3   | 3
1   | 0    | 2   | 1
2   | 1    | 1   | 2

So, obviously neither old nor new code match the calculations of the 
techpack. If we assume that sde_dsc_helper code is correct (which I have 
no reasons to doubt), then the proper code should be:


slice_last_group_size = (dsc->slice_width + 2) % 3;

Could you please doublecheck and adjust.


Hi Dmitry,

The calculation should match the techpack calculation (I kept the `data 
|= ((slice_last_group_size - 1) << 18);` a few lines down).


Thanks,

Jessica Zhang




+
  data = (initial_lines << 20);
  data |= ((slice_last_group_size - 1) << 18);
  /* bpp is 6.4 format, 4 LSBs bits are for fractional part */



--
With best wishes
Dmitry



Re: [PATCH RFC v2 2/6] drm/msm: Add MSM-specific DSC helper methods

2023-04-03 Thread Jessica Zhang




On 4/2/2023 4:21 AM, Dmitry Baryshkov wrote:

On 31/03/2023 21:49, Jessica Zhang wrote:

Introduce MSM-specific DSC helper methods, as some calculations are
common between DP and DSC.

Changes in v2:
- Moved files up to msm/ directory
- Dropped get_comp_ratio() helper
- Used drm_int2fixp() to convert to integers to fp
- Style changes to improve readability
- Dropped unused bpp variable in msm_dsc_get_dce_bytes_per_line()
- Changed msm_dsc_get_slice_per_intf() to a static inline method
- Dropped last division step of msm_dsc_get_pclk_per_line() and changed
   method name accordingly
- Changed DSC_BPP macro to drm_dsc_get_bpp_int() helper method
- Fixed some math issues caused by passing in incorrect types to
   drm_fixed methods in get_bytes_per_soft_slice()

Signed-off-by: Jessica Zhang 
---
  drivers/gpu/drm/msm/Makefile |  1 +
  drivers/gpu/drm/msm/msm_dsc_helper.c | 53 


  drivers/gpu/drm/msm/msm_dsc_helper.h | 42 
  3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 7274c41228ed..b814fc80e2d5 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -94,6 +94,7 @@ msm-y += \
  msm_atomic_tracepoints.o \
  msm_debugfs.o \
  msm_drv.o \
+    msm_dsc_helper.o \
  msm_fb.o \
  msm_fence.o \
  msm_gem.o \
diff --git a/drivers/gpu/drm/msm/msm_dsc_helper.c 
b/drivers/gpu/drm/msm/msm_dsc_helper.c

new file mode 100644
index ..60b73e17e6eb
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_dsc_helper.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights 
reserved

+ */
+
+#include 
+#include 
+#include 
+
+#include "msm_drv.h"
+#include "msm_dsc_helper.h"
+
+static s64 get_bytes_per_soft_slice(struct drm_dsc_config *dsc, int 
intf_width, u32 src_bpp)


intf_width is unused


Hi Dmitry,

Acked.




+{
+    int bpp = msm_dsc_get_bpp_int(dsc);
+    s64 numerator_fp, denominator_fp;
+    s64 comp_ratio_fp = drm_fixp_from_fraction(src_bpp, bpp);
+
+    numerator_fp = drm_int2fixp(dsc->slice_width * 3);


You have lost dsc->bits_per_component here.


This was moved to the denominator calculation, but I'll move it back to 
this line to avoid confusion.




+    denominator_fp = drm_fixp_from_fraction(comp_ratio_fp * 8, 
drm_int2fixp(bpp));


denominator_fp = drm_fixp_from_fraction(src_bpp * 8, bpp);


Acked.




+
+    return drm_fixp_div(numerator_fp, denominator_fp);
+}
+
+u32 msm_dsc_get_eol_byte_num(struct drm_dsc_config *dsc, int 
intf_width, u32 src_bpp)

+{
+    u32 bytes_per_soft_slice, extra_eol_bytes, bytes_per_intf;
+    s64 bytes_per_soft_slice_fp;
+    int slice_per_intf = msm_dsc_get_slice_per_intf(dsc, intf_width);
+
+    bytes_per_soft_slice_fp = get_bytes_per_soft_slice(dsc, 
intf_width, src_bpp);

+    bytes_per_soft_slice = drm_fixp2int_ceil(bytes_per_soft_slice_fp);
+
+    bytes_per_intf = bytes_per_soft_slice * slice_per_intf;
+    extra_eol_bytes = bytes_per_intf % 3;
+    if (extra_eol_bytes != 0)
+    extra_eol_bytes = 3 - extra_eol_bytes;


I become confused here when I checked eol_bytes in the display techpack.

I see that for DP the dp_panel_dsc_pclk_param_calc() calculates 
dsc->eol_bytes_num in this way, the size to pad dsc_byte_count * 
slice_per_intf to 3 bytes.


However, for DSI this is a simple as total_bytes_per_intf % 3 , so it is 
not a padding, but a length of the last chunk.


Could you please clarify? If the techpack code is correct, I'd prefer if 
we return last chunk size here and calculate the padding length in the 
DP driver.


I've double checked the calculations between DP and DSI, and I think 
you're right. Will move the `if (extra_eol_bytes != 0)` block out to DP 
code.





+
+    return extra_eol_bytes;
+}
+
+int msm_dsc_get_uncompressed_pclk_per_line(struct drm_dsc_config 
*dsc, int intf_width, u32 src_bpp)


Basing on Abhinav's description ("pclk_per_line can be only per 
interface") would it better be named as 
msm_dsc_get_uncompressed_pclk_per_intf() ? or 
msm_dsc_get_uncompressed_pclk_for_intf() ?


BTW: if get_bytes_per_soft_slice() doesn't use intf_width, we can 
probably drop it here too.



+{
+    s64 data_width;
+
+    if (!dsc->slice_width || (intf_width < dsc->slice_width))
+    return -EINVAL;


Error code is not validated at dsi_timing_setup. I'd suggest moving 
error checks there and dropping the error handling here. If 
dsc->slice_width is not set, we should stop much earlier than 
drm_bridge's pre_enable() callback.


Acked.

Thanks,

Jessica Zhang




+
+    data_width = drm_fixp_mul(dsc->slice_count,
+    get_bytes_per_soft_slice(dsc, intf_width, src_bpp));
+
+    return drm_fixp2int_ceil(data_width);
+}
diff --git a/drivers/gpu/drm/msm/msm_dsc_helper.h 
b/drivers/gpu/drm/msm/msm_dsc_helper.h

new file mode 100644
index ..743cd324b7d9
--- /dev/null
+++ 

[PATCH] drm/i915/guc: Don't capture Gen8 regs on Gen12 devices

2023-04-03 Thread John . C . Harrison
From: John Harrison 

A pair of pre-Gen12 registers were being included in the Gen12 capture
list. GuC was rejecting those as being invalid and logging errors
about them. So, stop doing it.

Signed-off-by: John Harrison 
Fixes: dce2bd542337 ("drm/i915/guc: Add Gen9 registers for GuC error state 
capture.")
Cc: Alan Previn 
Cc: Umesh Nerlige Ramappa 
Cc: Lucas De Marchi 
Cc: John Harrison 
Cc: Jani Nikula 
Cc: Matt Roper 
Cc: Balasubramani Vivekanandan 
Cc: Daniele Ceraolo Spurio 
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index cf49188db6a6e..e0e793167d61b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -31,12 +31,14 @@
{ FORCEWAKE_MT, 0,  0, "FORCEWAKE" }
 
 #define COMMON_GEN9BASE_GLOBAL \
-   { GEN8_FAULT_TLB_DATA0, 0,  0, "GEN8_FAULT_TLB_DATA0" }, \
-   { GEN8_FAULT_TLB_DATA1, 0,  0, "GEN8_FAULT_TLB_DATA1" }, \
{ ERROR_GEN6,   0,  0, "ERROR_GEN6" }, \
{ DONE_REG, 0,  0, "DONE_REG" }, \
{ HSW_GTT_CACHE_EN, 0,  0, "HSW_GTT_CACHE_EN" }
 
+#define GEN9_GLOBAL \
+   { GEN8_FAULT_TLB_DATA0, 0,  0, "GEN8_FAULT_TLB_DATA0" }, \
+   { GEN8_FAULT_TLB_DATA1, 0,  0, "GEN8_FAULT_TLB_DATA1" }
+
 #define COMMON_GEN12BASE_GLOBAL \
{ GEN12_FAULT_TLB_DATA0,0,  0, "GEN12_FAULT_TLB_DATA0" }, \
{ GEN12_FAULT_TLB_DATA1,0,  0, "GEN12_FAULT_TLB_DATA1" }, \
@@ -142,6 +144,7 @@ static const struct __guc_mmio_reg_descr 
xe_lpd_gsc_inst_regs[] = {
 static const struct __guc_mmio_reg_descr default_global_regs[] = {
COMMON_BASE_GLOBAL,
COMMON_GEN9BASE_GLOBAL,
+   GEN9_GLOBAL,
 };
 
 static const struct __guc_mmio_reg_descr default_rc_class_regs[] = {
-- 
2.39.1



Re: [PATCH v1 3/6] dt-bindings: display: bridge: toshiba,tc358768: add parallel input mode

2023-04-03 Thread Francesco Dolcini
On Mon, Apr 03, 2023 at 04:01:17PM -0500, Rob Herring wrote:
> On Fri, Mar 31, 2023 at 11:40:01AM +0200, Francesco Dolcini wrote:
> > On Fri, Mar 31, 2023 at 10:48:15AM +0200, Krzysztof Kozlowski wrote:
> > > On 30/03/2023 11:59, Francesco Dolcini wrote:
> > > > From: Francesco Dolcini 
> > > > 
> > > > Add new toshiba,input-rgb-mode property to describe the actual signal
> > > > connection on the parallel RGB input interface.
> > > > 
> > > > Signed-off-by: Francesco Dolcini 
> > > > ---
> > > >  .../bindings/display/bridge/toshiba,tc358768.yaml | 15 +++
> > > >  1 file changed, 15 insertions(+)
> > > > 
> > > > diff --git 
> > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > >  
> > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > > index 8f22093b61ae..2638121a2223 100644
> > > > --- 
> > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > > +++ 
> > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > > @@ -42,6 +42,21 @@ properties:
> > > >clock-names:
> > > >  const: refclk
> > > >  
> > > > +  toshiba,input-rgb-mode:
> > > > +description: |
> > > > +  Parallel Input (RGB) Mode.
> > > > +
> > > > +  RGB inputs (PD[23:0]) color arrangement as documented in the 
> > > > datasheet
> > > > +  and in the table below.
> > > > +
> > > > +  0 = R[7:0], G[7:0], B[7:0]
> > > 
> > > RGB888?
> > 
> > Or anything else - like a RGB666 - just connecting to GND the unused
> > pins.
> 
> If the bridge is configured for RGB666, then that's fine. If not, the 
> unused pins should be driven with either the MSB of each component. 
> Otherwise, you'd can't fully saturate the colors.

maybe a detail and maybe not really relevant, but this specific bridge
has no know-how on the actual RGB inputs width. While I understand what
you are saying here, in the end this is about the actual hardware design
that can be in any way, including having pins to gnd and have the issue
you just described.

Francesco



Re: [PATCH 1/3] drm/lima: add usage counting method to ctx_mgr

2023-04-03 Thread Ville Syrjälä
On Mon, Mar 13, 2023 at 12:30:50AM +0100, Erico Nunes wrote:
> lima maintains a context manager per drm_file, similar to amdgpu.
> In order to account for the complete usage per drm_file, all of the
> associated contexts need to be considered.
> Previously released contexts also need to be accounted for but their
> drm_sched_entity info is gone once they get released, so account for it
> in the ctx_mgr.
> 
> Signed-off-by: Erico Nunes 
> ---
>  drivers/gpu/drm/lima/lima_ctx.c | 30 +-
>  drivers/gpu/drm/lima/lima_ctx.h |  3 +++
>  2 files changed, 32 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c
> index 891d5cd5019a..e008e586fad0 100644
> --- a/drivers/gpu/drm/lima/lima_ctx.c
> +++ b/drivers/gpu/drm/lima/lima_ctx.c
> @@ -15,6 +15,7 @@ int lima_ctx_create(struct lima_device *dev, struct 
> lima_ctx_mgr *mgr, u32 *id)
>   if (!ctx)
>   return -ENOMEM;
>   ctx->dev = dev;
> + ctx->mgr = mgr;
>   kref_init(>refcnt);
>  
>   for (i = 0; i < lima_pipe_num; i++) {
> @@ -42,10 +43,17 @@ int lima_ctx_create(struct lima_device *dev, struct 
> lima_ctx_mgr *mgr, u32 *id)
>  static void lima_ctx_do_release(struct kref *ref)
>  {
>   struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt);
> + struct lima_ctx_mgr *mgr = ctx->mgr;
>   int i;
>  
> - for (i = 0; i < lima_pipe_num; i++)
> + for (i = 0; i < lima_pipe_num; i++) {
> + struct lima_sched_context *context = >context[i];
> + struct drm_sched_entity *entity = >base;
> +
> + mgr->elapsed_ns[i] += entity->elapsed_ns;

drm-tip build is now broken because of this vs. 
commit baad10973fdb ("Revert "drm/scheduler: track GPU active time per entity"")

../drivers/gpu/drm/lima/lima_ctx.c: In function ‘lima_ctx_do_release’:
../drivers/gpu/drm/lima/lima_ctx.c:53:45: error: ‘struct
drm_sched_entity’ has no member named ‘elapsed_ns’
   53 | mgr->elapsed_ns[i] += entity->elapsed_ns;

-- 
Ville Syrjälä
Intel


Re: [PATCH v1 1/2] dt-bindings: display: bridge: sn65dsi83: Add DSI video mode

2023-04-03 Thread Francesco Dolcini
On Mon, Apr 03, 2023 at 04:06:22PM -0500, Rob Herring wrote:
> On Thu, Mar 30, 2023 at 12:17:51PM +0200, Francesco Dolcini wrote:
> > From: Francesco Dolcini 
> > 
> > SN65DSI8[34] device supports burst video mode and non-burst video mode
> > with sync events or with sync pulses packet transmission as described in
> > the DSI specification.
> > 
> > Add property to select the expected mode, this allows for example to
> > select a mode that is compatible with the DSI host interface.
> 
> Why does this need to be in DT?

> The source and sink drivers should know what their capabilities are
> and pick the best common one.

Is there a best mode? Isn't this a decision how do we want the 2 peers
to communicate?

For the MIPI-DSI Linux/DRM experts: am I missing something? Is there
another way to have a DSI video sink to ask for a specific mode?

(I copied this from an existing DSI panel binding).

Francesco



Re: [PATCH v1 3/6] dt-bindings: display: bridge: toshiba,tc358768: add parallel input mode

2023-04-03 Thread Francesco Dolcini
On Mon, Apr 03, 2023 at 04:01:17PM -0500, Rob Herring wrote:
> On Fri, Mar 31, 2023 at 11:40:01AM +0200, Francesco Dolcini wrote:
> > On Fri, Mar 31, 2023 at 10:48:15AM +0200, Krzysztof Kozlowski wrote:
> > > On 30/03/2023 11:59, Francesco Dolcini wrote:
> > > > From: Francesco Dolcini 
> > > > 
> > > > Add new toshiba,input-rgb-mode property to describe the actual signal
> > > > connection on the parallel RGB input interface.
> > > > 
> > > > Signed-off-by: Francesco Dolcini 
> > > > ---
> > > >  .../bindings/display/bridge/toshiba,tc358768.yaml | 15 +++
> > > >  1 file changed, 15 insertions(+)
> > > > 
> > > > diff --git 
> > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > >  
> > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > > index 8f22093b61ae..2638121a2223 100644
> > > > --- 
> > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > > +++ 
> > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > > @@ -42,6 +42,21 @@ properties:
> > > >clock-names:
> > > >  const: refclk
> > > >  
> > > > +  toshiba,input-rgb-mode:
> > > > +description: |
> > > > +  Parallel Input (RGB) Mode.
> > > > +
> > > > +  RGB inputs (PD[23:0]) color arrangement as documented in the 
> > > > datasheet
> > > > +  and in the table below.
> > > > +
> > > > +  0 = R[7:0], G[7:0], B[7:0]
> > > 
> > > RGB888?
> > 
> > Or anything else - like a RGB666 - just connecting to GND the unused
> > pins.
> 
> If the bridge is configured for RGB666, then that's fine. If not, the 
> unused pins should be driven with either the MSB of each component. 
> Otherwise, you'd can't fully saturate the colors.

> > > > +  1 = R[1:0], G[1:0], B[1:0], R[7:2], G[7:2], B[7:2]
> > > > +  2 = 8’b0, R[4:0], G[5:0], B[4:0]
> > > 
> > > Isn't this RGB565?
> > > 
> > > Don't we have already properties like this? e.g. colorspace?
> > 
> > It's not really the colorspace this property.
> > 
> > tc358768 is a parallel RGB to DSI bridge, it has 24 bit parallel input
> > line.
> > 
> > The way this lines are connected is configurable with this parameter, if you
> > look at mode 0 and 1 they all allow to have a RGB888 or a RGB666 or a
> > RGB565 mapping. This just configure some internal mux, it's not strictly
> > about the RGB mode.
> 
> This is the same as other cases. There's a need for describing the 
> interface. It keeps coming up and I keep saying to go create something 
> common.

I am not aware of other discussion on the topic, do you have any pointer
I can look at?

What I'd like to re-iterate here once more is that this configuration is
about how the external 24-bit parallel RGB lines are mapped withing this
bridge.

It's not mapping the linux media bus format (e.g. not
MEDIA_BUS_FMT_RBG888_1X24 or alike).

This bridge allow for a limited set of combination (3) as described in
this binding.

Francesco



Re: [PATCH v1 1/2] dt-bindings: display: bridge: sn65dsi83: Add DSI video mode

2023-04-03 Thread Rob Herring
On Thu, Mar 30, 2023 at 12:17:51PM +0200, Francesco Dolcini wrote:
> From: Francesco Dolcini 
> 
> SN65DSI8[34] device supports burst video mode and non-burst video mode
> with sync events or with sync pulses packet transmission as described in
> the DSI specification.
> 
> Add property to select the expected mode, this allows for example to
> select a mode that is compatible with the DSI host interface.

Why does this need to be in DT? The source and sink drivers should know 
what their capabilities are and pick the best common one.

Rob


Re: [PATCH v1 3/6] dt-bindings: display: bridge: toshiba,tc358768: add parallel input mode

2023-04-03 Thread Rob Herring
On Fri, Mar 31, 2023 at 11:40:01AM +0200, Francesco Dolcini wrote:
> On Fri, Mar 31, 2023 at 10:48:15AM +0200, Krzysztof Kozlowski wrote:
> > On 30/03/2023 11:59, Francesco Dolcini wrote:
> > > From: Francesco Dolcini 
> > > 
> > > Add new toshiba,input-rgb-mode property to describe the actual signal
> > > connection on the parallel RGB input interface.
> > > 
> > > Signed-off-by: Francesco Dolcini 
> > > ---
> > >  .../bindings/display/bridge/toshiba,tc358768.yaml | 15 +++
> > >  1 file changed, 15 insertions(+)
> > > 
> > > diff --git 
> > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml 
> > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > index 8f22093b61ae..2638121a2223 100644
> > > --- 
> > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > +++ 
> > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml
> > > @@ -42,6 +42,21 @@ properties:
> > >clock-names:
> > >  const: refclk
> > >  
> > > +  toshiba,input-rgb-mode:
> > > +description: |
> > > +  Parallel Input (RGB) Mode.
> > > +
> > > +  RGB inputs (PD[23:0]) color arrangement as documented in the 
> > > datasheet
> > > +  and in the table below.
> > > +
> > > +  0 = R[7:0], G[7:0], B[7:0]
> > 
> > RGB888?
> 
> Or anything else - like a RGB666 - just connecting to GND the unused
> pins.

If the bridge is configured for RGB666, then that's fine. If not, the 
unused pins should be driven with either the MSB of each component. 
Otherwise, you'd can't fully saturate the colors.

> > > +  1 = R[1:0], G[1:0], B[1:0], R[7:2], G[7:2], B[7:2]
> > > +  2 = 8’b0, R[4:0], G[5:0], B[4:0]
> > 
> > Isn't this RGB565?
> > 
> > Don't we have already properties like this? e.g. colorspace?
> 
> It's not really the colorspace this property.
> 
> tc358768 is a parallel RGB to DSI bridge, it has 24 bit parallel input
> line.
> 
> The way this lines are connected is configurable with this parameter, if you
> look at mode 0 and 1 they all allow to have a RGB888 or a RGB666 or a
> RGB565 mapping. This just configure some internal mux, it's not strictly
> about the RGB mode.

This is the same as other cases. There's a need for describing the 
interface. It keeps coming up and I keep saying to go create something 
common.

Rob


Re: [RFC PATCH 0/4] uapi, drm: Add and implement RLIMIT_GPUPRIO

2023-04-03 Thread Joshua Ashton




On 4/3/23 20:54, Christian König wrote:

Am 03.04.23 um 21:40 schrieb Joshua Ashton:

Hello all!

I would like to propose a new API for allowing processes to control
the priority of GPU queues similar to RLIMIT_NICE/RLIMIT_RTPRIO.

The main reason for this is for compositors such as Gamescope and
SteamVR vrcompositor to be able to create realtime async compute
queues on AMD without the need of CAP_SYS_NICE.

The current situation is bad for a few reasons, one being that in order
to setcap the executable, typically one must run as root which involves
a pretty high privelage escalation in order to achieve one
small feat, a realtime async compute queue queue for VR or a compositor.
The executable cannot be setcap'ed inside a
container nor can the setcap'ed executable be run in a container with
NO_NEW_PRIVS.

I go into more detail in the description in
`uapi: Add RLIMIT_GPUPRIO`.

My initial proposal here is to add a new RLIMIT, `RLIMIT_GPUPRIO`,
which seems to make most initial sense to me to solve the problem.

I am definitely not set that this is the best formulation however
or if this should be linked to DRM (in terms of it's scheduler
priority enum/definitions) in any way and and would really like other
people's opinions across the stack on this.

Once initial concern is that potentially this RLIMIT could out-live
the lifespan of DRM. It sounds crazy saying it right now, something
that definitely popped into my mind when touching `resource.h`. :-)

Anyway, please let me know what you think!
Definitely open to any feedback and advice you may have. :D


Well the basic problem is that higher priority queues can be used to 
starve low priority queues.


This starvation in turn is very very bad for memory management since the 
dma_fence's the GPU scheduler deals with have very strong restrictions.


Even exposing this under CAP_SYS_NICE is questionable, so we will most 
likely have to NAK this.


This is already exposed with CAP_SYS_NICE and is relied on by SteamVR 
for async reprojection and Gamescope's composite path on Steam Deck.


Having a high priority async compute queue is really really important 
and advantageous for these tasks.


The majority of usecases for something like this is going to be a 
compositor which does some really tiny amount of work per-frame but is 
incredibly latency dependent (as it depends on latching onto buffers 
just before vblank to do it's work)


Starving and surpassing work on other queues is kind of the entire 
point. Gamescope and SteamVR do it on ACE as well so GFX work can run 
alongside it.


- Joshie ✨



Regards,
Christian.



Thanks!
  - Joshie

Joshua Ashton (4):
   drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH
   drm/scheduler: Split out drm_sched_priority to own file
   uapi: Add RLIMIT_GPUPRIO
   drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions

  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 ++--
  drivers/gpu/drm/msm/msm_gpu.h   |  2 +-
  fs/proc/base.c  |  1 +
  include/asm-generic/resource.h  |  3 +-
  include/drm/drm_sched_priority.h    | 41 +
  include/drm/gpu_scheduler.h | 14 +
  include/uapi/asm-generic/resource.h |  3 +-
  7 files changed, 58 insertions(+), 19 deletions(-)
  create mode 100644 include/drm/drm_sched_priority.h







Re: [PATCH v4 1/2] drm/virtio: Refactor job submission code path

2023-04-03 Thread Dmitry Osipenko
On 3/30/23 18:32, Emil Velikov wrote:
>> +static int virtio_gpu_dma_fence_wait(struct virtio_gpu_submit *submit,
>> + struct dma_fence *fence)
>> +{
>> +struct dma_fence_unwrap itr;
>> +struct dma_fence *f;
>> +int err;
>> +
>> +dma_fence_unwrap_for_each(f, , fence) {
> The dma_fence_unwrap_for_each() change should be a separate patch,
> highlighting why we want it.

Good point, it actually should be a potential optimization for the
in-fence waiting.

>> +ret = virtio_gpu_init_submit(, exbuf, dev, file,
>> + fence_ctx, ring_idx);
>> +if (ret)
>> +goto cleanup;
>> +
>> +ret = virtio_gpu_wait_in_fence();
>> +if (ret)
>> +goto cleanup;
>> +
> We have reshuffled the order around in_fence waiting, out_fence install,
> handles, cmdbuf, drm events, etc. Can we get that split up a bit, with
> some comments.
> 
> If it were me, I would keep the wait_in_fence early and inline
> virtio_gpu_init_submit (the nesting/abstraction seems a bit much). This
> means one can omit the virtio_gpu_submit::exbuf all together.

I tried to inline and this variant makes code much less readable to me.

The point of having wait_in_fence after submit_init is that it makes
submit code path shorter. If we have to wait for in-fence, then once
fence signals, there is no need to init and instead move directly to a
further submission step.

Perhaps won't hurt to also factor out the wait_fence from parse_deps in
the second patch and do all the waits right before locking the buflist.

-- 
Best regards,
Dmitry



Re: [RFC PATCH 0/4] uapi, drm: Add and implement RLIMIT_GPUPRIO

2023-04-03 Thread Christian König

Am 03.04.23 um 21:40 schrieb Joshua Ashton:

Hello all!

I would like to propose a new API for allowing processes to control
the priority of GPU queues similar to RLIMIT_NICE/RLIMIT_RTPRIO.

The main reason for this is for compositors such as Gamescope and
SteamVR vrcompositor to be able to create realtime async compute
queues on AMD without the need of CAP_SYS_NICE.

The current situation is bad for a few reasons, one being that in order
to setcap the executable, typically one must run as root which involves
a pretty high privelage escalation in order to achieve one
small feat, a realtime async compute queue queue for VR or a compositor.
The executable cannot be setcap'ed inside a
container nor can the setcap'ed executable be run in a container with
NO_NEW_PRIVS.

I go into more detail in the description in
`uapi: Add RLIMIT_GPUPRIO`.

My initial proposal here is to add a new RLIMIT, `RLIMIT_GPUPRIO`,
which seems to make most initial sense to me to solve the problem.

I am definitely not set that this is the best formulation however
or if this should be linked to DRM (in terms of it's scheduler
priority enum/definitions) in any way and and would really like other
people's opinions across the stack on this.

Once initial concern is that potentially this RLIMIT could out-live
the lifespan of DRM. It sounds crazy saying it right now, something
that definitely popped into my mind when touching `resource.h`. :-)

Anyway, please let me know what you think!
Definitely open to any feedback and advice you may have. :D


Well the basic problem is that higher priority queues can be used to 
starve low priority queues.


This starvation in turn is very very bad for memory management since the 
dma_fence's the GPU scheduler deals with have very strong restrictions.


Even exposing this under CAP_SYS_NICE is questionable, so we will most 
likely have to NAK this.


Regards,
Christian.



Thanks!
  - Joshie

Joshua Ashton (4):
   drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH
   drm/scheduler: Split out drm_sched_priority to own file
   uapi: Add RLIMIT_GPUPRIO
   drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions

  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 ++--
  drivers/gpu/drm/msm/msm_gpu.h   |  2 +-
  fs/proc/base.c  |  1 +
  include/asm-generic/resource.h  |  3 +-
  include/drm/drm_sched_priority.h| 41 +
  include/drm/gpu_scheduler.h | 14 +
  include/uapi/asm-generic/resource.h |  3 +-
  7 files changed, 58 insertions(+), 19 deletions(-)
  create mode 100644 include/drm/drm_sched_priority.h





Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level

2023-04-03 Thread Ville Syrjälä
On Mon, Apr 03, 2023 at 07:39:37PM +, Yang, Fei wrote:
> >Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
> >
> >On Mon, Apr 03, 2023 at 04:57:21PM +, Yang, Fei wrote:
> >>> Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of
> >>> cache_level
> >>>
> >>> On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote:
>  From: Fei Yang 
> 
>  Currently the KMD is using enum i915_cache_level to set caching
>  policy for buffer objects. This is flaky because the PAT index
>  which really controls the caching behavior in PTE has far more
>  levels than what's defined in the enum.
> >>>
> >>> Then just add more enum values.
> >>
> >> That would be really messy because PAT index is platform dependent,
> >> you would have to maintain many tables for the the translation.
> >>
> >>> 'pat_index' is absolutely meaningless to the reader, it's just an
> >>> arbitrary number. Whereas 'cache_level' conveys how the thing is
> >>> actually going to get used and thus how the caches should behave.
> >>
> >> By design UMD's understand PAT index. Both UMD and KMD should stand on
> >> the same ground, the Bspec, to avoid any potential ambiguity.
> >>
>  In addition, the PAT index is platform dependent, having to
>  translate between i915_cache_level and PAT index is not reliable,
> >>>
> >>> If it's not realiable then the code is clearly broken.
> >>
> >> Perhaps the word "reliable" is a bit confusing here. What I really
> >> meant to say is 'difficult to maintain', or 'error-prone'.
> >>
>  and makes the code more complicated.
> >>>
> >>> You have to translate somewhere anyway. Looks like you're now adding
> >>> translations the other way (pat_index->cache_level). How is that better?
> >>
> >> No, there is no pat_index->cache_level translation.
> >
> > i915_gem_object_has_cache_level() is exactly that. And that one does look
> > actually fragile since it assumes only one PAT index maps to each cache
> > level. So if the user picks any other pat_index anything using
> > i915_gem_object_has_cache_level() is likely to do the wrong thing.
> 
> That is still one way transaltion, from cache_level to pat_index.

Not really. The actual input to the thing is obj->pat_index.
And as stated, the whole thing is simply broken whenever
obj->pat_index isn't one of the magic numbers that you get
back from i915_gem_get_pat_index().

-- 
Ville Syrjälä
Intel


Re: [RFC PATCH 1/4] drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH

2023-04-03 Thread Christian König

Am 03.04.23 um 21:40 schrieb Joshua Ashton:

This allows AMDGPU scheduler priority above normal to be expressed
using the DRM_SCHED_PRIORITY enum.


That was rejected before, I just don't remember why exactly. Need to dig 
that up again.


Christian.



Signed-off-by: Joshua Ashton 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +-
  drivers/gpu/drm/msm/msm_gpu.h   | 2 +-
  include/drm/gpu_scheduler.h | 1 +
  3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d2139ac12159..8ec255091c4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -79,7 +79,7 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
return DRM_SCHED_PRIORITY_HIGH;
  
  	case AMDGPU_CTX_PRIORITY_VERY_HIGH:

-   return DRM_SCHED_PRIORITY_HIGH;
+   return DRM_SCHED_PRIORITY_VERY_HIGH;
  
  	/* This should not happen as we sanitized userspace provided priority

 * already, WARN if this happens.
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index fc1c0d8611a8..e3495712b236 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -336,7 +336,7 @@ struct msm_gpu_perfcntr {
   * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
   * cases, so we don't use it (no need for kernel generated jobs).
   */
-#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - 
DRM_SCHED_PRIORITY_MIN)
+#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_VERY_HIGH - 
DRM_SCHED_PRIORITY_MIN)
  
  /**

   * struct msm_file_private - per-drm_file context
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 9935d1e2ff69..a62071660602 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -55,6 +55,7 @@ enum drm_sched_priority {
DRM_SCHED_PRIORITY_MIN,
DRM_SCHED_PRIORITY_NORMAL,
DRM_SCHED_PRIORITY_HIGH,
+   DRM_SCHED_PRIORITY_VERY_HIGH,
DRM_SCHED_PRIORITY_KERNEL,
  
  	DRM_SCHED_PRIORITY_COUNT,




[RFC PATCH 4/4] drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions

2023-04-03 Thread Joshua Ashton
Add support for the new RLIMIT_GPUPRIO when doing the priority
checks creating an amdgpu_ctx.

Signed-off-by: Joshua Ashton 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 8ec255091c4a..4ac645455bc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -28,6 +28,8 @@
 #include "amdgpu_sched.h"
 #include "amdgpu_ras.h"
 #include 
+#include 
+#include 
 
 #define to_amdgpu_ctx_entity(e)\
container_of((e), struct amdgpu_ctx_entity, entity)
@@ -94,11 +96,16 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
  int32_t priority)
 {
+   enum drm_sched_priority in_drm_priority, rlim_drm_priority;
+
if (!amdgpu_ctx_priority_is_valid(priority))
return -EINVAL;
 
-   /* NORMAL and below are accessible by everyone */
-   if (priority <= AMDGPU_CTX_PRIORITY_NORMAL)
+   /* Check priority against RLIMIT to see what is allowed. */
+   in_drm_priority = amdgpu_ctx_to_drm_sched_prio(priority);
+   rlim_drm_priority = (enum drm_sched_priority)rlimit(RLIMIT_GPUPRIO);
+
+   if (in_drm_priority <= rlim_drm_priority)
return 0;
 
if (capable(CAP_SYS_NICE))
-- 
2.40.0



[RFC PATCH 2/4] drm/scheduler: Split out drm_sched_priority to own file

2023-04-03 Thread Joshua Ashton
This allows it to be used by other parts of the codebase without fear
of a circular include dependency being introduced.

Signed-off-by: Joshua Ashton 
---
 include/drm/drm_sched_priority.h | 41 
 include/drm/gpu_scheduler.h  | 15 +---
 2 files changed, 42 insertions(+), 14 deletions(-)
 create mode 100644 include/drm/drm_sched_priority.h

diff --git a/include/drm/drm_sched_priority.h b/include/drm/drm_sched_priority.h
new file mode 100644
index ..85a7bb011e27
--- /dev/null
+++ b/include/drm/drm_sched_priority.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _DRM_SCHED_PRIORITY_H_
+#define _DRM_SCHED_PRIORITY_H_
+
+/* These are often used as an (initial) index
+ * to an array, and as such should start at 0.
+ */
+enum drm_sched_priority {
+   DRM_SCHED_PRIORITY_MIN,
+   DRM_SCHED_PRIORITY_NORMAL,
+   DRM_SCHED_PRIORITY_HIGH,
+   DRM_SCHED_PRIORITY_VERY_HIGH,
+   DRM_SCHED_PRIORITY_KERNEL,
+
+   DRM_SCHED_PRIORITY_COUNT,
+   DRM_SCHED_PRIORITY_UNSET = -2
+};
+
+#endif
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index a62071660602..9228ff0d515e 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
 
@@ -48,20 +49,6 @@ struct drm_gem_object;
 struct drm_gpu_scheduler;
 struct drm_sched_rq;
 
-/* These are often used as an (initial) index
- * to an array, and as such should start at 0.
- */
-enum drm_sched_priority {
-   DRM_SCHED_PRIORITY_MIN,
-   DRM_SCHED_PRIORITY_NORMAL,
-   DRM_SCHED_PRIORITY_HIGH,
-   DRM_SCHED_PRIORITY_VERY_HIGH,
-   DRM_SCHED_PRIORITY_KERNEL,
-
-   DRM_SCHED_PRIORITY_COUNT,
-   DRM_SCHED_PRIORITY_UNSET = -2
-};
-
 /* Used to chose between FIFO and RR jobs scheduling */
 extern int drm_sched_policy;
 
-- 
2.40.0



[RFC PATCH 3/4] uapi: Add RLIMIT_GPUPRIO

2023-04-03 Thread Joshua Ashton
Introduce a new RLIMIT that allows the user to set a runtime limit on
the GPU scheduler priority for tasks.

This avoids the need for leased compositors such as SteamVR's
vrcompositor to be launched via a setcap'ed executable with
CAP_SYS_NICE.

This is required for SteamVR as it doesn't run as a DRM master, but
rather on a DRM lease using the HMD's connector.

The current situation is bad for a few reasons, one being that in order
to setcap the executable, typically one must run as root which involves
a pretty high privelage escalation in order to achieve one
small feat, a realtime async compute queue queue for VR or a compositor.
The executable cannot be setcap'ed inside a
container nor can the setcap'ed executable be run in a container with
NO_NEW_PRIVS.

Even in cases where one may think the DRM master check to be useful,
such as Gamescope where it is the DRM master, the part of the compositor
that runs as the DRM master is entirely separate to the Vulkan device
with it's own DRM device fd doing the GPU work that demands the
realtime priority queue. Additionally, Gamescope can also run nested
in a traditional compositor where there is no DRM master, but having a
realtime queue is still advantageous.

With adding RLIMIT_GPUPRIO, a process outside of a container or
eg. rtkit could call `prlimit` on the process inside to allow it to make
a realtime queue and solve these problems.

Signed-off-by: Joshua Ashton 
---
 fs/proc/base.c  | 1 +
 include/asm-generic/resource.h  | 3 ++-
 include/uapi/asm-generic/resource.h | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5e0e0ccd47aa..a5c9a9f23f08 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -589,6 +589,7 @@ static const struct limit_names lnames[RLIM_NLIMITS] = {
[RLIMIT_NICE] = {"Max nice priority", NULL},
[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
+   [RLIMIT_GPUPRIO] = {"Max DRM GPU priority", NULL},
 };
 
 /* Display limits for a process */
diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
index 8874f681b056..cefee1a8d9db 100644
--- a/include/asm-generic/resource.h
+++ b/include/asm-generic/resource.h
@@ -3,7 +3,7 @@
 #define _ASM_GENERIC_RESOURCE_H
 
 #include 
-
+#include 
 
 /*
  * boot-time rlimit defaults for the init task:
@@ -26,6 +26,7 @@
[RLIMIT_NICE]   = { 0, 0 }, \
[RLIMIT_RTPRIO] = { 0, 0 }, \
[RLIMIT_RTTIME] = {  RLIM_INFINITY,  RLIM_INFINITY },   \
+   [RLIMIT_GPUPRIO]= { DRM_SCHED_PRIORITY_NORMAL, 
DRM_SCHED_PRIORITY_NORMAL }, \
 }
 
 #endif
diff --git a/include/uapi/asm-generic/resource.h 
b/include/uapi/asm-generic/resource.h
index f12db7a0da64..85027b07a420 100644
--- a/include/uapi/asm-generic/resource.h
+++ b/include/uapi/asm-generic/resource.h
@@ -46,7 +46,8 @@
   0-39 for nice level 19 .. -20 */
 #define RLIMIT_RTPRIO  14  /* maximum realtime priority */
 #define RLIMIT_RTTIME  15  /* timeout for RT tasks in us */
-#define RLIM_NLIMITS   16
+#define RLIMIT_GPUPRIO 16  /* maximum GPU priority */
+#define RLIM_NLIMITS   17
 
 /*
  * SuS says limits have to be unsigned.
-- 
2.40.0



[RFC PATCH 1/4] drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH

2023-04-03 Thread Joshua Ashton
This allows AMDGPU scheduler priority above normal to be expressed
using the DRM_SCHED_PRIORITY enum.

Signed-off-by: Joshua Ashton 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +-
 drivers/gpu/drm/msm/msm_gpu.h   | 2 +-
 include/drm/gpu_scheduler.h | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index d2139ac12159..8ec255091c4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -79,7 +79,7 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio)
return DRM_SCHED_PRIORITY_HIGH;
 
case AMDGPU_CTX_PRIORITY_VERY_HIGH:
-   return DRM_SCHED_PRIORITY_HIGH;
+   return DRM_SCHED_PRIORITY_VERY_HIGH;
 
/* This should not happen as we sanitized userspace provided priority
 * already, WARN if this happens.
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index fc1c0d8611a8..e3495712b236 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -336,7 +336,7 @@ struct msm_gpu_perfcntr {
  * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some
  * cases, so we don't use it (no need for kernel generated jobs).
  */
-#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - 
DRM_SCHED_PRIORITY_MIN)
+#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_VERY_HIGH - 
DRM_SCHED_PRIORITY_MIN)
 
 /**
  * struct msm_file_private - per-drm_file context
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 9935d1e2ff69..a62071660602 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -55,6 +55,7 @@ enum drm_sched_priority {
DRM_SCHED_PRIORITY_MIN,
DRM_SCHED_PRIORITY_NORMAL,
DRM_SCHED_PRIORITY_HIGH,
+   DRM_SCHED_PRIORITY_VERY_HIGH,
DRM_SCHED_PRIORITY_KERNEL,
 
DRM_SCHED_PRIORITY_COUNT,
-- 
2.40.0



[RFC PATCH 0/4] uapi, drm: Add and implement RLIMIT_GPUPRIO

2023-04-03 Thread Joshua Ashton
Hello all!

I would like to propose a new API for allowing processes to control
the priority of GPU queues similar to RLIMIT_NICE/RLIMIT_RTPRIO.

The main reason for this is for compositors such as Gamescope and
SteamVR vrcompositor to be able to create realtime async compute
queues on AMD without the need of CAP_SYS_NICE.

The current situation is bad for a few reasons, one being that in order
to setcap the executable, typically one must run as root which involves
a pretty high privelage escalation in order to achieve one
small feat, a realtime async compute queue queue for VR or a compositor.
The executable cannot be setcap'ed inside a
container nor can the setcap'ed executable be run in a container with
NO_NEW_PRIVS.

I go into more detail in the description in
`uapi: Add RLIMIT_GPUPRIO`.

My initial proposal here is to add a new RLIMIT, `RLIMIT_GPUPRIO`,
which seems to make most initial sense to me to solve the problem.

I am definitely not set that this is the best formulation however
or if this should be linked to DRM (in terms of it's scheduler
priority enum/definitions) in any way and and would really like other
people's opinions across the stack on this.

Once initial concern is that potentially this RLIMIT could out-live
the lifespan of DRM. It sounds crazy saying it right now, something
that definitely popped into my mind when touching `resource.h`. :-)

Anyway, please let me know what you think!
Definitely open to any feedback and advice you may have. :D

Thanks!
 - Joshie

Joshua Ashton (4):
  drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH
  drm/scheduler: Split out drm_sched_priority to own file
  uapi: Add RLIMIT_GPUPRIO
  drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions

 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 ++--
 drivers/gpu/drm/msm/msm_gpu.h   |  2 +-
 fs/proc/base.c  |  1 +
 include/asm-generic/resource.h  |  3 +-
 include/drm/drm_sched_priority.h| 41 +
 include/drm/gpu_scheduler.h | 14 +
 include/uapi/asm-generic/resource.h |  3 +-
 7 files changed, 58 insertions(+), 19 deletions(-)
 create mode 100644 include/drm/drm_sched_priority.h

-- 
2.40.0



RE: [PATCH 5/7] drm/i915: use pat_index instead of cache_level

2023-04-03 Thread Yang, Fei
>Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
>
>On Mon, Apr 03, 2023 at 04:57:21PM +, Yang, Fei wrote:
>>> Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of
>>> cache_level
>>>
>>> On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote:
 From: Fei Yang 

 Currently the KMD is using enum i915_cache_level to set caching
 policy for buffer objects. This is flaky because the PAT index
 which really controls the caching behavior in PTE has far more
 levels than what's defined in the enum.
>>>
>>> Then just add more enum values.
>>
>> That would be really messy because PAT index is platform dependent,
>> you would have to maintain many tables for the the translation.
>>
>>> 'pat_index' is absolutely meaningless to the reader, it's just an
>>> arbitrary number. Whereas 'cache_level' conveys how the thing is
>>> actually going to get used and thus how the caches should behave.
>>
>> By design UMD's understand PAT index. Both UMD and KMD should stand on
>> the same ground, the Bspec, to avoid any potential ambiguity.
>>
 In addition, the PAT index is platform dependent, having to
 translate between i915_cache_level and PAT index is not reliable,
>>>
>>> If it's not realiable then the code is clearly broken.
>>
>> Perhaps the word "reliable" is a bit confusing here. What I really
>> meant to say is 'difficult to maintain', or 'error-prone'.
>>
 and makes the code more complicated.
>>>
>>> You have to translate somewhere anyway. Looks like you're now adding
>>> translations the other way (pat_index->cache_level). How is that better?
>>
>> No, there is no pat_index->cache_level translation.
>
> i915_gem_object_has_cache_level() is exactly that. And that one does look
> actually fragile since it assumes only one PAT index maps to each cache
> level. So if the user picks any other pat_index anything using
> i915_gem_object_has_cache_level() is likely to do the wrong thing.

That is still one way transaltion, from cache_level to pat_index.
The cache_level is only a KMD concept now. And inside the KMD, we have one
table to translate between cache_level to pat_index. Only KMD would be able
to trigger a comparison on pat_index for a KMD allocated BO.
User is not allowed to set pat_index dynamically any more. By design the cahce
setting for user space BO's should be immutable. That's why even the set caching
ioctl has been killed (from MTL onward).

> If we do switch to pat_index then I think cache_level should be made a
> purely uapi concept,

UMD's directly use pat_index because they are supposed to follow the b-spec.
The abstracted cache_level is no longer exposed to user space.

-Fei

> and all the internal code should instead be made to
> query various aspects of the caching behaviour of the current pat_index
> (eg. is LLC caching enabled, and thus do I need to clflush?).
>
> --
> Ville Syrjälä
> Intel


Re: [PATCH] misc: sram: Add dma-heap-export reserved SRAM area type

2023-04-03 Thread Andrew Davis

On 4/1/23 3:35 AM, Christian Gmeiner wrote:

Hi Andrew




Okay, will split for v2.




Was there a follow-up v2 of this patchset? AFAICT this series did not
make it into the mainline kernel.
Do you have any plans to work on it? If not I would like to help out
as we have a use case where we want to
use a dma-buf sram exporter.




Sure, I've been keeping it alive in our evil vendor tree, but if
there is interest upstream now I'll post a v2 and CC you.

Thanks,
Andrew


[PATCH v2] misc: sram: Add DMA-BUF Heap exporting of SRAM areas

2023-04-03 Thread Andrew Davis
This new export type exposes to userspace the SRAM area as a DMA-BUF Heap,
this allows for allocations of DMA-BUFs that can be consumed by various
DMA-BUF supporting devices.

Signed-off-by: Andrew Davis 
---

Changes from v1:
 - Use existing DT flags, if both pool(device usable) and export(userspace
   usable) properties are in the SRAM node then export as a DMA-BUF Heap
 - Rebase on 6.3-rc5

 drivers/misc/Kconfig |   7 +
 drivers/misc/Makefile|   1 +
 drivers/misc/sram-dma-heap.c | 245 +++
 drivers/misc/sram.c  |   6 +
 drivers/misc/sram.h  |  16 +++
 5 files changed, 275 insertions(+)
 create mode 100644 drivers/misc/sram-dma-heap.c

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 433aa41977852..8b4c111a6493b 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -448,6 +448,13 @@ config SRAM
 config SRAM_EXEC
bool
 
+config SRAM_DMA_HEAP
+   bool "Export on-chip SRAM pools using DMA-Heaps"
+   depends on DMABUF_HEAPS && SRAM
+   help
+ This driver allows the export of on-chip SRAM marked as both pool
+ and exportable to userspace using the DMA-Heaps interface.
+
 config DW_XDATA_PCIE
depends on PCI
tristate "Synopsys DesignWare xData PCIe driver"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 56de43943cd51..bbdc64aa8af1a 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci/
 obj-$(CONFIG_LATTICE_ECP3_CONFIG)  += lattice-ecp3-config.o
 obj-$(CONFIG_SRAM) += sram.o
 obj-$(CONFIG_SRAM_EXEC)+= sram-exec.o
+obj-$(CONFIG_SRAM_DMA_HEAP)+= sram-dma-heap.o
 obj-$(CONFIG_GENWQE)   += genwqe/
 obj-$(CONFIG_ECHO) += echo/
 obj-$(CONFIG_CXL_BASE) += cxl/
diff --git a/drivers/misc/sram-dma-heap.c b/drivers/misc/sram-dma-heap.c
new file mode 100644
index 0..c511f4ac1280e
--- /dev/null
+++ b/drivers/misc/sram-dma-heap.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SRAM DMA-Heap userspace exporter
+ *
+ * Copyright (C) 2019-2022 Texas Instruments Incorporated - https://www.ti.com/
+ * Andrew Davis 
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "sram.h"
+
+struct sram_dma_heap {
+   struct dma_heap *heap;
+   struct gen_pool *pool;
+};
+
+struct sram_dma_heap_buffer {
+   struct gen_pool *pool;
+   struct list_head attachments;
+   struct mutex attachments_lock;
+   unsigned long len;
+   void *vaddr;
+   phys_addr_t paddr;
+};
+
+struct dma_heap_attachment {
+   struct device *dev;
+   struct sg_table *table;
+   struct list_head list;
+};
+
+static int dma_heap_attach(struct dma_buf *dmabuf,
+  struct dma_buf_attachment *attachment)
+{
+   struct sram_dma_heap_buffer *buffer = dmabuf->priv;
+   struct dma_heap_attachment *a;
+   struct sg_table *table;
+
+   a = kzalloc(sizeof(*a), GFP_KERNEL);
+   if (!a)
+   return -ENOMEM;
+
+   table = kmalloc(sizeof(*table), GFP_KERNEL);
+   if (!table) {
+   kfree(a);
+   return -ENOMEM;
+   }
+   if (sg_alloc_table(table, 1, GFP_KERNEL)) {
+   kfree(table);
+   kfree(a);
+   return -ENOMEM;
+   }
+   sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(buffer->paddr)), 
buffer->len, 0);
+
+   a->table = table;
+   a->dev = attachment->dev;
+   INIT_LIST_HEAD(>list);
+
+   attachment->priv = a;
+
+   mutex_lock(>attachments_lock);
+   list_add(>list, >attachments);
+   mutex_unlock(>attachments_lock);
+
+   return 0;
+}
+
+static void dma_heap_detatch(struct dma_buf *dmabuf,
+struct dma_buf_attachment *attachment)
+{
+   struct sram_dma_heap_buffer *buffer = dmabuf->priv;
+   struct dma_heap_attachment *a = attachment->priv;
+
+   mutex_lock(>attachments_lock);
+   list_del(>list);
+   mutex_unlock(>attachments_lock);
+
+   sg_free_table(a->table);
+   kfree(a->table);
+   kfree(a);
+}
+
+static struct sg_table *dma_heap_map_dma_buf(struct dma_buf_attachment 
*attachment,
+enum dma_data_direction direction)
+{
+   struct dma_heap_attachment *a = attachment->priv;
+   struct sg_table *table = a->table;
+
+   /*
+* As this heap is backed by uncached SRAM memory we do not need to
+* perform any sync operations on the buffer before allowing device
+* domain access. For this reason we use SKIP_CPU_SYNC and also do
+* not use or provide begin/end_cpu_access() dma-buf functions.
+*/
+   if (!dma_map_sg_attrs(attachment->dev, table->sgl, table->nents,
+ direction, DMA_ATTR_SKIP_CPU_SYNC))
+   

Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog

2023-04-03 Thread Abhinav Kumar




On 4/3/2023 11:48 AM, Dmitry Baryshkov wrote:

On 03/04/2023 21:06, Abhinav Kumar wrote:



On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote:

This huge series attempts to restructure the DPU HW catalog into a
manageable and reviewable data set. In order to ease review and testing
I merged all the necessary fixes into this series. Also I cherry-picked
& slightly fixed Konrad's patch adding size to the SSPP and INTF macros.



I had to first dig up some history about why dpu catalog grew so much 
in the first place before starting this review. When the DPU driver 
first landed (which pre-dates my work in upstream), it looks like it 
followed mdp5 model from mdp5_cfg.c. But looks like as the number of 
chipsets which use DPU kept growing, this is becoming a burden.


As everyone knows, downstream follows a devicetree model for the dpu 
hardware and that should have always been the case. Perhaps in the 
last 2-3 years more time could have been spent on standardizing the 
bindings used for hw blocks in order to maintain a less hard-coded 
catalog file and more in the device tree.


Unfortunately, this is not how the upstream DT works. If something is a 
constant hardware property, it should not go into the DT. So pushing 
catalog to dt would have been immediately frowned upon by Rob Herring or 
Krzysztof.




Yes certainly we cannot put hardware specific properties. But in 
general, modelling the hardware like the number of sspps, number of 
interfaces and number of dspps etc can be a bit abstracted? like 
blk-type and blk-offset? blk-type can be a custom string because each 
block is named differently for different vendors?


The number of blk_offsets decides number of blocks. Its not constant 
right. We are seeing it varying with chipsets.


Then the catalog would have just been a place to parse the device 
tree, set the feature capability based on chipset (refer 
_sde_hardware_pre_caps). That way offsets , number of blocks and the 
blocks themselves still come from the device tree but perhaps some 
specific features are at SOC level for which the catalog still stays.


That being said, I thought of different strategies even before the 
review but two issues prevented me from suggesting those ideas (one of 
which I am seeing even here , which I am going to suggest below and 
also suggest why it wont work).


1) For the same DPU major/minor version, some features might get 
dropped or even get added with different SOCs as overall the system 
capabilities might differ like number of SSPPs or memory footprint of 
the SOC etc.


So there is no good way right now to generalize any dpu catalog or to 
tie it with a DPU major/minor version. We will have to stick with a 
per-SOC model.


Up to now, the SoC was equal to major+minor. Could you please be more 
specific here, if there are any actual differences within major+minor 
families?




So lets say, the same DPU major/minor version is used but we have only 
one DSI on one chipset Vs two DSIs on the other, some of the features 
which come into play only for dual DSI cannot be used. Like broadcasting 
a DCS command across two DSIs etc. This is a very basic example, but 
there are many examples.




This is what led me to not pursue that route.

2) For the same DPU major/minor version, even if core-DPU is same (in 
terms of SSPP, DSPP etc), the number of interfaces can change. So 
again no room to generalize same DPU hw version.


Again, I might be just scratching the surface, but I have not observed 
this.




This typically happens based on what products that chipset is catered 
towards. Thats pretty much what I can share. But more number of 
interfaces for more number of displays / use-cases.




3) For the same reason as (1) and (2), I think the de-duplication 
strategy used in this series is not correct. The idea of 
dpu_hw_version_num_layer_mixer is just not scalable as I dont know how 
many variants that will lead to. So it seems like just an attempt to 
de-duplicate which perhaps works today for existing dpu chipsets in 
upstream but by no means scalable. Lets go ahead with per-SOC catalog 
file but lets live with some amount of duplication between them if we 
really have to split it across header files.


Indeed, this leads to minor differences on top of major+lm. However, I 
think, the overall complexity is lowered.


Nevertheless, let's land the major set of patches and leave 
generalization for the later time. I think, with the addition of the 
next several platforms we will see the drill.




Yes, I would say lets handle generalization/de-duplication later when we 
see more patterns.


Lets land the main pieces first.

Going with dpu version and number of lms is not the way to generalize it 
from what we think.


I also thought of similar strategies to generalize like based on 
sub-blocks similar to what you have done but all of these were NAKed 
internally by folks who work on more chipsets / have more visibility 
into the spread of features across 

[PATCH] drm/bridge: ti-sn65dsi83: Do not generate HFP/HBP/HSA and EOT packet

2023-04-03 Thread Marek Vasut
Do not generate the HS front and back porch gaps, the HSA gap and
EOT packet, as per "SN65DSI83 datasheet SLLSEC1I - SEPTEMBER 2012
- REVISED OCTOBER 2020", page 22, these packets are not required.
This makes the TI SN65DSI83 bridge work with Samsung DSIM on i.MX8MN.

Signed-off-by: Marek Vasut 
---
Cc: Andrzej Hajda 
Cc: Daniel Vetter 
Cc: David Airlie 
Cc: Jagan Teki 
Cc: Jernej Skrabec 
Cc: Jonas Karlman 
Cc: Laurent Pinchart 
Cc: Michael Walle 
Cc: Neil Armstrong 
Cc: Robert Foss 
Cc: dri-devel@lists.freedesktop.org
---
 drivers/gpu/drm/bridge/ti-sn65dsi83.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c 
b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
index 91ecfbe45bf90..b60ae1dc1191d 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
@@ -642,7 +642,9 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
 
dsi->lanes = dsi_lanes;
dsi->format = MIPI_DSI_FMT_RGB888;
-   dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST;
+   dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+ MIPI_DSI_MODE_VIDEO_NO_HFP | 
MIPI_DSI_MODE_VIDEO_NO_HBP |
+ MIPI_DSI_MODE_VIDEO_NO_HSA | 
MIPI_DSI_MODE_NO_EOT_PACKET;
 
ret = devm_mipi_dsi_attach(dev, dsi);
if (ret < 0) {
-- 
2.39.2



Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog

2023-04-03 Thread Dmitry Baryshkov

On 03/04/2023 21:06, Abhinav Kumar wrote:



On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote:

This huge series attempts to restructure the DPU HW catalog into a
manageable and reviewable data set. In order to ease review and testing
I merged all the necessary fixes into this series. Also I cherry-picked
& slightly fixed Konrad's patch adding size to the SSPP and INTF macros.



I had to first dig up some history about why dpu catalog grew so much in 
the first place before starting this review. When the DPU driver first 
landed (which pre-dates my work in upstream), it looks like it followed 
mdp5 model from mdp5_cfg.c. But looks like as the number of chipsets 
which use DPU kept growing, this is becoming a burden.


As everyone knows, downstream follows a devicetree model for the dpu 
hardware and that should have always been the case. Perhaps in the last 
2-3 years more time could have been spent on standardizing the bindings 
used for hw blocks in order to maintain a less hard-coded catalog file 
and more in the device tree.


Unfortunately, this is not how the upstream DT works. If something is a 
constant hardware property, it should not go into the DT. So pushing 
catalog to dt would have been immediately frowned upon by Rob Herring or 
Krzysztof.


Then the catalog would have just been a place to parse the device tree, 
set the feature capability based on chipset (refer 
_sde_hardware_pre_caps). That way offsets , number of blocks and the 
blocks themselves still come from the device tree but perhaps some 
specific features are at SOC level for which the catalog still stays.


That being said, I thought of different strategies even before the 
review but two issues prevented me from suggesting those ideas (one of 
which I am seeing even here , which I am going to suggest below and also 
suggest why it wont work).


1) For the same DPU major/minor version, some features might get dropped 
or even get added with different SOCs as overall the system capabilities 
might differ like number of SSPPs or memory footprint of the SOC etc.


So there is no good way right now to generalize any dpu catalog or to 
tie it with a DPU major/minor version. We will have to stick with a 
per-SOC model.


Up to now, the SoC was equal to major+minor. Could you please be more 
specific here, if there are any actual differences within major+minor 
families?




This is what led me to not pursue that route.

2) For the same DPU major/minor version, even if core-DPU is same (in 
terms of SSPP, DSPP etc), the number of interfaces can change. So again 
no room to generalize same DPU hw version.


Again, I might be just scratching the surface, but I have not observed this.



3) For the same reason as (1) and (2), I think the de-duplication 
strategy used in this series is not correct. The idea of 
dpu_hw_version_num_layer_mixer is just not scalable as I dont know how 
many variants that will lead to. So it seems like just an attempt to 
de-duplicate which perhaps works today for existing dpu chipsets in 
upstream but by no means scalable. Lets go ahead with per-SOC catalog 
file but lets live with some amount of duplication between them if we 
really have to split it across header files.


Indeed, this leads to minor differences on top of major+lm. However, I 
think, the overall complexity is lowered.


Nevertheless, let's land the major set of patches and leave 
generalization for the later time. I think, with the addition of the 
next several platforms we will see the drill.


I also thought of similar strategies to generalize like based on 
sub-blocks similar to what you have done but all of these were NAKed 
internally by folks who work on more chipsets / have more visibility 
into the spread of features across chipsets.



First 4 patches clean up the catalog a bit in order to make it more
suitable for refactoring.



These are okay. I will address your follow-up questions about patch (1) 
and lets land these.



Then the next batch of 13 + 5 patches split the hw catalog entries into
per-SoC files.



This part is also fine. But perhaps dont have dpu hw version in the 
file. So just dpu_hw_sm8250.h or dpu_hw_sm8350.h etc.


Having a version makes it easier to compare chipsets (and also to verify 
that feature masks are correct), so I'd like to retain it.





Next 9 patches rework catalog entries, mostly targeting deduplication of
data used by several platforms. At this moment only three pairs (out of
13 devices supported by DPU) are merged. However this part lays out the
ground to ease adding support for new platforms, some of which use the
same configuration as the existing platforms



This is the part I suggest we drop.


Last batch of 7 patches renames existing macros to ease using them while
adding support for new devices.



I have to check this part but perhaps after re-basing based on my 
earlier comment.


Ack, I'll see what I can drop and what is going to be there.

Up to now there were some natural shares, 

Re: [PATCH v3 11/11] Documentation: iio: Document high-speed DMABUF based API

2023-04-03 Thread Paul Cercueil
Hi Jonathan,

Le lundi 03 avril 2023 à 10:05 -0600, Jonathan Corbet a écrit :
> Paul Cercueil  writes:
> 
> One nit:
> 
> > Document the new DMABUF based API.
> > 
> > Signed-off-by: Paul Cercueil 
> > Cc: Jonathan Corbet 
> > Cc: linux-...@vger.kernel.org
> > 
> > ---
> > v2: - Explicitly state that the new interface is optional and is
> >   not implemented by all drivers.
> >     - The IOCTLs can now only be called on the buffer FD returned
> > by
> >   IIO_BUFFER_GET_FD_IOCTL.
> >     - Move the page up a bit in the index since it is core stuff
> > and not
> >   driver-specific.
> > v3: Update the documentation to reflect the new API.
> > ---
> >  Documentation/iio/dmabuf_api.rst | 59
> > 
> >  Documentation/iio/index.rst  |  2 ++
> >  2 files changed, 61 insertions(+)
> >  create mode 100644 Documentation/iio/dmabuf_api.rst
> > 
> > diff --git a/Documentation/iio/dmabuf_api.rst
> > b/Documentation/iio/dmabuf_api.rst
> > new file mode 100644
> > index ..4d70372c7ebd
> > --- /dev/null
> > +++ b/Documentation/iio/dmabuf_api.rst
> > @@ -0,0 +1,59 @@
> > +.. SPDX-License-Identifier: GPL-2.0
> > +
> > +===
> > +High-speed DMABUF interface for IIO
> > +===
> > +
> > +1. Overview
> > +===
> > +
> > +The Industrial I/O subsystem supports access to buffers through a
> > +file-based interface, with read() and write() access calls through
> > the
> > +IIO device's dev node.
> > +
> > +It additionally supports a DMABUF based interface, where the
> > userspace
> > +can attach DMABUF objects (externally created) to a IIO buffer,
> > and
> > +subsequently use them for data transfers.
> > +
> > +A userspace application can then use this interface to share
> > DMABUF
> > +objects between several interfaces, allowing it to transfer data
> > in a
> > +zero-copy fashion, for instance between IIO and the USB stack.
> > +
> > +The userspace application can also memory-map the DMABUF objects,
> > and
> > +access the sample data directly. The advantage of doing this vs.
> > the
> > +read() interface is that it avoids an extra copy of the data
> > between the
> > +kernel and userspace. This is particularly useful for high-speed
> > devices
> > +which produce several megabytes or even gigabytes of data per
> > second.
> > +It does however increase the userspace-kernelspace synchronization
> > +overhead, as the DMA_BUF_SYNC_START and DMA_BUF_SYNC_END IOCTLs
> > have to
> > +be used for data integrity.
> > +
> > +2. User API
> > +===
> > +
> > +As part of this interface, three new IOCTLs have been added. These
> > three
> > +IOCTLs have to be performed on the IIO buffer's file descriptor,
> > +obtained using the IIO_BUFFER_GET_FD_IOCTL() ioctl.
> > +
> > +``IIO_BUFFER_DMABUF_ATTACH_IOCTL(int)``
> > +
> > +
> > +Attach the DMABUF object, identified by its file descriptor, to
> > the IIO
> > +buffer. Returns zero on success, and a negative errno value on
> > error.
> 
> Rather than abusing subsections, this would be better done as a
> description list:
> 
>   IIO_BUFFER_DMABUF_ATTACH_IOCTL(int)
>   Attach the DMABUF object, identified by its file descriptor, to
>   the IIO buffer. Returns zero on success, and a negative errno
>   value on error.

Noted, thanks.

Cheers,
-Paul


[PATCH] radeon: avoid double free in ci_dpm_init()

2023-04-03 Thread Nikita Zhandarovich
There are several calls to ci_dpm_fini() in ci_dpm_init() when there
occur errors in functions like r600_parse_extended_power_table().
This is harmful as it can lead to double free situations: for
instance, r600_parse_extended_power_table() will call for
r600_free_extended_power_table() as will ci_dpm_fini(), both
of which will try to free resources.
Other drivers do not call *_dpm_fini functions from their
respective *_dpm_init calls - neither should cpm_dpm_init().

Fix this by removing extra calls to ci_dpm_fini().

Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.

Fixes: cc8dbbb4f62a ("drm/radeon: add dpm support for CI dGPUs (v2)")
Cc: sta...@vger.kernel.org
Co-developed-by: Natalia Petrova 
Signed-off-by: Nikita Zhandarovich 

---
 drivers/gpu/drm/radeon/ci_dpm.c | 20 +---
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 8ef25ab305ae..7b77d4c93f1d 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -5677,28 +5677,20 @@ int ci_dpm_init(struct radeon_device *rdev)
pi->pcie_lane_powersaving.min = 16;
 
ret = ci_get_vbios_boot_values(rdev, >vbios_boot_state);
-   if (ret) {
-   ci_dpm_fini(rdev);
+   if (ret)
return ret;
-   }
 
ret = r600_get_platform_caps(rdev);
-   if (ret) {
-   ci_dpm_fini(rdev);
+   if (ret)
return ret;
-   }
 
ret = r600_parse_extended_power_table(rdev);
-   if (ret) {
-   ci_dpm_fini(rdev);
+   if (ret)
return ret;
-   }
 
ret = ci_parse_power_table(rdev);
-   if (ret) {
-   ci_dpm_fini(rdev);
+   if (ret)
return ret;
-   }
 
pi->dll_default_on = false;
pi->sram_end = SMC_RAM_END;
@@ -5749,10 +5741,8 @@ int ci_dpm_init(struct radeon_device *rdev)
kcalloc(4,
sizeof(struct radeon_clock_voltage_dependency_entry),
GFP_KERNEL);
-   if (!rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
-   ci_dpm_fini(rdev);
+   if (!rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries)
return -ENOMEM;
-   }
rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4;
rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0;
rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0;


RE: [PATCH 3/3] drm/xe: Update GuC/HuC firmware autoselect logic

2023-04-03 Thread Srivatsa, Anusha



> -Original Message-
> From: De Marchi, Lucas 
> Sent: Wednesday, March 29, 2023 8:46 PM
> To: Srivatsa, Anusha 
> Cc: intel...@lists.freedesktop.org; Harrison, John C
> ; Ceraolo Spurio, Daniele
> ; dri-devel@lists.freedesktop.org; Daniel
> Vetter ; Dave Airlie 
> Subject: Re: [PATCH 3/3] drm/xe: Update GuC/HuC firmware autoselect logic
> 
> On Tue, Mar 28, 2023 at 04:31:13PM -0700, Anusha Srivatsa wrote:
> >
> >
> >> -Original Message-
> >> From: De Marchi, Lucas 
> >> Sent: Thursday, March 23, 2023 10:18 PM
> >> To: intel...@lists.freedesktop.org
> >> Cc: Srivatsa, Anusha ; Harrison, John C
> >> ; Ceraolo Spurio, Daniele
> >> ; dri-devel@lists.freedesktop.org;
> >> Daniel Vetter ; Dave Airlie
> >> ; De Marchi, Lucas 
> >> Subject: [PATCH 3/3] drm/xe: Update GuC/HuC firmware autoselect logic
> >>
> >> Update the logic to autoselect GuC/HuC for the platforms with the
> >> following
> >> improvements:
> >>
> >> - Document what is the firmware file that is expected to be
> >>   loaded and what is checked from blob headers
> >> - When the platform is under force-probe it's desired to enforce
> >>   the full-version requirement so the correct firmware is used
> >>   before widespread adoption and backward-compatibility
> >>
> >Extra line ^
> >
> >>   commitments
> >> - Directory from which we expect firmware blobs to be available in
> >>   upstream linux-firmware repository depends on the platform: for
> >>   the ones supported by i915 it uses the i915/ directory, but the ones
> >>   expected to be supported by xe, it's on the xe/ directory. This
> >>   means that for platforms in the intersection, the firmware is
> >>   loaded from a different directory, but that is not much important
> >>   in the firmware repo and it avoids firmware duplication.
> >>
> >> - Make the table with the firmware definitions clearly state the
> >>   versions being expected. Now with macros to select the version it's
> >>   possible to choose between full-version/major-version for GuC and
> >>   full-version/no-version for HuC. These are similar to the macros used
> >>   in i915, but implemented in a slightly different way to avoid
> >>   duplicating the macros for each firmware/type and functionality,
> >>   besides adding the support for different directories.
> >>
> >> - There is no check added regarding force-probe since xe should
> >>   reuse the same firmware files published for i915 for past
> >>   platforms. This can be improved later with additional
> >>   kunit checking against a hardcoded list of platforms that
> >Extra line here.
> >
> >>   falls in this category.
> >> - As mentioned in the TODO, the major version fallback was not
> >>   implemented before as currently each platform only supports one
> >>   major. That can be easily added later.
> >>
> >> - GuC version for MTL and PVC were updated to 70.6.4, using the exact
> >>   full version, while the
> >>
> >> After this the GuC firmware used by PVC changes to pvc_guc_70.5.2.bin
> >> since it's using a file not published yet.
> >>
> >> Signed-off-by: Lucas De Marchi 
> >> ---
> >>  drivers/gpu/drm/xe/xe_uc_fw.c   | 315 +---
> >>  drivers/gpu/drm/xe/xe_uc_fw.h   |   2 +-
> >>  drivers/gpu/drm/xe/xe_uc_fw_types.h |   7 +
> >>  3 files changed, 204 insertions(+), 120 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c
> >> b/drivers/gpu/drm/xe/xe_uc_fw.c index 174c42873ebb..653bc3584cc5
> >> 100644
> >> --- a/drivers/gpu/drm/xe/xe_uc_fw.c
> >> +++ b/drivers/gpu/drm/xe/xe_uc_fw.c
> >> @@ -17,6 +17,137 @@
> >>  #include "xe_mmio.h"
> >>  #include "xe_uc_fw.h"
> >>
> >> +/*
> >> + * List of required GuC and HuC binaries per-platform. They must be
> >> +ordered
> >> + * based on platform, from newer to older.
> >> + *
> >> + * Versioning follows the guidelines from
> >> + * Documentation/driver-api/firmware/firmware-usage-guidelines.rst.
> >> +There is a
> >> + * distinction for platforms being officially supported by the driver or 
> >> not.
> >> + * Platforms not available publicly or not yet officially supported
> >> +by the
> >> + * driver (under force-probe), use the mmp_ver(): the firmware
> >> +autoselect logic
> >> + * will select the firmware from disk with filename that matches the
> >> +full
> >> + * "mpp version", i.e. major.minor.patch. mmp_ver() should only be
> >> +used for
> >> + * this case.
> >> + *
> >> + * For platforms officially supported by the driver, the filename
> >> +always only
> >> + * ever contains the major version (GuC) or no version at all (HuC).
> >> + *
> >> + * After loading the file, the driver parses the versions embedded in the 
> >> blob.
> >> + * The major version needs to match a major version supported by the
> >> +driver (if
> >> + * any). The minor version is also checked and a notice emitted to
> >> +the log if
> >> + * the version found is smaller than the version wanted. This is
> >> +done only for
> >> + * informational purposes so users may have a chance to 

Re: [PATCH v2 1/5] drm/tests: Test drm_rect_intersect()

2023-04-03 Thread Arthur Grillo Queiroz Cabral



On 03/04/23 12:33, Maíra Canal wrote:
> Hi Arthur,
> 
> On 3/27/23 10:38, Arthur Grillo wrote:
>> Insert test for the drm_rect_intersect() function, it also create a
>> helper for comparing drm_rects more easily.
>>
>> Signed-off-by: Arthur Grillo 
>> ---
>>   drivers/gpu/drm/tests/drm_rect_test.c | 139 ++
>>   1 file changed, 139 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/tests/drm_rect_test.c 
>> b/drivers/gpu/drm/tests/drm_rect_test.c
>> index e9809ea32696..3654c0be3d6b 100644
>> --- a/drivers/gpu/drm/tests/drm_rect_test.c
>> +++ b/drivers/gpu/drm/tests/drm_rect_test.c
>> @@ -9,6 +9,17 @@
>> #include 
>>   +#include 
> 
> Is this include really needed? I was able to compile without it.
> 
>> +
>> +static void drm_rect_compare(struct kunit *test, const struct drm_rect *r,
>> + const struct drm_rect *expected)
>> +{
>> +KUNIT_EXPECT_EQ(test, r->x1, expected->x1);
> 
> Maybe it would be nice to have a message here that shows the current x1
> and the expected x1. Same for the other dimensions.
> 

Doesn't KUnit already output this information when the values don't
match?

>> +KUNIT_EXPECT_EQ(test, r->y1, expected->y1);
>> +KUNIT_EXPECT_EQ(test, drm_rect_width(r), drm_rect_width(expected));
>> +KUNIT_EXPECT_EQ(test, drm_rect_height(r), drm_rect_height(expected));
>> +}
>> +
>>   static void drm_test_rect_clip_scaled_div_by_zero(struct kunit *test)
>>   {
>>   struct drm_rect src, dst, clip;
>> @@ -196,11 +207,139 @@ static void 
>> drm_test_rect_clip_scaled_signed_vs_unsigned(struct kunit *test)
>>   KUNIT_EXPECT_FALSE_MSG(test, drm_rect_visible(), "Source should 
>> not be visible\n");
>>   }
>>   +struct drm_rect_intersect_case {
>> +const char *description;
>> +struct drm_rect r1, r2;
>> +bool should_be_visible;
>> +struct drm_rect expected_intersection;
>> +};
>> +
>> +static const struct drm_rect_intersect_case drm_rect_intersect_cases[] = {
>> +{
>> +.description = "top-left X bottom-right",
>> +.r1 = DRM_RECT_INIT(1, 1, 2, 2),
>> +.r2 = DRM_RECT_INIT(0, 0, 2, 2),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(1, 1, 1, 1),
>> +},
>> +{
>> +.description = "top-right X bottom-left",
>> +.r1 = DRM_RECT_INIT(0, 0, 2, 2),
>> +.r2 = DRM_RECT_INIT(1, -1, 2, 2),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1),
>> +},
>> +{
>> +.description = "bottom-left X top-right",
>> +.r1 = DRM_RECT_INIT(1, -1, 2, 2),
>> +.r2 = DRM_RECT_INIT(0, 0, 2, 2),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1),
>> +},
>> +{
>> +.description = "bottom-right X top-left",
>> +.r1 = DRM_RECT_INIT(0, 0, 2, 2),
>> +.r2 = DRM_RECT_INIT(1, 1, 2, 2),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(1, 1, 1, 1),
>> +},
>> +{
>> +.description = "right X left",
>> +.r1 = DRM_RECT_INIT(0, 0, 2, 1),
>> +.r2 = DRM_RECT_INIT(1, 0, 3, 1),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1),
>> +},
>> +{
>> +.description = "left X right",
>> +.r1 = DRM_RECT_INIT(1, 0, 3, 1),
>> +.r2 = DRM_RECT_INIT(0, 0, 2, 1),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1),
>> +},
>> +{
>> +.description = "up X bottom",
>> +.r1 = DRM_RECT_INIT(0, 0, 1, 2),
>> +.r2 = DRM_RECT_INIT(0, -1, 1, 3),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(0, 0, 1, 2),
>> +},
>> +{
>> +.description = "bottom X up",
>> +.r1 = DRM_RECT_INIT(0, -1, 1, 3),
>> +.r2 = DRM_RECT_INIT(0, 0, 1, 2),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(0, 0, 1, 2),
>> +},
>> +{
>> +.description = "touching corner",
>> +.r1 = DRM_RECT_INIT(0, 0, 1, 1),
>> +.r2 = DRM_RECT_INIT(1, 1, 2, 2),
>> +.should_be_visible = false,
>> +.expected_intersection = DRM_RECT_INIT(1, 1, 0, 0),
>> +},
>> +{
>> +.description = "touching side",
>> +.r1 = DRM_RECT_INIT(0, 0, 1, 1),
>> +.r2 = DRM_RECT_INIT(1, 0, 1, 1),
>> +.should_be_visible = false,
>> +.expected_intersection = DRM_RECT_INIT(1, 0, 0, 1),
>> +},
>> +{
>> +.description = "equal rects",
>> +.r1 = DRM_RECT_INIT(0, 0, 2, 2),
>> +.r2 = DRM_RECT_INIT(0, 0, 2, 2),
>> +.should_be_visible = true,
>> +.expected_intersection = DRM_RECT_INIT(0, 0, 2, 2),
>> +},
>> +{
>> +.description = "inside another",
>> +.r1 = DRM_RECT_INIT(0, 0, 2, 2),
>> +.r2 = DRM_RECT_INIT(1, 1, 1, 1),
>> +  

Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog

2023-04-03 Thread Abhinav Kumar




On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote:

This huge series attempts to restructure the DPU HW catalog into a
manageable and reviewable data set. In order to ease review and testing
I merged all the necessary fixes into this series. Also I cherry-picked
& slightly fixed Konrad's patch adding size to the SSPP and INTF macros.



I had to first dig up some history about why dpu catalog grew so much in 
the first place before starting this review. When the DPU driver first 
landed (which pre-dates my work in upstream), it looks like it followed 
mdp5 model from mdp5_cfg.c. But looks like as the number of chipsets 
which use DPU kept growing, this is becoming a burden.


As everyone knows, downstream follows a devicetree model for the dpu 
hardware and that should have always been the case. Perhaps in the last 
2-3 years more time could have been spent on standardizing the bindings 
used for hw blocks in order to maintain a less hard-coded catalog file 
and more in the device tree.


Then the catalog would have just been a place to parse the device tree, 
set the feature capability based on chipset (refer 
_sde_hardware_pre_caps). That way offsets , number of blocks and the 
blocks themselves still come from the device tree but perhaps some 
specific features are at SOC level for which the catalog still stays.


That being said, I thought of different strategies even before the 
review but two issues prevented me from suggesting those ideas (one of 
which I am seeing even here , which I am going to suggest below and also 
suggest why it wont work).


1) For the same DPU major/minor version, some features might get dropped 
or even get added with different SOCs as overall the system capabilities 
might differ like number of SSPPs or memory footprint of the SOC etc.


So there is no good way right now to generalize any dpu catalog or to 
tie it with a DPU major/minor version. We will have to stick with a 
per-SOC model.


This is what led me to not pursue that route.

2) For the same DPU major/minor version, even if core-DPU is same (in 
terms of SSPP, DSPP etc), the number of interfaces can change. So again 
no room to generalize same DPU hw version.


3) For the same reason as (1) and (2), I think the de-duplication 
strategy used in this series is not correct. The idea of 
dpu_hw_version_num_layer_mixer is just not scalable as I dont know how 
many variants that will lead to. So it seems like just an attempt to 
de-duplicate which perhaps works today for existing dpu chipsets in 
upstream but by no means scalable. Lets go ahead with per-SOC catalog 
file but lets live with some amount of duplication between them if we 
really have to split it across header files.


I also thought of similar strategies to generalize like based on 
sub-blocks similar to what you have done but all of these were NAKed 
internally by folks who work on more chipsets / have more visibility 
into the spread of features across chipsets.



First 4 patches clean up the catalog a bit in order to make it more
suitable for refactoring.



These are okay. I will address your follow-up questions about patch (1) 
and lets land these.



Then the next batch of 13 + 5 patches split the hw catalog entries into
per-SoC files.



This part is also fine. But perhaps dont have dpu hw version in the 
file. So just dpu_hw_sm8250.h or dpu_hw_sm8350.h etc.



Next 9 patches rework catalog entries, mostly targeting deduplication of
data used by several platforms. At this moment only three pairs (out of
13 devices supported by DPU) are merged. However this part lays out the
ground to ease adding support for new platforms, some of which use the
same configuration as the existing platforms



This is the part I suggest we drop.


Last batch of 7 patches renames existing macros to ease using them while
adding support for new devices.



I have to check this part but perhaps after re-basing based on my 
earlier comment.



This pile of patches is submitted in a single batch to allow one to
observe the final goal of the cleanup which otherwise might be hard to
assess.


Changes since v2:
- Fixed sc8280xp SSPP size to 0x2ac
- Rebased on top of msm-next-lumag, dropped merged patches

Changes since v1:
- Picked up Konrad's patch
- Picked up dependencies into the main series
- Moved qseed3lite vs qseed4 patches into the fixes part
- Fixed sm6115 in a similar manner.

Dmitry Baryshkov (37):
   drm/msm/dpu: constify DSC data structures
   drm/msm/dpu: mark remaining pp data as const
   drm/msm/dpu: move UBWC/memory configuration to separate struct
   drm/msm/dpu: split SM8550 catalog entry to the separate file
   drm/msm/dpu: split SM8450 catalog entry to the separate file
   drm/msm/dpu: split SC8280XP catalog entry to the separate file
   drm/msm/dpu: split SC7280 catalog entry to the separate file
   drm/msm/dpu: split SM8350 catalog entry to the separate file
   drm/msm/dpu: split SM6115 catalog entry to the separate file
   drm/msm/dpu: split QCM2290 

Re: [PATCH v2] drm/vblank: Fix for drivers that do not drm_vblank_init()

2023-04-03 Thread Rob Clark
On Mon, Apr 3, 2023 at 9:25 AM Nathan Chancellor  wrote:
>
> On Mon, Apr 03, 2023 at 09:03:14AM -0700, Rob Clark wrote:
> > From: Rob Clark 
> >
> > This should fix a crash that was reported on ast (and possibly other
> > drivers which do not initialize vblank).
> >
> >fbcon: Taking over console
> >Unable to handle kernel NULL pointer dereference at virtual address 
> > 0074
> >Mem abort info:
> >  ESR = 0x9604
> >  EC = 0x25: DABT (current EL), IL = 32 bits
> >  SET = 0, FnV = 0
> >  EA = 0, S1PTW = 0
> >  FSC = 0x04: level 0 translation fault
> >Data abort info:
> >  ISV = 0, ISS = 0x0004
> >  CM = 0, WnR = 0
> >user pgtable: 4k pages, 48-bit VAs, pgdp=080009d16000
> >[0074] pgd=, p4d=
> >Internal error: Oops: 9604 [#1] SMP
> >Modules linked in: ip6table_nat tun nft_fib_inet nft_fib_ipv4 
> > nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 
> > nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 
> > nf_defrag_ipv4 rfkill ip_set nf_tables nfnetlink qrtr sunrpc binfmt_misc 
> > vfat fat xfs snd_usb_audio snd_hwdep snd_usbmidi_lib snd_seq snd_pcm 
> > snd_rawmidi snd_timer snd_seq_device snd soundcore joydev mc ipmi_ssif 
> > ipmi_devintf ipmi_msghandler arm_spe_pmu arm_cmn arm_dsu_pmu arm_dmc620_pmu 
> > cppc_cpufreq loop zram crct10dif_ce polyval_ce nvme polyval_generic 
> > ghash_ce sbsa_gwdt igb nvme_core ast nvme_common i2c_algo_bit xgene_hwmon 
> > gpio_dwapb scsi_dh_rdac scsi_dh_emc scsi_dh_alua ip6_tables ip_tables 
> > dm_multipath fuse
> >CPU: 12 PID: 469 Comm: kworker/12:1 Not tainted 
> > 6.3.0-rc2-8-gd39e48ca80c0 #1
> >Hardware name: ADLINK AVA Developer Platform/AVA Developer Platform, 
> > BIOS TianoCore 2.04.100.07 (SYS: 2.06.20220308) 09/08/2022
> >Workqueue: events fbcon_register_existing_fbs
> >pstate: 2049 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> >pc : drm_crtc_next_vblank_start+0x2c/0x98
> >lr : drm_atomic_helper_wait_for_fences+0x90/0x240
> >sp : 8d583960
> >x29: 8d583960 x28: 07ff8fc187b0 x27: 
> >x26: 07ff99c08c00 x25: 0038 x24: 07ff99c0c000
> >x23: 0001 x22: 0038 x21: 
> >x20: 07ff9640a280 x19:  x18: 
> >x17:  x16: b24d2eece1c0 x15: 003038303178
> >x14: 303239310048 x13:  x12: 
> >x11:  x10:  x9 : b24d2eeeaca0
> >x8 : 8d583628 x7 : 080077783000 x6 : 
> >x5 : 8d584000 x4 : 07ff99c0c000 x3 : 0130
> >x2 :  x1 : 8d5839c0 x0 : 07ff99c0cc08
> >Call trace:
> > drm_crtc_next_vblank_start+0x2c/0x98
> > drm_atomic_helper_wait_for_fences+0x90/0x240
> > drm_atomic_helper_commit+0xb0/0x188
> > drm_atomic_commit+0xb0/0xf0
> > drm_client_modeset_commit_atomic+0x218/0x280
> > drm_client_modeset_commit_locked+0x64/0x1a0
> > drm_client_modeset_commit+0x38/0x68
> > __drm_fb_helper_restore_fbdev_mode_unlocked+0xb0/0xf8
> > drm_fb_helper_set_par+0x44/0x88
> > fbcon_init+0x1e0/0x4a8
> > visual_init+0xbc/0x118
> > do_bind_con_driver.isra.0+0x194/0x3a0
> > do_take_over_console+0x50/0x70
> > do_fbcon_takeover+0x74/0xf8
> > do_fb_registered+0x13c/0x158
> > fbcon_register_existing_fbs+0x78/0xc0
> > process_one_work+0x1ec/0x478
> > worker_thread+0x74/0x418
> > kthread+0xec/0x100
> > ret_from_fork+0x10/0x20
> >Code: f944 b9409013 f940a082 9ba30a73 (b9407662)
> >---[ end trace  ]---
> >
> > v2: Use drm_dev_has_vblank()
> >
> > Reported-by: Nathan Chancellor 
> > Fixes: d39e48ca80c0 ("drm/atomic-helper: Set fence deadline for vblank")
> > Signed-off-by: Rob Clark 
> > Reviewed-by: Thomas Zimmermann 
>
> Still appears to work for me:
>
> Tested-by: Nathan Chancellor 

Thanks for confirming

BR,
-R

>
> > ---
> >  drivers/gpu/drm/drm_vblank.c | 10 --
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
> > index 299fa2a19a90..877e2067534f 100644
> > --- a/drivers/gpu/drm/drm_vblank.c
> > +++ b/drivers/gpu/drm/drm_vblank.c
> > @@ -996,10 +996,16 @@ EXPORT_SYMBOL(drm_crtc_vblank_count_and_time);
> >  int drm_crtc_next_vblank_start(struct drm_crtc *crtc, ktime_t *vblanktime)
> >  {
> >   unsigned int pipe = drm_crtc_index(crtc);
> > - struct drm_vblank_crtc *vblank = >dev->vblank[pipe];
> > - struct drm_display_mode *mode = >hwmode;
> > + struct drm_vblank_crtc *vblank;
> > + struct drm_display_mode *mode;
> >   u64 vblank_start;
> >
> > + if (!drm_dev_has_vblank(crtc->dev))
> > + return -EINVAL;
> > +
> > + 

Re: [PATCH v3] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit

2023-04-03 Thread Rodrigo Vivi
On Fri, Mar 31, 2023 at 07:41:46PM -0700, Ashutosh Dixit wrote:
> On ATSM the PL1 limit is disabled at power up. The previous uapi assumed
> that the PL1 limit is always enabled and therefore did not have a notion of
> a disabled PL1 limit. This results in erroneous PL1 limit values when the
> PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit
> was previously shown as 0 which means a low PL1 limit whereas the limit
> being disabled actually implies a high effective PL1 limit value.
> 
> To get round this problem, the PL1 limit uapi is expanded to include a
> special value 0 to designate a disabled PL1 limit. A read value of 0 means
> that the PL1 power limit is disabled, writing 0 disables the limit.
> 
> The link between this patch and the bugs mentioned below is as follows:
> * Because on ATSM the PL1 power limit is disabled on power up and there
>   were no means to enable it, we previously implemented the means to
>   enable the limit when the PL1 hwmon entry (power1_max) was written to.
> * Now there is a IGT igt@i915_hwmon@hwmon_write which (a) reads orig value
>   from all hwmon sysfs  (b) does a bunch of random writes and finally (c)
>   restores the orig value read. On ATSM since the orig value is 0, when
>   the IGT restores the 0 value, the PL1 limit is now enabled with a value
>   of 0.
> * PL1 limit of 0 implies a low PL1 limit which causes GPU freq to fall to
>   100 MHz. This causes GuC FW load and several IGT's to start timing out
>   and gives rise to these Intel CI bugs. After this patch, writing 0 would
>   disable the PL1 limit instead of enabling it, avoiding the freq drop
>   issue.
> 
> v2: Add explanation for bugs mentioned below (Rodrigo)
> v3: Eliminate race during PL1 disable and verify (Tvrtko)
> Change return to -ENODEV if verify fails (Tvrtko)
> 
> Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062
> Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8060
> Signed-off-by: Ashutosh Dixit 
> Reviewed-by: Rodrigo Vivi 

pushed to drm-intel-next

> ---
>  .../ABI/testing/sysfs-driver-intel-i915-hwmon |  4 ++-
>  drivers/gpu/drm/i915/i915_hwmon.c | 26 +++
>  2 files changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
> b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> index 2d6a472eef885..8d7d8f05f6cd0 100644
> --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> @@ -14,7 +14,9 @@ Description:RW. Card reactive sustained  (PL1/Tau) 
> power limit in microwatts.
>  
>   The power controller will throttle the operating frequency
>   if the power averaged over a window (typically seconds)
> - exceeds this limit.
> + exceeds this limit. A read value of 0 means that the PL1
> + power limit is disabled, writing 0 disables the
> + limit. Writing values > 0 will enable the power limit.
>  
>   Only supported for particular Intel i915 graphics platforms.
>  
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c 
> b/drivers/gpu/drm/i915/i915_hwmon.c
> index 596dd2c070106..8e7dccc8d3a0e 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 
> attr, int chan)
>   }
>  }
>  
> +#define PL1_DISABLE 0
> +
>  /*
>   * HW allows arbitrary PL1 limits to be set but silently clamps these values 
> to
>   * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow 
> the
> @@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val)
>   intel_wakeref_t wakeref;
>   u64 r, min, max;
>  
> + /* Check if PL1 limit is disabled */
> + with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> + r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
> + if (!(r & PKG_PWR_LIM_1_EN)) {
> + *val = PL1_DISABLE;
> + return 0;
> + }
> +
>   *val = hwm_field_read_and_scale(ddat,
>   hwmon->rg.pkg_rapl_limit,
>   PKG_PWR_LIM_1,
> @@ -385,8 +395,24 @@ static int
>  hwm_power_max_write(struct hwm_drvdata *ddat, long val)
>  {
>   struct i915_hwmon *hwmon = ddat->hwmon;
> + intel_wakeref_t wakeref;
>   u32 nval;
>  
> + /* Disable PL1 limit and verify, because the limit cannot be disabled 
> on all platforms */
> + if (val == PL1_DISABLE) {
> + mutex_lock(>hwmon_lock);
> + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) {
> + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit,
> +  PKG_PWR_LIM_1_EN, 0);
> + nval = intel_uncore_read(ddat->uncore, 
> hwmon->rg.pkg_rapl_limit);
> + }
> + 

Re: [PATCH v5 0/8] QAIC accel driver

2023-04-03 Thread Jeffrey Hugo

On 3/27/2023 9:54 AM, Jeffrey Hugo wrote:

This series introduces a driver under the accel subsystem (QAIC -
Qualcomm AIC) for the Qualcomm Cloud AI 100 product (AIC100).  AIC100 is
a PCIe adapter card that hosts a dedicated machine learning inference
accelerator.

The previous version (v4) can be found at:
https://lore.kernel.org/all/1679325074-5494-1-git-send-email-quic_jh...@quicinc.com/


Looks like things have been silent on this revision and we have a number 
of review tags already.  Seems like this series is ready for merge.


I'd like to see this queued for 6.4 if possible.  Given that we are at 
6.3-rc5, it seems like this would need to be merged now(ish) to make 6.4.


Jacek, since you have commit permissions in drm-misc and are an active 
Accel maintainer, I wonder if it would be appropriate for you to merge 
this series to drm-misc.  Thoughts?


-Jeff


Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level

2023-04-03 Thread Ville Syrjälä
On Mon, Apr 03, 2023 at 04:57:21PM +, Yang, Fei wrote:
> > Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
> >
> > On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote:
> >> From: Fei Yang 
> >> 
> >> Currently the KMD is using enum i915_cache_level to set caching policy for
> >> buffer objects. This is flaky because the PAT index which really controls
> >> the caching behavior in PTE has far more levels than what's defined in the
> >> enum.
> >
> > Then just add more enum values.
> 
> That would be really messy because PAT index is platform dependent, you would
> have to maintain many tables for the the translation.
> 
> > 'pat_index' is absolutely meaningless to the reader, it's just an
> > arbitrary number. Whereas 'cache_level' conveys how the thing is
> > actually going to get used and thus how the caches should behave.
> 
> By design UMD's understand PAT index. Both UMD and KMD should stand on the
> same ground, the Bspec, to avoid any potential ambiguity.
> 
> >> In addition, the PAT index is platform dependent, having to translate
> >> between i915_cache_level and PAT index is not reliable,
> >
> >If it's not realiable then the code is clearly broken.
> 
> Perhaps the word "reliable" is a bit confusing here. What I really meant to
> say is 'difficult to maintain', or 'error-prone'.
> 
> >> and makes the code more complicated.
> >
> > You have to translate somewhere anyway. Looks like you're now adding
> > translations the other way (pat_index->cache_level). How is that better?
> 
> No, there is no pat_index->cache_level translation.

i915_gem_object_has_cache_level() is exactly that. And that one
does look actually fragile since it assumes only one PAT index
maps to each cache level. So if the user picks any other pat_index
anything using i915_gem_object_has_cache_level() is likely to
do the wrong thing.

If we do switch to pat_index then I think cache_level should
be made a purely uapi concept, and all the internal code should
instead be made to query various aspects of the caching behaviour
of the current pat_index (eg. is LLC caching enabled, and thus
do I need to clflush?).

-- 
Ville Syrjälä
Intel


[PATCH v10 2/2] drm: add kms driver for loongson display controller

2023-04-03 Thread Sui Jingfeng
Loongson display controller IP has been integrated in both Loongson north
bridge chipsets(ls7a1000/ls7a2000) and SoCs(ls2k1000/ls2k2000), it has been
included in Loongson self-made BMC products.

This display controller is a PCI device in all of chips mentiond, it has
two display pipes which support primary planes and cursor plane. For the
DC in ls7a1000 and ls2k1000, each display pipe has a DVO output interface
which provide RGB888 signals, vertical & horizontal synchronisations and
the pixel clock. Each CRTC is able to support 1920x1080@60Hz, the maximum
resolution is 2048x2048 according to the hardware spec.

For the DC in LS7A2000, each display pipe is equipped with a built-in HDMI
encoder which is compliant with the HDMI 1.4 specification, thus it support
3840x2160@30Hz. The first display pipe is also equipped with a transparent
vga encoder which is parallel with the HDMI encoder. The DC in LS7A2000 is
more complete compare with the one in old chips, besides above feature, it
has two hardware cursors, two hardware vblank counter and two scanout
position recorders unit. It also support tiled framebuffer format which
can be used to scan out the framebuffer rendered by the LoongGPU directly.

 v1 -> v2:
  1) Use hpd status reg when polling for ls7a2000
  2) Fix all warnings emerged when compile with W=1

 v2 -> v3:
  1) Add COMPILE_TEST in Kconfig and make the driver off by default
  2) Alphabetical sorting headers (Thomas)
  3) Untangle register access functions as much as possible (Thomas)
  4) Switch to TTM based memory manager and prefer cached mapping
 for Loongson SoC (Thomas)
  5) Add chip id detection method, now all models are distinguishable.
  6) Revise builtin HDMI phy driver, nearly all main stream mode
 below 4K@30Hz is tested, this driver supported these mode very
 well including clone display mode and extend display mode.

 v3 -> v4:
  1) Quickly fix a small mistake.

 v4 -> v5:
  1) Drop potential support for Loongson 2K series SoC temporary,
 this part should be resend with the DT binding patch in the future.
  2) Add per display pipe debugfs support to the builtin HDMI encoder.
  3) Rewrite atomic_update() for hardware cursors plane(Thomas)
  4) Rewrite encoder and connector initialization part, untangle it
 according to the chip(Thomas).

 v5 -> v6:
  1) Remove stray code which didn't get used, say lsdc_of_get_reserved_ram
  2) Fix all typos I could found, make sentences and code more readable
  3) Untangle lsdc_hdmi*_connector_detect() function according to the pipe
  4) After a serious consideration, we rename this driver as loongson.
 Because we also have drivers toward the LoongGPU IP in LS7A2000 and
 LS2K2000. Besides, there are also drivers about the external encoder,
 HDMI audio driver and vbios support etc. This patch only provide DC
 driver part, my teammate Li Yi believe that loongson will be more
 suitable for loongson graphics than lsdc in the long run.

 loongson.ko = LSDC + LoongGPU + encoders driver + vbios/DT ...

  v6 -> v7:
   1) Add prime support, self-sharing is works. sharing buffer with etnaviv
  is also tested, and its works with limitation.
   2) Implement buffer objects tracking with list_head.
   3) S3(sleep to RAM) is tested on ls3a5000+ls7a2000 evb and it works.
   4) Rewrite lsdc_bo_move, since ttm core stop allocating resources
  during BO creation. Patch V1 ~ V6 of this series no longer works
  on latest kernel. Thus, we send V7 to revival them.

  v7 -> v8:
   1) Zero a compile warnnings on 32-bit platform, compile with W=1
   2) Revise lsdc_bo_gpu_offset() and minor cleanup
   3) Pageflip tested on the virtual terminal with following commands

  modetest -M loongson -s 32:1920x1080 -v
  modetest -M loongson -s 34:1920x1080 -v -F tiles

 It works like a charm, when running pageflip test with dual screnn
 configuration, another two additional bo created by the modetest
 emerged, VRAM usage up to 40+MB, well we have at least 64MB, still
 enough.

 # cat bos

 bo[]: size: 8112kB VRAM
 bo[0001]: size:   16kB VRAM
 bo[0002]: size:   16kB VRAM
 bo[0003]: size:16208kB VRAM
 bo[0004]: size: 8112kB VRAM
 bo[0005]: size: 8112kB VRAM

  v8 -> v9:
   1) Select I2C and I2C_ALGOBIT in Kconfig and should depend on MMU.
   2) Using pci_get_domain_bus_and_slot to get the GPU device.
   3) Other minor improvements.

   Those patches are tested on ls3a5000 + ls7a1000 CRB, ls3a5000 + ls7a2000
   evb, and lemote a1901 board(ls3a4000 + ls7a1000). On loongson mips CPU,
   the write combine support should be enabled, to get a decent performance
   for writing framebuffer data to the VRAM.

  v9 -> v10:
  1) Revise lsdc_drm_freeze() to implement S3 completely and correctly.
 I suddenly realized that pinned buffer can not move and VRAM lost
 power when sleep to RAM. Thus, the data in the buffer who is pinned
 

[PATCH v10 1/2] MAINTAINERS: add maintainers for DRM LOONGSON driver

2023-04-03 Thread Sui Jingfeng
 This patch add myself as maintainer to drm loongson driver

Signed-off-by: Sui Jingfeng 
---
 MAINTAINERS | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9736e04d3bd3..d258c5b54407 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6919,6 +6919,13 @@ T:   git git://anongit.freedesktop.org/drm/drm-misc
 F: drivers/gpu/drm/lima/
 F: include/uapi/drm/lima_drm.h
 
+DRM DRIVERS FOR LOONGSON
+M: Sui Jingfeng 
+L: dri-devel@lists.freedesktop.org
+S: Supported
+T: git git://anongit.freedesktop.org/drm/drm-misc
+F: drivers/gpu/drm/loongson/
+
 DRM DRIVERS FOR MEDIATEK
 M: Chun-Kuang Hu 
 M: Philipp Zabel 
-- 
2.25.1



RE: [PATCH 5/7] drm/i915: use pat_index instead of cache_level

2023-04-03 Thread Yang, Fei
> Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
>
> On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote:
>> From: Fei Yang 
>> 
>> Currently the KMD is using enum i915_cache_level to set caching policy for
>> buffer objects. This is flaky because the PAT index which really controls
>> the caching behavior in PTE has far more levels than what's defined in the
>> enum.
>
> Then just add more enum values.

That would be really messy because PAT index is platform dependent, you would
have to maintain many tables for the the translation.

> 'pat_index' is absolutely meaningless to the reader, it's just an
> arbitrary number. Whereas 'cache_level' conveys how the thing is
> actually going to get used and thus how the caches should behave.

By design UMD's understand PAT index. Both UMD and KMD should stand on the
same ground, the Bspec, to avoid any potential ambiguity.

>> In addition, the PAT index is platform dependent, having to translate
>> between i915_cache_level and PAT index is not reliable,
>
>If it's not realiable then the code is clearly broken.

Perhaps the word "reliable" is a bit confusing here. What I really meant to
say is 'difficult to maintain', or 'error-prone'.

>> and makes the code more complicated.
>
> You have to translate somewhere anyway. Looks like you're now adding
> translations the other way (pat_index->cache_level). How is that better?

No, there is no pat_index->cache_level translation.
There is only a small table for cache_level->pat_index translation. That is
added for the convenience of KMD coding, no exposure to UMD.

-Fei

>> 
>> >From UMD's perspective there is also a necessity to set caching policy for
>> performance fine tuning. It's much easier for the UMD to directly use PAT
>> index because the behavior of each PAT index is clearly defined in Bspec.
>> Haivng the abstracted i915_cache_level sitting in between would only cause
>> more ambiguity.
>> 
>> For these reasons this patch replaces i915_cache_level with PAT index. Also
>> note, the cache_level is not completely removed yet, because the KMD still
>> has the need of creating buffer objects with simple cache settings such as
>> cached, uncached, or writethrough. For these simple cases, using cache_level
>> would help simplify the code.
>> 
>> Cc: Chris Wilson 
>> Cc: Matt Roper 
>> Signed-off-by: Fei Yang 
>> Reviewed-by: Andi Shyti 
>> ---
>>  drivers/gpu/drm/i915/display/intel_dpt.c  | 12 +--
>>  drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++
>>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++-
>>  drivers/gpu/drm/i915/gem/i915_gem_mman.c  |  3 +-
>>  drivers/gpu/drm/i915/gem/i915_gem_object.c| 39 -
>>  drivers/gpu/drm/i915/gem/i915_gem_object.h|  4 +
>>  .../gpu/drm/i915/gem/i915_gem_object_types.h  | 18 ++--
>>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  4 +-
>>  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 16 ++--
>>  .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
>>  .../drm/i915/gem/selftests/i915_gem_migrate.c |  2 +-
>>  .../drm/i915/gem/selftests/i915_gem_mman.c|  2 +-
>>  drivers/gpu/drm/i915/gt/gen6_ppgtt.c  | 10 ++-
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 76 -
>>  drivers/gpu/drm/i915/gt/gen8_ppgtt.h  |  3 +-
>>  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 82 +--
>>  drivers/gpu/drm/i915/gt/intel_gtt.h   | 20 ++---
>>  drivers/gpu/drm/i915/gt/intel_migrate.c   | 47 ++-
>>  drivers/gpu/drm/i915/gt/intel_migrate.h   | 13 ++-
>>  drivers/gpu/drm/i915/gt/intel_ppgtt.c |  6 +-
>>  drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++-
>>  drivers/gpu/drm/i915/gt/selftest_reset.c  |  8 +-
>>  drivers/gpu/drm/i915/gt/selftest_timeline.c   |  2 +-
>>  drivers/gpu/drm/i915/gt/selftest_tlb.c|  4 +-
>>  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  | 10 ++-
>>  drivers/gpu/drm/i915/i915_debugfs.c   | 55 ++---
>>  drivers/gpu/drm/i915/i915_gem.c   | 16 +++-
>>  drivers/gpu/drm/i915/i915_gpu_error.c |  8 +-
>>  drivers/gpu/drm/i915/i915_vma.c   | 16 ++--
>>  drivers/gpu/drm/i915/i915_vma.h   |  2 +-
>>  drivers/gpu/drm/i915/i915_vma_types.h |  2 -
>>  drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +-
>>  .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +-
>>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++--
>>  .../drm/i915/selftests/intel_memory_region.c  |  4 +-
>>  drivers/gpu/drm/i915/selftests/mock_gtt.c |  8 +-
>>  36 files changed, 361 insertions(+), 241 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c 
>> b/drivers/gpu/drm/i915/display/intel_dpt.c
>> index c5eacfdba1a5..7c5fddb203ba 100644
>> --- a/drivers/gpu/drm/i915/display/intel_dpt.c
>> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c
>> @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t 
>> pte)
>>  static 

Re: [PATCH] drm/mediatek: dp: change the aux retries times when receiving AUX_DEFER

2023-04-03 Thread Chun-Kuang Hu
Hi, Xinlei:

Xinlei Lee (李昕磊)  於 2023年4月3日 週一 下午5:18寫道:
>
> On Mon, 2023-04-03 at 11:49 +0800, Chun-Kuang Hu wrote:
> > External email : Please do not click links or open attachments until
> > you have verified the sender or the content.
> >
> >
> > Hi, Xinlei:
> >
> >  於 2023年3月29日 週三 下午2:43寫道:
> > >
> > > From: Xinlei Lee 
> > >
> > > DP 1.4a Section 2.8.7.1.5.6.1:
> > > A DP Source device shall retry at least seven times upon receiving
> > > AUX_DEFER before giving up the AUX transaction.
> > >
> > > The drm_dp_i2c_do_msg() function in the drm_dp_helper.c file will
> > > judge the status of the msg->reply parameter passed to aux_transfer
> > > ange-the-aux-retries-times-when-re.patchfor different processing.
> > >
> > > Fixes: f70ac097a2cf ("drm/mediatek: Add MT8195 Embedded DisplayPort
> > > driver")
> > > Signed-off-by: Xinlei Lee 
> > > ---
> > >  drivers/gpu/drm/mediatek/mtk_dp.c | 12 +---
> > >  1 file changed, 5 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c
> > > b/drivers/gpu/drm/mediatek/mtk_dp.c
> > > index 1f94fcc144d3..767b71da31a4 100644
> > > --- a/drivers/gpu/drm/mediatek/mtk_dp.c
> > > +++ b/drivers/gpu/drm/mediatek/mtk_dp.c
> > > @@ -806,10 +806,9 @@ static int
> > > mtk_dp_aux_wait_for_completion(struct mtk_dp *mtk_dp, bool is_read)
> > >  }
> > >
> > >  static int mtk_dp_aux_do_transfer(struct mtk_dp *mtk_dp, bool
> > > is_read, u8 cmd,
> > > - u32 addr, u8 *buf, size_t length)
> > > + u32 addr, u8 *buf, size_t length,
> > > u8 *reply_cmd)
> > >  {
> > > int ret;
> > > -   u32 reply_cmd;
> > >
> > > if (is_read && (length > DP_AUX_MAX_PAYLOAD_BYTES ||
> > > (cmd == DP_AUX_NATIVE_READ && !length)))
> > > @@ -841,10 +840,10 @@ static int mtk_dp_aux_do_transfer(struct
> > > mtk_dp *mtk_dp, bool is_read, u8 cmd,
> > > /* Wait for feedback from sink device. */
> > > ret = mtk_dp_aux_wait_for_completion(mtk_dp, is_read);
> > >
> > > -   reply_cmd = mtk_dp_read(mtk_dp, MTK_DP_AUX_P0_3624) &
> > > -   AUX_RX_REPLY_COMMAND_AUX_TX_P0_MASK;
> > > +   *reply_cmd = mtk_dp_read(mtk_dp, MTK_DP_AUX_P0_3624) &
> > > +AUX_RX_REPLY_COMMAND_AUX_TX_P0_MASK;
> > >
> > > -   if (ret || reply_cmd) {
> > > +   if (ret) {
> > > u32 phy_status = mtk_dp_read(mtk_dp,
> > > MTK_DP_AUX_P0_3628) &
> > >  AUX_RX_PHY_STATE_AUX_TX_P0_MASK;
> > > if (phy_status !=
> > > AUX_RX_PHY_STATE_AUX_TX_P0_RX_IDLE) {
> > > @@ -2070,7 +2069,7 @@ static ssize_t mtk_dp_aux_transfer(struct
> > > drm_dp_aux *mtk_aux,
> > > ret = mtk_dp_aux_do_transfer(mtk_dp, is_read,
> > > request,
> > >  msg->address +
> > > accessed_bytes,
> > >  msg->buffer +
> > > accessed_bytes,
> > > -to_access);
> > > +to_access, 
> > > >reply);
> > >
> > > if (ret) {
> > > drm_info(mtk_dp->drm_dev,
> > > @@ -2080,7 +2079,6 @@ static ssize_t mtk_dp_aux_transfer(struct
> > > drm_dp_aux *mtk_aux,
> > > accessed_bytes += to_access;
> > > } while (accessed_bytes < msg->size);
> > >
> > > -   msg->reply = DP_AUX_NATIVE_REPLY_ACK |
> > > DP_AUX_I2C_REPLY_ACK;
> >
> > In your description, you just mention the retry count is 7 times, but
> > you does not mention you should change the reply. Why you modify
> > this?
> > And where is the 7 times retry?
> >
> > Regards,
> > Chun-Kuang.
> >
> > > return msg->size;
> > >  err:
> > > msg->reply = DP_AUX_NATIVE_REPLY_NACK |
> > > DP_AUX_I2C_REPLY_NACK;
> > > --
> > > 2.18.0
> > >
>
> Hi CK:
>
> Thanks for your review!
>
> This patch is to fix some DP sinks that return AUX_DEFER, and the dp
> driver does not handle it according to the specification. DP_v1.4a
> spec 2.8.1.2 describes that if the sink returns AUX_DEFER, DPTX may
> retry later:
>
> The logic before the modification is that reply_cmd returns ETIMEDOUT
> if it is not AUX_ACK after the read operation, without considering the
> retry operation when returning AUX_DEFER;
>
> The modified logic is to add parameters to mtk_dp_aux_do_transfer() to
> store the return value of the sink. In the dmr_dp_helper.c file,
> drm_dp_i2c_do_msg calls aux->transfer and then performs retry
> operation according to msg->reply. The 7 times specified in the spec
> are also in this function defined in (max_retries).

Applied to mediatek-drm-next [1], thanks.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/chunkuang.hu/linux.git/log/?h=mediatek-drm-next

Regards,
Chun-Kuang.

>
> Best Regards!
> xinlei


Re: [PATCH 7/7] drm/i915: Allow user to set cache at BO creation

2023-04-03 Thread Ville Syrjälä
On Mon, Apr 03, 2023 at 09:35:32AM -0700, Matt Roper wrote:
> On Mon, Apr 03, 2023 at 07:02:08PM +0300, Ville Syrjälä wrote:
> > On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.y...@intel.com wrote:
> > > From: Fei Yang 
> > > 
> > > To comply with the design that buffer objects shall have immutable
> > > cache setting through out its life cycle, {set, get}_caching ioctl's
> > > are no longer supported from MTL onward. With that change caching
> > > policy can only be set at object creation time. The current code
> > > applies a default (platform dependent) cache setting for all objects.
> > > However this is not optimal for performance tuning. The patch extends
> > > the existing gem_create uAPI to let user set PAT index for the object
> > > at creation time.
> > 
> > This is missing the whole justification for the new uapi.
> > Why is MOCS not sufficient?
> 
> PAT and MOCS are somewhat related, but they're not the same thing.  The
> general direction of the hardware architecture recently has been to
> slowly dumb down MOCS and move more of the important memory/cache
> control over to the PAT instead.  On current platforms there is some
> overlap (and MOCS has an "ignore PAT" setting that makes the MOCS "win"
> for the specific fields that both can control), but MOCS doesn't have a
> way to express things like snoop/coherency mode (on MTL), or class of
> service (on PVC).  And if you check some of the future platforms, the
> hardware design starts packing even more stuff into the PAT (not just
> cache behavior) which will never be handled by MOCS.

Sigh. So the hardware designers screwed up MOCS yet again and
instead of getting that fixed we are adding a new uapi to work
around it?

The IMO sane approach (which IIRC was the situation for a few
platform generations at least) is that you just shove the PAT
index into MOCS (or tell it to go look it up from the PTE).
Why the heck did they not just stick with that?

> 
> Also keep in mind that MOCS generally applies at the GPU instruction
> level; although a lot of instructions have a field to provide a MOCS
> index, or can use a MOCS already associated with a surface state, there
> are still some that don't. PAT is the source of memory access
> characteristics for anything that can't provide a MOCS directly.

So what are the things that don't have MOCS and where we need
some custom cache behaviour, and we already know all that at
buffer creation time?

-- 
Ville Syrjälä
Intel


Re: [PATCH v4 0/5] docs & checkpatch: allow Closes tags with links

2023-04-03 Thread Joe Perches
On Mon, 2023-04-03 at 18:23 +0200, Matthieu Baerts wrote:
> Since v6.3, checkpatch.pl now complains about the use of "Closes:" tags
> followed by a link [1]. It also complains if a "Reported-by:" tag is
> followed by a "Closes:" one [2].

All these patches seems sensible, thanks.

Assuming Linus approves the use of "Closes:"

Acked-by: Joe Perches 

> As detailed in the first patch, this "Closes:" tag is used for a bit of
> time, mainly by DRM and MPTCP subsystems. It is used by some bug
> trackers to automate the closure of issues when a patch is accepted.
> It is even planned to use this tag with bugzilla.kernel.org [3].
> 
> The first patch updates the documentation to explain what is this
> "Closes:" tag and how/when to use it. The second patch modifies
> checkpatch.pl to stop complaining about it.
> 
> The DRM maintainers and their mailing list have been added in Cc as they
> are probably interested by these two patches as well.
> 
> [1] 
> https://lore.kernel.org/all/3b036087d80b8c0e07a46a1dbaaf4ad0d018f8d5.1674217480.git.li...@leemhuis.info/
> [2] 
> https://lore.kernel.org/all/bb5dfd55ea2026303ab2296f4a6df3da7dd64006.1674217480.git.li...@leemhuis.info/
> [3] 
> https://lore.kernel.org/linux-doc/20230315181205.f3av7h6owqzzw64p@meerkat.local/
> 
> Signed-off-by: Matthieu Baerts 
> ---
> Note: After having re-read the comments from the v1, it is still unclear
> to me if this "Closes:" can be accepted or not. But because it seems
> that the future Bugzilla bot for kernel.org and regzbot would like to
> use it as well, I'm sending here new versions. I'm sorry if I
> misunderstood the comments from v1. Please tell me if I did.
> 
> Changes in v4:
> - Patches 1/5, 3/5 and 4/5 have been added to ask using the "Closes" tag
>   instead of the "Link" one for any bug reports. (Thorsten)
> - The Fixes tags have been removed from patch 4/5. (Joe)
> - The "Reported-by being followed by a link tag" check is now only
>   looking for the tag, not the URL which is done elsewhere in patch 5/5.
>   (Thorsten)
> - A new patch has been added to fix a small issues in checkpatch.pl when
>   checking if "Reported-by:" tag is on the last line.
> - Link to v3: 
> https://lore.kernel.org/r/20230314-doc-checkpatch-closes-tag-v3-0-d1bdcf31c...@tessares.net
> 
> Changes in v3:
> - Patch 1/4 now allow using the "Closes" tag with any kind of bug
>   reports, as long as the link is public. (Thorsten)
> - The former patch 2/2 has been split in two: first to use a list for
>   the different "link" tags (Joe). Then to allow the 'Closes' tag.
> - A new patch has been added to let checkpatch.pl checking if "Closes"
>   and "Links" are used with a URL.
> - Link to v2: 
> https://lore.kernel.org/r/20230314-doc-checkpatch-closes-tag-v2-0-f4a417861...@tessares.net
> 
> Changes in v2:
> - The text on patch 1/2 has been reworked thanks to Jon, Bagas and
>   Thorsten. See the individual changelog on the patch for more details.
> - Private bug trackers and invalid URLs are clearly marked as forbidden
>   to avoid being misused. (Linus)
> - Rebased on top of Linus' repo.
> - Link to v1: 
> https://lore.kernel.org/r/20230314-doc-checkpatch-closes-tag-v1-0-1b83072e9...@tessares.net
> 
> ---
> Matthieu Baerts (5):
>   docs: process: allow Closes tags with links
>   checkpatch: don't print the next line if not defined
>   checkpatch: use a list of "link" tags
>   checkpatch: allow Closes tags with links
>   checkpatch: check for misuse of the link tags
> 
>  Documentation/process/5.Posting.rst  | 22 ++
>  Documentation/process/submitting-patches.rst | 26 +++--
>  scripts/checkpatch.pl| 43 
> ++--
>  3 files changed, 70 insertions(+), 21 deletions(-)
> ---
> base-commit: 7e364e56293bb98cae1b55fd835f5991c4e96e7d
> change-id: 20230314-doc-checkpatch-closes-tag-1731b57556b1
> 
> Best regards,



Re: [PATCH 2/2] phy: mtk-mipi-csi: add driver for CSI phy

2023-04-03 Thread Chun-Kuang Hu
Hi, Julien:

Julien Stephan  於 2023年4月3日 週一 下午3:20寫道:
>
> From: Phi-bang Nguyen 
>
> This is a new driver that supports the MIPI CSI CD-PHY for mediatek
> mt8365 soc
>
> Signed-off-by: Louis Kuo 
> Signed-off-by: Phi-bang Nguyen 
> [Julien Stephan: use regmap]
> [Julien Stephan: use GENMASK]
> Co-developed-by: Julien Stephan 
> Signed-off-by: Julien Stephan 
> ---
>  .../bindings/phy/mediatek,csi-phy.yaml|   9 +-
>  MAINTAINERS   |   1 +
>  drivers/phy/mediatek/Kconfig  |   8 +
>  drivers/phy/mediatek/Makefile |   2 +
>  .../phy/mediatek/phy-mtk-mipi-csi-rx-reg.h| 435 ++
>  drivers/phy/mediatek/phy-mtk-mipi-csi.c   | 392 
>  6 files changed, 845 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/phy/mediatek/phy-mtk-mipi-csi-rx-reg.h
>  create mode 100644 drivers/phy/mediatek/phy-mtk-mipi-csi.c
>

[snip]

> +
> +#define REGMAP_BIT(map, reg, field, val) \
> +   regmap_update_bits((map), reg, reg##_##field##_MASK, \
> +  (val) << reg##_##field##_SHIFT)
> +

Use FIELD_PREP() macro  so you can drop the definition of SHIFT symbol.

Regards,
Chun-Kuang.


Re: [PATCH 7/7] drm/i915: Allow user to set cache at BO creation

2023-04-03 Thread Matt Roper
On Mon, Apr 03, 2023 at 07:02:08PM +0300, Ville Syrjälä wrote:
> On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.y...@intel.com wrote:
> > From: Fei Yang 
> > 
> > To comply with the design that buffer objects shall have immutable
> > cache setting through out its life cycle, {set, get}_caching ioctl's
> > are no longer supported from MTL onward. With that change caching
> > policy can only be set at object creation time. The current code
> > applies a default (platform dependent) cache setting for all objects.
> > However this is not optimal for performance tuning. The patch extends
> > the existing gem_create uAPI to let user set PAT index for the object
> > at creation time.
> 
> This is missing the whole justification for the new uapi.
> Why is MOCS not sufficient?

PAT and MOCS are somewhat related, but they're not the same thing.  The
general direction of the hardware architecture recently has been to
slowly dumb down MOCS and move more of the important memory/cache
control over to the PAT instead.  On current platforms there is some
overlap (and MOCS has an "ignore PAT" setting that makes the MOCS "win"
for the specific fields that both can control), but MOCS doesn't have a
way to express things like snoop/coherency mode (on MTL), or class of
service (on PVC).  And if you check some of the future platforms, the
hardware design starts packing even more stuff into the PAT (not just
cache behavior) which will never be handled by MOCS.

Also keep in mind that MOCS generally applies at the GPU instruction
level; although a lot of instructions have a field to provide a MOCS
index, or can use a MOCS already associated with a surface state, there
are still some that don't.  PAT is the source of memory access
characteristics for anything that can't provide a MOCS directly.


Matt

> 
> > The new extension is platform independent, so UMD's can switch to using
> > this extension for older platforms as well, while {set, get}_caching are
> > still supported on these legacy paltforms for compatibility reason.
> > 
> > Cc: Chris Wilson 
> > Cc: Matt Roper 
> > Signed-off-by: Fei Yang 
> > Reviewed-by: Andi Shyti 
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 
> >  include/uapi/drm/i915_drm.h| 36 ++
> >  tools/include/uapi/drm/i915_drm.h  | 36 ++
> >  3 files changed, 105 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c 
> > b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > index e76c9703680e..1c6e2034d28e 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > @@ -244,6 +244,7 @@ struct create_ext {
> > unsigned int n_placements;
> > unsigned int placement_mask;
> > unsigned long flags;
> > +   unsigned int pat_index;
> >  };
> >  
> >  static void repr_placements(char *buf, size_t size,
> > @@ -393,11 +394,39 @@ static int ext_set_protected(struct 
> > i915_user_extension __user *base, void *data
> > return 0;
> >  }
> >  
> > +static int ext_set_pat(struct i915_user_extension __user *base, void *data)
> > +{
> > +   struct create_ext *ext_data = data;
> > +   struct drm_i915_private *i915 = ext_data->i915;
> > +   struct drm_i915_gem_create_ext_set_pat ext;
> > +   unsigned int max_pat_index;
> > +
> > +   BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
> > +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
> > +
> > +   if (copy_from_user(, base, sizeof(ext)))
> > +   return -EFAULT;
> > +
> > +   max_pat_index = INTEL_INFO(i915)->max_pat_index;
> > +
> > +   if (ext.pat_index > max_pat_index) {
> > +   drm_dbg(>drm, "PAT index is invalid: %u\n",
> > +   ext.pat_index);
> > +   return -EINVAL;
> > +   }
> > +
> > +   ext_data->pat_index = ext.pat_index;
> > +
> > +   return 0;
> > +}
> > +
> >  static const i915_user_extension_fn create_extensions[] = {
> > [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
> > [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
> > +   [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
> >  };
> >  
> > +#define PAT_INDEX_NOT_SET  0x
> >  /**
> >   * Creates a new mm object and returns a handle to it.
> >   * @dev: drm device pointer
> > @@ -417,6 +446,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
> > *data,
> > if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
> > return -EINVAL;
> >  
> > +   ext_data.pat_index = PAT_INDEX_NOT_SET;
> > ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
> >create_extensions,
> >ARRAY_SIZE(create_extensions),
> > @@ -453,5 +483,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void 
> > *data,
> > if (IS_ERR(obj))
> > return PTR_ERR(obj);
> >  
> > +   if (ext_data.pat_index != PAT_INDEX_NOT_SET)
> > 

  1   2   3   >