Re: [PATCH drm-next v3 13/15] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm
Hi Danilo, kernel test robot noticed the following build warnings: [auto build test WARNING on d36d68fd1925d33066d52468b7c7c6aca6521248] url: https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042 base: d36d68fd1925d33066d52468b7c7c6aca6521248 patch link: https://lore.kernel.org/r/20230404012741.116502-14-dakr%40redhat.com patch subject: [PATCH drm-next v3 13/15] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm config: arc-randconfig-r043-20230403 (https://download.01.org/0day-ci/archive/20230404/202304041311.bwxdwpx0-...@intel.com/config) compiler: arc-elf-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/ff73c969805aef784d47f6bedea6c15c8548d0bf git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042 git checkout ff73c969805aef784d47f6bedea6c15c8548d0bf # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arc olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arc SHELL=/bin/bash drivers/gpu/drm/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot | Link: https://lore.kernel.org/oe-kbuild-all/202304041311.bwxdwpx0-...@intel.com/ All warnings (new ones prefixed by >>): In file included from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h:4, from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.h:5, from drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:22: drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c: In function 'nvkm_uvmm_mthd_raw_map': >> drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c:422:31: warning: cast to >> pointer from integer of different size [-Wint-to-pointer-cast] 422 | (void *)args->argv, args->argc); | ^ drivers/gpu/drm/nouveau/include/nvkm/core/memory.h:66:43: note: in definition of macro 'nvkm_memory_map' 66 | (p)->func->map((p),(o),(vm),(va),(av),(ac)) | ^~ vim +422 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c 388 389 static int 390 nvkm_uvmm_mthd_raw_map(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 *args) 391 { 392 struct nvkm_client *client = uvmm->object.client; 393 struct nvkm_vmm *vmm = uvmm->vmm; 394 struct nvkm_vma vma = { 395 .addr = args->addr, 396 .size = args->size, 397 .used = true, 398 .mapref = false, 399 .no_comp = true, 400 }; 401 struct nvkm_memory *memory; 402 u64 handle = args->memory; 403 u8 refd; 404 int ret; 405 406 if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size)) 407 return -EINVAL; 408 409 ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, ); 410 if (ret) 411 return ret; 412 413 vma.page = vma.refd = refd; 414 415 memory = nvkm_umem_search(client, args->memory); 416 if (IS_ERR(memory)) { 417 VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, PTR_ERR(memory)); 418 return PTR_ERR(memory); 419 } 420 421 ret = nvkm_memory_map(memory, args->offset, vmm, , > 422(void *)args->argv, args->argc); 423 424 nvkm_memory_unref(); 425 nvkm_memory_unref(); 426 return ret; 427 } 428 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests
Re: [PATCH drm-next v3 04/15] drm: manager to keep track of GPUs VA mappings
Hi Danilo, kernel test robot noticed the following build warnings: [auto build test WARNING on d36d68fd1925d33066d52468b7c7c6aca6521248] url: https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042 base: d36d68fd1925d33066d52468b7c7c6aca6521248 patch link: https://lore.kernel.org/r/20230404012741.116502-5-dakr%40redhat.com patch subject: [PATCH drm-next v3 04/15] drm: manager to keep track of GPUs VA mappings config: mips-randconfig-r024-20230403 (https://download.01.org/0day-ci/archive/20230404/202304041336.bd0g9u85-...@intel.com/config) compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project 67409911353323ca5edf2049ef0df54132fa1ca7) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install mips cross compiling tool for clang build # apt-get install binutils-mipsel-linux-gnu # https://github.com/intel-lab-lkp/linux/commit/c25139e5a168ae8a3a3e5ca0b650c201e5f41367 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042 git checkout c25139e5a168ae8a3a3e5ca0b650c201e5f41367 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=mips olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=mips SHELL=/bin/bash drivers/gpu/drm/ drivers/iio/light/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot | Link: https://lore.kernel.org/oe-kbuild-all/202304041336.bd0g9u85-...@intel.com/ All warnings (new ones prefixed by >>): >> drivers/gpu/drm/drm_gpuva_mgr.c:1031:25: warning: variable 'prev' set but >> not used [-Wunused-but-set-variable] struct drm_gpuva *va, *prev = NULL; ^ 1 warning generated. vim +/prev +1031 drivers/gpu/drm/drm_gpuva_mgr.c 1023 1024 static int 1025 __drm_gpuva_sm_map(struct drm_gpuva_manager *mgr, 1026 struct drm_gpuva_fn_ops *ops, void *priv, 1027 u64 req_addr, u64 req_range, 1028 struct drm_gem_object *req_obj, u64 req_offset) 1029 { 1030 DRM_GPUVA_ITER(it, mgr, req_addr); > 1031 struct drm_gpuva *va, *prev = NULL; 1032 u64 req_end = req_addr + req_range; 1033 int ret; 1034 1035 if (unlikely(!drm_gpuva_in_mm_range(mgr, req_addr, req_range))) 1036 return -EINVAL; 1037 1038 if (unlikely(drm_gpuva_in_kernel_node(mgr, req_addr, req_range))) 1039 return -EINVAL; 1040 1041 drm_gpuva_iter_for_each_range(va, it, req_end) { 1042 struct drm_gem_object *obj = va->gem.obj; 1043 u64 offset = va->gem.offset; 1044 u64 addr = va->va.addr; 1045 u64 range = va->va.range; 1046 u64 end = addr + range; 1047 bool merge = !!va->gem.obj; 1048 1049 if (addr == req_addr) { 1050 merge &= obj == req_obj && 1051 offset == req_offset; 1052 1053 if (end == req_end) { 1054 ret = op_unmap_cb(ops, , priv, va, merge); 1055 if (ret) 1056 return ret; 1057 break; 1058 } 1059 1060 if (end < req_end) { 1061 ret = op_unmap_cb(ops, , priv, va, merge); 1062 if (ret) 1063 return ret; 1064 goto next; 1065 } 1066 1067 if (end > req_end) { 1068 struct drm_gpuva_op_map n = { 1069 .va.addr = req_end, 1070 .va.range = range - req_range, 1071 .gem.obj = obj, 1072 .gem.offset = offset + req_range, 1073 }; 1074 struct drm_gpuva_op_unmap u = { 1075 .va = va, 1076 .keep = merge, 1077 }; 1078 1079
Re: [Regression] drm/scheduler: track GPU active time per entity
On 2023-03-28 04:54, Lucas Stach wrote: > Hi Danilo, > > Am Dienstag, dem 28.03.2023 um 02:57 +0200 schrieb Danilo Krummrich: >> Hi all, >> >> Commit df622729ddbf ("drm/scheduler: track GPU active time per entity") >> tries to track the accumulated time that a job was active on the GPU >> writing it to the entity through which the job was deployed to the >> scheduler originally. This is done within drm_sched_get_cleanup_job() >> which fetches a job from the schedulers pending_list. >> >> Doing this can result in a race condition where the entity is already >> freed, but the entity's newly added elapsed_ns field is still accessed >> once the job is fetched from the pending_list. >> >> After drm_sched_entity_destroy() being called it should be safe to free >> the structure that embeds the entity. However, a job originally handed >> over to the scheduler by this entity might still reside in the >> schedulers pending_list for cleanup after drm_sched_entity_destroy() >> already being called and the entity being freed. Hence, we can run into >> a UAF. >> > Sorry about that, I clearly didn't properly consider this case. > >> In my case it happened that a job, as explained above, was just picked >> from the schedulers pending_list after the entity was freed due to the >> client application exiting. Meanwhile this freed up memory was already >> allocated for a subsequent client applications job structure again. >> Hence, the new jobs memory got corrupted. Luckily, I was able to >> reproduce the same corruption over and over again by just using >> deqp-runner to run a specific set of VK test cases in parallel. >> >> Fixing this issue doesn't seem to be very straightforward though (unless >> I miss something), which is why I'm writing this mail instead of sending >> a fix directly. >> >> Spontaneously, I see three options to fix it: >> >> 1. Rather than embedding the entity into driver specific structures >> (e.g. tied to file_priv) we could allocate the entity separately and >> reference count it, such that it's only freed up once all jobs that were >> deployed through this entity are fetched from the schedulers pending list. >> > My vote is on this or something in similar vain for the long term. I > have some hope to be able to add a GPU scheduling algorithm with a bit > more fairness than the current one sometime in the future, which > requires execution time tracking on the entities. Danilo, Using kref is preferable, i.e. option 1 above. Lucas, can you shed some light on, 1. In what way the current FIFO scheduling is unfair, and 2. shed some details on this "scheduling algorithm with a bit more fairness than the current one"? Regards, Luben > >> 2. Somehow make sure drm_sched_entity_destroy() does block until all >> jobs deployed through this entity were fetched from the schedulers >> pending list. Though, I'm pretty sure that this is not really desirable. >> >> 3. Just revert the change and let drivers implement tracking of GPU >> active times themselves. >> > Given that we are already pretty late in the release cycle and etnaviv > being the only driver so far making use of the scheduler elapsed time > tracking I think the right short term solution is to either move the > tracking into etnaviv or just revert the change for now. I'll have a > look at this. > > Regards, > Lucas > >> In the case of just reverting the change I'd propose to also set a jobs >> entity pointer to NULL once the job was taken from the entity, such >> that in case of a future issue we fail where the actual issue resides >> and to make it more obvious that the field shouldn't be used anymore >> after the job was taken from the entity. >> >> I'm happy to implement the solution we agree on. However, it might also >> make sense to revert the change until we have a solution in place. I'm >> also happy to send a revert with a proper description of the problem. >> Please let me know what you think. >> >> - Danilo >> >
Re: [PATCH drm-next v3 05/15] drm: debugfs: provide infrastructure to dump a DRM GPU VA space
Hi Danilo, kernel test robot noticed the following build warnings: [auto build test WARNING on d36d68fd1925d33066d52468b7c7c6aca6521248] url: https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042 base: d36d68fd1925d33066d52468b7c7c6aca6521248 patch link: https://lore.kernel.org/r/20230404012741.116502-6-dakr%40redhat.com patch subject: [PATCH drm-next v3 05/15] drm: debugfs: provide infrastructure to dump a DRM GPU VA space config: xtensa-allyesconfig (https://download.01.org/0day-ci/archive/20230404/202304041151.y2wmbgh6-...@intel.com/config) compiler: xtensa-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/97d8731cc359143f6f790b1c4755d1055a72adb9 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v3/20230404-093042 git checkout 97d8731cc359143f6f790b1c4755d1055a72adb9 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=xtensa olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=xtensa SHELL=/bin/bash drivers/gpu/drm/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot | Link: https://lore.kernel.org/oe-kbuild-all/202304041151.y2wmbgh6-...@intel.com/ All warnings (new ones prefixed by >>): drivers/gpu/drm/drm_debugfs.c: In function 'drm_debugfs_gpuva_info': >> drivers/gpu/drm/drm_debugfs.c:213:28: warning: cast from pointer to integer >> of different size [-Wpointer-to-int-cast] 213 |(u64)va->gem.obj, va->gem.offset); |^ vim +213 drivers/gpu/drm/drm_debugfs.c 178 179 /** 180 * drm_debugfs_gpuva_info - dump the given DRM GPU VA space 181 * @m: pointer to the _file to write 182 * @mgr: the _gpuva_manager representing the GPU VA space 183 * 184 * Dumps the GPU VA mappings of a given DRM GPU VA manager. 185 * 186 * For each DRM GPU VA space drivers should call this function from their 187 * _info_list's show callback. 188 * 189 * Returns: 0 on success, -ENODEV if the is not initialized 190 */ 191 int drm_debugfs_gpuva_info(struct seq_file *m, 192 struct drm_gpuva_manager *mgr) 193 { 194 DRM_GPUVA_ITER(it, mgr, 0); 195 struct drm_gpuva *va, *kva = >kernel_alloc_node; 196 197 if (!mgr->name) 198 return -ENODEV; 199 200 seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n", 201 mgr->name, mgr->mm_start, mgr->mm_start + mgr->mm_range); 202 seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n", 203 kva->va.addr, kva->va.addr + kva->va.range); 204 seq_puts(m, "\n"); 205 seq_puts(m, " VAs | start | range | end| object | object offset\n"); 206 seq_puts(m, "-\n"); 207 drm_gpuva_iter_for_each(va, it) { 208 if (unlikely(va == >kernel_alloc_node)) 209 continue; 210 211 seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx\n", 212 va->va.addr, va->va.range, va->va.addr + va->va.range, > 213 (u64)va->gem.obj, va->gem.offset); 214 } 215 216 return 0; 217 } 218 EXPORT_SYMBOL(drm_debugfs_gpuva_info); 219 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests
[PATCH] video/aperture: fix typos
EFI FB, VESA FB or VGA FB etc are belong to firmware based framebuffer driver. Signed-off-by: Sui Jingfeng --- drivers/video/aperture.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c index 41e77de1ea82..b009468ffdff 100644 --- a/drivers/video/aperture.c +++ b/drivers/video/aperture.c @@ -20,7 +20,7 @@ * driver can be active at any given time. Many systems load a generic * graphics drivers, such as EFI-GOP or VESA, early during the boot process. * During later boot stages, they replace the generic driver with a dedicated, - * hardware-specific driver. To take over the device the dedicated driver + * hardware-specific driver. To take over the device, the dedicated driver * first has to remove the generic driver. Aperture functions manage * ownership of framebuffer memory and hand-over between drivers. * @@ -76,7 +76,7 @@ * generic EFI or VESA drivers, have to register themselves as owners of their * framebuffer apertures. Ownership of the framebuffer memory is achieved * by calling devm_aperture_acquire_for_platform_device(). If successful, the - * driveris the owner of the framebuffer range. The function fails if the + * driver is the owner of the framebuffer range. The function fails if the * framebuffer is already owned by another driver. See below for an example. * * .. code-block:: c @@ -126,7 +126,7 @@ * et al for the registered framebuffer range, the aperture helpers call * platform_device_unregister() and the generic driver unloads itself. The * generic driver also has to provide a remove function to make this work. - * Once hot unplugged fro mhardware, it may not access the device's + * Once hot unplugged from hardware, it may not access the device's * registers, framebuffer memory, ROM, etc afterwards. */ @@ -203,7 +203,7 @@ static void aperture_detach_platform_device(struct device *dev) /* * Remove the device from the device hierarchy. This is the right thing -* to do for firmware-based DRM drivers, such as EFI, VESA or VGA. After +* to do for firmware-based fb drivers, such as EFI, VESA or VGA. After * the new driver takes over the hardware, the firmware device's state * will be lost. * -- 2.25.1
Re: [PATCH v2] misc: sram: Add DMA-BUF Heap exporting of SRAM areas
Hi Andrew, kernel test robot noticed the following build warnings: [auto build test WARNING on char-misc/char-misc-testing] [also build test WARNING on char-misc/char-misc-next char-misc/char-misc-linus soc/for-next pza/reset/next linus/master v6.3-rc5 next-20230403] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Andrew-Davis/misc-sram-Add-DMA-BUF-Heap-exporting-of-SRAM-areas/20230404-032607 patch link:https://lore.kernel.org/r/20230403192433.26648-1-afd%40ti.com patch subject: [PATCH v2] misc: sram: Add DMA-BUF Heap exporting of SRAM areas config: loongarch-allyesconfig (https://download.01.org/0day-ci/archive/20230404/202304041144.t5jcogse-...@intel.com/config) compiler: loongarch64-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/6fcaa3c7cfbc144dd982f9abaa1c5af50dde24a8 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Andrew-Davis/misc-sram-Add-DMA-BUF-Heap-exporting-of-SRAM-areas/20230404-032607 git checkout 6fcaa3c7cfbc144dd982f9abaa1c5af50dde24a8 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch olddefconfig COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=loongarch SHELL=/bin/bash drivers/ If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot | Link: https://lore.kernel.org/oe-kbuild-all/202304041144.t5jcogse-...@intel.com/ All warnings (new ones prefixed by >>): >> drivers/misc/sram-dma-heap.c:161:17: warning: no previous prototype for >> 'sram_dma_heap_allocate' [-Wmissing-prototypes] 161 | struct dma_buf *sram_dma_heap_allocate(struct dma_heap *heap, | ^~ vim +/sram_dma_heap_allocate +161 drivers/misc/sram-dma-heap.c 160 > 161 struct dma_buf *sram_dma_heap_allocate(struct dma_heap *heap, 162 unsigned long len, 163 unsigned long fd_flags, 164 unsigned long heap_flags) 165 { 166 struct sram_dma_heap *sram_dma_heap = dma_heap_get_drvdata(heap); 167 struct sram_dma_heap_buffer *buffer; 168 169 DEFINE_DMA_BUF_EXPORT_INFO(exp_info); 170 struct dma_buf *dmabuf; 171 int ret; 172 173 buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); 174 if (!buffer) 175 return ERR_PTR(-ENOMEM); 176 buffer->pool = sram_dma_heap->pool; 177 INIT_LIST_HEAD(>attachments); 178 mutex_init(>attachments_lock); 179 buffer->len = len; 180 181 buffer->vaddr = (void *)gen_pool_alloc(buffer->pool, buffer->len); 182 if (!buffer->vaddr) { 183 ret = -ENOMEM; 184 goto free_buffer; 185 } 186 187 buffer->paddr = gen_pool_virt_to_phys(buffer->pool, (unsigned long)buffer->vaddr); 188 if (buffer->paddr == -1) { 189 ret = -ENOMEM; 190 goto free_pool; 191 } 192 193 /* create the dmabuf */ 194 exp_info.exp_name = dma_heap_get_name(heap); 195 exp_info.ops = _dma_heap_buf_ops; 196 exp_info.size = buffer->len; 197 exp_info.flags = fd_flags; 198 exp_info.priv = buffer; 199 dmabuf = dma_buf_export(_info); 200 if (IS_ERR(dmabuf)) { 201 ret = PTR_ERR(dmabuf); 202 goto free_pool; 203 } 204 205 return dmabuf; 206 207 free_pool: 208 gen_pool_free(buffer->pool, (unsigned long)buffer->vaddr, buffer->len); 209 free_buffer: 210 kfree(buffer); 211 212 return ERR_PTR(ret); 213 } 214 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests
Re: [PATCH] drm/bridge: ti-sn65dsi83: Do not generate HFP/HBP/HSA and EOT packet
Hi Marek, Thank you for the patch. On Mon, Apr 03, 2023 at 09:02:42PM +0200, Marek Vasut wrote: > Do not generate the HS front and back porch gaps, the HSA gap and > EOT packet, as per "SN65DSI83 datasheet SLLSEC1I - SEPTEMBER 2012 > - REVISED OCTOBER 2020", page 22, these packets are not required. > This makes the TI SN65DSI83 bridge work with Samsung DSIM on i.MX8MN. > > Signed-off-by: Marek Vasut I have successfully used this driver with a Raspberry Pi CM4. The VC4 DSI driver does not seem to support the newly added flags, so this patch shouldn't have any effect there. Reviewed-by: Laurent Pinchart > --- > Cc: Andrzej Hajda > Cc: Daniel Vetter > Cc: David Airlie > Cc: Jagan Teki > Cc: Jernej Skrabec > Cc: Jonas Karlman > Cc: Laurent Pinchart > Cc: Michael Walle > Cc: Neil Armstrong > Cc: Robert Foss > Cc: dri-devel@lists.freedesktop.org > --- > drivers/gpu/drm/bridge/ti-sn65dsi83.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c > b/drivers/gpu/drm/bridge/ti-sn65dsi83.c > index 91ecfbe45bf90..b60ae1dc1191d 100644 > --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c > +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c > @@ -642,7 +642,9 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx) > > dsi->lanes = dsi_lanes; > dsi->format = MIPI_DSI_FMT_RGB888; > - dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST; > + dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | > + MIPI_DSI_MODE_VIDEO_NO_HFP | > MIPI_DSI_MODE_VIDEO_NO_HBP | > + MIPI_DSI_MODE_VIDEO_NO_HSA | > MIPI_DSI_MODE_NO_EOT_PACKET; > > ret = devm_mipi_dsi_attach(dev, dsi); > if (ret < 0) { -- Regards, Laurent Pinchart
Re: [PATCH] drm/fbdev-generic: optimize out a redundant assignment clause
On 2023/3/29 17:04, Thomas Zimmermann wrote: (cc'ing Lucas) Hi Am 25.03.23 um 08:46 schrieb Sui Jingfeng: The assignment already done in drm_client_buffer_vmap(), just trival clean, no functional change. Signed-off-by: Sui Jingfeng <15330273...@189.cn> --- drivers/gpu/drm/drm_fbdev_generic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c index 4d6325e91565..1da48e71c7f1 100644 --- a/drivers/gpu/drm/drm_fbdev_generic.c +++ b/drivers/gpu/drm/drm_fbdev_generic.c @@ -282,7 +282,7 @@ static int drm_fbdev_damage_blit(struct drm_fb_helper *fb_helper, struct drm_clip_rect *clip) { struct drm_client_buffer *buffer = fb_helper->buffer; - struct iosys_map map, dst; + struct iosys_map map; int ret; /* @@ -302,8 +302,7 @@ static int drm_fbdev_damage_blit(struct drm_fb_helper *fb_helper, if (ret) goto out; - dst = map; - drm_fbdev_damage_blit_real(fb_helper, clip, ); + drm_fbdev_damage_blit_real(fb_helper, clip, ); I see what you're doing and it's probably correct in this case. But there's a larger issue with this iosys interfaces. Sometimes the address has to be modified (see calls of iosys_map_incr()). That can prevent incorrect uses of the mapping in other places, especially in unmap code. Yes, I just realized that. iosys_map_incr() change the internal state of a opaque structure, this is somewhat evil. if it is non-opaque, then this is abstract failure. You have to worry about that if it is changed by a accident call iosys_map_incr() from other place. The map should be const, I guess most programmer expect the map be a const. making it const please, copy on demand, modify the copy only, leave the original mapping untouched. Hope this could eliminate the embarrassing. Sorry for missing the point. I think it would make sense to consider a separate structure for the I/O location. The buffer as a whole would still be represented by struct iosys_map. And that new structure, let's call it struct iosys_ptr, would point to an actual location within the buffer's memory range. A few locations and helpers would need changes, but there are not so many callers that it's an issue. This would also allow for a few debugging tests that ensure that iosys_ptr always operates within the bounds of an iosys_map. I've long considered this idea, but there was no pressure to work on it. Maybe now. I have also get some idea from you idea. Best regards Thomas drm_client_buffer_vunmap(buffer);
[PATCH v3] dt-bindings: bridge: Convert Samsung MIPI DSIM bridge to yaml
From: Jagan Teki Samsung MIPI DSIM bridge can be found on Exynos and NXP's i.MX8M Mini/Nano/Plus SoCs. Convert exynos_dsim.txt to yaml. Used the example node from exynos5433.dtsi instead of the one used in the legacy exynos_dsim.txt. Signed-off-by: Jagan Teki Signed-off-by: Fabio Estevam --- Changes since v2: - Took previous Rob Herring's feedback into account: https://lore.kernel.org/all/20210712151322.ga1931...@robh.at.kernel.org/ - Handled imx8mn and imx8mp. - Remove unnecessary #address-cells/size-cells. .../display/bridge/samsung,mipi-dsim.yaml | 255 ++ .../bindings/display/exynos/exynos_dsim.txt | 92 --- MAINTAINERS | 1 + 3 files changed, 256 insertions(+), 92 deletions(-) create mode 100644 Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml delete mode 100644 Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt diff --git a/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml new file mode 100644 index ..55dbec178ea8 --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml @@ -0,0 +1,255 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/bridge/samsung,mipi-dsim.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung MIPI DSIM bridge controller + +maintainers: + - Inki Dae + - Jagan Teki + - Marek Szyprowski + +description: | + Samsung MIPI DSIM bridge controller can be found it on Exynos + and i.MX8M Mini/Nano/Plus SoC's. + +properties: + compatible: +oneOf: + - enum: + - samsung,exynos3250-mipi-dsi + - samsung,exynos4210-mipi-dsi + - samsung,exynos5410-mipi-dsi + - samsung,exynos5422-mipi-dsi + - samsung,exynos5433-mipi-dsi + - fsl,imx8mm-mipi-dsim + - fsl,imx8mp-mipi-dsim + - items: + - const: fsl,imx8mn-mipi-dsim + - const: fsl,imx8mm-mipi-dsim + + reg: +maxItems: 1 + + interrupts: +maxItems: 1 + + '#address-cells': +const: 1 + + '#size-cells': +const: 0 + + clocks: +minItems: 2 +maxItems: 5 + + clock-names: +minItems: 2 +maxItems: 5 + + samsung,phy-type: +$ref: /schemas/types.yaml#/definitions/uint32 +description: phandle to the samsung phy-type + + power-domains: +maxItems: 1 + + samsung,power-domain: +$ref: /schemas/types.yaml#/definitions/phandle +description: phandle to the associated samsung power domain + + vddcore-supply: +description: MIPI DSIM Core voltage supply (e.g. 1.1V) + + vddio-supply: +description: MIPI DSIM I/O and PLL voltage supply (e.g. 1.8V) + + samsung,burst-clock-frequency: +$ref: /schemas/types.yaml#/definitions/uint32 +description: + DSIM high speed burst mode frequency. + + samsung,esc-clock-frequency: +$ref: /schemas/types.yaml#/definitions/uint32 +description: + DSIM escape mode frequency. + + samsung,pll-clock-frequency: +$ref: /schemas/types.yaml#/definitions/uint32 +description: + DSIM oscillator clock frequency. + + phys: +maxItems: 1 + + phy-names: +const: dsim + + ports: +$ref: /schemas/graph.yaml#/properties/ports + +properties: + port@0: +$ref: /schemas/graph.yaml#/properties/port +description: + Input port node to receive pixel data from the + display controller. Exactly one endpoint must be + specified. + + port@1: +$ref: /schemas/graph.yaml#/properties/port +description: + DSI output port node to the panel or the next bridge + in the chain. + +required: + - clock-names + - clocks + - compatible + - interrupts + - reg + - samsung,burst-clock-frequency + - samsung,esc-clock-frequency + - samsung,pll-clock-frequency + +allOf: + - $ref: ../dsi-controller.yaml# + - if: + properties: +compatible: + contains: +const: samsung,exynos5433-mipi-dsi + +then: + properties: +clocks: + minItems: 5 + +clock-names: + items: +- const: bus_clk +- const: phyclk_mipidphy0_bitclkdiv8 +- const: phyclk_mipidphy0_rxclkesc0 +- const: sclk_rgb_vclk_to_dsim0 +- const: sclk_mipi + +ports: + required: +- port@0 + + required: +- ports +- vddcore-supply +- vddio-supply + + - if: + properties: +compatible: + contains: +const: samsung,exynos5410-mipi-dsi + +then: + properties: +clocks: + minItems: 2 + +clock-names: + items: +- const: bus_clk +- const: pll_clk + + required: +- vddcore-supply +-
Re: [PATCH v2] drm/scdc-helper: Pimp SCDC debugs
Hi Ville, Thank you for the patch. On Tue, Apr 04, 2023 at 01:36:52AM +0300, Ville Syrjala wrote: > From: Ville Syrjälä > > Include the device and connector information in the SCDC > debugs. Makes it easier to figure out who did what. > > v2: Rely on connector->ddc (Maxime) > > Cc: Andrzej Hajda > Cc: Neil Armstrong > Cc: Robert Foss > Cc: Laurent Pinchart > Cc: Jonas Karlman > Cc: Jernej Skrabec > Cc: Thierry Reding > Cc: Emma Anholt > Cc: Maxime Ripard > Cc: intel-...@lists.freedesktop.org > Cc: linux-te...@vger.kernel.org > Signed-off-by: Ville Syrjälä Reviewed-by: Laurent Pinchart > --- > drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 8 ++-- > drivers/gpu/drm/display/drm_scdc_helper.c | 46 +++ > drivers/gpu/drm/i915/display/intel_ddi.c | 4 +- > drivers/gpu/drm/i915/display/intel_hdmi.c | 8 +--- > drivers/gpu/drm/tegra/sor.c | 15 +++- > drivers/gpu/drm/vc4/vc4_hdmi.c| 21 ++- > include/drm/display/drm_scdc_helper.h | 7 ++-- > 7 files changed, 59 insertions(+), 50 deletions(-) > > diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c > b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c > index aa51c61a78c7..603bb3c51027 100644 > --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c > +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c > @@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi > *hdmi, > /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */ > if (dw_hdmi_support_scdc(hdmi, display)) { > if (mtmdsclock > HDMI14_MAX_TMDSCLK) > - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1); > + drm_scdc_set_high_tmds_clock_ratio(>connector, 1); > else > - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 0); > + drm_scdc_set_high_tmds_clock_ratio(>connector, 0); > } > } > EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio); > @@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, > min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION)); > > /* Enabled Scrambling in the Sink */ > - drm_scdc_set_scrambling(hdmi->ddc, 1); > + drm_scdc_set_scrambling(>connector, 1); > > /* >* To activate the scrambler feature, you must ensure > @@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, > hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL); > hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ, > HDMI_MC_SWRSTZ); > - drm_scdc_set_scrambling(hdmi->ddc, 0); > + drm_scdc_set_scrambling(>connector, 0); > } > } > > diff --git a/drivers/gpu/drm/display/drm_scdc_helper.c > b/drivers/gpu/drm/display/drm_scdc_helper.c > index c3ad4ab2b456..6d2f244e5830 100644 > --- a/drivers/gpu/drm/display/drm_scdc_helper.c > +++ b/drivers/gpu/drm/display/drm_scdc_helper.c > @@ -26,6 +26,8 @@ > #include > > #include > +#include > +#include > #include > > /** > @@ -140,7 +142,7 @@ EXPORT_SYMBOL(drm_scdc_write); > > /** > * drm_scdc_get_scrambling_status - what is status of scrambling? > - * @adapter: I2C adapter for DDC channel > + * @connector: connector > * > * Reads the scrambler status over SCDC, and checks the > * scrambling status. > @@ -148,14 +150,16 @@ EXPORT_SYMBOL(drm_scdc_write); > * Returns: > * True if the scrambling is enabled, false otherwise. > */ > -bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter) > +bool drm_scdc_get_scrambling_status(struct drm_connector *connector) > { > u8 status; > int ret; > > - ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, ); > + ret = drm_scdc_readb(connector->ddc, SCDC_SCRAMBLER_STATUS, ); > if (ret < 0) { > - DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret); > + drm_dbg_kms(connector->dev, > + "[CONNECTOR:%d:%s] Failed to read scrambling > status: %d\n", > + connector->base.id, connector->name, ret); > return false; > } > > @@ -165,7 +169,7 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status); > > /** > * drm_scdc_set_scrambling - enable scrambling > - * @adapter: I2C adapter for DDC channel > + * @connector: connector > * @enable: bool to indicate if scrambling is to be enabled/disabled > * > * Writes the TMDS config register over SCDC channel, and: > @@ -175,14 +179,17 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status); > * Returns: > * True if scrambling is set/reset successfully, false otherwise. > */ > -bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) > +bool drm_scdc_set_scrambling(struct drm_connector *connector, > + bool
Re: [PATCH v3 01/11] dmaengine: Add API function dmaengine_prep_slave_dma_array()
On 3 Apr 2023 17:47:50 +0200 Paul Cercueil > This function can be used to initiate a scatter-gather DMA transfer > where the DMA addresses and lengths are located inside arrays. > > The major difference with dmaengine_prep_slave_sg() is that it supports > specifying the lengths of each DMA transfer; as trying to override the > length of the transfer with dmaengine_prep_slave_sg() is a very tedious > process. The introduction of a new API function is also justified by the > fact that scatterlists are on their way out. Given sg's wayout and conceptually iovec and kvec (in include/linux/uio.h), what you add should have been dma_vec to ease people making use of it. struct dma_vec { dma_addr_t addr; size_t len; }; > > Signed-off-by: Paul Cercueil > > --- > v3: New patch > --- > include/linux/dmaengine.h | 16 > 1 file changed, 16 insertions(+) > > diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h > index c3656e590213..62efa28c009a 100644 > --- a/include/linux/dmaengine.h > +++ b/include/linux/dmaengine.h > @@ -912,6 +912,11 @@ struct dma_device { > struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( > struct dma_chan *chan, unsigned long flags); > > + struct dma_async_tx_descriptor *(*device_prep_slave_dma_array)( > + struct dma_chan *chan, dma_addr_t *addrs, > + size_t *lengths, size_t nb, > + enum dma_transfer_direction direction, > + unsigned long flags); Then the callback looks like struct dma_async_tx_descriptor *(*device_prep_slave_vec)( struct dma_chan *chan, struct dma_vec *vec, int nvec, enum dma_transfer_direction direction, unsigned long flags);
[PATCH v3 2/2] drm/bridge: fsl-ldb: Add i.MX6SX support
From: Fabio Estevam i.MX6SX has a single LVDS port and share a similar LDB_CTRL register layout with i.MX8MP and i.MX93. There is no LVDS CTRL register on the i.MX6SX, so only write to this register on the appropriate SoCs. Add support for the i.MX6SX LDB. Tested on a imx6sx-sdb board with a Hannstar HSD100PXN1 LVDS panel and also on a custom i.MX6SX-based board. Signed-off-by: Fabio Estevam Reviewed-by: Neil Armstrong Reviewed-by: Marek Vasut --- Changes since v2: - Rename it to 'single_ctrl_reg' to make it clearer that on i.MX6X, there is a single ctrl register. On the newer SoCs there are two ctrl registers. Changes since v1: - None drivers/gpu/drm/bridge/fsl-ldb.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index 450b352914f4..f8e5d8ab98e3 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -56,6 +56,7 @@ #define LVDS_CTRL_VBG_ADJ_MASK GENMASK(19, 17) enum fsl_ldb_devtype { + IMX6SX_LDB, IMX8MP_LDB, IMX93_LDB, }; @@ -64,9 +65,14 @@ struct fsl_ldb_devdata { u32 ldb_ctrl; u32 lvds_ctrl; bool lvds_en_bit; + bool single_ctrl_reg; }; static const struct fsl_ldb_devdata fsl_ldb_devdata[] = { + [IMX6SX_LDB] = { + .ldb_ctrl = 0x18, + .single_ctrl_reg = true, + }, [IMX8MP_LDB] = { .ldb_ctrl = 0x5c, .lvds_ctrl = 0x128, @@ -202,6 +208,9 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge, regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, reg); + if (fsl_ldb->devdata->single_ctrl_reg) + return; + /* Program LVDS_CTRL */ reg = LVDS_CTRL_CC_ADJ(2) | LVDS_CTRL_PRE_EMPH_EN | LVDS_CTRL_PRE_EMPH_ADJ(3) | LVDS_CTRL_VBG_EN; @@ -228,7 +237,8 @@ static void fsl_ldb_atomic_disable(struct drm_bridge *bridge, regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, LVDS_CTRL_LVDS_EN); else - regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, 0); + if (!fsl_ldb->devdata->single_ctrl_reg) + regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, 0); regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, 0); clk_disable_unprepare(fsl_ldb->clk); @@ -355,6 +365,8 @@ static void fsl_ldb_remove(struct platform_device *pdev) } static const struct of_device_id fsl_ldb_match[] = { + { .compatible = "fsl,imx6sx-ldb", + .data = _ldb_devdata[IMX6SX_LDB], }, { .compatible = "fsl,imx8mp-ldb", .data = _ldb_devdata[IMX8MP_LDB], }, { .compatible = "fsl,imx93-ldb", -- 2.34.1
[PATCH v3 1/2] dt-bindings: display: bridge: ldb: Add an i.MX6SX entry
From: Fabio Estevam i.MX6SX has a single LVDS port and share a similar LDB_CTRL register layout with i.MX8MP and i.MX93. Signed-off-by: Fabio Estevam Reviewed-by: Krzysztof Kozlowski Reviewed-by: Marek Vasut --- Changes since v2: - Collected Reviewed-by tags. - Improved the Subject by not stating support. (Marek). Changes since v1: - Do not duplicate the entire if. (Krzysztof) .../devicetree/bindings/display/bridge/fsl,ldb.yaml | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml index 6e0e3ba9b49e..07388bf2b90d 100644 --- a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml +++ b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml @@ -17,6 +17,7 @@ description: | properties: compatible: enum: + - fsl,imx6sx-ldb - fsl,imx8mp-ldb - fsl,imx93-ldb @@ -64,7 +65,9 @@ allOf: properties: compatible: contains: -const: fsl,imx93-ldb +enum: + - fsl,imx6sx-ldb + - fsl,imx93-ldb then: properties: ports: -- 2.34.1
Re: [RFC PATCH 00/10] Xe DRM scheduler and long running workload plans
On Tue, Apr 04, 2023 at 10:07:48AM +0900, Asahi Lina wrote: > Hi, thanks for the Cc! > No problem. > On 04/04/2023 09.22, Matthew Brost wrote: > > Hello, > > > > As a prerequisite to merging the new Intel Xe DRM driver [1] [2], we > > have been asked to merge our common DRM scheduler patches first as well > > as develop a common solution for long running workloads with the DRM > > scheduler. This RFC series is our first attempt at doing this. We > > welcome any and all feedback. > > > > This can we thought of as 4 parts detailed below. > > > > - DRM scheduler changes for 1 to 1 relationship between scheduler and > > entity (patches 1-3) > > > > In Xe all of the scheduling of jobs is done by a firmware scheduler (the > > GuC) which is a new paradigm WRT to the DRM scheduler and presents > > severals problems as the DRM was originally designed to schedule jobs on > > hardware queues. The main problem being that DRM scheduler expects the > > submission order of jobs to be the completion order of jobs even across > > multiple entities. This assumption falls apart with a firmware scheduler > > as a firmware scheduler has no concept of jobs and jobs can complete out > > of order. A novel solution for was originally thought of by Faith during > > the initial prototype of Xe, create a 1 to 1 relationship between scheduler > > and entity. I believe the AGX driver [3] is using this approach and > > Boris may use approach as well for the Mali driver [4]. > > > > To support a 1 to 1 relationship we move the main execution function > > from a kthread to a work queue and add a new scheduling mode which > > bypasses code in the DRM which isn't needed in a 1 to 1 relationship. > > The new scheduling mode should unify all drivers usage with a 1 to 1 > > relationship and can be thought of as using scheduler as a dependency / > > infligt job tracker rather than a true scheduler. > > Yup, we're in the exact same situation with drm/asahi, so this is very > welcome! We've been using the existing scheduler as-is, but this should help > remove some unneeded complexity in this use case. > That's the idea. > Do you want me to pull in this series into our tree and make sure this all > works out for us? > We tested this in Xe and it definitely works for us but the more testing the better. > I also have a couple bugfixes for drm/sched I need to send out, but I think > the rebase/merge with this series should be trivial. I'll send that out this > week. > > > - Generic messaging interface for DRM scheduler > > > > Idea is to be able to communicate to the submission backend with in band > > (relative to main execution function) messages. Messages are backend > > defined and flexable enough for any use case. In Xe we use these > > messages to clean up entites, set properties for entites, and suspend / > > resume execution of an entity [5]. I suspect other driver can leverage > > this messaging concept too as it a convenient way to avoid races in the > > backend. > > We haven't needed this so far (mostly by using fine-grained locking and > refcounting all over the place) but I can see it being useful to simplify > some of those constructs and maybe avoid potential deadlocks in some places. > I'm not sure yet whether we can fully get rid of the main queue > refcounting/locking (our completion/error signaling path doesn't map well to > DMA fences directly so we still need something there to get from the global > GPU completion signaling thread to individual queues) but it might be a step > in the right direction at least! > With this messaging interface we essentially have a lockless submission backend which is really nice compared to what we did in the i915. Matt > ~~ Lina >
[PATCH v3] dt-bindings: bridge: Convert Samsung MIPI DSIM bridge to yaml
From: Jagan Teki Samsung MIPI DSIM bridge can be found on Exynos and NXP's i.MX8M Mini/Nano/Plus SoCs. Convert exynos_dsim.txt to yaml. Used the example node from exynos5433.dtsi instead of the one used in the legacy exynos_dsim.txt. Signed-off-by: Jagan Teki Signed-off-by: Fabio Estevam --- Changes since v2: - Took previous Rob Herring's feedback into account: https://lore.kernel.org/all/20210712151322.ga1931...@robh.at.kernel.org/ - Handled imx8mn and imx8mp - Remove unnecessary #address-cells/size-cells. .../display/bridge/samsung,mipi-dsim.yaml | 255 ++ .../bindings/display/exynos/exynos_dsim.txt | 92 --- MAINTAINERS | 1 + 3 files changed, 256 insertions(+), 92 deletions(-) create mode 100644 Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml delete mode 100644 Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt diff --git a/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml new file mode 100644 index ..55dbec178ea8 --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/samsung,mipi-dsim.yaml @@ -0,0 +1,255 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/bridge/samsung,mipi-dsim.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung MIPI DSIM bridge controller + +maintainers: + - Inki Dae + - Jagan Teki + - Marek Szyprowski + +description: | + Samsung MIPI DSIM bridge controller can be found it on Exynos + and i.MX8M Mini/Nano/Plus SoC's. + +properties: + compatible: +oneOf: + - enum: + - samsung,exynos3250-mipi-dsi + - samsung,exynos4210-mipi-dsi + - samsung,exynos5410-mipi-dsi + - samsung,exynos5422-mipi-dsi + - samsung,exynos5433-mipi-dsi + - fsl,imx8mm-mipi-dsim + - fsl,imx8mp-mipi-dsim + - items: + - const: fsl,imx8mn-mipi-dsim + - const: fsl,imx8mm-mipi-dsim + + reg: +maxItems: 1 + + interrupts: +maxItems: 1 + + '#address-cells': +const: 1 + + '#size-cells': +const: 0 + + clocks: +minItems: 2 +maxItems: 5 + + clock-names: +minItems: 2 +maxItems: 5 + + samsung,phy-type: +$ref: /schemas/types.yaml#/definitions/uint32 +description: phandle to the samsung phy-type + + power-domains: +maxItems: 1 + + samsung,power-domain: +$ref: /schemas/types.yaml#/definitions/phandle +description: phandle to the associated samsung power domain + + vddcore-supply: +description: MIPI DSIM Core voltage supply (e.g. 1.1V) + + vddio-supply: +description: MIPI DSIM I/O and PLL voltage supply (e.g. 1.8V) + + samsung,burst-clock-frequency: +$ref: /schemas/types.yaml#/definitions/uint32 +description: + DSIM high speed burst mode frequency. + + samsung,esc-clock-frequency: +$ref: /schemas/types.yaml#/definitions/uint32 +description: + DSIM escape mode frequency. + + samsung,pll-clock-frequency: +$ref: /schemas/types.yaml#/definitions/uint32 +description: + DSIM oscillator clock frequency. + + phys: +maxItems: 1 + + phy-names: +const: dsim + + ports: +$ref: /schemas/graph.yaml#/properties/ports + +properties: + port@0: +$ref: /schemas/graph.yaml#/properties/port +description: + Input port node to receive pixel data from the + display controller. Exactly one endpoint must be + specified. + + port@1: +$ref: /schemas/graph.yaml#/properties/port +description: + DSI output port node to the panel or the next bridge + in the chain + +required: + - clock-names + - clocks + - compatible + - interrupts + - reg + - samsung,burst-clock-frequency + - samsung,esc-clock-frequency + - samsung,pll-clock-frequency + +allOf: + - $ref: ../dsi-controller.yaml# + - if: + properties: +compatible: + contains: +const: samsung,exynos5433-mipi-dsi + +then: + properties: +clocks: + minItems: 5 + +clock-names: + items: +- const: bus_clk +- const: phyclk_mipidphy0_bitclkdiv8 +- const: phyclk_mipidphy0_rxclkesc0 +- const: sclk_rgb_vclk_to_dsim0 +- const: sclk_mipi + +ports: + required: +- port@0 + + required: +- ports +- vddcore-supply +- vddio-supply + + - if: + properties: +compatible: + contains: +const: samsung,exynos5410-mipi-dsi + +then: + properties: +clocks: + minItems: 2 + +clock-names: + items: +- const: bus_clk +- const: pll_clk + + required: +- vddcore-supply +- vddio-supply
Re: [PATCH v2 1/2] dt-bindings: display: bridge: ldb: Add i.MX6SX support
On 3/30/23 12:42, Fabio Estevam wrote: From: Fabio Estevam i.MX6SX has a single LVDS port and share a similar LDB_CTRL register layout with i.MX8MP and i.MX93. Signed-off-by: Fabio Estevam Nit: you are not adding 'support' for the IP here, you are documenting bindings in this patch. The support is added in 2/2 . Reviewed-by: Marek Vasut
[PATCH drm-next v3 14/15] drm/nouveau: implement new VM_BIND uAPI
This commit provides the implementation for the new uapi motivated by the Vulkan API. It allows user mode drivers (UMDs) to: 1) Initialize a GPU virtual address (VA) space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA space managed by the kernel and userspace, respectively. 2) Allocate and free a VA space region as well as bind and unbind memory to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. UMDs can request the named operations to be processed either synchronously or asynchronously. It supports DRM syncobjs (incl. timelines) as synchronization mechanism. The management of the GPU VA mappings is implemented with the DRM GPU VA manager. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The execution happens asynchronously. It supports DRM syncobj (incl. timelines) as synchronization mechanism. DRM GEM object locking is handled with drm_exec. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM GPU scheduler for the asynchronous paths. Signed-off-by: Danilo Krummrich --- Documentation/gpu/driver-uapi.rst |3 + drivers/gpu/drm/nouveau/Kbuild |3 + drivers/gpu/drm/nouveau/Kconfig |2 + drivers/gpu/drm/nouveau/nouveau_abi16.c | 24 + drivers/gpu/drm/nouveau/nouveau_abi16.h |1 + drivers/gpu/drm/nouveau/nouveau_bo.c| 147 +- drivers/gpu/drm/nouveau/nouveau_bo.h|2 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 27 +- drivers/gpu/drm/nouveau/nouveau_drv.h | 59 +- drivers/gpu/drm/nouveau/nouveau_exec.c | 363 + drivers/gpu/drm/nouveau/nouveau_exec.h | 42 + drivers/gpu/drm/nouveau/nouveau_gem.c | 25 +- drivers/gpu/drm/nouveau/nouveau_mem.h |5 + drivers/gpu/drm/nouveau/nouveau_prime.c |2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 494 ++ drivers/gpu/drm/nouveau/nouveau_sched.h | 116 ++ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 1836 +++ drivers/gpu/drm/nouveau/nouveau_uvmm.h | 98 ++ 18 files changed, 3184 insertions(+), 65 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.h create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.h create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.c create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.h diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index 9c7ca6e33a68..c08bcbb95fb3 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -13,4 +13,7 @@ drm/nouveau uAPI VM_BIND / EXEC uAPI --- +.. kernel-doc:: drivers/gpu/drm/nouveau/nouveau_exec.c +:doc: Overview + .. kernel-doc:: include/uapi/drm/nouveau_drm.h diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild index 5e5617006da5..cf6b3a80c0c8 100644 --- a/drivers/gpu/drm/nouveau/Kbuild +++ b/drivers/gpu/drm/nouveau/Kbuild @@ -47,6 +47,9 @@ nouveau-y += nouveau_prime.o nouveau-y += nouveau_sgdma.o nouveau-y += nouveau_ttm.o nouveau-y += nouveau_vmm.o +nouveau-y += nouveau_exec.o +nouveau-y += nouveau_sched.o +nouveau-y += nouveau_uvmm.o # DRM - modesetting nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index a70bd65e1400..c52e8096cca4 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -10,6 +10,8 @@ config DRM_NOUVEAU select DRM_KMS_HELPER select DRM_TTM select DRM_TTM_HELPER + select DRM_EXEC + select DRM_SCHED select I2C select I2C_ALGOBIT select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 82dab51d8aeb..a112f28681d3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -35,6 +35,7 @@ #include "nouveau_chan.h" #include "nouveau_abi16.h" #include "nouveau_vmm.h" +#include "nouveau_sched.h" static struct nouveau_abi16 * nouveau_abi16(struct drm_file *file_priv) @@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, { struct nouveau_abi16_ntfy *ntfy, *temp; + /* When a client exits without waiting for it's queued up jobs to +* finish it might happen that we fault the channel. This is due to +* drm_file_free() calling drm_gem_release() before the postclose() +* callback. Hence, we can't tear down this scheduler entity before +* uvmm mappings are unmapped. Currently, we can't detect this case. +* +* However, this should be rare and harmless, since the channel isn't +* needed anymore. +*/ + nouveau_sched_entity_fini(>sched_entity); + /* wait for all activity to
[PATCH drm-next v3 15/15] drm/nouveau: debugfs: implement DRM GPU VA debugfs
Provide the driver indirection iterating over all DRM GPU VA spaces to enable the common 'gpuvas' debugfs file for dumping DRM GPU VA spaces. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_debugfs.c | 39 +++ 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 2a36d1ca8fda..d5487e655b0c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -202,6 +202,44 @@ nouveau_debugfs_pstate_open(struct inode *inode, struct file *file) return single_open(file, nouveau_debugfs_pstate_get, inode->i_private); } +static void +nouveau_debugfs_gpuva_regions(struct seq_file *m, struct nouveau_uvmm *uvmm) +{ + MA_STATE(mas, >region_mt, 0, 0); + struct nouveau_uvma_region *reg; + + seq_puts (m, " VA regions | start | range | end\n"); + seq_puts (m, "\n"); + mas_for_each(, reg, ULONG_MAX) + seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx\n", + reg->va.addr, reg->va.range, reg->va.addr + reg->va.range); +} + +static int +nouveau_debugfs_gpuva(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct nouveau_drm *drm = nouveau_drm(node->minor->dev); + struct nouveau_cli *cli; + + mutex_lock(>clients_lock); + list_for_each_entry(cli, >clients, head) { + struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); + + if (!uvmm) + continue; + + nouveau_uvmm_lock(uvmm); + drm_debugfs_gpuva_info(m, >umgr); + seq_puts(m, "\n"); + nouveau_debugfs_gpuva_regions(m, uvmm); + nouveau_uvmm_unlock(uvmm); + } + mutex_unlock(>clients_lock); + + return 0; +} + static const struct file_operations nouveau_pstate_fops = { .owner = THIS_MODULE, .open = nouveau_debugfs_pstate_open, @@ -213,6 +251,7 @@ static const struct file_operations nouveau_pstate_fops = { static struct drm_info_list nouveau_debugfs_list[] = { { "vbios.rom", nouveau_debugfs_vbios_image, 0, NULL }, { "strap_peek", nouveau_debugfs_strap_peek, 0, NULL }, + DRM_DEBUGFS_GPUVA_INFO(nouveau_debugfs_gpuva, NULL), }; #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list) -- 2.39.2
[PATCH drm-next v3 13/15] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm
The new VM_BIND UAPI uses the DRM GPU VA manager to manage the VA space. Hence, we a need a way to manipulate the MMUs page tables without going through the internal range allocator implemented by nvkm/vmm. This patch adds a raw interface for nvkm/vmm to pass the resposibility for managing the address space and the corresponding map/unmap/sparse operations to the upper layers. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/include/nvif/if000c.h | 26 ++- drivers/gpu/drm/nouveau/include/nvif/vmm.h| 19 +- .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 20 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_vmm.c | 4 +- drivers/gpu/drm/nouveau/nvif/vmm.c| 100 +++- .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c| 213 -- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 197 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 25 ++ .../drm/nouveau/nvkm/subdev/mmu/vmmgf100.c| 16 +- .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c| 16 +- .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c | 27 ++- 12 files changed, 566 insertions(+), 99 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h index 9c7ff56831c5..a5a182b3c28d 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h @@ -3,7 +3,10 @@ struct nvif_vmm_v0 { __u8 version; __u8 page_nr; - __u8 managed; +#define NVIF_VMM_V0_TYPE_UNMANAGED 0x00 +#define NVIF_VMM_V0_TYPE_MANAGED 0x01 +#define NVIF_VMM_V0_TYPE_RAW 0x02 + __u8 type; __u8 pad03[5]; __u64 addr; __u64 size; @@ -17,6 +20,7 @@ struct nvif_vmm_v0 { #define NVIF_VMM_V0_UNMAP 0x04 #define NVIF_VMM_V0_PFNMAP 0x05 #define NVIF_VMM_V0_PFNCLR 0x06 +#define NVIF_VMM_V0_RAW0x07 #define NVIF_VMM_V0_MTHD(i) ((i) + 0x80) struct nvif_vmm_page_v0 { @@ -66,6 +70,26 @@ struct nvif_vmm_unmap_v0 { __u64 addr; }; +struct nvif_vmm_raw_v0 { + __u8 version; +#define NVIF_VMM_RAW_V0_GET0x0 +#define NVIF_VMM_RAW_V0_PUT0x1 +#define NVIF_VMM_RAW_V0_MAP0x2 +#define NVIF_VMM_RAW_V0_UNMAP 0x3 +#define NVIF_VMM_RAW_V0_SPARSE 0x4 + __u8 op; + __u8 sparse; + __u8 ref; + __u8 shift; + __u32 argc; + __u8 pad01[7]; + __u64 addr; + __u64 size; + __u64 offset; + __u64 memory; + __u64 argv; +}; + struct nvif_vmm_pfnmap_v0 { __u8 version; __u8 page; diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h b/drivers/gpu/drm/nouveau/include/nvif/vmm.h index a2ee92201ace..0ecedd0ee0a5 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h +++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h @@ -4,6 +4,12 @@ struct nvif_mem; struct nvif_mmu; +enum nvif_vmm_type { + UNMANAGED, + MANAGED, + RAW, +}; + enum nvif_vmm_get { ADDR, PTES, @@ -30,8 +36,9 @@ struct nvif_vmm { int page_nr; }; -int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, bool managed, - u64 addr, u64 size, void *argv, u32 argc, struct nvif_vmm *); +int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, + enum nvif_vmm_type, u64 addr, u64 size, void *argv, u32 argc, + struct nvif_vmm *); void nvif_vmm_dtor(struct nvif_vmm *); int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse, u8 page, u8 align, u64 size, struct nvif_vma *); @@ -39,4 +46,12 @@ void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *); int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc, struct nvif_mem *, u64 offset); int nvif_vmm_unmap(struct nvif_vmm *, u64); + +int nvif_vmm_raw_get(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift); +int nvif_vmm_raw_put(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift); +int nvif_vmm_raw_map(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift, +void *argv, u32 argc, struct nvif_mem *mem, u64 offset); +int nvif_vmm_raw_unmap(struct nvif_vmm *vmm, u64 addr, u64 size, + u8 shift, bool sparse); +int nvif_vmm_raw_sparse(struct nvif_vmm *vmm, u64 addr, u64 size, bool ref); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 70e7887ef4b4..2fd2f2433fc7 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -17,6 +17,7 @@
[PATCH] drm/nouveau/disp: set varaiable gv100_disp_core_mthd_base storage-class-specifier to static
smatch reports drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c:610:1: warning: symbol 'gv100_disp_core_mthd_base' was not declared. Should it be static? This variable is only used in one file so it should be static. Signed-off-by: Tom Rix --- drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c index 115d0997fd62..4ebc030e40d1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c @@ -606,7 +606,7 @@ gv100_disp_curs = { .user = 73, }; -const struct nvkm_disp_mthd_list +static const struct nvkm_disp_mthd_list gv100_disp_core_mthd_base = { .mthd = 0x, .addr = 0x00, -- 2.27.0
[PATCH drm-next v3 12/15] drm/nouveau: chan: provide nouveau_channel_kill()
The new VM_BIND UAPI implementation introduced in subsequent commits will allow asynchronous jobs processing push buffers and emitting fences. If a job times out, we need a way to recover from this situation. For now, simply kill the channel to unblock all hung up jobs and signal userspace that the device is dead on the next EXEC or VM_BIND ioctl. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_chan.c | 14 +++--- drivers/gpu/drm/nouveau/nouveau_chan.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index f47c0363683c..a975f8b0e0e5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -40,6 +40,14 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM"); int nouveau_vram_pushbuf; module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400); +void +nouveau_channel_kill(struct nouveau_channel *chan) +{ + atomic_set(>killed, 1); + if (chan->fence) + nouveau_fence_context_kill(chan->fence, -ENODEV); +} + static int nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc) { @@ -47,9 +55,9 @@ nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc) struct nouveau_cli *cli = (void *)chan->user.client; NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid); - atomic_set(>killed, 1); - if (chan->fence) - nouveau_fence_context_kill(chan->fence, -ENODEV); + + if (unlikely(!atomic_read(>killed))) + nouveau_channel_kill(chan); return NVIF_EVENT_DROP; } diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h index e06a8ffed31a..e483f4a254da 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.h +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h @@ -65,6 +65,7 @@ int nouveau_channel_new(struct nouveau_drm *, struct nvif_device *, bool priv, u32 vram, u32 gart, struct nouveau_channel **); void nouveau_channel_del(struct nouveau_channel **); int nouveau_channel_idle(struct nouveau_channel *); +void nouveau_channel_kill(struct nouveau_channel *); extern int nouveau_vram_pushbuf; -- 2.39.2
[PATCH drm-next v3 11/15] drm/nouveau: fence: fail to emit when fence context is killed
The new VM_BIND UAPI implementation introduced in subsequent commits will allow asynchronous jobs processing push buffers and emitting fences. If a fence context is killed, e.g. due to a channel fault, jobs which are already queued for execution might still emit new fences. In such a case a job would hang forever. To fix that, fail to emit a new fence on a killed fence context with -ENODEV to unblock the job. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_fence.c | 7 +++ drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index e946408f945b..77c739a55b19 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -96,6 +96,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) if (nouveau_fence_signal(fence)) nvif_event_block(>event); } + fctx->killed = 1; spin_unlock_irqrestore(>lock, flags); } @@ -229,6 +230,12 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan) dma_fence_get(>base); spin_lock_irq(>lock); + if (unlikely(fctx->killed)) { + spin_unlock_irq(>lock); + dma_fence_put(>base); + return -ENODEV; + } + if (nouveau_fence_update(chan, fctx)) nvif_event_block(>event); diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 7c73c7c9834a..2c72d96ef17d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -44,7 +44,7 @@ struct nouveau_fence_chan { char name[32]; struct nvif_event event; - int notify_ref, dead; + int notify_ref, dead, killed; }; struct nouveau_fence_priv { -- 2.39.2
[PATCH drm-next v3 10/15] drm/nouveau: fence: separate fence alloc and emit
The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence, in order to emit fences within DMA fence signalling critical sections (e.g. as typically done in the DRM GPU schedulers run_job() callback) we need to separate fence allocation and fence emitting. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/dispnv04/crtc.c | 9 - drivers/gpu/drm/nouveau/nouveau_bo.c| 52 +++-- drivers/gpu/drm/nouveau/nouveau_chan.c | 6 ++- drivers/gpu/drm/nouveau/nouveau_dmem.c | 9 +++-- drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++- drivers/gpu/drm/nouveau/nouveau_fence.h | 3 +- drivers/gpu/drm/nouveau/nouveau_gem.c | 5 ++- 7 files changed, 59 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c index a6f2e681bde9..a34924523133 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c +++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c @@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan, PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x); PUSH_KICK(push); - ret = nouveau_fence_new(chan, false, pfence); + ret = nouveau_fence_new(pfence); if (ret) goto fail; + ret = nouveau_fence_emit(*pfence, chan); + if (ret) + goto fail_fence_unref; + return 0; + +fail_fence_unref: + nouveau_fence_unref(pfence); fail: spin_lock_irqsave(>event_lock, flags); list_del(>head); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 057bc995f19b..e9cbbf594e6f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, mutex_lock(>mutex); else mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING); + ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, ctx->interruptible); - if (ret == 0) { - ret = drm->ttm.move(chan, bo, bo->resource, new_reg); - if (ret == 0) { - ret = nouveau_fence_new(chan, false, ); - if (ret == 0) { - /* TODO: figure out a better solution here -* -* wait on the fence here explicitly as going through -* ttm_bo_move_accel_cleanup somehow doesn't seem to do it. -* -* Without this the operation can timeout and we'll fallback to a -* software copy, which might take several minutes to finish. -*/ - nouveau_fence_wait(fence, false, false); - ret = ttm_bo_move_accel_cleanup(bo, - >base, - evict, false, - new_reg); - nouveau_fence_unref(); - } - } + if (ret) + goto out_unlock; + + ret = drm->ttm.move(chan, bo, bo->resource, new_reg); + if (ret) + goto out_unlock; + + ret = nouveau_fence_new(); + if (ret) + goto out_unlock; + + ret = nouveau_fence_emit(fence, chan); + if (ret) { + nouveau_fence_unref(); + goto out_unlock; } + + /* TODO: figure out a better solution here +* +* wait on the fence here explicitly as going through +* ttm_bo_move_accel_cleanup somehow doesn't seem to do it. +* +* Without this the operation can timeout and we'll fallback to a +* software copy, which might take several minutes to finish. +*/ + nouveau_fence_wait(fence, false, false); + ret = ttm_bo_move_accel_cleanup(bo, >base, evict, false, + new_reg); + nouveau_fence_unref(); + +out_unlock: mutex_unlock(>mutex); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 1068abe41024..f47c0363683c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan) struct nouveau_fence *fence = NULL; int ret; - ret = nouveau_fence_new(chan, false, ); + ret = nouveau_fence_new(); if (!ret) { - ret = nouveau_fence_wait(fence, false, false); + ret = nouveau_fence_emit(fence, chan); + if (!ret) +
[PATCH drm-next v3 09/15] drm/nouveau: move usercopy helpers to nouveau_drv.h
Move the usercopy helpers to a common driver header file to make it usable for the new API added in subsequent commits. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_drv.h | 26 ++ drivers/gpu/drm/nouveau/nouveau_gem.c | 26 -- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 81350e685b50..20a7f31b9082 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -130,6 +130,32 @@ nouveau_cli(struct drm_file *fpriv) return fpriv ? fpriv->driver_priv : NULL; } +static inline void +u_free(void *addr) +{ + kvfree(addr); +} + +static inline void * +u_memcpya(uint64_t user, unsigned nmemb, unsigned size) +{ + void *mem; + void __user *userptr = (void __force __user *)(uintptr_t)user; + + size *= nmemb; + + mem = kvmalloc(size, GFP_KERNEL); + if (!mem) + return ERR_PTR(-ENOMEM); + + if (copy_from_user(mem, userptr, size)) { + u_free(mem); + return ERR_PTR(-EFAULT); + } + + return mem; +} + #include #include diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 08689ced4f6a..4369c8dc8b5b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -613,32 +613,6 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan, return 0; } -static inline void -u_free(void *addr) -{ - kvfree(addr); -} - -static inline void * -u_memcpya(uint64_t user, unsigned nmemb, unsigned size) -{ - void *mem; - void __user *userptr = (void __force __user *)(uintptr_t)user; - - size *= nmemb; - - mem = kvmalloc(size, GFP_KERNEL); - if (!mem) - return ERR_PTR(-ENOMEM); - - if (copy_from_user(mem, userptr, size)) { - u_free(mem); - return ERR_PTR(-EFAULT); - } - - return mem; -} - static int nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, struct drm_nouveau_gem_pushbuf *req, -- 2.39.2
[PATCH drm-next v3 08/15] drm/nouveau: bo: initialize GEM GPU VA interface
Initialize the GEM's DRM GPU VA manager interface in preparation for the (u)vmm implementation, provided by subsequent commits, to make use of it. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 7724fe63067d..057bc995f19b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -215,11 +215,14 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL); if (!nvbo) return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(>head); INIT_LIST_HEAD(>entry); INIT_LIST_HEAD(>vma_list); nvbo->bo.bdev = >ttm.bdev; + drm_gem_gpuva_init(>bo.base); + /* This is confusing, and doesn't actually mean we want an uncached * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated * into in nouveau_gem_new(). -- 2.39.2
[PATCH drm-next v3 07/15] drm/nouveau: get vmm via nouveau_cli_vmm()
Provide a getter function for the client's current vmm context. Since we'll add a new (u)vmm context for UMD bindings in subsequent commits, this will keep the code clean. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 +- drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +- drivers/gpu/drm/nouveau/nouveau_drv.h | 9 + drivers/gpu/drm/nouveau/nouveau_gem.c | 6 +++--- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index c2ec91cc845d..7724fe63067d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -204,7 +204,7 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, struct nouveau_drm *drm = cli->drm; struct nouveau_bo *nvbo; struct nvif_mmu *mmu = >mmu; - struct nvif_vmm *vmm = cli->svm.cli ? >svm.vmm : >vmm.vmm; + struct nvif_vmm *vmm = _cli_vmm(cli)->vmm; int i, pi = -1; if (!*size) { diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index e648ecd0c1a0..1068abe41024 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -148,7 +148,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device, chan->device = device; chan->drm = drm; - chan->vmm = cli->svm.cli ? >svm : >vmm; + chan->vmm = nouveau_cli_vmm(cli); atomic_set(>killed, 0); /* allocate memory for dma push buffer */ diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index b5de312a523f..81350e685b50 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -112,6 +112,15 @@ struct nouveau_cli_work { struct dma_fence_cb cb; }; +static inline struct nouveau_vmm * +nouveau_cli_vmm(struct nouveau_cli *cli) +{ + if (cli->svm.cli) + return >svm; + + return >vmm; +} + void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *, struct nouveau_cli_work *); diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index f77e44958037..08689ced4f6a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -103,7 +103,7 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; - struct nouveau_vmm *vmm = cli->svm.cli ? >svm : >vmm; + struct nouveau_vmm *vmm = nouveau_cli_vmm(cli); struct nouveau_vma *vma; int ret; @@ -180,7 +180,7 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv) struct nouveau_bo *nvbo = nouveau_gem_object(gem); struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); struct device *dev = drm->dev->dev; - struct nouveau_vmm *vmm = cli->svm.cli ? >svm : & cli->vmm; + struct nouveau_vmm *vmm = nouveau_cli_vmm(cli); struct nouveau_vma *vma; int ret; @@ -269,7 +269,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem, { struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_bo *nvbo = nouveau_gem_object(gem); - struct nouveau_vmm *vmm = cli->svm.cli ? >svm : >vmm; + struct nouveau_vmm *vmm = nouveau_cli_vmm(cli); struct nouveau_vma *vma; if (is_power_of_2(nvbo->valid_domains)) -- 2.39.2
[PATCH drm-next v3 05/15] drm: debugfs: provide infrastructure to dump a DRM GPU VA space
This commit adds a function to dump a DRM GPU VA space and a macro for drivers to register the struct drm_info_list 'gpuvas' entry. Most likely, most drivers might maintain one DRM GPU VA space per struct drm_file, but there might also be drivers not having a fixed relation between DRM GPU VA spaces and a DRM core infrastructure, hence we need the indirection via the driver iterating it's maintained DRM GPU VA spaces. Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/drm_debugfs.c | 41 +++ include/drm/drm_debugfs.h | 25 + 2 files changed, 66 insertions(+) diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 4855230ba2c6..82180fb1c200 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "drm_crtc_internal.h" #include "drm_internal.h" @@ -175,6 +176,46 @@ static const struct file_operations drm_debugfs_fops = { .release = single_release, }; +/** + * drm_debugfs_gpuva_info - dump the given DRM GPU VA space + * @m: pointer to the _file to write + * @mgr: the _gpuva_manager representing the GPU VA space + * + * Dumps the GPU VA mappings of a given DRM GPU VA manager. + * + * For each DRM GPU VA space drivers should call this function from their + * _info_list's show callback. + * + * Returns: 0 on success, -ENODEV if the is not initialized + */ +int drm_debugfs_gpuva_info(struct seq_file *m, + struct drm_gpuva_manager *mgr) +{ + DRM_GPUVA_ITER(it, mgr, 0); + struct drm_gpuva *va, *kva = >kernel_alloc_node; + + if (!mgr->name) + return -ENODEV; + + seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n", + mgr->name, mgr->mm_start, mgr->mm_start + mgr->mm_range); + seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n", + kva->va.addr, kva->va.addr + kva->va.range); + seq_puts(m, "\n"); + seq_puts(m, " VAs | start | range | end | object | object offset\n"); + seq_puts(m, "-\n"); + drm_gpuva_iter_for_each(va, it) { + if (unlikely(va == >kernel_alloc_node)) + continue; + + seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx\n", + va->va.addr, va->va.range, va->va.addr + va->va.range, + (u64)va->gem.obj, va->gem.offset); + } + + return 0; +} +EXPORT_SYMBOL(drm_debugfs_gpuva_info); /** * drm_debugfs_create_files - Initialize a given set of debugfs files for DRM diff --git a/include/drm/drm_debugfs.h b/include/drm/drm_debugfs.h index 7616f457ce70..cb2c1956a214 100644 --- a/include/drm/drm_debugfs.h +++ b/include/drm/drm_debugfs.h @@ -34,6 +34,22 @@ #include #include + +#include + +/** + * DRM_DEBUGFS_GPUVA_INFO - _info_list entry to dump a GPU VA space + * @show: the _info_list's show callback + * @data: driver private data + * + * Drivers should use this macro to define a _info_list entry to provide a + * debugfs file for dumping the GPU VA space regions and mappings. + * + * For each DRM GPU VA space drivers should call drm_debugfs_gpuva_info() from + * their @show callback. + */ +#define DRM_DEBUGFS_GPUVA_INFO(show, data) {"gpuvas", show, DRIVER_GEM_GPUVA, data} + /** * struct drm_info_list - debugfs info list entry * @@ -134,6 +150,9 @@ void drm_debugfs_add_file(struct drm_device *dev, const char *name, void drm_debugfs_add_files(struct drm_device *dev, const struct drm_debugfs_info *files, int count); + +int drm_debugfs_gpuva_info(struct seq_file *m, + struct drm_gpuva_manager *mgr); #else static inline void drm_debugfs_create_files(const struct drm_info_list *files, int count, struct dentry *root, @@ -155,6 +174,12 @@ static inline void drm_debugfs_add_files(struct drm_device *dev, const struct drm_debugfs_info *files, int count) {} + +static inline int drm_debugfs_gpuva_info(struct seq_file *m, +struct drm_gpuva_manager *mgr) +{ + return 0; +} #endif #endif /* _DRM_DEBUGFS_H_ */ -- 2.39.2
[PATCH drm-next v3 06/15] drm/nouveau: new VM_BIND uapi interfaces
This commit provides the interfaces for the new UAPI motivated by the Vulkan API. It allows user mode drivers (UMDs) to: 1) Initialize a GPU virtual address (VA) space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel reserved VA area. 2) Bind and unbind GPU VA space mappings via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support asynchronous processing with DRM syncobjs as synchronization mechanism. The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing, DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only. Co-authored-by: Dave Airlie Signed-off-by: Danilo Krummrich --- Documentation/gpu/driver-uapi.rst | 8 ++ include/uapi/drm/nouveau_drm.h| 209 ++ 2 files changed, 217 insertions(+) diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst index 4411e6919a3d..9c7ca6e33a68 100644 --- a/Documentation/gpu/driver-uapi.rst +++ b/Documentation/gpu/driver-uapi.rst @@ -6,3 +6,11 @@ drm/i915 uAPI = .. kernel-doc:: include/uapi/drm/i915_drm.h + +drm/nouveau uAPI + + +VM_BIND / EXEC uAPI +--- + +.. kernel-doc:: include/uapi/drm/nouveau_drm.h diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h index 853a327433d3..4d3a70529637 100644 --- a/include/uapi/drm/nouveau_drm.h +++ b/include/uapi/drm/nouveau_drm.h @@ -126,6 +126,209 @@ struct drm_nouveau_gem_cpu_fini { __u32 handle; }; +/** + * struct drm_nouveau_sync - sync object + * + * This structure serves as synchronization mechanism for (potentially) + * asynchronous operations such as EXEC or VM_BIND. + */ +struct drm_nouveau_sync { + /** +* @flags: the flags for a sync object +* +* The first 8 bits are used to determine the type of the sync object. +*/ + __u32 flags; +#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0 +#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1 +#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf + /** +* @handle: the handle of the sync object +*/ + __u32 handle; + /** +* @timeline_value: +* +* The timeline point of the sync object in case the syncobj is of +* type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ. +*/ + __u64 timeline_value; +}; + +/** + * struct drm_nouveau_vm_init - GPU VA space init structure + * + * Used to initialize the GPU's VA space for a user client, telling the kernel + * which portion of the VA space is managed by the UMD and kernel respectively. + */ +struct drm_nouveau_vm_init { + /** +* @unmanaged_addr: start address of the kernel managed VA space region +*/ + __u64 unmanaged_addr; + /** +* @unmanaged_size: size of the kernel managed VA space region in bytes +*/ + __u64 unmanaged_size; +}; + +/** + * struct drm_nouveau_vm_bind_op - VM_BIND operation + * + * This structure represents a single VM_BIND operation. UMDs should pass + * an array of this structure via struct drm_nouveau_vm_bind's _ptr field. + */ +struct drm_nouveau_vm_bind_op { + /** +* @op: the operation type +*/ + __u32 op; +/** + * @DRM_NOUVEAU_VM_BIND_OP_MAP: + * + * Map a GEM object to the GPU's VA space. Optionally, the + * _NOUVEAU_VM_BIND_SPARSE flag can be passed to instruct the kernel to + * create sparse mappings for the given range. + */ +#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0 +/** + * @DRM_NOUVEAU_VM_BIND_OP_UNMAP: + * + * Unmap an existing mapping in the GPU's VA space. If the region the mapping + * is located in is a sparse region, new sparse mappings are created where the + * unmapped (memory backed) mapping was mapped previously. To remove a sparse + * region the _NOUVEAU_VM_BIND_SPARSE must be set. + */ +#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1 + /** +* @flags: the flags for a _nouveau_vm_bind_op +*/ + __u32 flags; +/** + * @DRM_NOUVEAU_VM_BIND_SPARSE: + * + * Indicates that an allocated VA space region should be sparse. + */ +#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8) + /** +* @handle: the handle of the DRM GEM object to map +*/ + __u32 handle; + /** +* @pad: 32 bit padding, should be 0 +*/ + __u32 pad; + /** +* @addr: +* +* the address the VA space region or (memory backed) mapping should be mapped to +*/ + __u64 addr; + /** +* @bo_offset: the offset within the BO backing the mapping +*/ + __u64 bo_offset; + /** +* @range: the size of the requested mapping in bytes +*/ + __u64 range; +}; + +/** + * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND + */ +struct drm_nouveau_vm_bind { + /** +* @op_count: the number of _nouveau_vm_bind_op +
[PATCH drm-next v3 03/15] maple_tree: split up MA_STATE() macro
Split up the MA_STATE() macro such that components using the maple tree can easily inherit from struct ma_state and build custom tree walk macros to hide their internals from users. Example: struct sample_iterator { struct ma_state mas; struct sample_mgr *mgr; }; \#define SAMPLE_ITERATOR(name, __mgr, start)\ struct sample_iterator name = { \ .mas = MA_STATE_INIT(&(__mgr)->mt, start, 0), \ .mgr = __mgr, \ } \#define sample_iter_for_each_range(it__, entry__, end__) \ mas_for_each(&(it__).mas, entry__, end__) -- struct sample *sample; SAMPLE_ITERATOR(si, min); sample_iter_for_each_range(, sample, max) { frob(mgr, sample); } Signed-off-by: Danilo Krummrich --- include/linux/maple_tree.h | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 1fadb5f5978b..87d55334f1c2 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -423,8 +423,8 @@ struct ma_wr_state { #define MA_ERROR(err) \ ((struct maple_enode *)(((unsigned long)err << 2) | 2UL)) -#define MA_STATE(name, mt, first, end) \ - struct ma_state name = {\ +#define MA_STATE_INIT(mt, first, end) \ + { \ .tree = mt, \ .index = first, \ .last = end,\ @@ -435,6 +435,9 @@ struct ma_wr_state { .mas_flags = 0, \ } +#define MA_STATE(name, mt, first, end) \ + struct ma_state name = MA_STATE_INIT(mt, first, end) + #define MA_WR_STATE(name, ma_state, wr_entry) \ struct ma_wr_state name = { \ .mas = ma_state,\ -- 2.39.2
Re: [PATCH v2 2/2] drm/bridge: fsl-ldb: Add i.MX6SX support
On 3/30/23 12:42, Fabio Estevam wrote: From: Fabio Estevam i.MX6SX has a single LVDS port and share a similar LDB_CTRL register layout with i.MX8MP and i.MX93. There is no LVDS CTRL register on the i.MX6SX, so only write to this register on the appropriate SoCs. Add support for the i.MX6SX LDB. Tested on a imx6sx-sdb board with a Hannstar HSD100PXN1 LVDS panel and also on a custom i.MX6SX-based board. Signed-off-by: Fabio Estevam --- Changes since v1: - None drivers/gpu/drm/bridge/fsl-ldb.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c index 450b352914f4..f8e5d8ab98e3 100644 --- a/drivers/gpu/drm/bridge/fsl-ldb.c +++ b/drivers/gpu/drm/bridge/fsl-ldb.c @@ -56,6 +56,7 @@ #define LVDS_CTRL_VBG_ADJ_MASKGENMASK(19, 17) enum fsl_ldb_devtype { + IMX6SX_LDB, IMX8MP_LDB, IMX93_LDB, }; @@ -64,9 +65,14 @@ struct fsl_ldb_devdata { u32 ldb_ctrl; u32 lvds_ctrl; bool lvds_en_bit; + bool not_lvds_ctrl; You might want to rename this one to something like "composite_control_reg" since the MX6SX only has one LDB control register instead of two like the newer SoCs. But that's optional change. Reviewed-by: Marek Vasut
[PATCH drm-next v3 04/15] drm: manager to keep track of GPUs VA mappings
Add infrastructure to keep track of GPU virtual address (VA) mappings with a decicated VA space manager implementation. New UAPIs, motivated by Vulkan sparse memory bindings graphics drivers start implementing, allow userspace applications to request multiple and arbitrary GPU VA mappings of buffer objects. The DRM GPU VA manager is intended to serve the following purposes in this context. 1) Provide infrastructure to track GPU VA allocations and mappings, making use of the maple_tree. 2) Generically connect GPU VA mappings to their backing buffers, in particular DRM GEM objects. 3) Provide a common implementation to perform more complex mapping operations on the GPU VA space. In particular splitting and merging of GPU VA mappings, e.g. for intersecting mapping requests or partial unmap requests. Suggested-by: Dave Airlie Signed-off-by: Danilo Krummrich --- Documentation/gpu/drm-mm.rst| 31 + drivers/gpu/drm/Makefile|1 + drivers/gpu/drm/drm_gem.c |3 + drivers/gpu/drm/drm_gpuva_mgr.c | 1686 +++ include/drm/drm_drv.h |6 + include/drm/drm_gem.h | 75 ++ include/drm/drm_gpuva_mgr.h | 681 + 7 files changed, 2483 insertions(+) create mode 100644 drivers/gpu/drm/drm_gpuva_mgr.c create mode 100644 include/drm/drm_gpuva_mgr.h diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index a52e6f4117d6..c9f120cfe730 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -466,6 +466,37 @@ DRM MM Range Allocator Function References .. kernel-doc:: drivers/gpu/drm/drm_mm.c :export: +DRM GPU VA Manager +== + +Overview + + +.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c + :doc: Overview + +Split and Merge +--- + +.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c + :doc: Split and Merge + +Locking +--- + +.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c + :doc: Locking + + +DRM GPU VA Manager Function References +-- + +.. kernel-doc:: include/drm/drm_gpuva_mgr.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c + :export: + DRM Buddy Allocator === diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 9c6446eb3c83..8eeed446a078 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -45,6 +45,7 @@ drm-y := \ drm_vblank.o \ drm_vblank_work.o \ drm_vma_manager.o \ + drm_gpuva_mgr.o \ drm_writeback.o drm-$(CONFIG_DRM_LEGACY) += \ drm_agpsupport.o \ diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index ee3e11e7177d..dd50c46f21b7 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -164,6 +164,9 @@ void drm_gem_private_object_init(struct drm_device *dev, if (!obj->resv) obj->resv = >_resv; + if (drm_core_check_feature(dev, DRIVER_GEM_GPUVA)) + drm_gem_gpuva_init(obj); + drm_vma_node_reset(>vma_node); INIT_LIST_HEAD(>lru_node); } diff --git a/drivers/gpu/drm/drm_gpuva_mgr.c b/drivers/gpu/drm/drm_gpuva_mgr.c new file mode 100644 index ..bd7d27ee44bb --- /dev/null +++ b/drivers/gpu/drm/drm_gpuva_mgr.c @@ -0,0 +1,1686 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Danilo Krummrich + * + */ + +#include +#include + +/** + * DOC: Overview + * + * The DRM GPU VA Manager, represented by struct drm_gpuva_manager keeps track + * of a GPU's virtual address (VA) space and manages the corresponding virtual + * mappings represented by _gpuva objects. It also keeps track of the + * mapping's backing _gem_object buffers. + * + * _gem_object buffers maintain a list (and a corresponding list lock) of + * _gpuva objects representing
[PATCH drm-next v3 02/15] drm_exec: fix double dma_resv unlock
Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/drm_exec.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c index df546cc5a227..f645d22a0863 100644 --- a/drivers/gpu/drm/drm_exec.c +++ b/drivers/gpu/drm/drm_exec.c @@ -62,7 +62,6 @@ static void drm_exec_unlock_all(struct drm_exec *exec) } if (exec->prelocked) { - dma_resv_unlock(exec->prelocked->resv); drm_gem_object_put(exec->prelocked); exec->prelocked = NULL; } -- 2.39.2
[PATCH drm-next v3 01/15] drm: execution context for GEM buffers v3
From: Christian König This adds the infrastructure for an execution context for GEM buffers which is similar to the existinc TTMs execbuf util and intended to replace it in the long term. The basic functionality is that we abstracts the necessary loop to lock many different GEM buffers with automated deadlock and duplicate handling. v2: drop xarray and use dynamic resized array instead, the locking overhead is unecessary and measureable. v3: drop duplicate tracking, radeon is really the only one needing that. Signed-off-by: Christian König --- Documentation/gpu/drm-mm.rst | 12 ++ drivers/gpu/drm/Kconfig | 6 + drivers/gpu/drm/Makefile | 2 + drivers/gpu/drm/drm_exec.c | 249 +++ include/drm/drm_exec.h | 115 5 files changed, 384 insertions(+) create mode 100644 drivers/gpu/drm/drm_exec.c create mode 100644 include/drm/drm_exec.h diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index a79fd3549ff8..a52e6f4117d6 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -493,6 +493,18 @@ DRM Sync Objects .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: +DRM Execution context += + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :doc: Overview + +.. kernel-doc:: include/drm/drm_exec.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_exec.c + :export: + GPU Scheduler = diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index ba3fb04bb691..2dc81eb062eb 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -201,6 +201,12 @@ config DRM_TTM GPU memory types. Will be enabled automatically if a device driver uses it. +config DRM_EXEC + tristate + depends on DRM + help + Execution context for command submissions + config DRM_BUDDY tristate depends on DRM diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index a33257d2bc7f..9c6446eb3c83 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o # # Memory-management helpers # +# +obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c new file mode 100644 index ..df546cc5a227 --- /dev/null +++ b/drivers/gpu/drm/drm_exec.c @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#include +#include +#include + +/** + * DOC: Overview + * + * This component mainly abstracts the retry loop necessary for locking + * multiple GEM objects while preparing hardware operations (e.g. command + * submissions, page table updates etc..). + * + * If a contention is detected while locking a GEM object the cleanup procedure + * unlocks all previously locked GEM objects and locks the contended one first + * before locking any further objects. + * + * After an object is locked fences slots can optionally be reserved on the + * dma_resv object inside the GEM object. + * + * A typical usage pattern should look like this:: + * + * struct drm_gem_object *obj; + * struct drm_exec exec; + * unsigned long index; + * int ret; + * + * drm_exec_init(, true); + * drm_exec_while_not_all_locked() { + * ret = drm_exec_prepare_obj(, boA, 1); + * drm_exec_continue_on_contention(); + * if (ret) + * goto error; + * + * ret = drm_exec_lock(, boB, 1); + * drm_exec_continue_on_contention(); + * if (ret) + * goto error; + * } + * + * drm_exec_for_each_locked_object(, index, obj) { + * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ); + * ... + * } + * drm_exec_fini(); + * + * See struct dma_exec for more details. + */ + +/* Dummy value used to initially enter the retry loop */ +#define DRM_EXEC_DUMMY (void*)~0 + +/* Unlock all objects and drop references */ +static void drm_exec_unlock_all(struct drm_exec *exec) +{ + struct drm_gem_object *obj; + unsigned long index; + + drm_exec_for_each_locked_object(exec, index, obj) { + dma_resv_unlock(obj->resv); + drm_gem_object_put(obj); + } + + if (exec->prelocked) { + dma_resv_unlock(exec->prelocked->resv); + drm_gem_object_put(exec->prelocked); + exec->prelocked = NULL; + } +} + +/** + * drm_exec_init - initialize a drm_exec object + * @exec: the drm_exec object to initialize + * @interruptible: if locks should be acquired interruptible + * + * Initialize the object and make sure that we can track locked and duplicate + * objects. + */ +void drm_exec_init(struct drm_exec *exec, bool interruptible) +{ + exec->interruptible = interruptible; + exec->objects =
[PATCH drm-next v3 00/15] [RFC] DRM GPUVA Manager & Nouveau VM_BIND UAPI
This patch series provides a new UAPI for the Nouveau driver in order to support Vulkan features, such as sparse bindings and sparse residency. Furthermore, with the DRM GPUVA manager it provides a new DRM core feature to keep track of GPU virtual address (VA) mappings in a more generic way. The DRM GPUVA manager is indented to help drivers implement userspace-manageable GPU VA spaces in reference to the Vulkan API. In order to achieve this goal it serves the following purposes in this context. 1) Provide infrastructure to track GPU VA allocations and mappings, making use of the maple_tree. 2) Generically connect GPU VA mappings to their backing buffers, in particular DRM GEM objects. 3) Provide a common implementation to perform more complex mapping operations on the GPU VA space. In particular splitting and merging of GPU VA mappings, e.g. for intersecting mapping requests or partial unmap requests. The new VM_BIND Nouveau UAPI build on top of the DRM GPUVA manager, itself providing the following new interfaces. 1) Initialize a GPU VA space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA space managed by the kernel and userspace, respectively. 2) Allocate and free a VA space region as well as bind and unbind memory to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl. 3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, make use of the DRM scheduler to queue jobs and support asynchronous processing with DRM syncobjs as synchronization mechanism. By default DRM_IOCTL_NOUVEAU_VM_BIND does synchronous processing, DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only. The new VM_BIND UAPI for Nouveau makes also use of drm_exec (execution context for GEM buffers) by Christian König. Since the patch implementing drm_exec was not yet merged into drm-next it is part of this series, as well as a small fix for this patch, which was found while testing this series. This patch series is also available at [1]. There is a Mesa NVK merge request by Dave Airlie [2] implementing the corresponding userspace parts for this series. The Vulkan CTS test suite passes the sparse binding and sparse residency test cases for the new UAPI together with Dave's Mesa work. There are also some test cases in the igt-gpu-tools project [3] for the new UAPI and hence the DRM GPU VA manager. However, most of them are testing the DRM GPU VA manager's logic through Nouveau's new UAPI and should be considered just as helper for implementation. However, I absolutely intend to change those test cases to proper kunit test cases for the DRM GPUVA manager, once and if we agree on it's usefulness and design. [1] https://gitlab.freedesktop.org/nouvelles/kernel/-/tree/new-uapi-drm-next / https://gitlab.freedesktop.org/nouvelles/kernel/-/merge_requests/1 [2] https://gitlab.freedesktop.org/nouveau/mesa/-/merge_requests/150/ [3] https://gitlab.freedesktop.org/dakr/igt-gpu-tools/-/tree/wip_nouveau_vm_bind Changes in V2: == Nouveau: - Reworked the Nouveau VM_BIND UAPI to avoid memory allocations in fence signalling critical sections. Updates to the VA space are split up in three separate stages, where only the 2. stage executes in a fence signalling critical section: 1. update the VA space, allocate new structures and page tables 2. (un-)map the requested memory bindings 3. free structures and page tables - Separated generic job scheduler code from specific job implementations. - Separated the EXEC and VM_BIND implementation of the UAPI. - Reworked the locking parts of the nvkm/vmm RAW interface, such that (un-)map operations can be executed in fence signalling critical sections. GPUVA Manager: - made drm_gpuva_regions optional for users of the GPUVA manager - allow NULL GEMs for drm_gpuva entries - swichted from drm_mm to maple_tree for track drm_gpuva / drm_gpuva_region entries - provide callbacks for users to allocate custom drm_gpuva_op structures to allow inheritance - added user bits to drm_gpuva_flags - added a prefetch operation type in order to support generating prefetch operations in the same way other operations generated - hand the responsibility for mutual exclusion for a GEM's drm_gpuva list to the user; simplified corresponding (un-)link functions Maple Tree: - I added two maple tree patches to the series, one to support custom tree walk macros and one to hand the locking responsibility to the user of the GPUVA manager without pre-defined lockdep checks. Changes in V3: == Nouveau: - Reworked the Nouveau VM_BIND UAPI to do the job cleanup (including page table cleanup) within a workqueue rather than the job_free() callback of the
Re: linux-next: build failure after merge of the drm-misc tree
Hi Qiang, On Mon, 3 Apr 2023 16:51:27 +0800 Qiang Yu wrote: > > I think you can just revert the following three lima commits when merge: > * 4a66f3da99dc ("drm/lima: add show_fdinfo for drm usage stats") > * 87767de835ed ("drm/lima: allocate unique id per drm_file") > * bccafec957a5 ("drm/lima: add usage counting method to ctx_mgr") OK, I have done that from today. -- Cheers, Stephen Rothwell pgpmw4XMs2RWB.pgp Description: OpenPGP digital signature
Re: [RFC PATCH 00/10] Xe DRM scheduler and long running workload plans
Hi, thanks for the Cc! On 04/04/2023 09.22, Matthew Brost wrote: Hello, As a prerequisite to merging the new Intel Xe DRM driver [1] [2], we have been asked to merge our common DRM scheduler patches first as well as develop a common solution for long running workloads with the DRM scheduler. This RFC series is our first attempt at doing this. We welcome any and all feedback. This can we thought of as 4 parts detailed below. - DRM scheduler changes for 1 to 1 relationship between scheduler and entity (patches 1-3) In Xe all of the scheduling of jobs is done by a firmware scheduler (the GuC) which is a new paradigm WRT to the DRM scheduler and presents severals problems as the DRM was originally designed to schedule jobs on hardware queues. The main problem being that DRM scheduler expects the submission order of jobs to be the completion order of jobs even across multiple entities. This assumption falls apart with a firmware scheduler as a firmware scheduler has no concept of jobs and jobs can complete out of order. A novel solution for was originally thought of by Faith during the initial prototype of Xe, create a 1 to 1 relationship between scheduler and entity. I believe the AGX driver [3] is using this approach and Boris may use approach as well for the Mali driver [4]. To support a 1 to 1 relationship we move the main execution function from a kthread to a work queue and add a new scheduling mode which bypasses code in the DRM which isn't needed in a 1 to 1 relationship. The new scheduling mode should unify all drivers usage with a 1 to 1 relationship and can be thought of as using scheduler as a dependency / infligt job tracker rather than a true scheduler. Yup, we're in the exact same situation with drm/asahi, so this is very welcome! We've been using the existing scheduler as-is, but this should help remove some unneeded complexity in this use case. Do you want me to pull in this series into our tree and make sure this all works out for us? I also have a couple bugfixes for drm/sched I need to send out, but I think the rebase/merge with this series should be trivial. I'll send that out this week. - Generic messaging interface for DRM scheduler Idea is to be able to communicate to the submission backend with in band (relative to main execution function) messages. Messages are backend defined and flexable enough for any use case. In Xe we use these messages to clean up entites, set properties for entites, and suspend / resume execution of an entity [5]. I suspect other driver can leverage this messaging concept too as it a convenient way to avoid races in the backend. We haven't needed this so far (mostly by using fine-grained locking and refcounting all over the place) but I can see it being useful to simplify some of those constructs and maybe avoid potential deadlocks in some places. I'm not sure yet whether we can fully get rid of the main queue refcounting/locking (our completion/error signaling path doesn't map well to DMA fences directly so we still need something there to get from the global GPU completion signaling thread to individual queues) but it might be a step in the right direction at least! ~~ Lina
Re: [PATCH] drm/i915/guc: Don't capture Gen8 regs on Gen12 devices
On Mon, Apr 03, 2023 at 02:33:34PM -0700, john.c.harri...@intel.com wrote: > From: John Harrison > > A pair of pre-Gen12 registers were being included in the Gen12 capture > list. GuC was rejecting those as being invalid and logging errors > about them. So, stop doing it. Looks like these registers existed from gen8-gen11. With this change, it looks like they also won't be included in the GuC error capture for gen11 (ICL and EHL/JSL) since those platforms return xe_lpd_lists [1] rather than default_lists; do we care about that? I assume not (since those platforms don't use GuC submission unless you force it with the enable_guc modparam and taint your kernel), but I figured I should point it out. Reviewed-by: Matt Roper [1] Why is the main list we use called xe_lpd (i.e., the name of ADL-P's display IP)? It doesn't seem like we're doing anything with display registers here so using display IP naming seems really confusing. Matt > > Signed-off-by: John Harrison > Fixes: dce2bd542337 ("drm/i915/guc: Add Gen9 registers for GuC error state > capture.") > Cc: Alan Previn > Cc: Umesh Nerlige Ramappa > Cc: Lucas De Marchi > Cc: John Harrison > Cc: Jani Nikula > Cc: Matt Roper > Cc: Balasubramani Vivekanandan > Cc: Daniele Ceraolo Spurio > --- > drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 7 +-- > 1 file changed, 5 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > index cf49188db6a6e..e0e793167d61b 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > @@ -31,12 +31,14 @@ > { FORCEWAKE_MT, 0, 0, "FORCEWAKE" } > > #define COMMON_GEN9BASE_GLOBAL \ > - { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ > - { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" }, \ > { ERROR_GEN6, 0, 0, "ERROR_GEN6" }, \ > { DONE_REG, 0, 0, "DONE_REG" }, \ > { HSW_GTT_CACHE_EN, 0, 0, "HSW_GTT_CACHE_EN" } > > +#define GEN9_GLOBAL \ > + { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ > + { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" } > + > #define COMMON_GEN12BASE_GLOBAL \ > { GEN12_FAULT_TLB_DATA0,0, 0, "GEN12_FAULT_TLB_DATA0" }, \ > { GEN12_FAULT_TLB_DATA1,0, 0, "GEN12_FAULT_TLB_DATA1" }, \ > @@ -142,6 +144,7 @@ static const struct __guc_mmio_reg_descr > xe_lpd_gsc_inst_regs[] = { > static const struct __guc_mmio_reg_descr default_global_regs[] = { > COMMON_BASE_GLOBAL, > COMMON_GEN9BASE_GLOBAL, > + GEN9_GLOBAL, > }; > > static const struct __guc_mmio_reg_descr default_rc_class_regs[] = { > -- > 2.39.1 > -- Matt Roper Graphics Software Engineer Linux GPU Platform Enablement Intel Corporation
Re: [PATCH RFC v2 2/6] drm/msm: Add MSM-specific DSC helper methods
On 04/04/2023 00:38, Jessica Zhang wrote: On 4/2/2023 4:21 AM, Dmitry Baryshkov wrote: On 31/03/2023 21:49, Jessica Zhang wrote: Introduce MSM-specific DSC helper methods, as some calculations are common between DP and DSC. Changes in v2: - Moved files up to msm/ directory - Dropped get_comp_ratio() helper - Used drm_int2fixp() to convert to integers to fp - Style changes to improve readability - Dropped unused bpp variable in msm_dsc_get_dce_bytes_per_line() - Changed msm_dsc_get_slice_per_intf() to a static inline method - Dropped last division step of msm_dsc_get_pclk_per_line() and changed method name accordingly - Changed DSC_BPP macro to drm_dsc_get_bpp_int() helper method - Fixed some math issues caused by passing in incorrect types to drm_fixed methods in get_bytes_per_soft_slice() Signed-off-by: Jessica Zhang --- drivers/gpu/drm/msm/Makefile | 1 + drivers/gpu/drm/msm/msm_dsc_helper.c | 53 drivers/gpu/drm/msm/msm_dsc_helper.h | 42 3 files changed, 96 insertions(+) diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 7274c41228ed..b814fc80e2d5 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -94,6 +94,7 @@ msm-y += \ msm_atomic_tracepoints.o \ msm_debugfs.o \ msm_drv.o \ + msm_dsc_helper.o \ msm_fb.o \ msm_fence.o \ msm_gem.o \ diff --git a/drivers/gpu/drm/msm/msm_dsc_helper.c b/drivers/gpu/drm/msm/msm_dsc_helper.c new file mode 100644 index ..60b73e17e6eb --- /dev/null +++ b/drivers/gpu/drm/msm/msm_dsc_helper.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved + */ + +#include +#include +#include + +#include "msm_drv.h" +#include "msm_dsc_helper.h" + +static s64 get_bytes_per_soft_slice(struct drm_dsc_config *dsc, int intf_width, u32 src_bpp) intf_width is unused Hi Dmitry, Acked. +{ + int bpp = msm_dsc_get_bpp_int(dsc); + s64 numerator_fp, denominator_fp; + s64 comp_ratio_fp = drm_fixp_from_fraction(src_bpp, bpp); + + numerator_fp = drm_int2fixp(dsc->slice_width * 3); You have lost dsc->bits_per_component here. This was moved to the denominator calculation, but I'll move it back to this line to avoid confusion. Maybe you occasionally mixed bpp and bpc, because there is no bits_per_component usage in denominator. Could you please recheck the calculations. + denominator_fp = drm_fixp_from_fraction(comp_ratio_fp * 8, drm_int2fixp(bpp)); denominator_fp = drm_fixp_from_fraction(src_bpp * 8, bpp); Acked. + + return drm_fixp_div(numerator_fp, denominator_fp); +} + +u32 msm_dsc_get_eol_byte_num(struct drm_dsc_config *dsc, int intf_width, u32 src_bpp) +{ + u32 bytes_per_soft_slice, extra_eol_bytes, bytes_per_intf; + s64 bytes_per_soft_slice_fp; + int slice_per_intf = msm_dsc_get_slice_per_intf(dsc, intf_width); + + bytes_per_soft_slice_fp = get_bytes_per_soft_slice(dsc, intf_width, src_bpp); + bytes_per_soft_slice = drm_fixp2int_ceil(bytes_per_soft_slice_fp); + + bytes_per_intf = bytes_per_soft_slice * slice_per_intf; + extra_eol_bytes = bytes_per_intf % 3; + if (extra_eol_bytes != 0) + extra_eol_bytes = 3 - extra_eol_bytes; I become confused here when I checked eol_bytes in the display techpack. I see that for DP the dp_panel_dsc_pclk_param_calc() calculates dsc->eol_bytes_num in this way, the size to pad dsc_byte_count * slice_per_intf to 3 bytes. However, for DSI this is a simple as total_bytes_per_intf % 3 , so it is not a padding, but a length of the last chunk. Could you please clarify? If the techpack code is correct, I'd prefer if we return last chunk size here and calculate the padding length in the DP driver. I've double checked the calculations between DP and DSI, and I think you're right. Will move the `if (extra_eol_bytes != 0)` block out to DP code. Ack. Could you please check with HW team that our understanding is correct? + + return extra_eol_bytes; +} + +int msm_dsc_get_uncompressed_pclk_per_line(struct drm_dsc_config *dsc, int intf_width, u32 src_bpp) Basing on Abhinav's description ("pclk_per_line can be only per interface") would it better be named as msm_dsc_get_uncompressed_pclk_per_intf() ? or msm_dsc_get_uncompressed_pclk_for_intf() ? BTW: if get_bytes_per_soft_slice() doesn't use intf_width, we can probably drop it here too. +{ + s64 data_width; + + if (!dsc->slice_width || (intf_width < dsc->slice_width)) + return -EINVAL; Error code is not validated at dsi_timing_setup. I'd suggest moving error checks there and dropping the error handling here. If dsc->slice_width is not set, we should stop much earlier than drm_bridge's pre_enable() callback. Acked. Thanks, Jessica Zhang + + data_width = drm_fixp_mul(dsc->slice_count, +
[RFC PATCH 10/10] drm/syncobj: Warn on long running dma-fences
Long running dma-fences are not allowed to be exported, a drm_syncobj is designed to be exported to the user, so add a warn if drm_syncobj install long running dna-fences as this is not allowed. Signed-off-by: Matthew Brost --- drivers/gpu/drm/drm_syncobj.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 0c2be8360525..7c304cd7d037 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -291,6 +291,7 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj, struct syncobj_wait_entry *cur, *tmp; struct dma_fence *prev; + WARN_ON_ONCE(dma_fence_is_lr(fence)); dma_fence_get(fence); spin_lock(>lock); @@ -325,8 +326,10 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, struct dma_fence *old_fence; struct syncobj_wait_entry *cur, *tmp; - if (fence) + if (fence) { + WARN_ON_ONCE(dma_fence_is_lr(fence)); dma_fence_get(fence); + } spin_lock(>lock); -- 2.34.1
[RFC PATCH 05/10] drm/sched: Start run wq before TDR in drm_sched_start
If the TDR is set to a very small value it can fire before the run wq is started in the function drm_sched_start. The run wq is expected to running when the TDR fires, fix this ordering so this expectation is always met. Signed-off-by: Matthew Brost --- drivers/gpu/drm/scheduler/sched_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 9dc3378e9c5e..6ae710017024 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -611,13 +611,13 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) drm_sched_job_done(s_job); } + drm_sched_run_wq_start(sched); + if (full_recovery) { spin_lock(>job_list_lock); drm_sched_start_timeout(sched); spin_unlock(>job_list_lock); } - - drm_sched_run_wq_start(sched); } EXPORT_SYMBOL(drm_sched_start); -- 2.34.1
[RFC PATCH 09/10] drm/sched: Support long-running sched entities
From: Thomas Hellström Make the drm scheduler aware of long-running dma fences by * Enable marking a sched entity as producing long-running fences. * Disallowing long-running fences as dependencies for non-long-running sched entities, while long-running sched entities allow those. Signed-off-by: Matthew Brost Signed-off-by: Thomas Hellström --- drivers/gpu/drm/scheduler/sched_entity.c | 44 +++- drivers/gpu/drm/scheduler/sched_fence.c | 4 +++ drivers/gpu/drm/scheduler/sched_main.c | 9 ++--- include/drm/gpu_scheduler.h | 36 +++ include/linux/dma-fence.h| 5 +++ 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index ccea4d079d0f..0640fc9d4491 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -174,6 +174,32 @@ static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) job->sched->ops->free_job(job); } +/** + * drm_sched_entity_add_fence_cb() - Helper to add a fence callback + * @entity: The sched entity + * @f: The possbily long-running dma-fence on which to add a callback + * @cb: The struct dma_fence_cb to use for the callback + * @func: The callback function. + * + * This function calls the proper dma_fence add callback function + * depending on whether @entity is marked as long-running or not. If it + * is not, this will make sure we get a warning if trying to add a + * callback on a long-running dma-fence. + * + * Return: Zero on success, -ENOENT if already signaled and -EINVAL in case + * of error. + */ +int drm_sched_entity_add_fence_cb(struct drm_sched_entity *entity, + struct dma_fence *f, + struct dma_fence_cb *cb, + dma_fence_func_t func) +{ + if (drm_sched_entity_is_lr(entity)) + return dma_fence_lr_add_callback(f, cb, func); + + return dma_fence_add_callback(f, cb, func); +} + /* Signal the scheduler finished fence when the entity in question is killed. */ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, struct dma_fence_cb *cb) @@ -187,8 +213,8 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, /* Wait for all dependencies to avoid data corruptions */ while (!xa_empty(>dependencies)) { f = xa_erase(>dependencies, job->last_dependency++); - r = dma_fence_add_callback(f, >finish_cb, - drm_sched_entity_kill_jobs_cb); + r = drm_sched_entity_add_fence_cb(job->entity, f, >finish_cb, + drm_sched_entity_kill_jobs_cb); if (!r) return; @@ -226,8 +252,9 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) dma_fence_set_error(_fence->finished, -ESRCH); dma_fence_get(_fence->finished); - if (!prev || dma_fence_add_callback(prev, >finish_cb, - drm_sched_entity_kill_jobs_cb)) + if (!prev || drm_sched_entity_add_fence_cb(job->entity, prev, + >finish_cb, + drm_sched_entity_kill_jobs_cb)) drm_sched_entity_kill_jobs_cb(NULL, >finish_cb); prev = _fence->finished; @@ -420,8 +447,8 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) fence = dma_fence_get(_fence->scheduled); dma_fence_put(entity->dependency); entity->dependency = fence; - if (!dma_fence_add_callback(fence, >cb, - drm_sched_entity_clear_dep)) + if (!drm_sched_entity_add_fence_cb(entity, fence, >cb, + drm_sched_entity_clear_dep)) return true; /* Ignore it when it is already scheduled */ @@ -429,8 +456,9 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) return false; } - if (!dma_fence_add_callback(entity->dependency, >cb, - drm_sched_entity_wakeup)) + if (!drm_sched_entity_add_fence_cb(entity, entity->dependency, + >cb, + drm_sched_entity_wakeup)) return true; dma_fence_put(entity->dependency); diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index d7cfc0441885..a566723ecc2c 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++
[RFC PATCH 02/10] drm/sched: Move schedule policy to scheduler / entity
Rather than a global modparam for scheduling policy, move the scheduling policy to scheduler / entity so user can control each scheduler / entity policy. Signed-off-by: Matthew Brost --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/etnaviv/etnaviv_sched.c| 3 ++- drivers/gpu/drm/lima/lima_sched.c | 3 ++- drivers/gpu/drm/msm/msm_ringbuffer.c | 3 ++- drivers/gpu/drm/panfrost/panfrost_job.c| 3 ++- drivers/gpu/drm/scheduler/sched_entity.c | 25 ++ drivers/gpu/drm/scheduler/sched_main.c | 21 +- drivers/gpu/drm/v3d/v3d_sched.c| 15 - include/drm/gpu_scheduler.h| 23 ++-- 9 files changed, 73 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 00c9c03c8f94..4df0fca5a74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2368,6 +2368,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) ring->num_hw_submission, amdgpu_job_hang_limit, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, + DRM_SCHED_POLICY_DEFAULT, adev->dev); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 8486a2923f1b..61204a3f8b0b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -136,7 +136,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) ret = drm_sched_init(>sched, _sched_ops, NULL, etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, msecs_to_jiffies(500), NULL, NULL, -dev_name(gpu->dev), gpu->dev); +dev_name(gpu->dev), DRM_SCHED_POLICY_DEFAULT, +gpu->dev); if (ret) return ret; diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 54f53bece27c..33042ba6ae93 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -491,7 +491,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) return drm_sched_init(>base, _sched_ops, NULL, 1, lima_job_hang_limit, msecs_to_jiffies(timeout), NULL, - NULL, name, pipe->ldev->dev); + NULL, name, DRM_SCHED_POLICY_DEFAULT, + pipe->ldev->dev); } void lima_sched_pipe_fini(struct lima_sched_pipe *pipe) diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 5879fc262047..f408a9097315 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -97,7 +97,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, ret = drm_sched_init(>sched, _sched_ops, NULL, num_hw_submissions, 0, sched_timeout, - NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); + NULL, NULL, to_msm_bo(ring->bo)->name, + DRM_SCHED_POLICY_DEFAULT, gpu->dev->dev); if (ret) { goto fail; } diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index f48b07056a16..effa48b33dce 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -819,7 +819,8 @@ int panfrost_job_init(struct panfrost_device *pfdev) nentries, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), pfdev->reset.wq, -NULL, "pan_js", pfdev->dev); +NULL, "pan_js", DRM_SCHED_POLICY_DEFAULT, +pfdev->dev); if (ret) { dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); goto err_sched; diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 15d04a0ec623..f1299e51860b 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -33,6 +33,20 @@ #define to_drm_sched_job(sched_job)\ container_of((sched_job), struct drm_sched_job, queue_node) +static bool bad_policies(struct drm_gpu_scheduler **sched_list, +unsigned int num_sched_list) +{ + enum
[RFC PATCH 04/10] drm/sched: Add generic scheduler message interface
Add generic schedule message interface which sends messages to backend from the drm_gpu_scheduler main submission thread. The idea is some of these messages modify some state in drm_sched_entity which is also modified during submission. By scheduling these messages and submission in the same thread their is not race changing states in drm_sched_entity. This interface will be used in XE, new Intel GPU driver, to cleanup, suspend, resume, and change scheduling properties of a drm_sched_entity. The interface is designed to be generic and extendable with only the backend understanding the messages. Signed-off-by: Matthew Brost --- drivers/gpu/drm/scheduler/sched_main.c | 58 +- include/drm/gpu_scheduler.h| 29 - 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 2795021efe7b..9dc3378e9c5e 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1055,6 +1055,54 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, } EXPORT_SYMBOL(drm_sched_pick_best); +/** + * drm_sched_add_msg - add scheduler message + * + * @sched: scheduler instance + * @msg: message to be added + * + * Can and will pass an jobs waiting on dependencies or in a runnable queue. + * Messages processing will stop if schedule run wq is stopped and resume when + * run wq is started. + */ +void drm_sched_add_msg(struct drm_gpu_scheduler *sched, + struct drm_sched_msg *msg) +{ + spin_lock(>job_list_lock); + list_add_tail(>link, >msgs); + spin_unlock(>job_list_lock); + + /* +* Same as above in drm_sched_run_wq_queue, try to kick worker if +* paused, harmless if this races +*/ + if (!sched->pause_run_wq) + queue_work(sched->run_wq, >work_run); +} +EXPORT_SYMBOL(drm_sched_add_msg); + +/** + * drm_sched_get_msg - get scheduler message + * + * @sched: scheduler instance + * + * Returns NULL or message + */ +static struct drm_sched_msg * +drm_sched_get_msg(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_msg *msg; + + spin_lock(>job_list_lock); + msg = list_first_entry_or_null(>msgs, + struct drm_sched_msg, link); + if (msg) + list_del(>link); + spin_unlock(>job_list_lock); + + return msg; +} + /** * drm_sched_main - main scheduler thread * @@ -1068,6 +1116,7 @@ static void drm_sched_main(struct work_struct *w) while (!READ_ONCE(sched->pause_run_wq)) { struct drm_sched_entity *entity; + struct drm_sched_msg *msg; struct drm_sched_fence *s_fence; struct drm_sched_job *sched_job; struct dma_fence *fence; @@ -1075,12 +1124,16 @@ static void drm_sched_main(struct work_struct *w) cleanup_job = drm_sched_get_cleanup_job(sched); entity = drm_sched_select_entity(sched); + msg = drm_sched_get_msg(sched); if (cleanup_job) sched->ops->free_job(cleanup_job); + if (msg) + sched->ops->process_msg(msg); + if (!entity) { - if (!cleanup_job) + if (!cleanup_job && !msg) break; continue; } @@ -1089,7 +1142,7 @@ static void drm_sched_main(struct work_struct *w) if (!sched_job) { complete_all(>entity_idle); - if (!cleanup_job) + if (!cleanup_job && !msg) break; continue; } @@ -1181,6 +1234,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, init_waitqueue_head(>job_scheduled); INIT_LIST_HEAD(>pending_list); + INIT_LIST_HEAD(>msgs); spin_lock_init(>job_list_lock); atomic_set(>hw_rq_count, 0); INIT_DELAYED_WORK(>work_tdr, drm_sched_job_timedout); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 3e421f5a710c..18172ae63ab7 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -398,6 +398,23 @@ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_ENODEV, }; +/** + * struct drm_sched_msg - an in-band (relative to GPU scheduler run queue) + * message + * + * Generic enough for backend defined messages, backend can expand if needed. + */ +struct drm_sched_msg { + /** @link: list link into the gpu scheduler list of messages */ + struct list_headlink; + /** +* @private_data: opaque pointer to message private data (backend defined) +*/ + void*private_data; + /** @opcode: opcode of message (backend
[RFC PATCH 00/10] Xe DRM scheduler and long running workload plans
Hello, As a prerequisite to merging the new Intel Xe DRM driver [1] [2], we have been asked to merge our common DRM scheduler patches first as well as develop a common solution for long running workloads with the DRM scheduler. This RFC series is our first attempt at doing this. We welcome any and all feedback. This can we thought of as 4 parts detailed below. - DRM scheduler changes for 1 to 1 relationship between scheduler and entity (patches 1-3) In Xe all of the scheduling of jobs is done by a firmware scheduler (the GuC) which is a new paradigm WRT to the DRM scheduler and presents severals problems as the DRM was originally designed to schedule jobs on hardware queues. The main problem being that DRM scheduler expects the submission order of jobs to be the completion order of jobs even across multiple entities. This assumption falls apart with a firmware scheduler as a firmware scheduler has no concept of jobs and jobs can complete out of order. A novel solution for was originally thought of by Faith during the initial prototype of Xe, create a 1 to 1 relationship between scheduler and entity. I believe the AGX driver [3] is using this approach and Boris may use approach as well for the Mali driver [4]. To support a 1 to 1 relationship we move the main execution function from a kthread to a work queue and add a new scheduling mode which bypasses code in the DRM which isn't needed in a 1 to 1 relationship. The new scheduling mode should unify all drivers usage with a 1 to 1 relationship and can be thought of as using scheduler as a dependency / infligt job tracker rather than a true scheduler. - Generic messaging interface for DRM scheduler Idea is to be able to communicate to the submission backend with in band (relative to main execution function) messages. Messages are backend defined and flexable enough for any use case. In Xe we use these messages to clean up entites, set properties for entites, and suspend / resume execution of an entity [5]. I suspect other driver can leverage this messaging concept too as it a convenient way to avoid races in the backend. - Support for using TDR for all error paths of a scheduler / entity Fix a few races / bugs, add function to dynamically set the TDR timeout. - Annotate dma-fences for long running workloads. The idea here is to use dma-fences only as sync points within the scheduler and never export them for long running workloads. By annotating these fences as long running we ensure that these dma-fences are never used in a way that breaks the dma-fence rules. A benefit of thus approach is the scheduler can still safely flow control the execution ring buffer via the job limit without breaking the dma-fence rules. Again this a first draft and looking forward to feedback. Enjoy - Matt [1] https://gitlab.freedesktop.org/drm/xe/kernel [2] https://patchwork.freedesktop.org/series/112188/ [3] https://patchwork.freedesktop.org/series/114772/ [4] https://patchwork.freedesktop.org/patch/515854/?series=112188=1 [5] https://gitlab.freedesktop.org/drm/xe/kernel/-/blob/drm-xe-next/drivers/gpu/drm/xe/xe_guc_submit.c#L1031 Matthew Brost (8): drm/sched: Convert drm scheduler to use a work queue rather than kthread drm/sched: Move schedule policy to scheduler / entity drm/sched: Add DRM_SCHED_POLICY_SINGLE_ENTITY scheduling policy drm/sched: Add generic scheduler message interface drm/sched: Start run wq before TDR in drm_sched_start drm/sched: Submit job before starting TDR drm/sched: Add helper to set TDR timeout drm/syncobj: Warn on long running dma-fences Thomas Hellström (2): dma-buf/dma-fence: Introduce long-running completion fences drm/sched: Support long-running sched entities drivers/dma-buf/dma-fence.c | 142 +++--- drivers/dma-buf/dma-resv.c | 5 + drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +- drivers/gpu/drm/drm_syncobj.c | 5 +- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 5 +- drivers/gpu/drm/lima/lima_sched.c | 5 +- drivers/gpu/drm/msm/adreno/adreno_device.c | 6 +- drivers/gpu/drm/msm/msm_ringbuffer.c| 5 +- drivers/gpu/drm/panfrost/panfrost_job.c | 5 +- drivers/gpu/drm/scheduler/sched_entity.c| 127 +++-- drivers/gpu/drm/scheduler/sched_fence.c | 6 +- drivers/gpu/drm/scheduler/sched_main.c | 278 +++- drivers/gpu/drm/v3d/v3d_sched.c | 25 +- include/drm/gpu_scheduler.h | 130 +++-- include/linux/dma-fence.h | 60 - 16 files changed, 649 insertions(+), 184 deletions(-) -- 2.34.1
[RFC PATCH 07/10] drm/sched: Add helper to set TDR timeout
Add helper to set TDR timeout and restart the TDR with new timeout value. This will be used in XE, new Intel GPU driver, to trigger the TDR to cleanup drm_sched_entity that encounter errors. Signed-off-by: Matthew Brost --- drivers/gpu/drm/scheduler/sched_main.c | 18 ++ include/drm/gpu_scheduler.h| 1 + 2 files changed, 19 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4eac02d212c1..d61880315d8d 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -370,6 +370,24 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) queue_delayed_work(sched->timeout_wq, >work_tdr, sched->timeout); } +/** + * drm_sched_set_timeout - set timeout for reset worker + * + * @sched: scheduler instance to set and (re)-start the worker for + * @timeout: timeout period + * + * Set and (re)-start the timeout for the given scheduler. + */ +void drm_sched_set_timeout(struct drm_gpu_scheduler *sched, long timeout) +{ + spin_lock(>job_list_lock); + sched->timeout = timeout; + cancel_delayed_work(>work_tdr); + drm_sched_start_timeout(sched); + spin_unlock(>job_list_lock); +} +EXPORT_SYMBOL(drm_sched_set_timeout); + /** * drm_sched_fault - immediately start timeout handler * diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 18172ae63ab7..6258e324bd7c 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -593,6 +593,7 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, struct drm_gpu_scheduler **sched_list, unsigned int num_sched_list); +void drm_sched_set_timeout(struct drm_gpu_scheduler *sched, long timeout); void drm_sched_job_cleanup(struct drm_sched_job *job); void drm_sched_wakeup(struct drm_gpu_scheduler *sched); void drm_sched_add_msg(struct drm_gpu_scheduler *sched, -- 2.34.1
[RFC PATCH 03/10] drm/sched: Add DRM_SCHED_POLICY_SINGLE_ENTITY scheduling policy
DRM_SCHED_POLICY_SINGLE_ENTITY creates a 1 to 1 relationship between scheduler and entity. No priorities or run queue used in this mode. Intended for devices with firmware schedulers. Signed-off-by: Matthew Brost --- drivers/gpu/drm/scheduler/sched_entity.c | 58 + drivers/gpu/drm/scheduler/sched_fence.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 64 +--- include/drm/gpu_scheduler.h | 29 +++ 4 files changed, 123 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index f1299e51860b..ccea4d079d0f 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -91,8 +91,15 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, RB_CLEAR_NODE(>rb_tree_node); if(num_sched_list) { - entity->rq = _list[0]->sched_rq[entity->priority]; entity->sched_policy = sched_list[0]->sched_policy; + if (entity->sched_policy != DRM_SCHED_POLICY_SINGLE_ENTITY) { + entity->rq = _list[0]->sched_rq[entity->priority]; + } else { + if (num_sched_list != 1 || sched_list[0]->single_entity) + return -EINVAL; + sched_list[0]->single_entity = entity; + entity->single_sched = sched_list[0]; + } } init_completion(>entity_idle); @@ -126,7 +133,8 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, struct drm_gpu_scheduler **sched_list, unsigned int num_sched_list) { - WARN_ON(!num_sched_list || !sched_list); + WARN_ON(!num_sched_list || !sched_list || + entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY); entity->sched_list = sched_list; entity->num_sched_list = num_sched_list; @@ -196,13 +204,16 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) { struct drm_sched_job *job; struct dma_fence *prev; + bool single_entity = + entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY; - if (!entity->rq) + if (!entity->rq && !single_entity) return; spin_lock(>rq_lock); entity->stopped = true; - drm_sched_rq_remove_entity(entity->rq, entity); + if (!single_entity) + drm_sched_rq_remove_entity(entity->rq, entity); spin_unlock(>rq_lock); /* Make sure this entity is not used by the scheduler at the moment */ @@ -224,6 +235,21 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) dma_fence_put(prev); } +/** + * drm_sched_entity_to_scheduler - Schedule entity to GPU scheduler + * @entity: scheduler entity + * + * Returns GPU scheduler for the entity + */ +struct drm_gpu_scheduler * +drm_sched_entity_to_scheduler(struct drm_sched_entity *entity) +{ + bool single_entity = + entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY; + + return single_entity ? entity->single_sched : entity->rq->sched; +} + /** * drm_sched_entity_flush - Flush a context entity * @@ -241,11 +267,13 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) struct drm_gpu_scheduler *sched; struct task_struct *last_user; long ret = timeout; + bool single_entity = + entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY; - if (!entity->rq) + if (!entity->rq && !single_entity) return 0; - sched = entity->rq->sched; + sched = drm_sched_entity_to_scheduler(entity); /** * The client will not queue more IBs during this fini, consume existing * queued IBs or discard them on SIGKILL @@ -338,7 +366,7 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, container_of(cb, struct drm_sched_entity, cb); drm_sched_entity_clear_dep(f, cb); - drm_sched_wakeup(entity->rq->sched); + drm_sched_wakeup(drm_sched_entity_to_scheduler(entity)); } /** @@ -352,6 +380,8 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority) { + WARN_ON(entity->sched_policy == DRM_SCHED_POLICY_SINGLE_ENTITY); + spin_lock(>rq_lock); entity->priority = priority; spin_unlock(>rq_lock); @@ -364,7 +394,7 @@ EXPORT_SYMBOL(drm_sched_entity_set_priority); */ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) { - struct drm_gpu_scheduler *sched = entity->rq->sched; + struct drm_gpu_scheduler *sched = drm_sched_entity_to_scheduler(entity); struct dma_fence *fence =
[RFC PATCH 08/10] dma-buf/dma-fence: Introduce long-running completion fences
From: Thomas Hellström For long-running workloads, drivers either need to open-code completion waits, invent their own synchronization primitives or internally use dma-fences that do not obey the cross-driver dma-fence protocol, but without any lockdep annotation all these approaches are error prone. So since for example the drm scheduler uses dma-fences it is desirable for a driver to be able to use it for throttling and error handling also with internal dma-fences tha do not obey the cros-driver dma-fence protocol. Introduce long-running completion fences in form of dma-fences, and add lockdep annotation for them. In particular: * Do not allow waiting under any memory management locks. * Do not allow to attach them to a dma-resv object. * Introduce a new interface for adding callbacks making the helper adding a callback sign off on that it is aware that the dma-fence may not complete anytime soon. Typically this will be the scheduler chaining a new long-running fence on another one. Signed-off-by: Matthew Brost Signed-off-by: Thomas Hellström --- drivers/dma-buf/dma-fence.c | 142 ++-- drivers/dma-buf/dma-resv.c | 5 ++ include/linux/dma-fence.h | 55 +- 3 files changed, 160 insertions(+), 42 deletions(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index f177c56269bb..9726b2a3c67d 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -111,6 +111,20 @@ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); * drivers/gpu should ever call dma_fence_wait() in such contexts. */ +/** + * DOC: Long-Running (lr) dma-fences. + * + * * Long-running dma-fences are NOT required to complete in reasonable time. + * Typically they signal completion of user-space controlled workloads and + * as such, need to never be part of a cross-driver contract, never waited + * for inside a kernel lock, nor attached to a dma-resv. There are helpers + * and warnings in place to help facilitate that that never happens. + * + * * The motivation for their existense is that helpers that are intended to + * be used by drivers may use dma-fences that, given the workloads mentioned + * above, become long-running. + */ + static const char *dma_fence_stub_get_name(struct dma_fence *fence) { return "stub"; @@ -284,6 +298,34 @@ static struct lockdep_map dma_fence_lockdep_map = { .name = "dma_fence_map" }; +static struct lockdep_map dma_fence_lr_lockdep_map = { + .name = "dma_fence_lr_map" +}; + +static bool __dma_fence_begin_signalling(struct lockdep_map *map) +{ + /* explicitly nesting ... */ + if (lock_is_held_type(map, 1)) + return true; + + /* rely on might_sleep check for soft/hardirq locks */ + if (in_atomic()) + return true; + + /* ... and non-recursive readlock */ + lock_acquire(map, 0, 0, 1, 1, NULL, _RET_IP_); + + return false; +} + +static void __dma_fence_end_signalling(bool cookie, struct lockdep_map *map) +{ + if (cookie) + return; + + lock_release(map, _RET_IP_); +} + /** * dma_fence_begin_signalling - begin a critical DMA fence signalling section * @@ -300,18 +342,7 @@ static struct lockdep_map dma_fence_lockdep_map = { */ bool dma_fence_begin_signalling(void) { - /* explicitly nesting ... */ - if (lock_is_held_type(_fence_lockdep_map, 1)) - return true; - - /* rely on might_sleep check for soft/hardirq locks */ - if (in_atomic()) - return true; - - /* ... and non-recursive readlock */ - lock_acquire(_fence_lockdep_map, 0, 0, 1, 1, NULL, _RET_IP_); - - return false; + return __dma_fence_begin_signalling(_fence_lockdep_map); } EXPORT_SYMBOL(dma_fence_begin_signalling); @@ -323,25 +354,61 @@ EXPORT_SYMBOL(dma_fence_begin_signalling); */ void dma_fence_end_signalling(bool cookie) { - if (cookie) - return; - - lock_release(_fence_lockdep_map, _RET_IP_); + __dma_fence_end_signalling(cookie, _fence_lockdep_map); } EXPORT_SYMBOL(dma_fence_end_signalling); -void __dma_fence_might_wait(void) +/** + * dma_fence_lr begin_signalling - begin a critical long-running DMA fence + * signalling section + * + * Drivers should use this to annotate the beginning of any code section + * required to eventually complete _fence by calling dma_fence_signal(). + * + * The end of these critical sections are annotated with + * dma_fence_lr_end_signalling(). Ideally the section should encompass all + * locks that are ever required to signal a long-running dma-fence. + * + * Return: An opaque cookie needed by the implementation, which needs to be + * passed to dma_fence_lr end_signalling(). + */ +bool dma_fence_lr_begin_signalling(void) +{ + return __dma_fence_begin_signalling(_fence_lr_lockdep_map); +} +EXPORT_SYMBOL(dma_fence_lr_begin_signalling); + +/** + *
[RFC PATCH 06/10] drm/sched: Submit job before starting TDR
If the TDR is set to a value, it can fire before a job is submitted in drm_sched_main. The job should be always be submitted before the TDR fires, fix this ordering. Signed-off-by: Matthew Brost --- drivers/gpu/drm/scheduler/sched_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 6ae710017024..4eac02d212c1 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1150,10 +1150,10 @@ static void drm_sched_main(struct work_struct *w) s_fence = sched_job->s_fence; atomic_inc(>hw_rq_count); - drm_sched_job_begin(sched_job); trace_drm_run_job(sched_job, entity); fence = sched->ops->run_job(sched_job); + drm_sched_job_begin(sched_job); complete_all(>entity_idle); drm_sched_fence_scheduled(s_fence); -- 2.34.1
[RFC PATCH 01/10] drm/sched: Convert drm scheduler to use a work queue rather than kthread
In XE, the new Intel GPU driver, a choice has made to have a 1 to 1 mapping between a drm_gpu_scheduler and drm_sched_entity. At first this seems a bit odd but let us explain the reasoning below. 1. In XE the submission order from multiple drm_sched_entity is not guaranteed to be the same completion even if targeting the same hardware engine. This is because in XE we have a firmware scheduler, the GuC, which allowed to reorder, timeslice, and preempt submissions. If a using shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls apart as the TDR expects submission order == completion order. Using a dedicated drm_gpu_scheduler per drm_sched_entity solve this problem. 2. In XE submissions are done via programming a ring buffer (circular buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow control on the ring for free. A problem with this design is currently a drm_gpu_scheduler uses a kthread for submission / job cleanup. This doesn't scale if a large number of drm_gpu_scheduler are used. To work around the scaling issue, use a worker rather than kthread for submission / job cleanup. v2: - (Rob Clark) Fix msm build - Pass in run work queue Signed-off-by: Matthew Brost --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 14 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +-- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +- drivers/gpu/drm/lima/lima_sched.c | 2 +- drivers/gpu/drm/msm/adreno/adreno_device.c | 6 +- drivers/gpu/drm/msm/msm_ringbuffer.c| 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 126 drivers/gpu/drm/v3d/v3d_sched.c | 10 +- include/drm/gpu_scheduler.h | 14 ++- 10 files changed, 110 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f60753f97ac5..9c2a10aeb0b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1489,9 +1489,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !ring->sched.ready) continue; - kthread_park(ring->sched.thread); + drm_sched_run_wq_stop(>sched); } seq_printf(m, "run ib test:\n"); @@ -1505,9 +1505,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!ring || !ring->sched.ready) continue; - kthread_unpark(ring->sched.thread); + drm_sched_run_wq_start(>sched); } up_write(>reset_domain->sem); @@ -1727,7 +1727,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) ring = adev->rings[val]; - if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread) + if (!ring || !ring->funcs->preempt_ib || !ring->sched.ready) return -EINVAL; /* the last preemption failed */ @@ -1745,7 +1745,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) goto pro_end; /* stop the scheduler */ - kthread_park(ring->sched.thread); + drm_sched_run_wq_stop(>sched); /* preempt the IB */ r = amdgpu_ring_preempt_ib(ring); @@ -1779,7 +1779,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) failure: /* restart the scheduler */ - kthread_unpark(ring->sched.thread); + drm_sched_run_wq_start(>sched); up_read(>reset_domain->sem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fac9312b1695..00c9c03c8f94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2364,7 +2364,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) break; } - r = drm_sched_init(>sched, _sched_ops, + r = drm_sched_init(>sched, _sched_ops, NULL, ring->num_hw_submission, amdgpu_job_hang_limit, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, @@ -4627,7 +4627,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) +
Re: [PATCH v1 1/2] dt-bindings: display: bridge: sn65dsi83: Add DSI video mode
On 4/3/23 23:15, Francesco Dolcini wrote: On Mon, Apr 03, 2023 at 04:06:22PM -0500, Rob Herring wrote: On Thu, Mar 30, 2023 at 12:17:51PM +0200, Francesco Dolcini wrote: From: Francesco Dolcini SN65DSI8[34] device supports burst video mode and non-burst video mode with sync events or with sync pulses packet transmission as described in the DSI specification. Add property to select the expected mode, this allows for example to select a mode that is compatible with the DSI host interface. Why does this need to be in DT? The source and sink drivers should know what their capabilities are and pick the best common one. Is there a best mode? I think yes: Burst (is better than) Sync Events (is better than) Sync Pulses Burst is most energy efficient, Sync-Pulses is the simplest and least energy efficient and with most constraints. Isn't this a decision how do we want the 2 peers to communicate? I don't think so, I believe the Host and nearest bridge should be able to negotiate their capabilities (mode, link rate, etc.) within the DRM subsystem. For the MIPI-DSI Linux/DRM experts: am I missing something? Is there another way to have a DSI video sink to ask for a specific mode? I'm afraid this is not implemented yet, so ... plumbing needed. [...]
[PATCH v2] drm/scdc-helper: Pimp SCDC debugs
From: Ville Syrjälä Include the device and connector information in the SCDC debugs. Makes it easier to figure out who did what. v2: Rely on connector->ddc (Maxime) Cc: Andrzej Hajda Cc: Neil Armstrong Cc: Robert Foss Cc: Laurent Pinchart Cc: Jonas Karlman Cc: Jernej Skrabec Cc: Thierry Reding Cc: Emma Anholt Cc: Maxime Ripard Cc: intel-...@lists.freedesktop.org Cc: linux-te...@vger.kernel.org Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 8 ++-- drivers/gpu/drm/display/drm_scdc_helper.c | 46 +++ drivers/gpu/drm/i915/display/intel_ddi.c | 4 +- drivers/gpu/drm/i915/display/intel_hdmi.c | 8 +--- drivers/gpu/drm/tegra/sor.c | 15 +++- drivers/gpu/drm/vc4/vc4_hdmi.c| 21 ++- include/drm/display/drm_scdc_helper.h | 7 ++-- 7 files changed, 59 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index aa51c61a78c7..603bb3c51027 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi, /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */ if (dw_hdmi_support_scdc(hdmi, display)) { if (mtmdsclock > HDMI14_MAX_TMDSCLK) - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1); + drm_scdc_set_high_tmds_clock_ratio(>connector, 1); else - drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 0); + drm_scdc_set_high_tmds_clock_ratio(>connector, 0); } } EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio); @@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION)); /* Enabled Scrambling in the Sink */ - drm_scdc_set_scrambling(hdmi->ddc, 1); + drm_scdc_set_scrambling(>connector, 1); /* * To activate the scrambler feature, you must ensure @@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi, hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL); hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ, HDMI_MC_SWRSTZ); - drm_scdc_set_scrambling(hdmi->ddc, 0); + drm_scdc_set_scrambling(>connector, 0); } } diff --git a/drivers/gpu/drm/display/drm_scdc_helper.c b/drivers/gpu/drm/display/drm_scdc_helper.c index c3ad4ab2b456..6d2f244e5830 100644 --- a/drivers/gpu/drm/display/drm_scdc_helper.c +++ b/drivers/gpu/drm/display/drm_scdc_helper.c @@ -26,6 +26,8 @@ #include #include +#include +#include #include /** @@ -140,7 +142,7 @@ EXPORT_SYMBOL(drm_scdc_write); /** * drm_scdc_get_scrambling_status - what is status of scrambling? - * @adapter: I2C adapter for DDC channel + * @connector: connector * * Reads the scrambler status over SCDC, and checks the * scrambling status. @@ -148,14 +150,16 @@ EXPORT_SYMBOL(drm_scdc_write); * Returns: * True if the scrambling is enabled, false otherwise. */ -bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter) +bool drm_scdc_get_scrambling_status(struct drm_connector *connector) { u8 status; int ret; - ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, ); + ret = drm_scdc_readb(connector->ddc, SCDC_SCRAMBLER_STATUS, ); if (ret < 0) { - DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret); + drm_dbg_kms(connector->dev, + "[CONNECTOR:%d:%s] Failed to read scrambling status: %d\n", + connector->base.id, connector->name, ret); return false; } @@ -165,7 +169,7 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status); /** * drm_scdc_set_scrambling - enable scrambling - * @adapter: I2C adapter for DDC channel + * @connector: connector * @enable: bool to indicate if scrambling is to be enabled/disabled * * Writes the TMDS config register over SCDC channel, and: @@ -175,14 +179,17 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status); * Returns: * True if scrambling is set/reset successfully, false otherwise. */ -bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) +bool drm_scdc_set_scrambling(struct drm_connector *connector, +bool enable) { u8 config; int ret; - ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, ); + ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, ); if (ret < 0) { - DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret); + drm_dbg_kms(connector->dev, +
Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog
On 03/04/2023 22:07, Abhinav Kumar wrote: On 4/3/2023 11:48 AM, Dmitry Baryshkov wrote: On 03/04/2023 21:06, Abhinav Kumar wrote: On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote: This huge series attempts to restructure the DPU HW catalog into a manageable and reviewable data set. In order to ease review and testing I merged all the necessary fixes into this series. Also I cherry-picked & slightly fixed Konrad's patch adding size to the SSPP and INTF macros. I had to first dig up some history about why dpu catalog grew so much in the first place before starting this review. When the DPU driver first landed (which pre-dates my work in upstream), it looks like it followed mdp5 model from mdp5_cfg.c. But looks like as the number of chipsets which use DPU kept growing, this is becoming a burden. As everyone knows, downstream follows a devicetree model for the dpu hardware and that should have always been the case. Perhaps in the last 2-3 years more time could have been spent on standardizing the bindings used for hw blocks in order to maintain a less hard-coded catalog file and more in the device tree. Unfortunately, this is not how the upstream DT works. If something is a constant hardware property, it should not go into the DT. So pushing catalog to dt would have been immediately frowned upon by Rob Herring or Krzysztof. Yes certainly we cannot put hardware specific properties. But in general, modelling the hardware like the number of sspps, number of interfaces and number of dspps etc can be a bit abstracted? like blk-type and blk-offset? blk-type can be a custom string because each block is named differently for different vendors? No. The number of blk_offsets decides number of blocks. Its not constant right. We are seeing it varying with chipsets. Then the catalog would have just been a place to parse the device tree, set the feature capability based on chipset (refer _sde_hardware_pre_caps). That way offsets , number of blocks and the blocks themselves still come from the device tree but perhaps some specific features are at SOC level for which the catalog still stays. That being said, I thought of different strategies even before the review but two issues prevented me from suggesting those ideas (one of which I am seeing even here , which I am going to suggest below and also suggest why it wont work). 1) For the same DPU major/minor version, some features might get dropped or even get added with different SOCs as overall the system capabilities might differ like number of SSPPs or memory footprint of the SOC etc. So there is no good way right now to generalize any dpu catalog or to tie it with a DPU major/minor version. We will have to stick with a per-SOC model. Up to now, the SoC was equal to major+minor. Could you please be more specific here, if there are any actual differences within major+minor families? So lets say, the same DPU major/minor version is used but we have only one DSI on one chipset Vs two DSIs on the other, some of the features which come into play only for dual DSI cannot be used. Like broadcasting a DCS command across two DSIs etc. This is a very basic example, but there are many examples. I'm asking for the exact details, because up to now the driver was using major:minor to find the catalog entry. It was modelled this way in sdm845/sc7180, then it was natural for us to continue down this path. I will put reworking catalog to be bound to the binding data This is what led me to not pursue that route. 2) For the same DPU major/minor version, even if core-DPU is same (in terms of SSPP, DSPP etc), the number of interfaces can change. So again no room to generalize same DPU hw version. Again, I might be just scratching the surface, but I have not observed this. This typically happens based on what products that chipset is catered towards. Thats pretty much what I can share. But more number of interfaces for more number of displays / use-cases. Ack, I will not that we should be more careful about this items. 3) For the same reason as (1) and (2), I think the de-duplication strategy used in this series is not correct. The idea of dpu_hw_version_num_layer_mixer is just not scalable as I dont know how many variants that will lead to. So it seems like just an attempt to de-duplicate which perhaps works today for existing dpu chipsets in upstream but by no means scalable. Lets go ahead with per-SOC catalog file but lets live with some amount of duplication between them if we really have to split it across header files. Indeed, this leads to minor differences on top of major+lm. However, I think, the overall complexity is lowered. Nevertheless, let's land the major set of patches and leave generalization for the later time. I think, with the addition of the next several platforms we will see the drill. Yes, I would say lets handle generalization/de-duplication later when
Re: [Freedreno] [PATCH RFC v2 4/6] drm/msm/dpu: Fix slice_last_group_size calculation
On 4/3/2023 2:51 PM, Dmitry Baryshkov wrote: On 04/04/2023 00:45, Jessica Zhang wrote: On 4/2/2023 4:27 AM, Dmitry Baryshkov wrote: On 31/03/2023 21:49, Jessica Zhang wrote: Correct the math for slice_last_group_size so that it matches the calculations downstream. Fixes: c110cfd1753e ("drm/msm/disp/dpu1: Add support for DSC") Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index b952f7d2b7f5..9312a8d7fbd9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -56,7 +56,11 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, if (is_cmd_mode) initial_lines += 1; - slice_last_group_size = 3 - (dsc->slice_width % 3); + slice_last_group_size = dsc->slice_width % 3; + + if (slice_last_group_size == 0) + slice_last_group_size = 3; Hmm. As I went on checking this against techpack: mod = dsc->slice_width % 3 mod | techpack | old | your_patch 0 | 2 | 3 | 3 1 | 0 | 2 | 1 2 | 1 | 1 | 2 So, obviously neither old nor new code match the calculations of the techpack. If we assume that sde_dsc_helper code is correct (which I have no reasons to doubt), then the proper code should be: slice_last_group_size = (dsc->slice_width + 2) % 3; Could you please doublecheck and adjust. Hi Dmitry, The calculation should match the techpack calculation (I kept the `data |= ((slice_last_group_size - 1) << 18);` a few lines down). And the techpack doesn't have -1. I think the following code piece would be more convenient as it is simpler: slice_last_group_size = (dsc->slice_width + 2) % 3; [...] data |= slice_last_group_size << 18; If you agree, could you please switch to it? Sure. Thanks, Jessica Zhang Thanks, Jessica Zhang + data = (initial_lines << 20); data |= ((slice_last_group_size - 1) << 18); /* bpp is 6.4 format, 4 LSBs bits are for fractional part */ -- With best wishes Dmitry -- With best wishes Dmitry
[PATCH 2/2] drm/bridge: lt9611: Do not generate HFP/HBP/HSA and EOT packet
Do not generate the HS front and back porch gaps, the HSA gap and EOT packet, as these packets are not required. This makes the bridge work with Samsung DSIM on i.MX8MM and i.MX8MP. Signed-off-by: Marek Vasut --- Cc: Andrzej Hajda Cc: Daniel Vetter Cc: David Airlie Cc: Jagan Teki Cc: Jernej Skrabec Cc: Jonas Karlman Cc: Laurent Pinchart Cc: Michael Walle Cc: Neil Armstrong Cc: Robert Foss Cc: dri-devel@lists.freedesktop.org --- drivers/gpu/drm/bridge/lontium-lt9611.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index a25d21a7d5c19..151efe92711c4 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -774,7 +774,9 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611, dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_VIDEO_HSE; + MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA | + MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP | + MIPI_DSI_MODE_NO_EOT_PACKET; ret = devm_mipi_dsi_attach(dev, dsi); if (ret < 0) { -- 2.39.2
[PATCH 1/2] drm/bridge: lt9211: Do not generate HFP/HBP/HSA and EOT packet
Do not generate the HS front and back porch gaps, the HSA gap and EOT packet, as these packets are not required. This makes the bridge work with Samsung DSIM on i.MX8MM and i.MX8MP. Signed-off-by: Marek Vasut --- Cc: Andrzej Hajda Cc: Daniel Vetter Cc: David Airlie Cc: Jagan Teki Cc: Jernej Skrabec Cc: Jonas Karlman Cc: Laurent Pinchart Cc: Michael Walle Cc: Neil Armstrong Cc: Robert Foss Cc: dri-devel@lists.freedesktop.org --- drivers/gpu/drm/bridge/lontium-lt9211.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c b/drivers/gpu/drm/bridge/lontium-lt9211.c index 3e19fff6547a2..00db681512385 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9211.c +++ b/drivers/gpu/drm/bridge/lontium-lt9211.c @@ -709,7 +709,9 @@ static int lt9211_host_attach(struct lt9211 *ctx) dsi->lanes = dsi_lanes; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_VIDEO_HSE; + MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA | + MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP | + MIPI_DSI_MODE_NO_EOT_PACKET; ret = devm_mipi_dsi_attach(dev, dsi); if (ret < 0) { -- 2.39.2
Re: [Freedreno] [PATCH v3 01/38] drm/msm/dpu: Allow variable SSPP/INTF_BLK size
On 4/1/2023 2:37 AM, Dmitry Baryshkov wrote: On 01/04/2023 03:57, Abhinav Kumar wrote: On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote: From: Konrad Dybcio These blocks are of variable length on different SoCs. Set the correct values where I was able to retrieve it from downstream DTs and leave the old defaults (0x1c8 for sspp and 0x280 for intf) otherwise. Signed-off-by: Konrad Dybcio [DB: fixed some of lengths] Signed-off-by: Dmitry Baryshkov Can you please split this to two changes one for SSPP and one for INTF block? Ack, of course. Thanks a lot for the comments to this patch. This is, I think, what we missed from the vendor dtsi files. --- .../gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 243 +- 1 file changed, 122 insertions(+), 121 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 6840b22a4159..e44e7455a56e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -1172,11 +1172,11 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_1 = _DMA_SBLK("9", 2); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_2 = _DMA_SBLK("10", 3); static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK("11", 4); -#define SSPP_BLK(_name, _id, _base, _features, \ +#define SSPP_BLK(_name, _id, _base, _len, _features, \ _sblk, _xinid, _type, _clkctrl) \ { \ .name = _name, .id = _id, \ - .base = _base, .len = 0x1c8, \ + .base = _base, .len = _len, \ .features = _features, \ .sblk = &_sblk, \ .xin_id = _xinid, \ @@ -1185,40 +1185,40 @@ static const struct dpu_sspp_sub_blks sdm845_dma_sblk_3 = _DMA_SBLK("11", 4); } static const struct dpu_sspp_cfg msm8998_sspp[] = { - SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_MSM8998_MASK, + SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, 0x1ac, VIG_MSM8998_MASK, msm8998_vig_sblk_0, 0, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0), - SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_MSM8998_MASK, + SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, 0x1ac, VIG_MSM8998_MASK, msm8998_vig_sblk_1, 4, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG1), - SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, VIG_MSM8998_MASK, + SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, 0x1ac, VIG_MSM8998_MASK, msm8998_vig_sblk_2, 8, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG2), - SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, VIG_MSM8998_MASK, + SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, 0x1ac, VIG_MSM8998_MASK, msm8998_vig_sblk_3, 12, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG3), - SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, DMA_MSM8998_MASK, + SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, 0x1ac, DMA_MSM8998_MASK, sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0), - SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000, DMA_MSM8998_MASK, + SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000, 0x1ac, DMA_MSM8998_MASK, sdm845_dma_sblk_1, 5, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA1), - SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, DMA_CURSOR_MSM8998_MASK, + SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, 0x1ac, DMA_CURSOR_MSM8998_MASK, sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA2), - SSPP_BLK("sspp_11", SSPP_DMA3, 0x2a000, DMA_CURSOR_MSM8998_MASK, + SSPP_BLK("sspp_11", SSPP_DMA3, 0x2a000, 0x1ac, DMA_CURSOR_MSM8998_MASK, sdm845_dma_sblk_3, 13, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA3), }; static const struct dpu_sspp_cfg sdm845_sspp[] = { - SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SDM845_MASK_SDMA, + SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, 0x1c8, VIG_SDM845_MASK_SDMA, sdm845_vig_sblk_0, 0, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0), - SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_SDM845_MASK_SDMA, + SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, 0x1c8, VIG_SDM845_MASK_SDMA, sdm845_vig_sblk_1, 4, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG1), - SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, VIG_SDM845_MASK_SDMA, + SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, 0x1c8, VIG_SDM845_MASK_SDMA, sdm845_vig_sblk_2, 8, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG2), - SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, VIG_SDM845_MASK_SDMA, + SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, 0x1c8, VIG_SDM845_MASK_SDMA, sdm845_vig_sblk_3, 12, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG3), - SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, DMA_SDM845_MASK_SDMA, + SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000, 0x1c8, DMA_SDM845_MASK_SDMA, sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0), - SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000, DMA_SDM845_MASK_SDMA, + SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000, 0x1c8, DMA_SDM845_MASK_SDMA, sdm845_dma_sblk_1, 5, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA1), - SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, DMA_CURSOR_SDM845_MASK_SDMA, + SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000, 0x1c8, DMA_CURSOR_SDM845_MASK_SDMA, sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA2), -
Re: [PATCH RFC v2 4/6] drm/msm/dpu: Fix slice_last_group_size calculation
On 04/04/2023 00:45, Jessica Zhang wrote: On 4/2/2023 4:27 AM, Dmitry Baryshkov wrote: On 31/03/2023 21:49, Jessica Zhang wrote: Correct the math for slice_last_group_size so that it matches the calculations downstream. Fixes: c110cfd1753e ("drm/msm/disp/dpu1: Add support for DSC") Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index b952f7d2b7f5..9312a8d7fbd9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -56,7 +56,11 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, if (is_cmd_mode) initial_lines += 1; - slice_last_group_size = 3 - (dsc->slice_width % 3); + slice_last_group_size = dsc->slice_width % 3; + + if (slice_last_group_size == 0) + slice_last_group_size = 3; Hmm. As I went on checking this against techpack: mod = dsc->slice_width % 3 mod | techpack | old | your_patch 0 | 2 | 3 | 3 1 | 0 | 2 | 1 2 | 1 | 1 | 2 So, obviously neither old nor new code match the calculations of the techpack. If we assume that sde_dsc_helper code is correct (which I have no reasons to doubt), then the proper code should be: slice_last_group_size = (dsc->slice_width + 2) % 3; Could you please doublecheck and adjust. Hi Dmitry, The calculation should match the techpack calculation (I kept the `data |= ((slice_last_group_size - 1) << 18);` a few lines down). And the techpack doesn't have -1. I think the following code piece would be more convenient as it is simpler: slice_last_group_size = (dsc->slice_width + 2) % 3; [...] data |= slice_last_group_size << 18; If you agree, could you please switch to it? Thanks, Jessica Zhang + data = (initial_lines << 20); data |= ((slice_last_group_size - 1) << 18); /* bpp is 6.4 format, 4 LSBs bits are for fractional part */ -- With best wishes Dmitry -- With best wishes Dmitry
Re: [PATCH RFC v2 5/6] drm/msm/dsi: Use MSM and DRM DSC helper methods
On 4/2/2023 4:29 AM, Dmitry Baryshkov wrote: On 31/03/2023 21:49, Jessica Zhang wrote: Use MSM and DRM DSC helper methods to configure DSC for DSI. Changes in V2: - *_calculate_initial_scale_value --> *_set_initial_scale_value - Split pkt_per_line and eol_byte_num changes to a separate patch - Moved pclk_per_line calculation to hdisplay adjustment in `if (dsc)` block of dsi_update_dsc_timing() Signed-off-by: Jessica Zhang --- drivers/gpu/drm/msm/dsi/dsi_host.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 74d38f90398a..b7ab81737473 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -28,6 +28,7 @@ #include "dsi.xml.h" #include "sfpb.xml.h" #include "dsi_cfg.h" +#include "msm_dsc_helper.h" #include "msm_kms.h" #include "msm_gem.h" #include "phy/dsi_phy.h" @@ -848,7 +849,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod /* first calculate dsc parameters and then program * compress mode registers */ - slice_per_intf = DIV_ROUND_UP(hdisplay, dsc->slice_width); + slice_per_intf = msm_dsc_get_slice_per_intf(dsc, hdisplay); /* * If slice_count is greater than slice_per_intf @@ -951,7 +952,11 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi) * pulse width same */ h_total -= hdisplay; - hdisplay /= 3; + if (msm_host->mode_flags & MIPI_DSI_MODE_VIDEO) + hdisplay = msm_dsc_get_uncompressed_pclk_per_line(dsc, hdisplay, + dsi_get_bpp(msm_host->format)) / 3; + else + hdisplay /= 3; h_total += hdisplay; ha_end = ha_start + hdisplay; This chunk changes the calculated value (two other are mere updates to use new functions). Please move it to a separate patch, add proper description/justification and possibly a Fixes tag, if the original code was incorrect. Hi Dmitry, Acked. Thanks, Jessica Zhang } @@ -1759,7 +1764,7 @@ static int dsi_populate_dsc_params(struct msm_dsi_host *msm_host, struct drm_dsc return ret; } - dsc->initial_scale_value = 32; + drm_dsc_set_initial_scale_value(dsc); dsc->line_buf_depth = dsc->bits_per_component + 1; return drm_dsc_compute_rc_parameters(dsc); -- With best wishes Dmitry
Re: [PATCH RFC v2 4/6] drm/msm/dpu: Fix slice_last_group_size calculation
On 4/2/2023 4:27 AM, Dmitry Baryshkov wrote: On 31/03/2023 21:49, Jessica Zhang wrote: Correct the math for slice_last_group_size so that it matches the calculations downstream. Fixes: c110cfd1753e ("drm/msm/disp/dpu1: Add support for DSC") Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index b952f7d2b7f5..9312a8d7fbd9 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -56,7 +56,11 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, if (is_cmd_mode) initial_lines += 1; - slice_last_group_size = 3 - (dsc->slice_width % 3); + slice_last_group_size = dsc->slice_width % 3; + + if (slice_last_group_size == 0) + slice_last_group_size = 3; Hmm. As I went on checking this against techpack: mod = dsc->slice_width % 3 mod | techpack | old | your_patch 0 | 2 | 3 | 3 1 | 0 | 2 | 1 2 | 1 | 1 | 2 So, obviously neither old nor new code match the calculations of the techpack. If we assume that sde_dsc_helper code is correct (which I have no reasons to doubt), then the proper code should be: slice_last_group_size = (dsc->slice_width + 2) % 3; Could you please doublecheck and adjust. Hi Dmitry, The calculation should match the techpack calculation (I kept the `data |= ((slice_last_group_size - 1) << 18);` a few lines down). Thanks, Jessica Zhang + data = (initial_lines << 20); data |= ((slice_last_group_size - 1) << 18); /* bpp is 6.4 format, 4 LSBs bits are for fractional part */ -- With best wishes Dmitry
Re: [PATCH RFC v2 2/6] drm/msm: Add MSM-specific DSC helper methods
On 4/2/2023 4:21 AM, Dmitry Baryshkov wrote: On 31/03/2023 21:49, Jessica Zhang wrote: Introduce MSM-specific DSC helper methods, as some calculations are common between DP and DSC. Changes in v2: - Moved files up to msm/ directory - Dropped get_comp_ratio() helper - Used drm_int2fixp() to convert to integers to fp - Style changes to improve readability - Dropped unused bpp variable in msm_dsc_get_dce_bytes_per_line() - Changed msm_dsc_get_slice_per_intf() to a static inline method - Dropped last division step of msm_dsc_get_pclk_per_line() and changed method name accordingly - Changed DSC_BPP macro to drm_dsc_get_bpp_int() helper method - Fixed some math issues caused by passing in incorrect types to drm_fixed methods in get_bytes_per_soft_slice() Signed-off-by: Jessica Zhang --- drivers/gpu/drm/msm/Makefile | 1 + drivers/gpu/drm/msm/msm_dsc_helper.c | 53 drivers/gpu/drm/msm/msm_dsc_helper.h | 42 3 files changed, 96 insertions(+) diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 7274c41228ed..b814fc80e2d5 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -94,6 +94,7 @@ msm-y += \ msm_atomic_tracepoints.o \ msm_debugfs.o \ msm_drv.o \ + msm_dsc_helper.o \ msm_fb.o \ msm_fence.o \ msm_gem.o \ diff --git a/drivers/gpu/drm/msm/msm_dsc_helper.c b/drivers/gpu/drm/msm/msm_dsc_helper.c new file mode 100644 index ..60b73e17e6eb --- /dev/null +++ b/drivers/gpu/drm/msm/msm_dsc_helper.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved + */ + +#include +#include +#include + +#include "msm_drv.h" +#include "msm_dsc_helper.h" + +static s64 get_bytes_per_soft_slice(struct drm_dsc_config *dsc, int intf_width, u32 src_bpp) intf_width is unused Hi Dmitry, Acked. +{ + int bpp = msm_dsc_get_bpp_int(dsc); + s64 numerator_fp, denominator_fp; + s64 comp_ratio_fp = drm_fixp_from_fraction(src_bpp, bpp); + + numerator_fp = drm_int2fixp(dsc->slice_width * 3); You have lost dsc->bits_per_component here. This was moved to the denominator calculation, but I'll move it back to this line to avoid confusion. + denominator_fp = drm_fixp_from_fraction(comp_ratio_fp * 8, drm_int2fixp(bpp)); denominator_fp = drm_fixp_from_fraction(src_bpp * 8, bpp); Acked. + + return drm_fixp_div(numerator_fp, denominator_fp); +} + +u32 msm_dsc_get_eol_byte_num(struct drm_dsc_config *dsc, int intf_width, u32 src_bpp) +{ + u32 bytes_per_soft_slice, extra_eol_bytes, bytes_per_intf; + s64 bytes_per_soft_slice_fp; + int slice_per_intf = msm_dsc_get_slice_per_intf(dsc, intf_width); + + bytes_per_soft_slice_fp = get_bytes_per_soft_slice(dsc, intf_width, src_bpp); + bytes_per_soft_slice = drm_fixp2int_ceil(bytes_per_soft_slice_fp); + + bytes_per_intf = bytes_per_soft_slice * slice_per_intf; + extra_eol_bytes = bytes_per_intf % 3; + if (extra_eol_bytes != 0) + extra_eol_bytes = 3 - extra_eol_bytes; I become confused here when I checked eol_bytes in the display techpack. I see that for DP the dp_panel_dsc_pclk_param_calc() calculates dsc->eol_bytes_num in this way, the size to pad dsc_byte_count * slice_per_intf to 3 bytes. However, for DSI this is a simple as total_bytes_per_intf % 3 , so it is not a padding, but a length of the last chunk. Could you please clarify? If the techpack code is correct, I'd prefer if we return last chunk size here and calculate the padding length in the DP driver. I've double checked the calculations between DP and DSI, and I think you're right. Will move the `if (extra_eol_bytes != 0)` block out to DP code. + + return extra_eol_bytes; +} + +int msm_dsc_get_uncompressed_pclk_per_line(struct drm_dsc_config *dsc, int intf_width, u32 src_bpp) Basing on Abhinav's description ("pclk_per_line can be only per interface") would it better be named as msm_dsc_get_uncompressed_pclk_per_intf() ? or msm_dsc_get_uncompressed_pclk_for_intf() ? BTW: if get_bytes_per_soft_slice() doesn't use intf_width, we can probably drop it here too. +{ + s64 data_width; + + if (!dsc->slice_width || (intf_width < dsc->slice_width)) + return -EINVAL; Error code is not validated at dsi_timing_setup. I'd suggest moving error checks there and dropping the error handling here. If dsc->slice_width is not set, we should stop much earlier than drm_bridge's pre_enable() callback. Acked. Thanks, Jessica Zhang + + data_width = drm_fixp_mul(dsc->slice_count, + get_bytes_per_soft_slice(dsc, intf_width, src_bpp)); + + return drm_fixp2int_ceil(data_width); +} diff --git a/drivers/gpu/drm/msm/msm_dsc_helper.h b/drivers/gpu/drm/msm/msm_dsc_helper.h new file mode 100644 index ..743cd324b7d9 --- /dev/null +++
[PATCH] drm/i915/guc: Don't capture Gen8 regs on Gen12 devices
From: John Harrison A pair of pre-Gen12 registers were being included in the Gen12 capture list. GuC was rejecting those as being invalid and logging errors about them. So, stop doing it. Signed-off-by: John Harrison Fixes: dce2bd542337 ("drm/i915/guc: Add Gen9 registers for GuC error state capture.") Cc: Alan Previn Cc: Umesh Nerlige Ramappa Cc: Lucas De Marchi Cc: John Harrison Cc: Jani Nikula Cc: Matt Roper Cc: Balasubramani Vivekanandan Cc: Daniele Ceraolo Spurio --- drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index cf49188db6a6e..e0e793167d61b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -31,12 +31,14 @@ { FORCEWAKE_MT, 0, 0, "FORCEWAKE" } #define COMMON_GEN9BASE_GLOBAL \ - { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ - { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" }, \ { ERROR_GEN6, 0, 0, "ERROR_GEN6" }, \ { DONE_REG, 0, 0, "DONE_REG" }, \ { HSW_GTT_CACHE_EN, 0, 0, "HSW_GTT_CACHE_EN" } +#define GEN9_GLOBAL \ + { GEN8_FAULT_TLB_DATA0, 0, 0, "GEN8_FAULT_TLB_DATA0" }, \ + { GEN8_FAULT_TLB_DATA1, 0, 0, "GEN8_FAULT_TLB_DATA1" } + #define COMMON_GEN12BASE_GLOBAL \ { GEN12_FAULT_TLB_DATA0,0, 0, "GEN12_FAULT_TLB_DATA0" }, \ { GEN12_FAULT_TLB_DATA1,0, 0, "GEN12_FAULT_TLB_DATA1" }, \ @@ -142,6 +144,7 @@ static const struct __guc_mmio_reg_descr xe_lpd_gsc_inst_regs[] = { static const struct __guc_mmio_reg_descr default_global_regs[] = { COMMON_BASE_GLOBAL, COMMON_GEN9BASE_GLOBAL, + GEN9_GLOBAL, }; static const struct __guc_mmio_reg_descr default_rc_class_regs[] = { -- 2.39.1
Re: [PATCH v1 3/6] dt-bindings: display: bridge: toshiba,tc358768: add parallel input mode
On Mon, Apr 03, 2023 at 04:01:17PM -0500, Rob Herring wrote: > On Fri, Mar 31, 2023 at 11:40:01AM +0200, Francesco Dolcini wrote: > > On Fri, Mar 31, 2023 at 10:48:15AM +0200, Krzysztof Kozlowski wrote: > > > On 30/03/2023 11:59, Francesco Dolcini wrote: > > > > From: Francesco Dolcini > > > > > > > > Add new toshiba,input-rgb-mode property to describe the actual signal > > > > connection on the parallel RGB input interface. > > > > > > > > Signed-off-by: Francesco Dolcini > > > > --- > > > > .../bindings/display/bridge/toshiba,tc358768.yaml | 15 +++ > > > > 1 file changed, 15 insertions(+) > > > > > > > > diff --git > > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > > > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > index 8f22093b61ae..2638121a2223 100644 > > > > --- > > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > +++ > > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > @@ -42,6 +42,21 @@ properties: > > > >clock-names: > > > > const: refclk > > > > > > > > + toshiba,input-rgb-mode: > > > > +description: | > > > > + Parallel Input (RGB) Mode. > > > > + > > > > + RGB inputs (PD[23:0]) color arrangement as documented in the > > > > datasheet > > > > + and in the table below. > > > > + > > > > + 0 = R[7:0], G[7:0], B[7:0] > > > > > > RGB888? > > > > Or anything else - like a RGB666 - just connecting to GND the unused > > pins. > > If the bridge is configured for RGB666, then that's fine. If not, the > unused pins should be driven with either the MSB of each component. > Otherwise, you'd can't fully saturate the colors. maybe a detail and maybe not really relevant, but this specific bridge has no know-how on the actual RGB inputs width. While I understand what you are saying here, in the end this is about the actual hardware design that can be in any way, including having pins to gnd and have the issue you just described. Francesco
Re: [PATCH 1/3] drm/lima: add usage counting method to ctx_mgr
On Mon, Mar 13, 2023 at 12:30:50AM +0100, Erico Nunes wrote: > lima maintains a context manager per drm_file, similar to amdgpu. > In order to account for the complete usage per drm_file, all of the > associated contexts need to be considered. > Previously released contexts also need to be accounted for but their > drm_sched_entity info is gone once they get released, so account for it > in the ctx_mgr. > > Signed-off-by: Erico Nunes > --- > drivers/gpu/drm/lima/lima_ctx.c | 30 +- > drivers/gpu/drm/lima/lima_ctx.h | 3 +++ > 2 files changed, 32 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/lima/lima_ctx.c b/drivers/gpu/drm/lima/lima_ctx.c > index 891d5cd5019a..e008e586fad0 100644 > --- a/drivers/gpu/drm/lima/lima_ctx.c > +++ b/drivers/gpu/drm/lima/lima_ctx.c > @@ -15,6 +15,7 @@ int lima_ctx_create(struct lima_device *dev, struct > lima_ctx_mgr *mgr, u32 *id) > if (!ctx) > return -ENOMEM; > ctx->dev = dev; > + ctx->mgr = mgr; > kref_init(>refcnt); > > for (i = 0; i < lima_pipe_num; i++) { > @@ -42,10 +43,17 @@ int lima_ctx_create(struct lima_device *dev, struct > lima_ctx_mgr *mgr, u32 *id) > static void lima_ctx_do_release(struct kref *ref) > { > struct lima_ctx *ctx = container_of(ref, struct lima_ctx, refcnt); > + struct lima_ctx_mgr *mgr = ctx->mgr; > int i; > > - for (i = 0; i < lima_pipe_num; i++) > + for (i = 0; i < lima_pipe_num; i++) { > + struct lima_sched_context *context = >context[i]; > + struct drm_sched_entity *entity = >base; > + > + mgr->elapsed_ns[i] += entity->elapsed_ns; drm-tip build is now broken because of this vs. commit baad10973fdb ("Revert "drm/scheduler: track GPU active time per entity"") ../drivers/gpu/drm/lima/lima_ctx.c: In function ‘lima_ctx_do_release’: ../drivers/gpu/drm/lima/lima_ctx.c:53:45: error: ‘struct drm_sched_entity’ has no member named ‘elapsed_ns’ 53 | mgr->elapsed_ns[i] += entity->elapsed_ns; -- Ville Syrjälä Intel
Re: [PATCH v1 1/2] dt-bindings: display: bridge: sn65dsi83: Add DSI video mode
On Mon, Apr 03, 2023 at 04:06:22PM -0500, Rob Herring wrote: > On Thu, Mar 30, 2023 at 12:17:51PM +0200, Francesco Dolcini wrote: > > From: Francesco Dolcini > > > > SN65DSI8[34] device supports burst video mode and non-burst video mode > > with sync events or with sync pulses packet transmission as described in > > the DSI specification. > > > > Add property to select the expected mode, this allows for example to > > select a mode that is compatible with the DSI host interface. > > Why does this need to be in DT? > The source and sink drivers should know what their capabilities are > and pick the best common one. Is there a best mode? Isn't this a decision how do we want the 2 peers to communicate? For the MIPI-DSI Linux/DRM experts: am I missing something? Is there another way to have a DSI video sink to ask for a specific mode? (I copied this from an existing DSI panel binding). Francesco
Re: [PATCH v1 3/6] dt-bindings: display: bridge: toshiba,tc358768: add parallel input mode
On Mon, Apr 03, 2023 at 04:01:17PM -0500, Rob Herring wrote: > On Fri, Mar 31, 2023 at 11:40:01AM +0200, Francesco Dolcini wrote: > > On Fri, Mar 31, 2023 at 10:48:15AM +0200, Krzysztof Kozlowski wrote: > > > On 30/03/2023 11:59, Francesco Dolcini wrote: > > > > From: Francesco Dolcini > > > > > > > > Add new toshiba,input-rgb-mode property to describe the actual signal > > > > connection on the parallel RGB input interface. > > > > > > > > Signed-off-by: Francesco Dolcini > > > > --- > > > > .../bindings/display/bridge/toshiba,tc358768.yaml | 15 +++ > > > > 1 file changed, 15 insertions(+) > > > > > > > > diff --git > > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > > > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > index 8f22093b61ae..2638121a2223 100644 > > > > --- > > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > +++ > > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > > @@ -42,6 +42,21 @@ properties: > > > >clock-names: > > > > const: refclk > > > > > > > > + toshiba,input-rgb-mode: > > > > +description: | > > > > + Parallel Input (RGB) Mode. > > > > + > > > > + RGB inputs (PD[23:0]) color arrangement as documented in the > > > > datasheet > > > > + and in the table below. > > > > + > > > > + 0 = R[7:0], G[7:0], B[7:0] > > > > > > RGB888? > > > > Or anything else - like a RGB666 - just connecting to GND the unused > > pins. > > If the bridge is configured for RGB666, then that's fine. If not, the > unused pins should be driven with either the MSB of each component. > Otherwise, you'd can't fully saturate the colors. > > > > + 1 = R[1:0], G[1:0], B[1:0], R[7:2], G[7:2], B[7:2] > > > > + 2 = 8’b0, R[4:0], G[5:0], B[4:0] > > > > > > Isn't this RGB565? > > > > > > Don't we have already properties like this? e.g. colorspace? > > > > It's not really the colorspace this property. > > > > tc358768 is a parallel RGB to DSI bridge, it has 24 bit parallel input > > line. > > > > The way this lines are connected is configurable with this parameter, if you > > look at mode 0 and 1 they all allow to have a RGB888 or a RGB666 or a > > RGB565 mapping. This just configure some internal mux, it's not strictly > > about the RGB mode. > > This is the same as other cases. There's a need for describing the > interface. It keeps coming up and I keep saying to go create something > common. I am not aware of other discussion on the topic, do you have any pointer I can look at? What I'd like to re-iterate here once more is that this configuration is about how the external 24-bit parallel RGB lines are mapped withing this bridge. It's not mapping the linux media bus format (e.g. not MEDIA_BUS_FMT_RBG888_1X24 or alike). This bridge allow for a limited set of combination (3) as described in this binding. Francesco
Re: [PATCH v1 1/2] dt-bindings: display: bridge: sn65dsi83: Add DSI video mode
On Thu, Mar 30, 2023 at 12:17:51PM +0200, Francesco Dolcini wrote: > From: Francesco Dolcini > > SN65DSI8[34] device supports burst video mode and non-burst video mode > with sync events or with sync pulses packet transmission as described in > the DSI specification. > > Add property to select the expected mode, this allows for example to > select a mode that is compatible with the DSI host interface. Why does this need to be in DT? The source and sink drivers should know what their capabilities are and pick the best common one. Rob
Re: [PATCH v1 3/6] dt-bindings: display: bridge: toshiba,tc358768: add parallel input mode
On Fri, Mar 31, 2023 at 11:40:01AM +0200, Francesco Dolcini wrote: > On Fri, Mar 31, 2023 at 10:48:15AM +0200, Krzysztof Kozlowski wrote: > > On 30/03/2023 11:59, Francesco Dolcini wrote: > > > From: Francesco Dolcini > > > > > > Add new toshiba,input-rgb-mode property to describe the actual signal > > > connection on the parallel RGB input interface. > > > > > > Signed-off-by: Francesco Dolcini > > > --- > > > .../bindings/display/bridge/toshiba,tc358768.yaml | 15 +++ > > > 1 file changed, 15 insertions(+) > > > > > > diff --git > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > index 8f22093b61ae..2638121a2223 100644 > > > --- > > > a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > +++ > > > b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358768.yaml > > > @@ -42,6 +42,21 @@ properties: > > >clock-names: > > > const: refclk > > > > > > + toshiba,input-rgb-mode: > > > +description: | > > > + Parallel Input (RGB) Mode. > > > + > > > + RGB inputs (PD[23:0]) color arrangement as documented in the > > > datasheet > > > + and in the table below. > > > + > > > + 0 = R[7:0], G[7:0], B[7:0] > > > > RGB888? > > Or anything else - like a RGB666 - just connecting to GND the unused > pins. If the bridge is configured for RGB666, then that's fine. If not, the unused pins should be driven with either the MSB of each component. Otherwise, you'd can't fully saturate the colors. > > > + 1 = R[1:0], G[1:0], B[1:0], R[7:2], G[7:2], B[7:2] > > > + 2 = 8’b0, R[4:0], G[5:0], B[4:0] > > > > Isn't this RGB565? > > > > Don't we have already properties like this? e.g. colorspace? > > It's not really the colorspace this property. > > tc358768 is a parallel RGB to DSI bridge, it has 24 bit parallel input > line. > > The way this lines are connected is configurable with this parameter, if you > look at mode 0 and 1 they all allow to have a RGB888 or a RGB666 or a > RGB565 mapping. This just configure some internal mux, it's not strictly > about the RGB mode. This is the same as other cases. There's a need for describing the interface. It keeps coming up and I keep saying to go create something common. Rob
Re: [RFC PATCH 0/4] uapi, drm: Add and implement RLIMIT_GPUPRIO
On 4/3/23 20:54, Christian König wrote: Am 03.04.23 um 21:40 schrieb Joshua Ashton: Hello all! I would like to propose a new API for allowing processes to control the priority of GPU queues similar to RLIMIT_NICE/RLIMIT_RTPRIO. The main reason for this is for compositors such as Gamescope and SteamVR vrcompositor to be able to create realtime async compute queues on AMD without the need of CAP_SYS_NICE. The current situation is bad for a few reasons, one being that in order to setcap the executable, typically one must run as root which involves a pretty high privelage escalation in order to achieve one small feat, a realtime async compute queue queue for VR or a compositor. The executable cannot be setcap'ed inside a container nor can the setcap'ed executable be run in a container with NO_NEW_PRIVS. I go into more detail in the description in `uapi: Add RLIMIT_GPUPRIO`. My initial proposal here is to add a new RLIMIT, `RLIMIT_GPUPRIO`, which seems to make most initial sense to me to solve the problem. I am definitely not set that this is the best formulation however or if this should be linked to DRM (in terms of it's scheduler priority enum/definitions) in any way and and would really like other people's opinions across the stack on this. Once initial concern is that potentially this RLIMIT could out-live the lifespan of DRM. It sounds crazy saying it right now, something that definitely popped into my mind when touching `resource.h`. :-) Anyway, please let me know what you think! Definitely open to any feedback and advice you may have. :D Well the basic problem is that higher priority queues can be used to starve low priority queues. This starvation in turn is very very bad for memory management since the dma_fence's the GPU scheduler deals with have very strong restrictions. Even exposing this under CAP_SYS_NICE is questionable, so we will most likely have to NAK this. This is already exposed with CAP_SYS_NICE and is relied on by SteamVR for async reprojection and Gamescope's composite path on Steam Deck. Having a high priority async compute queue is really really important and advantageous for these tasks. The majority of usecases for something like this is going to be a compositor which does some really tiny amount of work per-frame but is incredibly latency dependent (as it depends on latching onto buffers just before vblank to do it's work) Starving and surpassing work on other queues is kind of the entire point. Gamescope and SteamVR do it on ACE as well so GFX work can run alongside it. - Joshie ✨ Regards, Christian. Thanks! - Joshie Joshua Ashton (4): drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH drm/scheduler: Split out drm_sched_priority to own file uapi: Add RLIMIT_GPUPRIO drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 ++-- drivers/gpu/drm/msm/msm_gpu.h | 2 +- fs/proc/base.c | 1 + include/asm-generic/resource.h | 3 +- include/drm/drm_sched_priority.h | 41 + include/drm/gpu_scheduler.h | 14 + include/uapi/asm-generic/resource.h | 3 +- 7 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 include/drm/drm_sched_priority.h
Re: [PATCH v4 1/2] drm/virtio: Refactor job submission code path
On 3/30/23 18:32, Emil Velikov wrote: >> +static int virtio_gpu_dma_fence_wait(struct virtio_gpu_submit *submit, >> + struct dma_fence *fence) >> +{ >> +struct dma_fence_unwrap itr; >> +struct dma_fence *f; >> +int err; >> + >> +dma_fence_unwrap_for_each(f, , fence) { > The dma_fence_unwrap_for_each() change should be a separate patch, > highlighting why we want it. Good point, it actually should be a potential optimization for the in-fence waiting. >> +ret = virtio_gpu_init_submit(, exbuf, dev, file, >> + fence_ctx, ring_idx); >> +if (ret) >> +goto cleanup; >> + >> +ret = virtio_gpu_wait_in_fence(); >> +if (ret) >> +goto cleanup; >> + > We have reshuffled the order around in_fence waiting, out_fence install, > handles, cmdbuf, drm events, etc. Can we get that split up a bit, with > some comments. > > If it were me, I would keep the wait_in_fence early and inline > virtio_gpu_init_submit (the nesting/abstraction seems a bit much). This > means one can omit the virtio_gpu_submit::exbuf all together. I tried to inline and this variant makes code much less readable to me. The point of having wait_in_fence after submit_init is that it makes submit code path shorter. If we have to wait for in-fence, then once fence signals, there is no need to init and instead move directly to a further submission step. Perhaps won't hurt to also factor out the wait_fence from parse_deps in the second patch and do all the waits right before locking the buflist. -- Best regards, Dmitry
Re: [RFC PATCH 0/4] uapi, drm: Add and implement RLIMIT_GPUPRIO
Am 03.04.23 um 21:40 schrieb Joshua Ashton: Hello all! I would like to propose a new API for allowing processes to control the priority of GPU queues similar to RLIMIT_NICE/RLIMIT_RTPRIO. The main reason for this is for compositors such as Gamescope and SteamVR vrcompositor to be able to create realtime async compute queues on AMD without the need of CAP_SYS_NICE. The current situation is bad for a few reasons, one being that in order to setcap the executable, typically one must run as root which involves a pretty high privelage escalation in order to achieve one small feat, a realtime async compute queue queue for VR or a compositor. The executable cannot be setcap'ed inside a container nor can the setcap'ed executable be run in a container with NO_NEW_PRIVS. I go into more detail in the description in `uapi: Add RLIMIT_GPUPRIO`. My initial proposal here is to add a new RLIMIT, `RLIMIT_GPUPRIO`, which seems to make most initial sense to me to solve the problem. I am definitely not set that this is the best formulation however or if this should be linked to DRM (in terms of it's scheduler priority enum/definitions) in any way and and would really like other people's opinions across the stack on this. Once initial concern is that potentially this RLIMIT could out-live the lifespan of DRM. It sounds crazy saying it right now, something that definitely popped into my mind when touching `resource.h`. :-) Anyway, please let me know what you think! Definitely open to any feedback and advice you may have. :D Well the basic problem is that higher priority queues can be used to starve low priority queues. This starvation in turn is very very bad for memory management since the dma_fence's the GPU scheduler deals with have very strong restrictions. Even exposing this under CAP_SYS_NICE is questionable, so we will most likely have to NAK this. Regards, Christian. Thanks! - Joshie Joshua Ashton (4): drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH drm/scheduler: Split out drm_sched_priority to own file uapi: Add RLIMIT_GPUPRIO drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 ++-- drivers/gpu/drm/msm/msm_gpu.h | 2 +- fs/proc/base.c | 1 + include/asm-generic/resource.h | 3 +- include/drm/drm_sched_priority.h| 41 + include/drm/gpu_scheduler.h | 14 + include/uapi/asm-generic/resource.h | 3 +- 7 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 include/drm/drm_sched_priority.h
Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
On Mon, Apr 03, 2023 at 07:39:37PM +, Yang, Fei wrote: > >Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level > > > >On Mon, Apr 03, 2023 at 04:57:21PM +, Yang, Fei wrote: > >>> Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of > >>> cache_level > >>> > >>> On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote: > From: Fei Yang > > Currently the KMD is using enum i915_cache_level to set caching > policy for buffer objects. This is flaky because the PAT index > which really controls the caching behavior in PTE has far more > levels than what's defined in the enum. > >>> > >>> Then just add more enum values. > >> > >> That would be really messy because PAT index is platform dependent, > >> you would have to maintain many tables for the the translation. > >> > >>> 'pat_index' is absolutely meaningless to the reader, it's just an > >>> arbitrary number. Whereas 'cache_level' conveys how the thing is > >>> actually going to get used and thus how the caches should behave. > >> > >> By design UMD's understand PAT index. Both UMD and KMD should stand on > >> the same ground, the Bspec, to avoid any potential ambiguity. > >> > In addition, the PAT index is platform dependent, having to > translate between i915_cache_level and PAT index is not reliable, > >>> > >>> If it's not realiable then the code is clearly broken. > >> > >> Perhaps the word "reliable" is a bit confusing here. What I really > >> meant to say is 'difficult to maintain', or 'error-prone'. > >> > and makes the code more complicated. > >>> > >>> You have to translate somewhere anyway. Looks like you're now adding > >>> translations the other way (pat_index->cache_level). How is that better? > >> > >> No, there is no pat_index->cache_level translation. > > > > i915_gem_object_has_cache_level() is exactly that. And that one does look > > actually fragile since it assumes only one PAT index maps to each cache > > level. So if the user picks any other pat_index anything using > > i915_gem_object_has_cache_level() is likely to do the wrong thing. > > That is still one way transaltion, from cache_level to pat_index. Not really. The actual input to the thing is obj->pat_index. And as stated, the whole thing is simply broken whenever obj->pat_index isn't one of the magic numbers that you get back from i915_gem_get_pat_index(). -- Ville Syrjälä Intel
Re: [RFC PATCH 1/4] drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH
Am 03.04.23 um 21:40 schrieb Joshua Ashton: This allows AMDGPU scheduler priority above normal to be expressed using the DRM_SCHED_PRIORITY enum. That was rejected before, I just don't remember why exactly. Need to dig that up again. Christian. Signed-off-by: Joshua Ashton --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- drivers/gpu/drm/msm/msm_gpu.h | 2 +- include/drm/gpu_scheduler.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d2139ac12159..8ec255091c4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -79,7 +79,7 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) return DRM_SCHED_PRIORITY_HIGH; case AMDGPU_CTX_PRIORITY_VERY_HIGH: - return DRM_SCHED_PRIORITY_HIGH; + return DRM_SCHED_PRIORITY_VERY_HIGH; /* This should not happen as we sanitized userspace provided priority * already, WARN if this happens. diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index fc1c0d8611a8..e3495712b236 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -336,7 +336,7 @@ struct msm_gpu_perfcntr { * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some * cases, so we don't use it (no need for kernel generated jobs). */ -#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) +#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_VERY_HIGH - DRM_SCHED_PRIORITY_MIN) /** * struct msm_file_private - per-drm_file context diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9935d1e2ff69..a62071660602 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -55,6 +55,7 @@ enum drm_sched_priority { DRM_SCHED_PRIORITY_MIN, DRM_SCHED_PRIORITY_NORMAL, DRM_SCHED_PRIORITY_HIGH, + DRM_SCHED_PRIORITY_VERY_HIGH, DRM_SCHED_PRIORITY_KERNEL, DRM_SCHED_PRIORITY_COUNT,
[RFC PATCH 4/4] drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions
Add support for the new RLIMIT_GPUPRIO when doing the priority checks creating an amdgpu_ctx. Signed-off-by: Joshua Ashton --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 8ec255091c4a..4ac645455bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -28,6 +28,8 @@ #include "amdgpu_sched.h" #include "amdgpu_ras.h" #include +#include +#include #define to_amdgpu_ctx_entity(e)\ container_of((e), struct amdgpu_ctx_entity, entity) @@ -94,11 +96,16 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) static int amdgpu_ctx_priority_permit(struct drm_file *filp, int32_t priority) { + enum drm_sched_priority in_drm_priority, rlim_drm_priority; + if (!amdgpu_ctx_priority_is_valid(priority)) return -EINVAL; - /* NORMAL and below are accessible by everyone */ - if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) + /* Check priority against RLIMIT to see what is allowed. */ + in_drm_priority = amdgpu_ctx_to_drm_sched_prio(priority); + rlim_drm_priority = (enum drm_sched_priority)rlimit(RLIMIT_GPUPRIO); + + if (in_drm_priority <= rlim_drm_priority) return 0; if (capable(CAP_SYS_NICE)) -- 2.40.0
[RFC PATCH 2/4] drm/scheduler: Split out drm_sched_priority to own file
This allows it to be used by other parts of the codebase without fear of a circular include dependency being introduced. Signed-off-by: Joshua Ashton --- include/drm/drm_sched_priority.h | 41 include/drm/gpu_scheduler.h | 15 +--- 2 files changed, 42 insertions(+), 14 deletions(-) create mode 100644 include/drm/drm_sched_priority.h diff --git a/include/drm/drm_sched_priority.h b/include/drm/drm_sched_priority.h new file mode 100644 index ..85a7bb011e27 --- /dev/null +++ b/include/drm/drm_sched_priority.h @@ -0,0 +1,41 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _DRM_SCHED_PRIORITY_H_ +#define _DRM_SCHED_PRIORITY_H_ + +/* These are often used as an (initial) index + * to an array, and as such should start at 0. + */ +enum drm_sched_priority { + DRM_SCHED_PRIORITY_MIN, + DRM_SCHED_PRIORITY_NORMAL, + DRM_SCHED_PRIORITY_HIGH, + DRM_SCHED_PRIORITY_VERY_HIGH, + DRM_SCHED_PRIORITY_KERNEL, + + DRM_SCHED_PRIORITY_COUNT, + DRM_SCHED_PRIORITY_UNSET = -2 +}; + +#endif diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index a62071660602..9228ff0d515e 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -29,6 +29,7 @@ #include #include #include +#include #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) @@ -48,20 +49,6 @@ struct drm_gem_object; struct drm_gpu_scheduler; struct drm_sched_rq; -/* These are often used as an (initial) index - * to an array, and as such should start at 0. - */ -enum drm_sched_priority { - DRM_SCHED_PRIORITY_MIN, - DRM_SCHED_PRIORITY_NORMAL, - DRM_SCHED_PRIORITY_HIGH, - DRM_SCHED_PRIORITY_VERY_HIGH, - DRM_SCHED_PRIORITY_KERNEL, - - DRM_SCHED_PRIORITY_COUNT, - DRM_SCHED_PRIORITY_UNSET = -2 -}; - /* Used to chose between FIFO and RR jobs scheduling */ extern int drm_sched_policy; -- 2.40.0
[RFC PATCH 3/4] uapi: Add RLIMIT_GPUPRIO
Introduce a new RLIMIT that allows the user to set a runtime limit on the GPU scheduler priority for tasks. This avoids the need for leased compositors such as SteamVR's vrcompositor to be launched via a setcap'ed executable with CAP_SYS_NICE. This is required for SteamVR as it doesn't run as a DRM master, but rather on a DRM lease using the HMD's connector. The current situation is bad for a few reasons, one being that in order to setcap the executable, typically one must run as root which involves a pretty high privelage escalation in order to achieve one small feat, a realtime async compute queue queue for VR or a compositor. The executable cannot be setcap'ed inside a container nor can the setcap'ed executable be run in a container with NO_NEW_PRIVS. Even in cases where one may think the DRM master check to be useful, such as Gamescope where it is the DRM master, the part of the compositor that runs as the DRM master is entirely separate to the Vulkan device with it's own DRM device fd doing the GPU work that demands the realtime priority queue. Additionally, Gamescope can also run nested in a traditional compositor where there is no DRM master, but having a realtime queue is still advantageous. With adding RLIMIT_GPUPRIO, a process outside of a container or eg. rtkit could call `prlimit` on the process inside to allow it to make a realtime queue and solve these problems. Signed-off-by: Joshua Ashton --- fs/proc/base.c | 1 + include/asm-generic/resource.h | 3 ++- include/uapi/asm-generic/resource.h | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 5e0e0ccd47aa..a5c9a9f23f08 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -589,6 +589,7 @@ static const struct limit_names lnames[RLIM_NLIMITS] = { [RLIMIT_NICE] = {"Max nice priority", NULL}, [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, + [RLIMIT_GPUPRIO] = {"Max DRM GPU priority", NULL}, }; /* Display limits for a process */ diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h index 8874f681b056..cefee1a8d9db 100644 --- a/include/asm-generic/resource.h +++ b/include/asm-generic/resource.h @@ -3,7 +3,7 @@ #define _ASM_GENERIC_RESOURCE_H #include - +#include /* * boot-time rlimit defaults for the init task: @@ -26,6 +26,7 @@ [RLIMIT_NICE] = { 0, 0 }, \ [RLIMIT_RTPRIO] = { 0, 0 }, \ [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \ + [RLIMIT_GPUPRIO]= { DRM_SCHED_PRIORITY_NORMAL, DRM_SCHED_PRIORITY_NORMAL }, \ } #endif diff --git a/include/uapi/asm-generic/resource.h b/include/uapi/asm-generic/resource.h index f12db7a0da64..85027b07a420 100644 --- a/include/uapi/asm-generic/resource.h +++ b/include/uapi/asm-generic/resource.h @@ -46,7 +46,8 @@ 0-39 for nice level 19 .. -20 */ #define RLIMIT_RTPRIO 14 /* maximum realtime priority */ #define RLIMIT_RTTIME 15 /* timeout for RT tasks in us */ -#define RLIM_NLIMITS 16 +#define RLIMIT_GPUPRIO 16 /* maximum GPU priority */ +#define RLIM_NLIMITS 17 /* * SuS says limits have to be unsigned. -- 2.40.0
[RFC PATCH 1/4] drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH
This allows AMDGPU scheduler priority above normal to be expressed using the DRM_SCHED_PRIORITY enum. Signed-off-by: Joshua Ashton --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- drivers/gpu/drm/msm/msm_gpu.h | 2 +- include/drm/gpu_scheduler.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d2139ac12159..8ec255091c4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -79,7 +79,7 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) return DRM_SCHED_PRIORITY_HIGH; case AMDGPU_CTX_PRIORITY_VERY_HIGH: - return DRM_SCHED_PRIORITY_HIGH; + return DRM_SCHED_PRIORITY_VERY_HIGH; /* This should not happen as we sanitized userspace provided priority * already, WARN if this happens. diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index fc1c0d8611a8..e3495712b236 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -336,7 +336,7 @@ struct msm_gpu_perfcntr { * DRM_SCHED_PRIORITY_KERNEL priority level is treated specially in some * cases, so we don't use it (no need for kernel generated jobs). */ -#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_HIGH - DRM_SCHED_PRIORITY_MIN) +#define NR_SCHED_PRIORITIES (1 + DRM_SCHED_PRIORITY_VERY_HIGH - DRM_SCHED_PRIORITY_MIN) /** * struct msm_file_private - per-drm_file context diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9935d1e2ff69..a62071660602 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -55,6 +55,7 @@ enum drm_sched_priority { DRM_SCHED_PRIORITY_MIN, DRM_SCHED_PRIORITY_NORMAL, DRM_SCHED_PRIORITY_HIGH, + DRM_SCHED_PRIORITY_VERY_HIGH, DRM_SCHED_PRIORITY_KERNEL, DRM_SCHED_PRIORITY_COUNT, -- 2.40.0
[RFC PATCH 0/4] uapi, drm: Add and implement RLIMIT_GPUPRIO
Hello all! I would like to propose a new API for allowing processes to control the priority of GPU queues similar to RLIMIT_NICE/RLIMIT_RTPRIO. The main reason for this is for compositors such as Gamescope and SteamVR vrcompositor to be able to create realtime async compute queues on AMD without the need of CAP_SYS_NICE. The current situation is bad for a few reasons, one being that in order to setcap the executable, typically one must run as root which involves a pretty high privelage escalation in order to achieve one small feat, a realtime async compute queue queue for VR or a compositor. The executable cannot be setcap'ed inside a container nor can the setcap'ed executable be run in a container with NO_NEW_PRIVS. I go into more detail in the description in `uapi: Add RLIMIT_GPUPRIO`. My initial proposal here is to add a new RLIMIT, `RLIMIT_GPUPRIO`, which seems to make most initial sense to me to solve the problem. I am definitely not set that this is the best formulation however or if this should be linked to DRM (in terms of it's scheduler priority enum/definitions) in any way and and would really like other people's opinions across the stack on this. Once initial concern is that potentially this RLIMIT could out-live the lifespan of DRM. It sounds crazy saying it right now, something that definitely popped into my mind when touching `resource.h`. :-) Anyway, please let me know what you think! Definitely open to any feedback and advice you may have. :D Thanks! - Joshie Joshua Ashton (4): drm/scheduler: Add DRM_SCHED_PRIORITY_VERY_HIGH drm/scheduler: Split out drm_sched_priority to own file uapi: Add RLIMIT_GPUPRIO drm/amd/amdgpu: Check RLIMIT_GPUPRIO in priority permissions drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 13 ++-- drivers/gpu/drm/msm/msm_gpu.h | 2 +- fs/proc/base.c | 1 + include/asm-generic/resource.h | 3 +- include/drm/drm_sched_priority.h| 41 + include/drm/gpu_scheduler.h | 14 + include/uapi/asm-generic/resource.h | 3 +- 7 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 include/drm/drm_sched_priority.h -- 2.40.0
RE: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
>Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level > >On Mon, Apr 03, 2023 at 04:57:21PM +, Yang, Fei wrote: >>> Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of >>> cache_level >>> >>> On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote: From: Fei Yang Currently the KMD is using enum i915_cache_level to set caching policy for buffer objects. This is flaky because the PAT index which really controls the caching behavior in PTE has far more levels than what's defined in the enum. >>> >>> Then just add more enum values. >> >> That would be really messy because PAT index is platform dependent, >> you would have to maintain many tables for the the translation. >> >>> 'pat_index' is absolutely meaningless to the reader, it's just an >>> arbitrary number. Whereas 'cache_level' conveys how the thing is >>> actually going to get used and thus how the caches should behave. >> >> By design UMD's understand PAT index. Both UMD and KMD should stand on >> the same ground, the Bspec, to avoid any potential ambiguity. >> In addition, the PAT index is platform dependent, having to translate between i915_cache_level and PAT index is not reliable, >>> >>> If it's not realiable then the code is clearly broken. >> >> Perhaps the word "reliable" is a bit confusing here. What I really >> meant to say is 'difficult to maintain', or 'error-prone'. >> and makes the code more complicated. >>> >>> You have to translate somewhere anyway. Looks like you're now adding >>> translations the other way (pat_index->cache_level). How is that better? >> >> No, there is no pat_index->cache_level translation. > > i915_gem_object_has_cache_level() is exactly that. And that one does look > actually fragile since it assumes only one PAT index maps to each cache > level. So if the user picks any other pat_index anything using > i915_gem_object_has_cache_level() is likely to do the wrong thing. That is still one way transaltion, from cache_level to pat_index. The cache_level is only a KMD concept now. And inside the KMD, we have one table to translate between cache_level to pat_index. Only KMD would be able to trigger a comparison on pat_index for a KMD allocated BO. User is not allowed to set pat_index dynamically any more. By design the cahce setting for user space BO's should be immutable. That's why even the set caching ioctl has been killed (from MTL onward). > If we do switch to pat_index then I think cache_level should be made a > purely uapi concept, UMD's directly use pat_index because they are supposed to follow the b-spec. The abstracted cache_level is no longer exposed to user space. -Fei > and all the internal code should instead be made to > query various aspects of the caching behaviour of the current pat_index > (eg. is LLC caching enabled, and thus do I need to clflush?). > > -- > Ville Syrjälä > Intel
Re: [PATCH] misc: sram: Add dma-heap-export reserved SRAM area type
On 4/1/23 3:35 AM, Christian Gmeiner wrote: Hi Andrew Okay, will split for v2. Was there a follow-up v2 of this patchset? AFAICT this series did not make it into the mainline kernel. Do you have any plans to work on it? If not I would like to help out as we have a use case where we want to use a dma-buf sram exporter. Sure, I've been keeping it alive in our evil vendor tree, but if there is interest upstream now I'll post a v2 and CC you. Thanks, Andrew
[PATCH v2] misc: sram: Add DMA-BUF Heap exporting of SRAM areas
This new export type exposes to userspace the SRAM area as a DMA-BUF Heap, this allows for allocations of DMA-BUFs that can be consumed by various DMA-BUF supporting devices. Signed-off-by: Andrew Davis --- Changes from v1: - Use existing DT flags, if both pool(device usable) and export(userspace usable) properties are in the SRAM node then export as a DMA-BUF Heap - Rebase on 6.3-rc5 drivers/misc/Kconfig | 7 + drivers/misc/Makefile| 1 + drivers/misc/sram-dma-heap.c | 245 +++ drivers/misc/sram.c | 6 + drivers/misc/sram.h | 16 +++ 5 files changed, 275 insertions(+) create mode 100644 drivers/misc/sram-dma-heap.c diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 433aa41977852..8b4c111a6493b 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -448,6 +448,13 @@ config SRAM config SRAM_EXEC bool +config SRAM_DMA_HEAP + bool "Export on-chip SRAM pools using DMA-Heaps" + depends on DMABUF_HEAPS && SRAM + help + This driver allows the export of on-chip SRAM marked as both pool + and exportable to userspace using the DMA-Heaps interface. + config DW_XDATA_PCIE depends on PCI tristate "Synopsys DesignWare xData PCIe driver" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 56de43943cd51..bbdc64aa8af1a 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_VMWARE_VMCI) += vmw_vmci/ obj-$(CONFIG_LATTICE_ECP3_CONFIG) += lattice-ecp3-config.o obj-$(CONFIG_SRAM) += sram.o obj-$(CONFIG_SRAM_EXEC)+= sram-exec.o +obj-$(CONFIG_SRAM_DMA_HEAP)+= sram-dma-heap.o obj-$(CONFIG_GENWQE) += genwqe/ obj-$(CONFIG_ECHO) += echo/ obj-$(CONFIG_CXL_BASE) += cxl/ diff --git a/drivers/misc/sram-dma-heap.c b/drivers/misc/sram-dma-heap.c new file mode 100644 index 0..c511f4ac1280e --- /dev/null +++ b/drivers/misc/sram-dma-heap.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SRAM DMA-Heap userspace exporter + * + * Copyright (C) 2019-2022 Texas Instruments Incorporated - https://www.ti.com/ + * Andrew Davis + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sram.h" + +struct sram_dma_heap { + struct dma_heap *heap; + struct gen_pool *pool; +}; + +struct sram_dma_heap_buffer { + struct gen_pool *pool; + struct list_head attachments; + struct mutex attachments_lock; + unsigned long len; + void *vaddr; + phys_addr_t paddr; +}; + +struct dma_heap_attachment { + struct device *dev; + struct sg_table *table; + struct list_head list; +}; + +static int dma_heap_attach(struct dma_buf *dmabuf, + struct dma_buf_attachment *attachment) +{ + struct sram_dma_heap_buffer *buffer = dmabuf->priv; + struct dma_heap_attachment *a; + struct sg_table *table; + + a = kzalloc(sizeof(*a), GFP_KERNEL); + if (!a) + return -ENOMEM; + + table = kmalloc(sizeof(*table), GFP_KERNEL); + if (!table) { + kfree(a); + return -ENOMEM; + } + if (sg_alloc_table(table, 1, GFP_KERNEL)) { + kfree(table); + kfree(a); + return -ENOMEM; + } + sg_set_page(table->sgl, pfn_to_page(PFN_DOWN(buffer->paddr)), buffer->len, 0); + + a->table = table; + a->dev = attachment->dev; + INIT_LIST_HEAD(>list); + + attachment->priv = a; + + mutex_lock(>attachments_lock); + list_add(>list, >attachments); + mutex_unlock(>attachments_lock); + + return 0; +} + +static void dma_heap_detatch(struct dma_buf *dmabuf, +struct dma_buf_attachment *attachment) +{ + struct sram_dma_heap_buffer *buffer = dmabuf->priv; + struct dma_heap_attachment *a = attachment->priv; + + mutex_lock(>attachments_lock); + list_del(>list); + mutex_unlock(>attachments_lock); + + sg_free_table(a->table); + kfree(a->table); + kfree(a); +} + +static struct sg_table *dma_heap_map_dma_buf(struct dma_buf_attachment *attachment, +enum dma_data_direction direction) +{ + struct dma_heap_attachment *a = attachment->priv; + struct sg_table *table = a->table; + + /* +* As this heap is backed by uncached SRAM memory we do not need to +* perform any sync operations on the buffer before allowing device +* domain access. For this reason we use SKIP_CPU_SYNC and also do +* not use or provide begin/end_cpu_access() dma-buf functions. +*/ + if (!dma_map_sg_attrs(attachment->dev, table->sgl, table->nents, + direction, DMA_ATTR_SKIP_CPU_SYNC)) +
Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog
On 4/3/2023 11:48 AM, Dmitry Baryshkov wrote: On 03/04/2023 21:06, Abhinav Kumar wrote: On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote: This huge series attempts to restructure the DPU HW catalog into a manageable and reviewable data set. In order to ease review and testing I merged all the necessary fixes into this series. Also I cherry-picked & slightly fixed Konrad's patch adding size to the SSPP and INTF macros. I had to first dig up some history about why dpu catalog grew so much in the first place before starting this review. When the DPU driver first landed (which pre-dates my work in upstream), it looks like it followed mdp5 model from mdp5_cfg.c. But looks like as the number of chipsets which use DPU kept growing, this is becoming a burden. As everyone knows, downstream follows a devicetree model for the dpu hardware and that should have always been the case. Perhaps in the last 2-3 years more time could have been spent on standardizing the bindings used for hw blocks in order to maintain a less hard-coded catalog file and more in the device tree. Unfortunately, this is not how the upstream DT works. If something is a constant hardware property, it should not go into the DT. So pushing catalog to dt would have been immediately frowned upon by Rob Herring or Krzysztof. Yes certainly we cannot put hardware specific properties. But in general, modelling the hardware like the number of sspps, number of interfaces and number of dspps etc can be a bit abstracted? like blk-type and blk-offset? blk-type can be a custom string because each block is named differently for different vendors? The number of blk_offsets decides number of blocks. Its not constant right. We are seeing it varying with chipsets. Then the catalog would have just been a place to parse the device tree, set the feature capability based on chipset (refer _sde_hardware_pre_caps). That way offsets , number of blocks and the blocks themselves still come from the device tree but perhaps some specific features are at SOC level for which the catalog still stays. That being said, I thought of different strategies even before the review but two issues prevented me from suggesting those ideas (one of which I am seeing even here , which I am going to suggest below and also suggest why it wont work). 1) For the same DPU major/minor version, some features might get dropped or even get added with different SOCs as overall the system capabilities might differ like number of SSPPs or memory footprint of the SOC etc. So there is no good way right now to generalize any dpu catalog or to tie it with a DPU major/minor version. We will have to stick with a per-SOC model. Up to now, the SoC was equal to major+minor. Could you please be more specific here, if there are any actual differences within major+minor families? So lets say, the same DPU major/minor version is used but we have only one DSI on one chipset Vs two DSIs on the other, some of the features which come into play only for dual DSI cannot be used. Like broadcasting a DCS command across two DSIs etc. This is a very basic example, but there are many examples. This is what led me to not pursue that route. 2) For the same DPU major/minor version, even if core-DPU is same (in terms of SSPP, DSPP etc), the number of interfaces can change. So again no room to generalize same DPU hw version. Again, I might be just scratching the surface, but I have not observed this. This typically happens based on what products that chipset is catered towards. Thats pretty much what I can share. But more number of interfaces for more number of displays / use-cases. 3) For the same reason as (1) and (2), I think the de-duplication strategy used in this series is not correct. The idea of dpu_hw_version_num_layer_mixer is just not scalable as I dont know how many variants that will lead to. So it seems like just an attempt to de-duplicate which perhaps works today for existing dpu chipsets in upstream but by no means scalable. Lets go ahead with per-SOC catalog file but lets live with some amount of duplication between them if we really have to split it across header files. Indeed, this leads to minor differences on top of major+lm. However, I think, the overall complexity is lowered. Nevertheless, let's land the major set of patches and leave generalization for the later time. I think, with the addition of the next several platforms we will see the drill. Yes, I would say lets handle generalization/de-duplication later when we see more patterns. Lets land the main pieces first. Going with dpu version and number of lms is not the way to generalize it from what we think. I also thought of similar strategies to generalize like based on sub-blocks similar to what you have done but all of these were NAKed internally by folks who work on more chipsets / have more visibility into the spread of features across
[PATCH] drm/bridge: ti-sn65dsi83: Do not generate HFP/HBP/HSA and EOT packet
Do not generate the HS front and back porch gaps, the HSA gap and EOT packet, as per "SN65DSI83 datasheet SLLSEC1I - SEPTEMBER 2012 - REVISED OCTOBER 2020", page 22, these packets are not required. This makes the TI SN65DSI83 bridge work with Samsung DSIM on i.MX8MN. Signed-off-by: Marek Vasut --- Cc: Andrzej Hajda Cc: Daniel Vetter Cc: David Airlie Cc: Jagan Teki Cc: Jernej Skrabec Cc: Jonas Karlman Cc: Laurent Pinchart Cc: Michael Walle Cc: Neil Armstrong Cc: Robert Foss Cc: dri-devel@lists.freedesktop.org --- drivers/gpu/drm/bridge/ti-sn65dsi83.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 91ecfbe45bf90..b60ae1dc1191d 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -642,7 +642,9 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx) dsi->lanes = dsi_lanes; dsi->format = MIPI_DSI_FMT_RGB888; - dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST; + dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | + MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP | + MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET; ret = devm_mipi_dsi_attach(dev, dsi); if (ret < 0) { -- 2.39.2
Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog
On 03/04/2023 21:06, Abhinav Kumar wrote: On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote: This huge series attempts to restructure the DPU HW catalog into a manageable and reviewable data set. In order to ease review and testing I merged all the necessary fixes into this series. Also I cherry-picked & slightly fixed Konrad's patch adding size to the SSPP and INTF macros. I had to first dig up some history about why dpu catalog grew so much in the first place before starting this review. When the DPU driver first landed (which pre-dates my work in upstream), it looks like it followed mdp5 model from mdp5_cfg.c. But looks like as the number of chipsets which use DPU kept growing, this is becoming a burden. As everyone knows, downstream follows a devicetree model for the dpu hardware and that should have always been the case. Perhaps in the last 2-3 years more time could have been spent on standardizing the bindings used for hw blocks in order to maintain a less hard-coded catalog file and more in the device tree. Unfortunately, this is not how the upstream DT works. If something is a constant hardware property, it should not go into the DT. So pushing catalog to dt would have been immediately frowned upon by Rob Herring or Krzysztof. Then the catalog would have just been a place to parse the device tree, set the feature capability based on chipset (refer _sde_hardware_pre_caps). That way offsets , number of blocks and the blocks themselves still come from the device tree but perhaps some specific features are at SOC level for which the catalog still stays. That being said, I thought of different strategies even before the review but two issues prevented me from suggesting those ideas (one of which I am seeing even here , which I am going to suggest below and also suggest why it wont work). 1) For the same DPU major/minor version, some features might get dropped or even get added with different SOCs as overall the system capabilities might differ like number of SSPPs or memory footprint of the SOC etc. So there is no good way right now to generalize any dpu catalog or to tie it with a DPU major/minor version. We will have to stick with a per-SOC model. Up to now, the SoC was equal to major+minor. Could you please be more specific here, if there are any actual differences within major+minor families? This is what led me to not pursue that route. 2) For the same DPU major/minor version, even if core-DPU is same (in terms of SSPP, DSPP etc), the number of interfaces can change. So again no room to generalize same DPU hw version. Again, I might be just scratching the surface, but I have not observed this. 3) For the same reason as (1) and (2), I think the de-duplication strategy used in this series is not correct. The idea of dpu_hw_version_num_layer_mixer is just not scalable as I dont know how many variants that will lead to. So it seems like just an attempt to de-duplicate which perhaps works today for existing dpu chipsets in upstream but by no means scalable. Lets go ahead with per-SOC catalog file but lets live with some amount of duplication between them if we really have to split it across header files. Indeed, this leads to minor differences on top of major+lm. However, I think, the overall complexity is lowered. Nevertheless, let's land the major set of patches and leave generalization for the later time. I think, with the addition of the next several platforms we will see the drill. I also thought of similar strategies to generalize like based on sub-blocks similar to what you have done but all of these were NAKed internally by folks who work on more chipsets / have more visibility into the spread of features across chipsets. First 4 patches clean up the catalog a bit in order to make it more suitable for refactoring. These are okay. I will address your follow-up questions about patch (1) and lets land these. Then the next batch of 13 + 5 patches split the hw catalog entries into per-SoC files. This part is also fine. But perhaps dont have dpu hw version in the file. So just dpu_hw_sm8250.h or dpu_hw_sm8350.h etc. Having a version makes it easier to compare chipsets (and also to verify that feature masks are correct), so I'd like to retain it. Next 9 patches rework catalog entries, mostly targeting deduplication of data used by several platforms. At this moment only three pairs (out of 13 devices supported by DPU) are merged. However this part lays out the ground to ease adding support for new platforms, some of which use the same configuration as the existing platforms This is the part I suggest we drop. Last batch of 7 patches renames existing macros to ease using them while adding support for new devices. I have to check this part but perhaps after re-basing based on my earlier comment. Ack, I'll see what I can drop and what is going to be there. Up to now there were some natural shares,
Re: [PATCH v3 11/11] Documentation: iio: Document high-speed DMABUF based API
Hi Jonathan, Le lundi 03 avril 2023 à 10:05 -0600, Jonathan Corbet a écrit : > Paul Cercueil writes: > > One nit: > > > Document the new DMABUF based API. > > > > Signed-off-by: Paul Cercueil > > Cc: Jonathan Corbet > > Cc: linux-...@vger.kernel.org > > > > --- > > v2: - Explicitly state that the new interface is optional and is > > not implemented by all drivers. > > - The IOCTLs can now only be called on the buffer FD returned > > by > > IIO_BUFFER_GET_FD_IOCTL. > > - Move the page up a bit in the index since it is core stuff > > and not > > driver-specific. > > v3: Update the documentation to reflect the new API. > > --- > > Documentation/iio/dmabuf_api.rst | 59 > > > > Documentation/iio/index.rst | 2 ++ > > 2 files changed, 61 insertions(+) > > create mode 100644 Documentation/iio/dmabuf_api.rst > > > > diff --git a/Documentation/iio/dmabuf_api.rst > > b/Documentation/iio/dmabuf_api.rst > > new file mode 100644 > > index ..4d70372c7ebd > > --- /dev/null > > +++ b/Documentation/iio/dmabuf_api.rst > > @@ -0,0 +1,59 @@ > > +.. SPDX-License-Identifier: GPL-2.0 > > + > > +=== > > +High-speed DMABUF interface for IIO > > +=== > > + > > +1. Overview > > +=== > > + > > +The Industrial I/O subsystem supports access to buffers through a > > +file-based interface, with read() and write() access calls through > > the > > +IIO device's dev node. > > + > > +It additionally supports a DMABUF based interface, where the > > userspace > > +can attach DMABUF objects (externally created) to a IIO buffer, > > and > > +subsequently use them for data transfers. > > + > > +A userspace application can then use this interface to share > > DMABUF > > +objects between several interfaces, allowing it to transfer data > > in a > > +zero-copy fashion, for instance between IIO and the USB stack. > > + > > +The userspace application can also memory-map the DMABUF objects, > > and > > +access the sample data directly. The advantage of doing this vs. > > the > > +read() interface is that it avoids an extra copy of the data > > between the > > +kernel and userspace. This is particularly useful for high-speed > > devices > > +which produce several megabytes or even gigabytes of data per > > second. > > +It does however increase the userspace-kernelspace synchronization > > +overhead, as the DMA_BUF_SYNC_START and DMA_BUF_SYNC_END IOCTLs > > have to > > +be used for data integrity. > > + > > +2. User API > > +=== > > + > > +As part of this interface, three new IOCTLs have been added. These > > three > > +IOCTLs have to be performed on the IIO buffer's file descriptor, > > +obtained using the IIO_BUFFER_GET_FD_IOCTL() ioctl. > > + > > +``IIO_BUFFER_DMABUF_ATTACH_IOCTL(int)`` > > + > > + > > +Attach the DMABUF object, identified by its file descriptor, to > > the IIO > > +buffer. Returns zero on success, and a negative errno value on > > error. > > Rather than abusing subsections, this would be better done as a > description list: > > IIO_BUFFER_DMABUF_ATTACH_IOCTL(int) > Attach the DMABUF object, identified by its file descriptor, to > the IIO buffer. Returns zero on success, and a negative errno > value on error. Noted, thanks. Cheers, -Paul
[PATCH] radeon: avoid double free in ci_dpm_init()
There are several calls to ci_dpm_fini() in ci_dpm_init() when there occur errors in functions like r600_parse_extended_power_table(). This is harmful as it can lead to double free situations: for instance, r600_parse_extended_power_table() will call for r600_free_extended_power_table() as will ci_dpm_fini(), both of which will try to free resources. Other drivers do not call *_dpm_fini functions from their respective *_dpm_init calls - neither should cpm_dpm_init(). Fix this by removing extra calls to ci_dpm_fini(). Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: cc8dbbb4f62a ("drm/radeon: add dpm support for CI dGPUs (v2)") Cc: sta...@vger.kernel.org Co-developed-by: Natalia Petrova Signed-off-by: Nikita Zhandarovich --- drivers/gpu/drm/radeon/ci_dpm.c | 20 +--- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 8ef25ab305ae..7b77d4c93f1d 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -5677,28 +5677,20 @@ int ci_dpm_init(struct radeon_device *rdev) pi->pcie_lane_powersaving.min = 16; ret = ci_get_vbios_boot_values(rdev, >vbios_boot_state); - if (ret) { - ci_dpm_fini(rdev); + if (ret) return ret; - } ret = r600_get_platform_caps(rdev); - if (ret) { - ci_dpm_fini(rdev); + if (ret) return ret; - } ret = r600_parse_extended_power_table(rdev); - if (ret) { - ci_dpm_fini(rdev); + if (ret) return ret; - } ret = ci_parse_power_table(rdev); - if (ret) { - ci_dpm_fini(rdev); + if (ret) return ret; - } pi->dll_default_on = false; pi->sram_end = SMC_RAM_END; @@ -5749,10 +5741,8 @@ int ci_dpm_init(struct radeon_device *rdev) kcalloc(4, sizeof(struct radeon_clock_voltage_dependency_entry), GFP_KERNEL); - if (!rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) { - ci_dpm_fini(rdev); + if (!rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) return -ENOMEM; - } rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4; rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0; rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0;
RE: [PATCH 3/3] drm/xe: Update GuC/HuC firmware autoselect logic
> -Original Message- > From: De Marchi, Lucas > Sent: Wednesday, March 29, 2023 8:46 PM > To: Srivatsa, Anusha > Cc: intel...@lists.freedesktop.org; Harrison, John C > ; Ceraolo Spurio, Daniele > ; dri-devel@lists.freedesktop.org; Daniel > Vetter ; Dave Airlie > Subject: Re: [PATCH 3/3] drm/xe: Update GuC/HuC firmware autoselect logic > > On Tue, Mar 28, 2023 at 04:31:13PM -0700, Anusha Srivatsa wrote: > > > > > >> -Original Message- > >> From: De Marchi, Lucas > >> Sent: Thursday, March 23, 2023 10:18 PM > >> To: intel...@lists.freedesktop.org > >> Cc: Srivatsa, Anusha ; Harrison, John C > >> ; Ceraolo Spurio, Daniele > >> ; dri-devel@lists.freedesktop.org; > >> Daniel Vetter ; Dave Airlie > >> ; De Marchi, Lucas > >> Subject: [PATCH 3/3] drm/xe: Update GuC/HuC firmware autoselect logic > >> > >> Update the logic to autoselect GuC/HuC for the platforms with the > >> following > >> improvements: > >> > >> - Document what is the firmware file that is expected to be > >> loaded and what is checked from blob headers > >> - When the platform is under force-probe it's desired to enforce > >> the full-version requirement so the correct firmware is used > >> before widespread adoption and backward-compatibility > >> > >Extra line ^ > > > >> commitments > >> - Directory from which we expect firmware blobs to be available in > >> upstream linux-firmware repository depends on the platform: for > >> the ones supported by i915 it uses the i915/ directory, but the ones > >> expected to be supported by xe, it's on the xe/ directory. This > >> means that for platforms in the intersection, the firmware is > >> loaded from a different directory, but that is not much important > >> in the firmware repo and it avoids firmware duplication. > >> > >> - Make the table with the firmware definitions clearly state the > >> versions being expected. Now with macros to select the version it's > >> possible to choose between full-version/major-version for GuC and > >> full-version/no-version for HuC. These are similar to the macros used > >> in i915, but implemented in a slightly different way to avoid > >> duplicating the macros for each firmware/type and functionality, > >> besides adding the support for different directories. > >> > >> - There is no check added regarding force-probe since xe should > >> reuse the same firmware files published for i915 for past > >> platforms. This can be improved later with additional > >> kunit checking against a hardcoded list of platforms that > >Extra line here. > > > >> falls in this category. > >> - As mentioned in the TODO, the major version fallback was not > >> implemented before as currently each platform only supports one > >> major. That can be easily added later. > >> > >> - GuC version for MTL and PVC were updated to 70.6.4, using the exact > >> full version, while the > >> > >> After this the GuC firmware used by PVC changes to pvc_guc_70.5.2.bin > >> since it's using a file not published yet. > >> > >> Signed-off-by: Lucas De Marchi > >> --- > >> drivers/gpu/drm/xe/xe_uc_fw.c | 315 +--- > >> drivers/gpu/drm/xe/xe_uc_fw.h | 2 +- > >> drivers/gpu/drm/xe/xe_uc_fw_types.h | 7 + > >> 3 files changed, 204 insertions(+), 120 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c > >> b/drivers/gpu/drm/xe/xe_uc_fw.c index 174c42873ebb..653bc3584cc5 > >> 100644 > >> --- a/drivers/gpu/drm/xe/xe_uc_fw.c > >> +++ b/drivers/gpu/drm/xe/xe_uc_fw.c > >> @@ -17,6 +17,137 @@ > >> #include "xe_mmio.h" > >> #include "xe_uc_fw.h" > >> > >> +/* > >> + * List of required GuC and HuC binaries per-platform. They must be > >> +ordered > >> + * based on platform, from newer to older. > >> + * > >> + * Versioning follows the guidelines from > >> + * Documentation/driver-api/firmware/firmware-usage-guidelines.rst. > >> +There is a > >> + * distinction for platforms being officially supported by the driver or > >> not. > >> + * Platforms not available publicly or not yet officially supported > >> +by the > >> + * driver (under force-probe), use the mmp_ver(): the firmware > >> +autoselect logic > >> + * will select the firmware from disk with filename that matches the > >> +full > >> + * "mpp version", i.e. major.minor.patch. mmp_ver() should only be > >> +used for > >> + * this case. > >> + * > >> + * For platforms officially supported by the driver, the filename > >> +always only > >> + * ever contains the major version (GuC) or no version at all (HuC). > >> + * > >> + * After loading the file, the driver parses the versions embedded in the > >> blob. > >> + * The major version needs to match a major version supported by the > >> +driver (if > >> + * any). The minor version is also checked and a notice emitted to > >> +the log if > >> + * the version found is smaller than the version wanted. This is > >> +done only for > >> + * informational purposes so users may have a chance to
Re: [PATCH v2 1/5] drm/tests: Test drm_rect_intersect()
On 03/04/23 12:33, Maíra Canal wrote: > Hi Arthur, > > On 3/27/23 10:38, Arthur Grillo wrote: >> Insert test for the drm_rect_intersect() function, it also create a >> helper for comparing drm_rects more easily. >> >> Signed-off-by: Arthur Grillo >> --- >> drivers/gpu/drm/tests/drm_rect_test.c | 139 ++ >> 1 file changed, 139 insertions(+) >> >> diff --git a/drivers/gpu/drm/tests/drm_rect_test.c >> b/drivers/gpu/drm/tests/drm_rect_test.c >> index e9809ea32696..3654c0be3d6b 100644 >> --- a/drivers/gpu/drm/tests/drm_rect_test.c >> +++ b/drivers/gpu/drm/tests/drm_rect_test.c >> @@ -9,6 +9,17 @@ >> #include >> +#include > > Is this include really needed? I was able to compile without it. > >> + >> +static void drm_rect_compare(struct kunit *test, const struct drm_rect *r, >> + const struct drm_rect *expected) >> +{ >> +KUNIT_EXPECT_EQ(test, r->x1, expected->x1); > > Maybe it would be nice to have a message here that shows the current x1 > and the expected x1. Same for the other dimensions. > Doesn't KUnit already output this information when the values don't match? >> +KUNIT_EXPECT_EQ(test, r->y1, expected->y1); >> +KUNIT_EXPECT_EQ(test, drm_rect_width(r), drm_rect_width(expected)); >> +KUNIT_EXPECT_EQ(test, drm_rect_height(r), drm_rect_height(expected)); >> +} >> + >> static void drm_test_rect_clip_scaled_div_by_zero(struct kunit *test) >> { >> struct drm_rect src, dst, clip; >> @@ -196,11 +207,139 @@ static void >> drm_test_rect_clip_scaled_signed_vs_unsigned(struct kunit *test) >> KUNIT_EXPECT_FALSE_MSG(test, drm_rect_visible(), "Source should >> not be visible\n"); >> } >> +struct drm_rect_intersect_case { >> +const char *description; >> +struct drm_rect r1, r2; >> +bool should_be_visible; >> +struct drm_rect expected_intersection; >> +}; >> + >> +static const struct drm_rect_intersect_case drm_rect_intersect_cases[] = { >> +{ >> +.description = "top-left X bottom-right", >> +.r1 = DRM_RECT_INIT(1, 1, 2, 2), >> +.r2 = DRM_RECT_INIT(0, 0, 2, 2), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(1, 1, 1, 1), >> +}, >> +{ >> +.description = "top-right X bottom-left", >> +.r1 = DRM_RECT_INIT(0, 0, 2, 2), >> +.r2 = DRM_RECT_INIT(1, -1, 2, 2), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1), >> +}, >> +{ >> +.description = "bottom-left X top-right", >> +.r1 = DRM_RECT_INIT(1, -1, 2, 2), >> +.r2 = DRM_RECT_INIT(0, 0, 2, 2), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1), >> +}, >> +{ >> +.description = "bottom-right X top-left", >> +.r1 = DRM_RECT_INIT(0, 0, 2, 2), >> +.r2 = DRM_RECT_INIT(1, 1, 2, 2), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(1, 1, 1, 1), >> +}, >> +{ >> +.description = "right X left", >> +.r1 = DRM_RECT_INIT(0, 0, 2, 1), >> +.r2 = DRM_RECT_INIT(1, 0, 3, 1), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1), >> +}, >> +{ >> +.description = "left X right", >> +.r1 = DRM_RECT_INIT(1, 0, 3, 1), >> +.r2 = DRM_RECT_INIT(0, 0, 2, 1), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(1, 0, 1, 1), >> +}, >> +{ >> +.description = "up X bottom", >> +.r1 = DRM_RECT_INIT(0, 0, 1, 2), >> +.r2 = DRM_RECT_INIT(0, -1, 1, 3), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(0, 0, 1, 2), >> +}, >> +{ >> +.description = "bottom X up", >> +.r1 = DRM_RECT_INIT(0, -1, 1, 3), >> +.r2 = DRM_RECT_INIT(0, 0, 1, 2), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(0, 0, 1, 2), >> +}, >> +{ >> +.description = "touching corner", >> +.r1 = DRM_RECT_INIT(0, 0, 1, 1), >> +.r2 = DRM_RECT_INIT(1, 1, 2, 2), >> +.should_be_visible = false, >> +.expected_intersection = DRM_RECT_INIT(1, 1, 0, 0), >> +}, >> +{ >> +.description = "touching side", >> +.r1 = DRM_RECT_INIT(0, 0, 1, 1), >> +.r2 = DRM_RECT_INIT(1, 0, 1, 1), >> +.should_be_visible = false, >> +.expected_intersection = DRM_RECT_INIT(1, 0, 0, 1), >> +}, >> +{ >> +.description = "equal rects", >> +.r1 = DRM_RECT_INIT(0, 0, 2, 2), >> +.r2 = DRM_RECT_INIT(0, 0, 2, 2), >> +.should_be_visible = true, >> +.expected_intersection = DRM_RECT_INIT(0, 0, 2, 2), >> +}, >> +{ >> +.description = "inside another", >> +.r1 = DRM_RECT_INIT(0, 0, 2, 2), >> +.r2 = DRM_RECT_INIT(1, 1, 1, 1), >> +
Re: [PATCH v3 00/38] drm/msm/dpu: rework HW catalog
On 3/30/2023 2:52 PM, Dmitry Baryshkov wrote: This huge series attempts to restructure the DPU HW catalog into a manageable and reviewable data set. In order to ease review and testing I merged all the necessary fixes into this series. Also I cherry-picked & slightly fixed Konrad's patch adding size to the SSPP and INTF macros. I had to first dig up some history about why dpu catalog grew so much in the first place before starting this review. When the DPU driver first landed (which pre-dates my work in upstream), it looks like it followed mdp5 model from mdp5_cfg.c. But looks like as the number of chipsets which use DPU kept growing, this is becoming a burden. As everyone knows, downstream follows a devicetree model for the dpu hardware and that should have always been the case. Perhaps in the last 2-3 years more time could have been spent on standardizing the bindings used for hw blocks in order to maintain a less hard-coded catalog file and more in the device tree. Then the catalog would have just been a place to parse the device tree, set the feature capability based on chipset (refer _sde_hardware_pre_caps). That way offsets , number of blocks and the blocks themselves still come from the device tree but perhaps some specific features are at SOC level for which the catalog still stays. That being said, I thought of different strategies even before the review but two issues prevented me from suggesting those ideas (one of which I am seeing even here , which I am going to suggest below and also suggest why it wont work). 1) For the same DPU major/minor version, some features might get dropped or even get added with different SOCs as overall the system capabilities might differ like number of SSPPs or memory footprint of the SOC etc. So there is no good way right now to generalize any dpu catalog or to tie it with a DPU major/minor version. We will have to stick with a per-SOC model. This is what led me to not pursue that route. 2) For the same DPU major/minor version, even if core-DPU is same (in terms of SSPP, DSPP etc), the number of interfaces can change. So again no room to generalize same DPU hw version. 3) For the same reason as (1) and (2), I think the de-duplication strategy used in this series is not correct. The idea of dpu_hw_version_num_layer_mixer is just not scalable as I dont know how many variants that will lead to. So it seems like just an attempt to de-duplicate which perhaps works today for existing dpu chipsets in upstream but by no means scalable. Lets go ahead with per-SOC catalog file but lets live with some amount of duplication between them if we really have to split it across header files. I also thought of similar strategies to generalize like based on sub-blocks similar to what you have done but all of these were NAKed internally by folks who work on more chipsets / have more visibility into the spread of features across chipsets. First 4 patches clean up the catalog a bit in order to make it more suitable for refactoring. These are okay. I will address your follow-up questions about patch (1) and lets land these. Then the next batch of 13 + 5 patches split the hw catalog entries into per-SoC files. This part is also fine. But perhaps dont have dpu hw version in the file. So just dpu_hw_sm8250.h or dpu_hw_sm8350.h etc. Next 9 patches rework catalog entries, mostly targeting deduplication of data used by several platforms. At this moment only three pairs (out of 13 devices supported by DPU) are merged. However this part lays out the ground to ease adding support for new platforms, some of which use the same configuration as the existing platforms This is the part I suggest we drop. Last batch of 7 patches renames existing macros to ease using them while adding support for new devices. I have to check this part but perhaps after re-basing based on my earlier comment. This pile of patches is submitted in a single batch to allow one to observe the final goal of the cleanup which otherwise might be hard to assess. Changes since v2: - Fixed sc8280xp SSPP size to 0x2ac - Rebased on top of msm-next-lumag, dropped merged patches Changes since v1: - Picked up Konrad's patch - Picked up dependencies into the main series - Moved qseed3lite vs qseed4 patches into the fixes part - Fixed sm6115 in a similar manner. Dmitry Baryshkov (37): drm/msm/dpu: constify DSC data structures drm/msm/dpu: mark remaining pp data as const drm/msm/dpu: move UBWC/memory configuration to separate struct drm/msm/dpu: split SM8550 catalog entry to the separate file drm/msm/dpu: split SM8450 catalog entry to the separate file drm/msm/dpu: split SC8280XP catalog entry to the separate file drm/msm/dpu: split SC7280 catalog entry to the separate file drm/msm/dpu: split SM8350 catalog entry to the separate file drm/msm/dpu: split SM6115 catalog entry to the separate file drm/msm/dpu: split QCM2290
Re: [PATCH v2] drm/vblank: Fix for drivers that do not drm_vblank_init()
On Mon, Apr 3, 2023 at 9:25 AM Nathan Chancellor wrote: > > On Mon, Apr 03, 2023 at 09:03:14AM -0700, Rob Clark wrote: > > From: Rob Clark > > > > This should fix a crash that was reported on ast (and possibly other > > drivers which do not initialize vblank). > > > >fbcon: Taking over console > >Unable to handle kernel NULL pointer dereference at virtual address > > 0074 > >Mem abort info: > > ESR = 0x9604 > > EC = 0x25: DABT (current EL), IL = 32 bits > > SET = 0, FnV = 0 > > EA = 0, S1PTW = 0 > > FSC = 0x04: level 0 translation fault > >Data abort info: > > ISV = 0, ISS = 0x0004 > > CM = 0, WnR = 0 > >user pgtable: 4k pages, 48-bit VAs, pgdp=080009d16000 > >[0074] pgd=, p4d= > >Internal error: Oops: 9604 [#1] SMP > >Modules linked in: ip6table_nat tun nft_fib_inet nft_fib_ipv4 > > nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 > > nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 > > nf_defrag_ipv4 rfkill ip_set nf_tables nfnetlink qrtr sunrpc binfmt_misc > > vfat fat xfs snd_usb_audio snd_hwdep snd_usbmidi_lib snd_seq snd_pcm > > snd_rawmidi snd_timer snd_seq_device snd soundcore joydev mc ipmi_ssif > > ipmi_devintf ipmi_msghandler arm_spe_pmu arm_cmn arm_dsu_pmu arm_dmc620_pmu > > cppc_cpufreq loop zram crct10dif_ce polyval_ce nvme polyval_generic > > ghash_ce sbsa_gwdt igb nvme_core ast nvme_common i2c_algo_bit xgene_hwmon > > gpio_dwapb scsi_dh_rdac scsi_dh_emc scsi_dh_alua ip6_tables ip_tables > > dm_multipath fuse > >CPU: 12 PID: 469 Comm: kworker/12:1 Not tainted > > 6.3.0-rc2-8-gd39e48ca80c0 #1 > >Hardware name: ADLINK AVA Developer Platform/AVA Developer Platform, > > BIOS TianoCore 2.04.100.07 (SYS: 2.06.20220308) 09/08/2022 > >Workqueue: events fbcon_register_existing_fbs > >pstate: 2049 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) > >pc : drm_crtc_next_vblank_start+0x2c/0x98 > >lr : drm_atomic_helper_wait_for_fences+0x90/0x240 > >sp : 8d583960 > >x29: 8d583960 x28: 07ff8fc187b0 x27: > >x26: 07ff99c08c00 x25: 0038 x24: 07ff99c0c000 > >x23: 0001 x22: 0038 x21: > >x20: 07ff9640a280 x19: x18: > >x17: x16: b24d2eece1c0 x15: 003038303178 > >x14: 303239310048 x13: x12: > >x11: x10: x9 : b24d2eeeaca0 > >x8 : 8d583628 x7 : 080077783000 x6 : > >x5 : 8d584000 x4 : 07ff99c0c000 x3 : 0130 > >x2 : x1 : 8d5839c0 x0 : 07ff99c0cc08 > >Call trace: > > drm_crtc_next_vblank_start+0x2c/0x98 > > drm_atomic_helper_wait_for_fences+0x90/0x240 > > drm_atomic_helper_commit+0xb0/0x188 > > drm_atomic_commit+0xb0/0xf0 > > drm_client_modeset_commit_atomic+0x218/0x280 > > drm_client_modeset_commit_locked+0x64/0x1a0 > > drm_client_modeset_commit+0x38/0x68 > > __drm_fb_helper_restore_fbdev_mode_unlocked+0xb0/0xf8 > > drm_fb_helper_set_par+0x44/0x88 > > fbcon_init+0x1e0/0x4a8 > > visual_init+0xbc/0x118 > > do_bind_con_driver.isra.0+0x194/0x3a0 > > do_take_over_console+0x50/0x70 > > do_fbcon_takeover+0x74/0xf8 > > do_fb_registered+0x13c/0x158 > > fbcon_register_existing_fbs+0x78/0xc0 > > process_one_work+0x1ec/0x478 > > worker_thread+0x74/0x418 > > kthread+0xec/0x100 > > ret_from_fork+0x10/0x20 > >Code: f944 b9409013 f940a082 9ba30a73 (b9407662) > >---[ end trace ]--- > > > > v2: Use drm_dev_has_vblank() > > > > Reported-by: Nathan Chancellor > > Fixes: d39e48ca80c0 ("drm/atomic-helper: Set fence deadline for vblank") > > Signed-off-by: Rob Clark > > Reviewed-by: Thomas Zimmermann > > Still appears to work for me: > > Tested-by: Nathan Chancellor Thanks for confirming BR, -R > > > --- > > drivers/gpu/drm/drm_vblank.c | 10 -- > > 1 file changed, 8 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c > > index 299fa2a19a90..877e2067534f 100644 > > --- a/drivers/gpu/drm/drm_vblank.c > > +++ b/drivers/gpu/drm/drm_vblank.c > > @@ -996,10 +996,16 @@ EXPORT_SYMBOL(drm_crtc_vblank_count_and_time); > > int drm_crtc_next_vblank_start(struct drm_crtc *crtc, ktime_t *vblanktime) > > { > > unsigned int pipe = drm_crtc_index(crtc); > > - struct drm_vblank_crtc *vblank = >dev->vblank[pipe]; > > - struct drm_display_mode *mode = >hwmode; > > + struct drm_vblank_crtc *vblank; > > + struct drm_display_mode *mode; > > u64 vblank_start; > > > > + if (!drm_dev_has_vblank(crtc->dev)) > > + return -EINVAL; > > + > > +
Re: [PATCH v3] drm/i915/hwmon: Use 0 to designate disabled PL1 power limit
On Fri, Mar 31, 2023 at 07:41:46PM -0700, Ashutosh Dixit wrote: > On ATSM the PL1 limit is disabled at power up. The previous uapi assumed > that the PL1 limit is always enabled and therefore did not have a notion of > a disabled PL1 limit. This results in erroneous PL1 limit values when the > PL1 limit is disabled. For example at power up, the disabled ATSM PL1 limit > was previously shown as 0 which means a low PL1 limit whereas the limit > being disabled actually implies a high effective PL1 limit value. > > To get round this problem, the PL1 limit uapi is expanded to include a > special value 0 to designate a disabled PL1 limit. A read value of 0 means > that the PL1 power limit is disabled, writing 0 disables the limit. > > The link between this patch and the bugs mentioned below is as follows: > * Because on ATSM the PL1 power limit is disabled on power up and there > were no means to enable it, we previously implemented the means to > enable the limit when the PL1 hwmon entry (power1_max) was written to. > * Now there is a IGT igt@i915_hwmon@hwmon_write which (a) reads orig value > from all hwmon sysfs (b) does a bunch of random writes and finally (c) > restores the orig value read. On ATSM since the orig value is 0, when > the IGT restores the 0 value, the PL1 limit is now enabled with a value > of 0. > * PL1 limit of 0 implies a low PL1 limit which causes GPU freq to fall to > 100 MHz. This causes GuC FW load and several IGT's to start timing out > and gives rise to these Intel CI bugs. After this patch, writing 0 would > disable the PL1 limit instead of enabling it, avoiding the freq drop > issue. > > v2: Add explanation for bugs mentioned below (Rodrigo) > v3: Eliminate race during PL1 disable and verify (Tvrtko) > Change return to -ENODEV if verify fails (Tvrtko) > > Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8062 > Link: https://gitlab.freedesktop.org/drm/intel/-/issues/8060 > Signed-off-by: Ashutosh Dixit > Reviewed-by: Rodrigo Vivi pushed to drm-intel-next > --- > .../ABI/testing/sysfs-driver-intel-i915-hwmon | 4 ++- > drivers/gpu/drm/i915/i915_hwmon.c | 26 +++ > 2 files changed, 29 insertions(+), 1 deletion(-) > > diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon > b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon > index 2d6a472eef885..8d7d8f05f6cd0 100644 > --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon > +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon > @@ -14,7 +14,9 @@ Description:RW. Card reactive sustained (PL1/Tau) > power limit in microwatts. > > The power controller will throttle the operating frequency > if the power averaged over a window (typically seconds) > - exceeds this limit. > + exceeds this limit. A read value of 0 means that the PL1 > + power limit is disabled, writing 0 disables the > + limit. Writing values > 0 will enable the power limit. > > Only supported for particular Intel i915 graphics platforms. > > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c > b/drivers/gpu/drm/i915/i915_hwmon.c > index 596dd2c070106..8e7dccc8d3a0e 100644 > --- a/drivers/gpu/drm/i915/i915_hwmon.c > +++ b/drivers/gpu/drm/i915/i915_hwmon.c > @@ -349,6 +349,8 @@ hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 > attr, int chan) > } > } > > +#define PL1_DISABLE 0 > + > /* > * HW allows arbitrary PL1 limits to be set but silently clamps these values > to > * "typical but not guaranteed" min/max values in rg.pkg_power_sku. Follow > the > @@ -362,6 +364,14 @@ hwm_power_max_read(struct hwm_drvdata *ddat, long *val) > intel_wakeref_t wakeref; > u64 r, min, max; > > + /* Check if PL1 limit is disabled */ > + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) > + r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit); > + if (!(r & PKG_PWR_LIM_1_EN)) { > + *val = PL1_DISABLE; > + return 0; > + } > + > *val = hwm_field_read_and_scale(ddat, > hwmon->rg.pkg_rapl_limit, > PKG_PWR_LIM_1, > @@ -385,8 +395,24 @@ static int > hwm_power_max_write(struct hwm_drvdata *ddat, long val) > { > struct i915_hwmon *hwmon = ddat->hwmon; > + intel_wakeref_t wakeref; > u32 nval; > > + /* Disable PL1 limit and verify, because the limit cannot be disabled > on all platforms */ > + if (val == PL1_DISABLE) { > + mutex_lock(>hwmon_lock); > + with_intel_runtime_pm(ddat->uncore->rpm, wakeref) { > + intel_uncore_rmw(ddat->uncore, hwmon->rg.pkg_rapl_limit, > + PKG_PWR_LIM_1_EN, 0); > + nval = intel_uncore_read(ddat->uncore, > hwmon->rg.pkg_rapl_limit); > + } > +
Re: [PATCH v5 0/8] QAIC accel driver
On 3/27/2023 9:54 AM, Jeffrey Hugo wrote: This series introduces a driver under the accel subsystem (QAIC - Qualcomm AIC) for the Qualcomm Cloud AI 100 product (AIC100). AIC100 is a PCIe adapter card that hosts a dedicated machine learning inference accelerator. The previous version (v4) can be found at: https://lore.kernel.org/all/1679325074-5494-1-git-send-email-quic_jh...@quicinc.com/ Looks like things have been silent on this revision and we have a number of review tags already. Seems like this series is ready for merge. I'd like to see this queued for 6.4 if possible. Given that we are at 6.3-rc5, it seems like this would need to be merged now(ish) to make 6.4. Jacek, since you have commit permissions in drm-misc and are an active Accel maintainer, I wonder if it would be appropriate for you to merge this series to drm-misc. Thoughts? -Jeff
Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
On Mon, Apr 03, 2023 at 04:57:21PM +, Yang, Fei wrote: > > Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level > > > > On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote: > >> From: Fei Yang > >> > >> Currently the KMD is using enum i915_cache_level to set caching policy for > >> buffer objects. This is flaky because the PAT index which really controls > >> the caching behavior in PTE has far more levels than what's defined in the > >> enum. > > > > Then just add more enum values. > > That would be really messy because PAT index is platform dependent, you would > have to maintain many tables for the the translation. > > > 'pat_index' is absolutely meaningless to the reader, it's just an > > arbitrary number. Whereas 'cache_level' conveys how the thing is > > actually going to get used and thus how the caches should behave. > > By design UMD's understand PAT index. Both UMD and KMD should stand on the > same ground, the Bspec, to avoid any potential ambiguity. > > >> In addition, the PAT index is platform dependent, having to translate > >> between i915_cache_level and PAT index is not reliable, > > > >If it's not realiable then the code is clearly broken. > > Perhaps the word "reliable" is a bit confusing here. What I really meant to > say is 'difficult to maintain', or 'error-prone'. > > >> and makes the code more complicated. > > > > You have to translate somewhere anyway. Looks like you're now adding > > translations the other way (pat_index->cache_level). How is that better? > > No, there is no pat_index->cache_level translation. i915_gem_object_has_cache_level() is exactly that. And that one does look actually fragile since it assumes only one PAT index maps to each cache level. So if the user picks any other pat_index anything using i915_gem_object_has_cache_level() is likely to do the wrong thing. If we do switch to pat_index then I think cache_level should be made a purely uapi concept, and all the internal code should instead be made to query various aspects of the caching behaviour of the current pat_index (eg. is LLC caching enabled, and thus do I need to clflush?). -- Ville Syrjälä Intel
[PATCH v10 2/2] drm: add kms driver for loongson display controller
Loongson display controller IP has been integrated in both Loongson north bridge chipsets(ls7a1000/ls7a2000) and SoCs(ls2k1000/ls2k2000), it has been included in Loongson self-made BMC products. This display controller is a PCI device in all of chips mentiond, it has two display pipes which support primary planes and cursor plane. For the DC in ls7a1000 and ls2k1000, each display pipe has a DVO output interface which provide RGB888 signals, vertical & horizontal synchronisations and the pixel clock. Each CRTC is able to support 1920x1080@60Hz, the maximum resolution is 2048x2048 according to the hardware spec. For the DC in LS7A2000, each display pipe is equipped with a built-in HDMI encoder which is compliant with the HDMI 1.4 specification, thus it support 3840x2160@30Hz. The first display pipe is also equipped with a transparent vga encoder which is parallel with the HDMI encoder. The DC in LS7A2000 is more complete compare with the one in old chips, besides above feature, it has two hardware cursors, two hardware vblank counter and two scanout position recorders unit. It also support tiled framebuffer format which can be used to scan out the framebuffer rendered by the LoongGPU directly. v1 -> v2: 1) Use hpd status reg when polling for ls7a2000 2) Fix all warnings emerged when compile with W=1 v2 -> v3: 1) Add COMPILE_TEST in Kconfig and make the driver off by default 2) Alphabetical sorting headers (Thomas) 3) Untangle register access functions as much as possible (Thomas) 4) Switch to TTM based memory manager and prefer cached mapping for Loongson SoC (Thomas) 5) Add chip id detection method, now all models are distinguishable. 6) Revise builtin HDMI phy driver, nearly all main stream mode below 4K@30Hz is tested, this driver supported these mode very well including clone display mode and extend display mode. v3 -> v4: 1) Quickly fix a small mistake. v4 -> v5: 1) Drop potential support for Loongson 2K series SoC temporary, this part should be resend with the DT binding patch in the future. 2) Add per display pipe debugfs support to the builtin HDMI encoder. 3) Rewrite atomic_update() for hardware cursors plane(Thomas) 4) Rewrite encoder and connector initialization part, untangle it according to the chip(Thomas). v5 -> v6: 1) Remove stray code which didn't get used, say lsdc_of_get_reserved_ram 2) Fix all typos I could found, make sentences and code more readable 3) Untangle lsdc_hdmi*_connector_detect() function according to the pipe 4) After a serious consideration, we rename this driver as loongson. Because we also have drivers toward the LoongGPU IP in LS7A2000 and LS2K2000. Besides, there are also drivers about the external encoder, HDMI audio driver and vbios support etc. This patch only provide DC driver part, my teammate Li Yi believe that loongson will be more suitable for loongson graphics than lsdc in the long run. loongson.ko = LSDC + LoongGPU + encoders driver + vbios/DT ... v6 -> v7: 1) Add prime support, self-sharing is works. sharing buffer with etnaviv is also tested, and its works with limitation. 2) Implement buffer objects tracking with list_head. 3) S3(sleep to RAM) is tested on ls3a5000+ls7a2000 evb and it works. 4) Rewrite lsdc_bo_move, since ttm core stop allocating resources during BO creation. Patch V1 ~ V6 of this series no longer works on latest kernel. Thus, we send V7 to revival them. v7 -> v8: 1) Zero a compile warnnings on 32-bit platform, compile with W=1 2) Revise lsdc_bo_gpu_offset() and minor cleanup 3) Pageflip tested on the virtual terminal with following commands modetest -M loongson -s 32:1920x1080 -v modetest -M loongson -s 34:1920x1080 -v -F tiles It works like a charm, when running pageflip test with dual screnn configuration, another two additional bo created by the modetest emerged, VRAM usage up to 40+MB, well we have at least 64MB, still enough. # cat bos bo[]: size: 8112kB VRAM bo[0001]: size: 16kB VRAM bo[0002]: size: 16kB VRAM bo[0003]: size:16208kB VRAM bo[0004]: size: 8112kB VRAM bo[0005]: size: 8112kB VRAM v8 -> v9: 1) Select I2C and I2C_ALGOBIT in Kconfig and should depend on MMU. 2) Using pci_get_domain_bus_and_slot to get the GPU device. 3) Other minor improvements. Those patches are tested on ls3a5000 + ls7a1000 CRB, ls3a5000 + ls7a2000 evb, and lemote a1901 board(ls3a4000 + ls7a1000). On loongson mips CPU, the write combine support should be enabled, to get a decent performance for writing framebuffer data to the VRAM. v9 -> v10: 1) Revise lsdc_drm_freeze() to implement S3 completely and correctly. I suddenly realized that pinned buffer can not move and VRAM lost power when sleep to RAM. Thus, the data in the buffer who is pinned
[PATCH v10 1/2] MAINTAINERS: add maintainers for DRM LOONGSON driver
This patch add myself as maintainer to drm loongson driver Signed-off-by: Sui Jingfeng --- MAINTAINERS | 7 +++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9736e04d3bd3..d258c5b54407 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6919,6 +6919,13 @@ T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/lima/ F: include/uapi/drm/lima_drm.h +DRM DRIVERS FOR LOONGSON +M: Sui Jingfeng +L: dri-devel@lists.freedesktop.org +S: Supported +T: git git://anongit.freedesktop.org/drm/drm-misc +F: drivers/gpu/drm/loongson/ + DRM DRIVERS FOR MEDIATEK M: Chun-Kuang Hu M: Philipp Zabel -- 2.25.1
RE: [PATCH 5/7] drm/i915: use pat_index instead of cache_level
> Subject: Re: [PATCH 5/7] drm/i915: use pat_index instead of cache_level > > On Fri, Mar 31, 2023 at 11:38:28PM -0700, fei.y...@intel.com wrote: >> From: Fei Yang >> >> Currently the KMD is using enum i915_cache_level to set caching policy for >> buffer objects. This is flaky because the PAT index which really controls >> the caching behavior in PTE has far more levels than what's defined in the >> enum. > > Then just add more enum values. That would be really messy because PAT index is platform dependent, you would have to maintain many tables for the the translation. > 'pat_index' is absolutely meaningless to the reader, it's just an > arbitrary number. Whereas 'cache_level' conveys how the thing is > actually going to get used and thus how the caches should behave. By design UMD's understand PAT index. Both UMD and KMD should stand on the same ground, the Bspec, to avoid any potential ambiguity. >> In addition, the PAT index is platform dependent, having to translate >> between i915_cache_level and PAT index is not reliable, > >If it's not realiable then the code is clearly broken. Perhaps the word "reliable" is a bit confusing here. What I really meant to say is 'difficult to maintain', or 'error-prone'. >> and makes the code more complicated. > > You have to translate somewhere anyway. Looks like you're now adding > translations the other way (pat_index->cache_level). How is that better? No, there is no pat_index->cache_level translation. There is only a small table for cache_level->pat_index translation. That is added for the convenience of KMD coding, no exposure to UMD. -Fei >> >> >From UMD's perspective there is also a necessity to set caching policy for >> performance fine tuning. It's much easier for the UMD to directly use PAT >> index because the behavior of each PAT index is clearly defined in Bspec. >> Haivng the abstracted i915_cache_level sitting in between would only cause >> more ambiguity. >> >> For these reasons this patch replaces i915_cache_level with PAT index. Also >> note, the cache_level is not completely removed yet, because the KMD still >> has the need of creating buffer objects with simple cache settings such as >> cached, uncached, or writethrough. For these simple cases, using cache_level >> would help simplify the code. >> >> Cc: Chris Wilson >> Cc: Matt Roper >> Signed-off-by: Fei Yang >> Reviewed-by: Andi Shyti >> --- >> drivers/gpu/drm/i915/display/intel_dpt.c | 12 +-- >> drivers/gpu/drm/i915/gem/i915_gem_domain.c| 27 ++ >> .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 10 ++- >> drivers/gpu/drm/i915/gem/i915_gem_mman.c | 3 +- >> drivers/gpu/drm/i915/gem/i915_gem_object.c| 39 - >> drivers/gpu/drm/i915/gem/i915_gem_object.h| 4 + >> .../gpu/drm/i915/gem/i915_gem_object_types.h | 18 ++-- >> drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- >> drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 16 ++-- >> .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- >> .../drm/i915/gem/selftests/i915_gem_migrate.c | 2 +- >> .../drm/i915/gem/selftests/i915_gem_mman.c| 2 +- >> drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 10 ++- >> drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 76 - >> drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 3 +- >> drivers/gpu/drm/i915/gt/intel_ggtt.c | 82 +-- >> drivers/gpu/drm/i915/gt/intel_gtt.h | 20 ++--- >> drivers/gpu/drm/i915/gt/intel_migrate.c | 47 ++- >> drivers/gpu/drm/i915/gt/intel_migrate.h | 13 ++- >> drivers/gpu/drm/i915/gt/intel_ppgtt.c | 6 +- >> drivers/gpu/drm/i915/gt/selftest_migrate.c| 47 ++- >> drivers/gpu/drm/i915/gt/selftest_reset.c | 8 +- >> drivers/gpu/drm/i915/gt/selftest_timeline.c | 2 +- >> drivers/gpu/drm/i915/gt/selftest_tlb.c| 4 +- >> drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++- >> drivers/gpu/drm/i915/i915_debugfs.c | 55 ++--- >> drivers/gpu/drm/i915/i915_gem.c | 16 +++- >> drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- >> drivers/gpu/drm/i915/i915_vma.c | 16 ++-- >> drivers/gpu/drm/i915/i915_vma.h | 2 +- >> drivers/gpu/drm/i915/i915_vma_types.h | 2 - >> drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +- >> .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- >> drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 15 ++-- >> .../drm/i915/selftests/intel_memory_region.c | 4 +- >> drivers/gpu/drm/i915/selftests/mock_gtt.c | 8 +- >> 36 files changed, 361 insertions(+), 241 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c >> b/drivers/gpu/drm/i915/display/intel_dpt.c >> index c5eacfdba1a5..7c5fddb203ba 100644 >> --- a/drivers/gpu/drm/i915/display/intel_dpt.c >> +++ b/drivers/gpu/drm/i915/display/intel_dpt.c >> @@ -43,24 +43,24 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t >> pte) >> static
Re: [PATCH] drm/mediatek: dp: change the aux retries times when receiving AUX_DEFER
Hi, Xinlei: Xinlei Lee (李昕磊) 於 2023年4月3日 週一 下午5:18寫道: > > On Mon, 2023-04-03 at 11:49 +0800, Chun-Kuang Hu wrote: > > External email : Please do not click links or open attachments until > > you have verified the sender or the content. > > > > > > Hi, Xinlei: > > > > 於 2023年3月29日 週三 下午2:43寫道: > > > > > > From: Xinlei Lee > > > > > > DP 1.4a Section 2.8.7.1.5.6.1: > > > A DP Source device shall retry at least seven times upon receiving > > > AUX_DEFER before giving up the AUX transaction. > > > > > > The drm_dp_i2c_do_msg() function in the drm_dp_helper.c file will > > > judge the status of the msg->reply parameter passed to aux_transfer > > > ange-the-aux-retries-times-when-re.patchfor different processing. > > > > > > Fixes: f70ac097a2cf ("drm/mediatek: Add MT8195 Embedded DisplayPort > > > driver") > > > Signed-off-by: Xinlei Lee > > > --- > > > drivers/gpu/drm/mediatek/mtk_dp.c | 12 +--- > > > 1 file changed, 5 insertions(+), 7 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c > > > b/drivers/gpu/drm/mediatek/mtk_dp.c > > > index 1f94fcc144d3..767b71da31a4 100644 > > > --- a/drivers/gpu/drm/mediatek/mtk_dp.c > > > +++ b/drivers/gpu/drm/mediatek/mtk_dp.c > > > @@ -806,10 +806,9 @@ static int > > > mtk_dp_aux_wait_for_completion(struct mtk_dp *mtk_dp, bool is_read) > > > } > > > > > > static int mtk_dp_aux_do_transfer(struct mtk_dp *mtk_dp, bool > > > is_read, u8 cmd, > > > - u32 addr, u8 *buf, size_t length) > > > + u32 addr, u8 *buf, size_t length, > > > u8 *reply_cmd) > > > { > > > int ret; > > > - u32 reply_cmd; > > > > > > if (is_read && (length > DP_AUX_MAX_PAYLOAD_BYTES || > > > (cmd == DP_AUX_NATIVE_READ && !length))) > > > @@ -841,10 +840,10 @@ static int mtk_dp_aux_do_transfer(struct > > > mtk_dp *mtk_dp, bool is_read, u8 cmd, > > > /* Wait for feedback from sink device. */ > > > ret = mtk_dp_aux_wait_for_completion(mtk_dp, is_read); > > > > > > - reply_cmd = mtk_dp_read(mtk_dp, MTK_DP_AUX_P0_3624) & > > > - AUX_RX_REPLY_COMMAND_AUX_TX_P0_MASK; > > > + *reply_cmd = mtk_dp_read(mtk_dp, MTK_DP_AUX_P0_3624) & > > > +AUX_RX_REPLY_COMMAND_AUX_TX_P0_MASK; > > > > > > - if (ret || reply_cmd) { > > > + if (ret) { > > > u32 phy_status = mtk_dp_read(mtk_dp, > > > MTK_DP_AUX_P0_3628) & > > > AUX_RX_PHY_STATE_AUX_TX_P0_MASK; > > > if (phy_status != > > > AUX_RX_PHY_STATE_AUX_TX_P0_RX_IDLE) { > > > @@ -2070,7 +2069,7 @@ static ssize_t mtk_dp_aux_transfer(struct > > > drm_dp_aux *mtk_aux, > > > ret = mtk_dp_aux_do_transfer(mtk_dp, is_read, > > > request, > > > msg->address + > > > accessed_bytes, > > > msg->buffer + > > > accessed_bytes, > > > -to_access); > > > +to_access, > > > >reply); > > > > > > if (ret) { > > > drm_info(mtk_dp->drm_dev, > > > @@ -2080,7 +2079,6 @@ static ssize_t mtk_dp_aux_transfer(struct > > > drm_dp_aux *mtk_aux, > > > accessed_bytes += to_access; > > > } while (accessed_bytes < msg->size); > > > > > > - msg->reply = DP_AUX_NATIVE_REPLY_ACK | > > > DP_AUX_I2C_REPLY_ACK; > > > > In your description, you just mention the retry count is 7 times, but > > you does not mention you should change the reply. Why you modify > > this? > > And where is the 7 times retry? > > > > Regards, > > Chun-Kuang. > > > > > return msg->size; > > > err: > > > msg->reply = DP_AUX_NATIVE_REPLY_NACK | > > > DP_AUX_I2C_REPLY_NACK; > > > -- > > > 2.18.0 > > > > > Hi CK: > > Thanks for your review! > > This patch is to fix some DP sinks that return AUX_DEFER, and the dp > driver does not handle it according to the specification. DP_v1.4a > spec 2.8.1.2 describes that if the sink returns AUX_DEFER, DPTX may > retry later: > > The logic before the modification is that reply_cmd returns ETIMEDOUT > if it is not AUX_ACK after the read operation, without considering the > retry operation when returning AUX_DEFER; > > The modified logic is to add parameters to mtk_dp_aux_do_transfer() to > store the return value of the sink. In the dmr_dp_helper.c file, > drm_dp_i2c_do_msg calls aux->transfer and then performs retry > operation according to msg->reply. The 7 times specified in the spec > are also in this function defined in (max_retries). Applied to mediatek-drm-next [1], thanks. [1] https://git.kernel.org/pub/scm/linux/kernel/git/chunkuang.hu/linux.git/log/?h=mediatek-drm-next Regards, Chun-Kuang. > > Best Regards! > xinlei
Re: [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
On Mon, Apr 03, 2023 at 09:35:32AM -0700, Matt Roper wrote: > On Mon, Apr 03, 2023 at 07:02:08PM +0300, Ville Syrjälä wrote: > > On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.y...@intel.com wrote: > > > From: Fei Yang > > > > > > To comply with the design that buffer objects shall have immutable > > > cache setting through out its life cycle, {set, get}_caching ioctl's > > > are no longer supported from MTL onward. With that change caching > > > policy can only be set at object creation time. The current code > > > applies a default (platform dependent) cache setting for all objects. > > > However this is not optimal for performance tuning. The patch extends > > > the existing gem_create uAPI to let user set PAT index for the object > > > at creation time. > > > > This is missing the whole justification for the new uapi. > > Why is MOCS not sufficient? > > PAT and MOCS are somewhat related, but they're not the same thing. The > general direction of the hardware architecture recently has been to > slowly dumb down MOCS and move more of the important memory/cache > control over to the PAT instead. On current platforms there is some > overlap (and MOCS has an "ignore PAT" setting that makes the MOCS "win" > for the specific fields that both can control), but MOCS doesn't have a > way to express things like snoop/coherency mode (on MTL), or class of > service (on PVC). And if you check some of the future platforms, the > hardware design starts packing even more stuff into the PAT (not just > cache behavior) which will never be handled by MOCS. Sigh. So the hardware designers screwed up MOCS yet again and instead of getting that fixed we are adding a new uapi to work around it? The IMO sane approach (which IIRC was the situation for a few platform generations at least) is that you just shove the PAT index into MOCS (or tell it to go look it up from the PTE). Why the heck did they not just stick with that? > > Also keep in mind that MOCS generally applies at the GPU instruction > level; although a lot of instructions have a field to provide a MOCS > index, or can use a MOCS already associated with a surface state, there > are still some that don't. PAT is the source of memory access > characteristics for anything that can't provide a MOCS directly. So what are the things that don't have MOCS and where we need some custom cache behaviour, and we already know all that at buffer creation time? -- Ville Syrjälä Intel
Re: [PATCH v4 0/5] docs & checkpatch: allow Closes tags with links
On Mon, 2023-04-03 at 18:23 +0200, Matthieu Baerts wrote: > Since v6.3, checkpatch.pl now complains about the use of "Closes:" tags > followed by a link [1]. It also complains if a "Reported-by:" tag is > followed by a "Closes:" one [2]. All these patches seems sensible, thanks. Assuming Linus approves the use of "Closes:" Acked-by: Joe Perches > As detailed in the first patch, this "Closes:" tag is used for a bit of > time, mainly by DRM and MPTCP subsystems. It is used by some bug > trackers to automate the closure of issues when a patch is accepted. > It is even planned to use this tag with bugzilla.kernel.org [3]. > > The first patch updates the documentation to explain what is this > "Closes:" tag and how/when to use it. The second patch modifies > checkpatch.pl to stop complaining about it. > > The DRM maintainers and their mailing list have been added in Cc as they > are probably interested by these two patches as well. > > [1] > https://lore.kernel.org/all/3b036087d80b8c0e07a46a1dbaaf4ad0d018f8d5.1674217480.git.li...@leemhuis.info/ > [2] > https://lore.kernel.org/all/bb5dfd55ea2026303ab2296f4a6df3da7dd64006.1674217480.git.li...@leemhuis.info/ > [3] > https://lore.kernel.org/linux-doc/20230315181205.f3av7h6owqzzw64p@meerkat.local/ > > Signed-off-by: Matthieu Baerts > --- > Note: After having re-read the comments from the v1, it is still unclear > to me if this "Closes:" can be accepted or not. But because it seems > that the future Bugzilla bot for kernel.org and regzbot would like to > use it as well, I'm sending here new versions. I'm sorry if I > misunderstood the comments from v1. Please tell me if I did. > > Changes in v4: > - Patches 1/5, 3/5 and 4/5 have been added to ask using the "Closes" tag > instead of the "Link" one for any bug reports. (Thorsten) > - The Fixes tags have been removed from patch 4/5. (Joe) > - The "Reported-by being followed by a link tag" check is now only > looking for the tag, not the URL which is done elsewhere in patch 5/5. > (Thorsten) > - A new patch has been added to fix a small issues in checkpatch.pl when > checking if "Reported-by:" tag is on the last line. > - Link to v3: > https://lore.kernel.org/r/20230314-doc-checkpatch-closes-tag-v3-0-d1bdcf31c...@tessares.net > > Changes in v3: > - Patch 1/4 now allow using the "Closes" tag with any kind of bug > reports, as long as the link is public. (Thorsten) > - The former patch 2/2 has been split in two: first to use a list for > the different "link" tags (Joe). Then to allow the 'Closes' tag. > - A new patch has been added to let checkpatch.pl checking if "Closes" > and "Links" are used with a URL. > - Link to v2: > https://lore.kernel.org/r/20230314-doc-checkpatch-closes-tag-v2-0-f4a417861...@tessares.net > > Changes in v2: > - The text on patch 1/2 has been reworked thanks to Jon, Bagas and > Thorsten. See the individual changelog on the patch for more details. > - Private bug trackers and invalid URLs are clearly marked as forbidden > to avoid being misused. (Linus) > - Rebased on top of Linus' repo. > - Link to v1: > https://lore.kernel.org/r/20230314-doc-checkpatch-closes-tag-v1-0-1b83072e9...@tessares.net > > --- > Matthieu Baerts (5): > docs: process: allow Closes tags with links > checkpatch: don't print the next line if not defined > checkpatch: use a list of "link" tags > checkpatch: allow Closes tags with links > checkpatch: check for misuse of the link tags > > Documentation/process/5.Posting.rst | 22 ++ > Documentation/process/submitting-patches.rst | 26 +++-- > scripts/checkpatch.pl| 43 > ++-- > 3 files changed, 70 insertions(+), 21 deletions(-) > --- > base-commit: 7e364e56293bb98cae1b55fd835f5991c4e96e7d > change-id: 20230314-doc-checkpatch-closes-tag-1731b57556b1 > > Best regards,
Re: [PATCH 2/2] phy: mtk-mipi-csi: add driver for CSI phy
Hi, Julien: Julien Stephan 於 2023年4月3日 週一 下午3:20寫道: > > From: Phi-bang Nguyen > > This is a new driver that supports the MIPI CSI CD-PHY for mediatek > mt8365 soc > > Signed-off-by: Louis Kuo > Signed-off-by: Phi-bang Nguyen > [Julien Stephan: use regmap] > [Julien Stephan: use GENMASK] > Co-developed-by: Julien Stephan > Signed-off-by: Julien Stephan > --- > .../bindings/phy/mediatek,csi-phy.yaml| 9 +- > MAINTAINERS | 1 + > drivers/phy/mediatek/Kconfig | 8 + > drivers/phy/mediatek/Makefile | 2 + > .../phy/mediatek/phy-mtk-mipi-csi-rx-reg.h| 435 ++ > drivers/phy/mediatek/phy-mtk-mipi-csi.c | 392 > 6 files changed, 845 insertions(+), 2 deletions(-) > create mode 100644 drivers/phy/mediatek/phy-mtk-mipi-csi-rx-reg.h > create mode 100644 drivers/phy/mediatek/phy-mtk-mipi-csi.c > [snip] > + > +#define REGMAP_BIT(map, reg, field, val) \ > + regmap_update_bits((map), reg, reg##_##field##_MASK, \ > + (val) << reg##_##field##_SHIFT) > + Use FIELD_PREP() macro so you can drop the definition of SHIFT symbol. Regards, Chun-Kuang.
Re: [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
On Mon, Apr 03, 2023 at 07:02:08PM +0300, Ville Syrjälä wrote: > On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.y...@intel.com wrote: > > From: Fei Yang > > > > To comply with the design that buffer objects shall have immutable > > cache setting through out its life cycle, {set, get}_caching ioctl's > > are no longer supported from MTL onward. With that change caching > > policy can only be set at object creation time. The current code > > applies a default (platform dependent) cache setting for all objects. > > However this is not optimal for performance tuning. The patch extends > > the existing gem_create uAPI to let user set PAT index for the object > > at creation time. > > This is missing the whole justification for the new uapi. > Why is MOCS not sufficient? PAT and MOCS are somewhat related, but they're not the same thing. The general direction of the hardware architecture recently has been to slowly dumb down MOCS and move more of the important memory/cache control over to the PAT instead. On current platforms there is some overlap (and MOCS has an "ignore PAT" setting that makes the MOCS "win" for the specific fields that both can control), but MOCS doesn't have a way to express things like snoop/coherency mode (on MTL), or class of service (on PVC). And if you check some of the future platforms, the hardware design starts packing even more stuff into the PAT (not just cache behavior) which will never be handled by MOCS. Also keep in mind that MOCS generally applies at the GPU instruction level; although a lot of instructions have a field to provide a MOCS index, or can use a MOCS already associated with a surface state, there are still some that don't. PAT is the source of memory access characteristics for anything that can't provide a MOCS directly. Matt > > > The new extension is platform independent, so UMD's can switch to using > > this extension for older platforms as well, while {set, get}_caching are > > still supported on these legacy paltforms for compatibility reason. > > > > Cc: Chris Wilson > > Cc: Matt Roper > > Signed-off-by: Fei Yang > > Reviewed-by: Andi Shyti > > --- > > drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 > > include/uapi/drm/i915_drm.h| 36 ++ > > tools/include/uapi/drm/i915_drm.h | 36 ++ > > 3 files changed, 105 insertions(+) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c > > b/drivers/gpu/drm/i915/gem/i915_gem_create.c > > index e76c9703680e..1c6e2034d28e 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c > > @@ -244,6 +244,7 @@ struct create_ext { > > unsigned int n_placements; > > unsigned int placement_mask; > > unsigned long flags; > > + unsigned int pat_index; > > }; > > > > static void repr_placements(char *buf, size_t size, > > @@ -393,11 +394,39 @@ static int ext_set_protected(struct > > i915_user_extension __user *base, void *data > > return 0; > > } > > > > +static int ext_set_pat(struct i915_user_extension __user *base, void *data) > > +{ > > + struct create_ext *ext_data = data; > > + struct drm_i915_private *i915 = ext_data->i915; > > + struct drm_i915_gem_create_ext_set_pat ext; > > + unsigned int max_pat_index; > > + > > + BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != > > +offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); > > + > > + if (copy_from_user(, base, sizeof(ext))) > > + return -EFAULT; > > + > > + max_pat_index = INTEL_INFO(i915)->max_pat_index; > > + > > + if (ext.pat_index > max_pat_index) { > > + drm_dbg(>drm, "PAT index is invalid: %u\n", > > + ext.pat_index); > > + return -EINVAL; > > + } > > + > > + ext_data->pat_index = ext.pat_index; > > + > > + return 0; > > +} > > + > > static const i915_user_extension_fn create_extensions[] = { > > [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements, > > [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected, > > + [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat, > > }; > > > > +#define PAT_INDEX_NOT_SET 0x > > /** > > * Creates a new mm object and returns a handle to it. > > * @dev: drm device pointer > > @@ -417,6 +446,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void > > *data, > > if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) > > return -EINVAL; > > > > + ext_data.pat_index = PAT_INDEX_NOT_SET; > > ret = i915_user_extensions(u64_to_user_ptr(args->extensions), > >create_extensions, > >ARRAY_SIZE(create_extensions), > > @@ -453,5 +483,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void > > *data, > > if (IS_ERR(obj)) > > return PTR_ERR(obj); > > > > + if (ext_data.pat_index != PAT_INDEX_NOT_SET) > >