Re: [Nouveau] [PATCH drm-next v4 03/14] drm: manager to keep track of GPUs VA mappings

2023-06-06 Thread kernel test robot
Hi Danilo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 33a86170888b7e4aa0cea94ebb9c67180139cea9]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v4/20230607-063442
base:   33a86170888b7e4aa0cea94ebb9c67180139cea9
patch link:https://lore.kernel.org/r/20230606223130.6132-4-dakr%40redhat.com
patch subject: [PATCH drm-next v4 03/14] drm: manager to keep track of GPUs VA 
mappings
config: alpha-allyesconfig 
(https://download.01.org/0day-ci/archive/20230607/202306071203.gn8jrmlz-...@intel.com/config)
compiler: alpha-linux-gcc (GCC) 12.3.0
reproduce (this is a W=1 build):
mkdir -p ~/bin
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 33a86170888b7e4aa0cea94ebb9c67180139cea9
b4 shazam 
https://lore.kernel.org/r/20230606223130.6132-4-d...@redhat.com
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross 
W=1 O=build_dir ARCH=alpha olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross 
W=1 O=build_dir ARCH=alpha SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202306071203.gn8jrmlz-...@intel.com/

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/drm_gpuva_mgr.c: In function '__drm_gpuva_sm_map':
>> drivers/gpu/drm/drm_gpuva_mgr.c:1032:32: warning: variable 'prev' set but 
>> not used [-Wunused-but-set-variable]
1032 | struct drm_gpuva *va, *prev = NULL;
 |^~~~


vim +/prev +1032 drivers/gpu/drm/drm_gpuva_mgr.c

  1024  
  1025  static int
  1026  __drm_gpuva_sm_map(struct drm_gpuva_manager *mgr,
  1027 const struct drm_gpuva_fn_ops *ops, void *priv,
  1028 u64 req_addr, u64 req_range,
  1029 struct drm_gem_object *req_obj, u64 req_offset)
  1030  {
  1031  DRM_GPUVA_ITER(it, mgr, req_addr);
> 1032  struct drm_gpuva *va, *prev = NULL;
  1033  u64 req_end = req_addr + req_range;
  1034  int ret;
  1035  
  1036  if (unlikely(!drm_gpuva_in_mm_range(mgr, req_addr, req_range)))
  1037  return -EINVAL;
  1038  
  1039  if (unlikely(drm_gpuva_in_kernel_node(mgr, req_addr, 
req_range)))
  1040  return -EINVAL;
  1041  
  1042  drm_gpuva_iter_for_each_range(va, it, req_end) {
  1043  struct drm_gem_object *obj = va->gem.obj;
  1044  u64 offset = va->gem.offset;
  1045  u64 addr = va->va.addr;
  1046  u64 range = va->va.range;
  1047  u64 end = addr + range;
  1048  bool merge = !!va->gem.obj;
  1049  
  1050  if (addr == req_addr) {
  1051  merge &= obj == req_obj &&
  1052   offset == req_offset;
  1053  
  1054  if (end == req_end) {
  1055  ret = op_unmap_cb(ops, , priv, va, 
merge);
  1056  if (ret)
  1057  return ret;
  1058  break;
  1059  }
  1060  
  1061  if (end < req_end) {
  1062  ret = op_unmap_cb(ops, , priv, va, 
merge);
  1063  if (ret)
  1064  return ret;
  1065  goto next;
  1066  }
  1067  
  1068  if (end > req_end) {
  1069  struct drm_gpuva_op_map n = {
  1070  .va.addr = req_end,
  1071  .va.range = range - req_range,
  1072  .gem.obj = obj,
  1073  .gem.offset = offset + 
req_range,
  1074  };
  1075  struct drm_gpuva_op_unmap u = {
  1076  .va = va,
  1077  .keep = merge,
  1078  };
  1079  
  1080  ret = op_remap_cb(ops, , priv, NULL, 
, );
  1081  if (ret)
  1082  return ret;
  1083  break;
  1084  }
  1085  } else if (addr < req_addr) {
  1086  u64 ls_range = req_addr - 

Re: [Nouveau] [PATCH drm-next v4 04/14] drm: debugfs: provide infrastructure to dump a DRM GPU VA space

2023-06-06 Thread kernel test robot
Hi Danilo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 33a86170888b7e4aa0cea94ebb9c67180139cea9]

url:
https://github.com/intel-lab-lkp/linux/commits/Danilo-Krummrich/drm-execution-context-for-GEM-buffers-v4/20230607-063442
base:   33a86170888b7e4aa0cea94ebb9c67180139cea9
patch link:https://lore.kernel.org/r/20230606223130.6132-5-dakr%40redhat.com
patch subject: [PATCH drm-next v4 04/14] drm: debugfs: provide infrastructure 
to dump a DRM GPU VA space
config: m68k-allyesconfig 
(https://download.01.org/0day-ci/archive/20230607/202306070751.26wx3ive-...@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.3.0
reproduce (this is a W=1 build):
mkdir -p ~/bin
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
git checkout 33a86170888b7e4aa0cea94ebb9c67180139cea9
b4 shazam 
https://lore.kernel.org/r/20230606223130.6132-5-d...@redhat.com
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross 
W=1 O=build_dir ARCH=m68k olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.3.0 ~/bin/make.cross 
W=1 O=build_dir ARCH=m68k SHELL=/bin/bash drivers/gpu/drm/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 
| Closes: 
https://lore.kernel.org/oe-kbuild-all/202306070751.26wx3ive-...@intel.com/

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/drm_debugfs.c: In function 'drm_debugfs_gpuva_info':
>> drivers/gpu/drm/drm_debugfs.c:213:28: warning: cast from pointer to integer 
>> of different size [-Wpointer-to-int-cast]
 213 |(u64)va->gem.obj, va->gem.offset);
 |^


vim +213 drivers/gpu/drm/drm_debugfs.c

   178  
   179  /**
   180   * drm_debugfs_gpuva_info - dump the given DRM GPU VA space
   181   * @m: pointer to the _file to write
   182   * @mgr: the _gpuva_manager representing the GPU VA space
   183   *
   184   * Dumps the GPU VA mappings of a given DRM GPU VA manager.
   185   *
   186   * For each DRM GPU VA space drivers should call this function from 
their
   187   * _info_list's show callback.
   188   *
   189   * Returns: 0 on success, -ENODEV if the  is not initialized
   190   */
   191  int drm_debugfs_gpuva_info(struct seq_file *m,
   192 struct drm_gpuva_manager *mgr)
   193  {
   194  DRM_GPUVA_ITER(it, mgr, 0);
   195  struct drm_gpuva *va, *kva = >kernel_alloc_node;
   196  
   197  if (!mgr->name)
   198  return -ENODEV;
   199  
   200  seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n",
   201 mgr->name, mgr->mm_start, mgr->mm_start + 
mgr->mm_range);
   202  seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n",
   203 kva->va.addr, kva->va.addr + kva->va.range);
   204  seq_puts(m, "\n");
   205  seq_puts(m, " VAs | start  | range  | 
end| object | object offset\n");
   206  seq_puts(m, 
"-\n");
   207  drm_gpuva_iter_for_each(va, it) {
   208  if (unlikely(va == >kernel_alloc_node))
   209  continue;
   210  
   211  seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx 
| 0x%016llx | 0x%016llx\n",
   212 va->va.addr, va->va.range, va->va.addr + 
va->va.range,
 > 213 (u64)va->gem.obj, va->gem.offset);
   214  }
   215  
   216  return 0;
   217  }
   218  EXPORT_SYMBOL(drm_debugfs_gpuva_info);
   219  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


[Nouveau] [PATCH drm-next v4 14/14] drm/nouveau: debugfs: implement DRM GPU VA debugfs

2023-06-06 Thread Danilo Krummrich
Provide the driver indirection iterating over all DRM GPU VA spaces to
enable the common 'gpuvas' debugfs file for dumping DRM GPU VA spaces.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_debugfs.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c 
b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 99d022a91afc..053f703f2f68 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -203,6 +203,44 @@ nouveau_debugfs_pstate_open(struct inode *inode, struct 
file *file)
return single_open(file, nouveau_debugfs_pstate_get, inode->i_private);
 }
 
+static void
+nouveau_debugfs_gpuva_regions(struct seq_file *m, struct nouveau_uvmm *uvmm)
+{
+   MA_STATE(mas, >region_mt, 0, 0);
+   struct nouveau_uvma_region *reg;
+
+   seq_puts  (m, " VA regions  | start  | range  | 
end\n");
+   seq_puts  (m, 
"\n");
+   mas_for_each(, reg, ULONG_MAX)
+   seq_printf(m, " | 0x%016llx | 0x%016llx | 
0x%016llx\n",
+  reg->va.addr, reg->va.range, reg->va.addr + 
reg->va.range);
+}
+
+static int
+nouveau_debugfs_gpuva(struct seq_file *m, void *data)
+{
+   struct drm_info_node *node = (struct drm_info_node *) m->private;
+   struct nouveau_drm *drm = nouveau_drm(node->minor->dev);
+   struct nouveau_cli *cli;
+
+   mutex_lock(>clients_lock);
+   list_for_each_entry(cli, >clients, head) {
+   struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+
+   if (!uvmm)
+   continue;
+
+   nouveau_uvmm_lock(uvmm);
+   drm_debugfs_gpuva_info(m, >umgr);
+   seq_puts(m, "\n");
+   nouveau_debugfs_gpuva_regions(m, uvmm);
+   nouveau_uvmm_unlock(uvmm);
+   }
+   mutex_unlock(>clients_lock);
+
+   return 0;
+}
+
 static const struct file_operations nouveau_pstate_fops = {
.owner = THIS_MODULE,
.open = nouveau_debugfs_pstate_open,
@@ -214,6 +252,7 @@ static const struct file_operations nouveau_pstate_fops = {
 static struct drm_info_list nouveau_debugfs_list[] = {
{ "vbios.rom",  nouveau_debugfs_vbios_image, 0, NULL },
{ "strap_peek", nouveau_debugfs_strap_peek, 0, NULL },
+   DRM_DEBUGFS_GPUVA_INFO(nouveau_debugfs_gpuva, NULL),
 };
 #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
 
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 13/14] drm/nouveau: implement new VM_BIND uAPI

2023-06-06 Thread Danilo Krummrich
This commit provides the implementation for the new uapi motivated by the
Vulkan API. It allows user mode drivers (UMDs) to:

1) Initialize a GPU virtual address (VA) space via the new
   DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA
   space managed by the kernel and userspace, respectively.

2) Allocate and free a VA space region as well as bind and unbind memory
   to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl.
   UMDs can request the named operations to be processed either
   synchronously or asynchronously. It supports DRM syncobjs
   (incl. timelines) as synchronization mechanism. The management of the
   GPU VA mappings is implemented with the DRM GPU VA manager.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The
   execution happens asynchronously. It supports DRM syncobj (incl.
   timelines) as synchronization mechanism. DRM GEM object locking is
   handled with drm_exec.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM
GPU scheduler for the asynchronous paths.

Signed-off-by: Danilo Krummrich 
---
 Documentation/gpu/driver-uapi.rst   |3 +
 drivers/gpu/drm/nouveau/Kbuild  |3 +
 drivers/gpu/drm/nouveau/Kconfig |2 +
 drivers/gpu/drm/nouveau/nouveau_abi16.c |   24 +
 drivers/gpu/drm/nouveau/nouveau_abi16.h |1 +
 drivers/gpu/drm/nouveau/nouveau_bo.c|  147 +-
 drivers/gpu/drm/nouveau/nouveau_bo.h|2 +-
 drivers/gpu/drm/nouveau/nouveau_drm.c   |   27 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h   |   59 +-
 drivers/gpu/drm/nouveau/nouveau_exec.c  |  418 +
 drivers/gpu/drm/nouveau/nouveau_exec.h  |   54 +
 drivers/gpu/drm/nouveau/nouveau_gem.c   |   25 +-
 drivers/gpu/drm/nouveau/nouveau_mem.h   |5 +
 drivers/gpu/drm/nouveau/nouveau_prime.c |2 +-
 drivers/gpu/drm/nouveau/nouveau_sched.c |  461 ++
 drivers/gpu/drm/nouveau/nouveau_sched.h |  123 ++
 drivers/gpu/drm/nouveau/nouveau_uvmm.c  | 1898 +++
 drivers/gpu/drm/nouveau/nouveau_uvmm.h  |  107 ++
 18 files changed, 3296 insertions(+), 65 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_exec.h
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_sched.h
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_uvmm.h

diff --git a/Documentation/gpu/driver-uapi.rst 
b/Documentation/gpu/driver-uapi.rst
index 9c7ca6e33a68..c08bcbb95fb3 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -13,4 +13,7 @@ drm/nouveau uAPI
 VM_BIND / EXEC uAPI
 ---
 
+.. kernel-doc:: drivers/gpu/drm/nouveau/nouveau_exec.c
+:doc: Overview
+
 .. kernel-doc:: include/uapi/drm/nouveau_drm.h
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 5e5617006da5..cf6b3a80c0c8 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -47,6 +47,9 @@ nouveau-y += nouveau_prime.o
 nouveau-y += nouveau_sgdma.o
 nouveau-y += nouveau_ttm.o
 nouveau-y += nouveau_vmm.o
+nouveau-y += nouveau_exec.o
+nouveau-y += nouveau_sched.o
+nouveau-y += nouveau_uvmm.o
 
 # DRM - modesetting
 nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index a70bd65e1400..c52e8096cca4 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -10,6 +10,8 @@ config DRM_NOUVEAU
select DRM_KMS_HELPER
select DRM_TTM
select DRM_TTM_HELPER
+   select DRM_EXEC
+   select DRM_SCHED
select I2C
select I2C_ALGOBIT
select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c 
b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 82dab51d8aeb..a112f28681d3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -35,6 +35,7 @@
 #include "nouveau_chan.h"
 #include "nouveau_abi16.h"
 #include "nouveau_vmm.h"
+#include "nouveau_sched.h"
 
 static struct nouveau_abi16 *
 nouveau_abi16(struct drm_file *file_priv)
@@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 {
struct nouveau_abi16_ntfy *ntfy, *temp;
 
+   /* When a client exits without waiting for it's queued up jobs to
+* finish it might happen that we fault the channel. This is due to
+* drm_file_free() calling drm_gem_release() before the postclose()
+* callback. Hence, we can't tear down this scheduler entity before
+* uvmm mappings are unmapped. Currently, we can't detect this case.
+*
+* However, this should be rare and harmless, since the channel isn't
+* needed anymore.
+*/
+   nouveau_sched_entity_fini(>sched_entity);
+
/* wait for all activity to 

[Nouveau] [PATCH drm-next v4 12/14] drm/nouveau: nvkm/vmm: implement raw ops to manage uvmm

2023-06-06 Thread Danilo Krummrich
The new VM_BIND UAPI uses the DRM GPU VA manager to manage the VA space.
Hence, we a need a way to manipulate the MMUs page tables without going
through the internal range allocator implemented by nvkm/vmm.

This patch adds a raw interface for nvkm/vmm to pass the resposibility
for managing the address space and the corresponding map/unmap/sparse
operations to the upper layers.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/include/nvif/if000c.h |  26 ++-
 drivers/gpu/drm/nouveau/include/nvif/vmm.h|  19 +-
 .../gpu/drm/nouveau/include/nvkm/subdev/mmu.h |  20 +-
 drivers/gpu/drm/nouveau/nouveau_svm.c |   2 +-
 drivers/gpu/drm/nouveau/nouveau_vmm.c |   4 +-
 drivers/gpu/drm/nouveau/nvif/vmm.c| 100 +++-
 .../gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c| 213 --
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 197 
 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h |  25 ++
 .../drm/nouveau/nvkm/subdev/mmu/vmmgf100.c|  16 +-
 .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c|  16 +-
 .../gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c |  27 ++-
 12 files changed, 566 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h 
b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
index 9c7ff56831c5..a5a182b3c28d 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
@@ -3,7 +3,10 @@
 struct nvif_vmm_v0 {
__u8  version;
__u8  page_nr;
-   __u8  managed;
+#define NVIF_VMM_V0_TYPE_UNMANAGED 0x00
+#define NVIF_VMM_V0_TYPE_MANAGED   0x01
+#define NVIF_VMM_V0_TYPE_RAW   0x02
+   __u8  type;
__u8  pad03[5];
__u64 addr;
__u64 size;
@@ -17,6 +20,7 @@ struct nvif_vmm_v0 {
 #define NVIF_VMM_V0_UNMAP  0x04
 #define NVIF_VMM_V0_PFNMAP 0x05
 #define NVIF_VMM_V0_PFNCLR 0x06
+#define NVIF_VMM_V0_RAW0x07
 #define NVIF_VMM_V0_MTHD(i) ((i) + 
0x80)
 
 struct nvif_vmm_page_v0 {
@@ -66,6 +70,26 @@ struct nvif_vmm_unmap_v0 {
__u64 addr;
 };
 
+struct nvif_vmm_raw_v0 {
+   __u8 version;
+#define NVIF_VMM_RAW_V0_GET0x0
+#define NVIF_VMM_RAW_V0_PUT0x1
+#define NVIF_VMM_RAW_V0_MAP0x2
+#define NVIF_VMM_RAW_V0_UNMAP  0x3
+#define NVIF_VMM_RAW_V0_SPARSE 0x4
+   __u8  op;
+   __u8  sparse;
+   __u8  ref;
+   __u8  shift;
+   __u32 argc;
+   __u8  pad01[7];
+   __u64 addr;
+   __u64 size;
+   __u64 offset;
+   __u64 memory;
+   __u64 argv;
+};
+
 struct nvif_vmm_pfnmap_v0 {
__u8  version;
__u8  page;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h 
b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
index a2ee92201ace..0ecedd0ee0a5 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
@@ -4,6 +4,12 @@
 struct nvif_mem;
 struct nvif_mmu;
 
+enum nvif_vmm_type {
+   UNMANAGED,
+   MANAGED,
+   RAW,
+};
+
 enum nvif_vmm_get {
ADDR,
PTES,
@@ -30,8 +36,9 @@ struct nvif_vmm {
int page_nr;
 };
 
-int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, bool 
managed,
- u64 addr, u64 size, void *argv, u32 argc, struct nvif_vmm *);
+int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass,
+ enum nvif_vmm_type, u64 addr, u64 size, void *argv, u32 argc,
+ struct nvif_vmm *);
 void nvif_vmm_dtor(struct nvif_vmm *);
 int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse,
 u8 page, u8 align, u64 size, struct nvif_vma *);
@@ -39,4 +46,12 @@ void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *);
 int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc,
 struct nvif_mem *, u64 offset);
 int nvif_vmm_unmap(struct nvif_vmm *, u64);
+
+int nvif_vmm_raw_get(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_put(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_map(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift,
+void *argv, u32 argc, struct nvif_mem *mem, u64 offset);
+int nvif_vmm_raw_unmap(struct nvif_vmm *vmm, u64 addr, u64 size,
+  u8 shift, bool sparse);
+int nvif_vmm_raw_sparse(struct nvif_vmm *vmm, u64 addr, u64 size, bool ref);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h 
b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 70e7887ef4b4..2fd2f2433fc7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -17,6 +17,7 @@ 

[Nouveau] [PATCH drm-next v4 10/14] drm/nouveau: fence: fail to emit when fence context is killed

2023-06-06 Thread Danilo Krummrich
The new VM_BIND UAPI implementation introduced in subsequent commits
will allow asynchronous jobs processing push buffers and emitting
fences.

If a fence context is killed, e.g. due to a channel fault, jobs which
are already queued for execution might still emit new fences. In such a
case a job would hang forever.

To fix that, fail to emit a new fence on a killed fence context with
-ENODEV to unblock the job.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_fence.c | 7 +++
 drivers/gpu/drm/nouveau/nouveau_fence.h | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c 
b/drivers/gpu/drm/nouveau/nouveau_fence.c
index e946408f945b..77c739a55b19 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -96,6 +96,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, 
int error)
if (nouveau_fence_signal(fence))
nvif_event_block(>event);
}
+   fctx->killed = 1;
spin_unlock_irqrestore(>lock, flags);
 }
 
@@ -229,6 +230,12 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct 
nouveau_channel *chan)
dma_fence_get(>base);
spin_lock_irq(>lock);
 
+   if (unlikely(fctx->killed)) {
+   spin_unlock_irq(>lock);
+   dma_fence_put(>base);
+   return -ENODEV;
+   }
+
if (nouveau_fence_update(chan, fctx))
nvif_event_block(>event);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h 
b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 7c73c7c9834a..2c72d96ef17d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -44,7 +44,7 @@ struct nouveau_fence_chan {
char name[32];
 
struct nvif_event event;
-   int notify_ref, dead;
+   int notify_ref, dead, killed;
 };
 
 struct nouveau_fence_priv {
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 11/14] drm/nouveau: chan: provide nouveau_channel_kill()

2023-06-06 Thread Danilo Krummrich
The new VM_BIND UAPI implementation introduced in subsequent commits
will allow asynchronous jobs processing push buffers and emitting fences.

If a job times out, we need a way to recover from this situation. For
now, simply kill the channel to unblock all hung up jobs and signal
userspace that the device is dead on the next EXEC or VM_BIND ioctl.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_chan.c | 14 +++---
 drivers/gpu/drm/nouveau/nouveau_chan.h |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index f47c0363683c..a975f8b0e0e5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -40,6 +40,14 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in 
VRAM");
 int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
+void
+nouveau_channel_kill(struct nouveau_channel *chan)
+{
+   atomic_set(>killed, 1);
+   if (chan->fence)
+   nouveau_fence_context_kill(chan->fence, -ENODEV);
+}
+
 static int
 nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc)
 {
@@ -47,9 +55,9 @@ nouveau_channel_killed(struct nvif_event *event, void *repv, 
u32 repc)
struct nouveau_cli *cli = (void *)chan->user.client;
 
NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
-   atomic_set(>killed, 1);
-   if (chan->fence)
-   nouveau_fence_context_kill(chan->fence, -ENODEV);
+
+   if (unlikely(!atomic_read(>killed)))
+   nouveau_channel_kill(chan);
 
return NVIF_EVENT_DROP;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h 
b/drivers/gpu/drm/nouveau/nouveau_chan.h
index e06a8ffed31a..e483f4a254da 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -65,6 +65,7 @@ int  nouveau_channel_new(struct nouveau_drm *, struct 
nvif_device *, bool priv,
 u32 vram, u32 gart, struct nouveau_channel **);
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
+void nouveau_channel_kill(struct nouveau_channel *);
 
 extern int nouveau_vram_pushbuf;
 
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 09/14] drm/nouveau: fence: separate fence alloc and emit

2023-06-06 Thread Danilo Krummrich
The new (VM_BIND) UAPI exports DMA fences through DRM syncobjs. Hence,
in order to emit fences within DMA fence signalling critical sections
(e.g. as typically done in the DRM GPU schedulers run_job() callback) we
need to separate fence allocation and fence emitting.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/dispnv04/crtc.c |  9 -
 drivers/gpu/drm/nouveau/nouveau_bo.c| 52 +++--
 drivers/gpu/drm/nouveau/nouveau_chan.c  |  6 ++-
 drivers/gpu/drm/nouveau/nouveau_dmem.c  |  9 +++--
 drivers/gpu/drm/nouveau/nouveau_fence.c | 16 +++-
 drivers/gpu/drm/nouveau/nouveau_fence.h |  3 +-
 drivers/gpu/drm/nouveau/nouveau_gem.c   |  5 ++-
 7 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c 
b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a6f2e681bde9..a34924523133 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x);
PUSH_KICK(push);
 
-   ret = nouveau_fence_new(chan, false, pfence);
+   ret = nouveau_fence_new(pfence);
if (ret)
goto fail;
 
+   ret = nouveau_fence_emit(*pfence, chan);
+   if (ret)
+   goto fail_fence_unref;
+
return 0;
+
+fail_fence_unref:
+   nouveau_fence_unref(pfence);
 fail:
spin_lock_irqsave(>event_lock, flags);
list_del(>head);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 057bc995f19b..e9cbbf594e6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,29 +820,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int 
evict,
mutex_lock(>mutex);
else
mutex_lock_nested(>mutex, SINGLE_DEPTH_NESTING);
+
ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, 
ctx->interruptible);
-   if (ret == 0) {
-   ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
-   if (ret == 0) {
-   ret = nouveau_fence_new(chan, false, );
-   if (ret == 0) {
-   /* TODO: figure out a better solution here
-*
-* wait on the fence here explicitly as going 
through
-* ttm_bo_move_accel_cleanup somehow doesn't 
seem to do it.
-*
-* Without this the operation can timeout and 
we'll fallback to a
-* software copy, which might take several 
minutes to finish.
-*/
-   nouveau_fence_wait(fence, false, false);
-   ret = ttm_bo_move_accel_cleanup(bo,
-   >base,
-   evict, false,
-   new_reg);
-   nouveau_fence_unref();
-   }
-   }
+   if (ret)
+   goto out_unlock;
+
+   ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
+   if (ret)
+   goto out_unlock;
+
+   ret = nouveau_fence_new();
+   if (ret)
+   goto out_unlock;
+
+   ret = nouveau_fence_emit(fence, chan);
+   if (ret) {
+   nouveau_fence_unref();
+   goto out_unlock;
}
+
+   /* TODO: figure out a better solution here
+*
+* wait on the fence here explicitly as going through
+* ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+*
+* Without this the operation can timeout and we'll fallback to a
+* software copy, which might take several minutes to finish.
+*/
+   nouveau_fence_wait(fence, false, false);
+   ret = ttm_bo_move_accel_cleanup(bo, >base, evict, false,
+   new_reg);
+   nouveau_fence_unref();
+
+out_unlock:
mutex_unlock(>mutex);
return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 1068abe41024..f47c0363683c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -62,9 +62,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
struct nouveau_fence *fence = NULL;
int ret;
 
-   ret = nouveau_fence_new(chan, false, );
+   ret = nouveau_fence_new();
if (!ret) {
-   ret = nouveau_fence_wait(fence, false, false);
+   ret = nouveau_fence_emit(fence, chan);
+   if (!ret)
+ 

[Nouveau] [PATCH drm-next v4 08/14] drm/nouveau: move usercopy helpers to nouveau_drv.h

2023-06-06 Thread Danilo Krummrich
Move the usercopy helpers to a common driver header file to make it
usable for the new API added in subsequent commits.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_drv.h | 26 ++
 drivers/gpu/drm/nouveau/nouveau_gem.c | 26 --
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 81350e685b50..20a7f31b9082 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -130,6 +130,32 @@ nouveau_cli(struct drm_file *fpriv)
return fpriv ? fpriv->driver_priv : NULL;
 }
 
+static inline void
+u_free(void *addr)
+{
+   kvfree(addr);
+}
+
+static inline void *
+u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
+{
+   void *mem;
+   void __user *userptr = (void __force __user *)(uintptr_t)user;
+
+   size *= nmemb;
+
+   mem = kvmalloc(size, GFP_KERNEL);
+   if (!mem)
+   return ERR_PTR(-ENOMEM);
+
+   if (copy_from_user(mem, userptr, size)) {
+   u_free(mem);
+   return ERR_PTR(-EFAULT);
+   }
+
+   return mem;
+}
+
 #include 
 #include 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 45ca4eb98f54..a48f42aaeab9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -613,32 +613,6 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
return 0;
 }
 
-static inline void
-u_free(void *addr)
-{
-   kvfree(addr);
-}
-
-static inline void *
-u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
-{
-   void *mem;
-   void __user *userptr = (void __force __user *)(uintptr_t)user;
-
-   size *= nmemb;
-
-   mem = kvmalloc(size, GFP_KERNEL);
-   if (!mem)
-   return ERR_PTR(-ENOMEM);
-
-   if (copy_from_user(mem, userptr, size)) {
-   u_free(mem);
-   return ERR_PTR(-EFAULT);
-   }
-
-   return mem;
-}
-
 static int
 nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
struct drm_nouveau_gem_pushbuf *req,
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 06/14] drm/nouveau: get vmm via nouveau_cli_vmm()

2023-06-06 Thread Danilo Krummrich
Provide a getter function for the client's current vmm context. Since
we'll add a new (u)vmm context for UMD bindings in subsequent commits,
this will keep the code clean.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c   | 2 +-
 drivers/gpu/drm/nouveau/nouveau_chan.c | 2 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h  | 9 +
 drivers/gpu/drm/nouveau/nouveau_gem.c  | 6 +++---
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index c2ec91cc845d..7724fe63067d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -204,7 +204,7 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int 
*align, u32 domain,
struct nouveau_drm *drm = cli->drm;
struct nouveau_bo *nvbo;
struct nvif_mmu *mmu = >mmu;
-   struct nvif_vmm *vmm = cli->svm.cli ? >svm.vmm : >vmm.vmm;
+   struct nvif_vmm *vmm = _cli_vmm(cli)->vmm;
int i, pi = -1;
 
if (!*size) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
b/drivers/gpu/drm/nouveau/nouveau_chan.c
index e648ecd0c1a0..1068abe41024 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -148,7 +148,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct 
nvif_device *device,
 
chan->device = device;
chan->drm = drm;
-   chan->vmm = cli->svm.cli ? >svm : >vmm;
+   chan->vmm = nouveau_cli_vmm(cli);
atomic_set(>killed, 0);
 
/* allocate memory for dma push buffer */
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index b5de312a523f..81350e685b50 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -112,6 +112,15 @@ struct nouveau_cli_work {
struct dma_fence_cb cb;
 };
 
+static inline struct nouveau_vmm *
+nouveau_cli_vmm(struct nouveau_cli *cli)
+{
+   if (cli->svm.cli)
+   return >svm;
+
+   return >vmm;
+}
+
 void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *,
struct nouveau_cli_work *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c 
b/drivers/gpu/drm/nouveau/nouveau_gem.c
index ab9062e50977..45ca4eb98f54 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -103,7 +103,7 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct 
drm_file *file_priv)
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct device *dev = drm->dev->dev;
-   struct nouveau_vmm *vmm = cli->svm.cli ? >svm : >vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
int ret;
 
@@ -180,7 +180,7 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct 
drm_file *file_priv)
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
struct device *dev = drm->dev->dev;
-   struct nouveau_vmm *vmm = cli->svm.cli ? >svm : & cli->vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
int ret;
 
@@ -269,7 +269,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct 
drm_gem_object *gem,
 {
struct nouveau_cli *cli = nouveau_cli(file_priv);
struct nouveau_bo *nvbo = nouveau_gem_object(gem);
-   struct nouveau_vmm *vmm = cli->svm.cli ? >svm : >vmm;
+   struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_vma *vma;
 
if (is_power_of_2(nvbo->valid_domains))
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 07/14] drm/nouveau: bo: initialize GEM GPU VA interface

2023-06-06 Thread Danilo Krummrich
Initialize the GEM's DRM GPU VA manager interface in preparation for the
(u)vmm implementation, provided by subsequent commits, to make use of it.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7724fe63067d..057bc995f19b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -215,11 +215,14 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int 
*align, u32 domain,
nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL);
if (!nvbo)
return ERR_PTR(-ENOMEM);
+
INIT_LIST_HEAD(>head);
INIT_LIST_HEAD(>entry);
INIT_LIST_HEAD(>vma_list);
nvbo->bo.bdev = >ttm.bdev;
 
+   drm_gem_gpuva_init(>bo.base);
+
/* This is confusing, and doesn't actually mean we want an uncached
 * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated
 * into in nouveau_gem_new().
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 05/14] drm/nouveau: new VM_BIND uapi interfaces

2023-06-06 Thread Danilo Krummrich
This commit provides the interfaces for the new UAPI motivated by the
Vulkan API. It allows user mode drivers (UMDs) to:

1) Initialize a GPU virtual address (VA) space via the new
   DRM_IOCTL_NOUVEAU_VM_INIT ioctl. UMDs can provide a kernel reserved
   VA area.

2) Bind and unbind GPU VA space mappings via the new
   DRM_IOCTL_NOUVEAU_VM_BIND ioctl.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC support
asynchronous processing with DRM syncobjs as synchronization mechanism.

The default DRM_IOCTL_NOUVEAU_VM_BIND is synchronous processing,
DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.

Co-authored-by: Dave Airlie 
Signed-off-by: Danilo Krummrich 
---
 Documentation/gpu/driver-uapi.rst |   8 ++
 include/uapi/drm/nouveau_drm.h| 209 ++
 2 files changed, 217 insertions(+)

diff --git a/Documentation/gpu/driver-uapi.rst 
b/Documentation/gpu/driver-uapi.rst
index 4411e6919a3d..9c7ca6e33a68 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -6,3 +6,11 @@ drm/i915 uAPI
 =
 
 .. kernel-doc:: include/uapi/drm/i915_drm.h
+
+drm/nouveau uAPI
+
+
+VM_BIND / EXEC uAPI
+---
+
+.. kernel-doc:: include/uapi/drm/nouveau_drm.h
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 853a327433d3..4d3a70529637 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -126,6 +126,209 @@ struct drm_nouveau_gem_cpu_fini {
__u32 handle;
 };
 
+/**
+ * struct drm_nouveau_sync - sync object
+ *
+ * This structure serves as synchronization mechanism for (potentially)
+ * asynchronous operations such as EXEC or VM_BIND.
+ */
+struct drm_nouveau_sync {
+   /**
+* @flags: the flags for a sync object
+*
+* The first 8 bits are used to determine the type of the sync object.
+*/
+   __u32 flags;
+#define DRM_NOUVEAU_SYNC_SYNCOBJ 0x0
+#define DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ 0x1
+#define DRM_NOUVEAU_SYNC_TYPE_MASK 0xf
+   /**
+* @handle: the handle of the sync object
+*/
+   __u32 handle;
+   /**
+* @timeline_value:
+*
+* The timeline point of the sync object in case the syncobj is of
+* type DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ.
+*/
+   __u64 timeline_value;
+};
+
+/**
+ * struct drm_nouveau_vm_init - GPU VA space init structure
+ *
+ * Used to initialize the GPU's VA space for a user client, telling the kernel
+ * which portion of the VA space is managed by the UMD and kernel respectively.
+ */
+struct drm_nouveau_vm_init {
+   /**
+* @unmanaged_addr: start address of the kernel managed VA space region
+*/
+   __u64 unmanaged_addr;
+   /**
+* @unmanaged_size: size of the kernel managed VA space region in bytes
+*/
+   __u64 unmanaged_size;
+};
+
+/**
+ * struct drm_nouveau_vm_bind_op - VM_BIND operation
+ *
+ * This structure represents a single VM_BIND operation. UMDs should pass
+ * an array of this structure via struct drm_nouveau_vm_bind's _ptr field.
+ */
+struct drm_nouveau_vm_bind_op {
+   /**
+* @op: the operation type
+*/
+   __u32 op;
+/**
+ * @DRM_NOUVEAU_VM_BIND_OP_MAP:
+ *
+ * Map a GEM object to the GPU's VA space. Optionally, the
+ * _NOUVEAU_VM_BIND_SPARSE flag can be passed to instruct the kernel to
+ * create sparse mappings for the given range.
+ */
+#define DRM_NOUVEAU_VM_BIND_OP_MAP 0x0
+/**
+ * @DRM_NOUVEAU_VM_BIND_OP_UNMAP:
+ *
+ * Unmap an existing mapping in the GPU's VA space. If the region the mapping
+ * is located in is a sparse region, new sparse mappings are created where the
+ * unmapped (memory backed) mapping was mapped previously. To remove a sparse
+ * region the _NOUVEAU_VM_BIND_SPARSE must be set.
+ */
+#define DRM_NOUVEAU_VM_BIND_OP_UNMAP 0x1
+   /**
+* @flags: the flags for a _nouveau_vm_bind_op
+*/
+   __u32 flags;
+/**
+ * @DRM_NOUVEAU_VM_BIND_SPARSE:
+ *
+ * Indicates that an allocated VA space region should be sparse.
+ */
+#define DRM_NOUVEAU_VM_BIND_SPARSE (1 << 8)
+   /**
+* @handle: the handle of the DRM GEM object to map
+*/
+   __u32 handle;
+   /**
+* @pad: 32 bit padding, should be 0
+*/
+   __u32 pad;
+   /**
+* @addr:
+*
+* the address the VA space region or (memory backed) mapping should be 
mapped to
+*/
+   __u64 addr;
+   /**
+* @bo_offset: the offset within the BO backing the mapping
+*/
+   __u64 bo_offset;
+   /**
+* @range: the size of the requested mapping in bytes
+*/
+   __u64 range;
+};
+
+/**
+ * struct drm_nouveau_vm_bind - structure for DRM_IOCTL_NOUVEAU_VM_BIND
+ */
+struct drm_nouveau_vm_bind {
+   /**
+* @op_count: the number of _nouveau_vm_bind_op
+  

[Nouveau] [PATCH drm-next v4 04/14] drm: debugfs: provide infrastructure to dump a DRM GPU VA space

2023-06-06 Thread Danilo Krummrich
This commit adds a function to dump a DRM GPU VA space and a macro for
drivers to register the struct drm_info_list 'gpuvas' entry.

Most likely, most drivers might maintain one DRM GPU VA space per struct
drm_file, but there might also be drivers not having a fixed relation
between DRM GPU VA spaces and a DRM core infrastructure, hence we need the
indirection via the driver iterating it's maintained DRM GPU VA spaces.

Signed-off-by: Danilo Krummrich 
---
 drivers/gpu/drm/drm_debugfs.c | 41 +++
 include/drm/drm_debugfs.h | 25 +
 2 files changed, 66 insertions(+)

diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 4855230ba2c6..82180fb1c200 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
@@ -175,6 +176,46 @@ static const struct file_operations drm_debugfs_fops = {
.release = single_release,
 };
 
+/**
+ * drm_debugfs_gpuva_info - dump the given DRM GPU VA space
+ * @m: pointer to the _file to write
+ * @mgr: the _gpuva_manager representing the GPU VA space
+ *
+ * Dumps the GPU VA mappings of a given DRM GPU VA manager.
+ *
+ * For each DRM GPU VA space drivers should call this function from their
+ * _info_list's show callback.
+ *
+ * Returns: 0 on success, -ENODEV if the  is not initialized
+ */
+int drm_debugfs_gpuva_info(struct seq_file *m,
+  struct drm_gpuva_manager *mgr)
+{
+   DRM_GPUVA_ITER(it, mgr, 0);
+   struct drm_gpuva *va, *kva = >kernel_alloc_node;
+
+   if (!mgr->name)
+   return -ENODEV;
+
+   seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n",
+  mgr->name, mgr->mm_start, mgr->mm_start + mgr->mm_range);
+   seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n",
+  kva->va.addr, kva->va.addr + kva->va.range);
+   seq_puts(m, "\n");
+   seq_puts(m, " VAs | start  | range  | end   
 | object | object offset\n");
+   seq_puts(m, 
"-\n");
+   drm_gpuva_iter_for_each(va, it) {
+   if (unlikely(va == >kernel_alloc_node))
+   continue;
+
+   seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx | 
0x%016llx | 0x%016llx\n",
+  va->va.addr, va->va.range, va->va.addr + 
va->va.range,
+  (u64)va->gem.obj, va->gem.offset);
+   }
+
+   return 0;
+}
+EXPORT_SYMBOL(drm_debugfs_gpuva_info);
 
 /**
  * drm_debugfs_create_files - Initialize a given set of debugfs files for DRM
diff --git a/include/drm/drm_debugfs.h b/include/drm/drm_debugfs.h
index 7616f457ce70..cb2c1956a214 100644
--- a/include/drm/drm_debugfs.h
+++ b/include/drm/drm_debugfs.h
@@ -34,6 +34,22 @@
 
 #include 
 #include 
+
+#include 
+
+/**
+ * DRM_DEBUGFS_GPUVA_INFO - _info_list entry to dump a GPU VA space
+ * @show: the _info_list's show callback
+ * @data: driver private data
+ *
+ * Drivers should use this macro to define a _info_list entry to provide a
+ * debugfs file for dumping the GPU VA space regions and mappings.
+ *
+ * For each DRM GPU VA space drivers should call drm_debugfs_gpuva_info() from
+ * their @show callback.
+ */
+#define DRM_DEBUGFS_GPUVA_INFO(show, data) {"gpuvas", show, DRIVER_GEM_GPUVA, 
data}
+
 /**
  * struct drm_info_list - debugfs info list entry
  *
@@ -134,6 +150,9 @@ void drm_debugfs_add_file(struct drm_device *dev, const 
char *name,
 
 void drm_debugfs_add_files(struct drm_device *dev,
   const struct drm_debugfs_info *files, int count);
+
+int drm_debugfs_gpuva_info(struct seq_file *m,
+  struct drm_gpuva_manager *mgr);
 #else
 static inline void drm_debugfs_create_files(const struct drm_info_list *files,
int count, struct dentry *root,
@@ -155,6 +174,12 @@ static inline void drm_debugfs_add_files(struct drm_device 
*dev,
 const struct drm_debugfs_info *files,
 int count)
 {}
+
+static inline int drm_debugfs_gpuva_info(struct seq_file *m,
+struct drm_gpuva_manager *mgr)
+{
+   return 0;
+}
 #endif
 
 #endif /* _DRM_DEBUGFS_H_ */
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 03/14] drm: manager to keep track of GPUs VA mappings

2023-06-06 Thread Danilo Krummrich
Add infrastructure to keep track of GPU virtual address (VA) mappings
with a decicated VA space manager implementation.

New UAPIs, motivated by Vulkan sparse memory bindings graphics drivers
start implementing, allow userspace applications to request multiple and
arbitrary GPU VA mappings of buffer objects. The DRM GPU VA manager is
intended to serve the following purposes in this context.

1) Provide infrastructure to track GPU VA allocations and mappings,
   making use of the maple_tree.

2) Generically connect GPU VA mappings to their backing buffers, in
   particular DRM GEM objects.

3) Provide a common implementation to perform more complex mapping
   operations on the GPU VA space. In particular splitting and merging
   of GPU VA mappings, e.g. for intersecting mapping requests or partial
   unmap requests.

Suggested-by: Dave Airlie 
Signed-off-by: Danilo Krummrich 
---
 Documentation/gpu/drm-mm.rst|   31 +
 drivers/gpu/drm/Makefile|1 +
 drivers/gpu/drm/drm_gem.c   |3 +
 drivers/gpu/drm/drm_gpuva_mgr.c | 1687 +++
 include/drm/drm_drv.h   |6 +
 include/drm/drm_gem.h   |   75 ++
 include/drm/drm_gpuva_mgr.h |  681 +
 7 files changed, 2484 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_gpuva_mgr.c
 create mode 100644 include/drm/drm_gpuva_mgr.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a52e6f4117d6..c9f120cfe730 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -466,6 +466,37 @@ DRM MM Range Allocator Function References
 .. kernel-doc:: drivers/gpu/drm/drm_mm.c
:export:
 
+DRM GPU VA Manager
+==
+
+Overview
+
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :doc: Overview
+
+Split and Merge
+---
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :doc: Split and Merge
+
+Locking
+---
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :doc: Locking
+
+
+DRM GPU VA Manager Function References
+--
+
+.. kernel-doc:: include/drm/drm_gpuva_mgr.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpuva_mgr.c
+   :export:
+
 DRM Buddy Allocator
 ===
 
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 9c6446eb3c83..8eeed446a078 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -45,6 +45,7 @@ drm-y := \
drm_vblank.o \
drm_vblank_work.o \
drm_vma_manager.o \
+   drm_gpuva_mgr.o \
drm_writeback.o
 drm-$(CONFIG_DRM_LEGACY) += \
drm_agpsupport.o \
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 1a5a2cd0d4ec..cd878ebddbd0 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -164,6 +164,9 @@ void drm_gem_private_object_init(struct drm_device *dev,
if (!obj->resv)
obj->resv = >_resv;
 
+   if (drm_core_check_feature(dev, DRIVER_GEM_GPUVA))
+   drm_gem_gpuva_init(obj);
+
drm_vma_node_reset(>vma_node);
INIT_LIST_HEAD(>lru_node);
 }
diff --git a/drivers/gpu/drm/drm_gpuva_mgr.c b/drivers/gpu/drm/drm_gpuva_mgr.c
new file mode 100644
index ..dd8dd7fef14b
--- /dev/null
+++ b/drivers/gpu/drm/drm_gpuva_mgr.c
@@ -0,0 +1,1687 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Danilo Krummrich 
+ *
+ */
+
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * The DRM GPU VA Manager, represented by struct drm_gpuva_manager keeps track
+ * of a GPU's virtual address (VA) space and manages the corresponding virtual
+ * mappings represented by _gpuva objects. It also keeps track of the
+ * mapping's backing _gem_object buffers.
+ *
+ * _gem_object buffers maintain a list (and a corresponding list lock) of
+ * _gpuva objects representing 

[Nouveau] [PATCH drm-next v4 02/14] maple_tree: split up MA_STATE() macro

2023-06-06 Thread Danilo Krummrich
Split up the MA_STATE() macro such that components using the maple tree
can easily inherit from struct ma_state and build custom tree walk
macros to hide their internals from users.

Example:

struct sample_iterator {
struct ma_state mas;
struct sample_mgr *mgr;
};

\#define SAMPLE_ITERATOR(name, __mgr, start)\
struct sample_iterator name = { \
.mas = MA_STATE_INIT(&(__mgr)->mt, start, 0),   \
.mgr = __mgr,   \
}

\#define sample_iter_for_each_range(it__, entry__, end__) \
mas_for_each(&(it__).mas, entry__, end__)

--

struct sample *sample;
SAMPLE_ITERATOR(si, min);

sample_iter_for_each_range(, sample, max) {
frob(mgr, sample);
}

Signed-off-by: Danilo Krummrich 
---
 include/linux/maple_tree.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 1fadb5f5978b..87d55334f1c2 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -423,8 +423,8 @@ struct ma_wr_state {
 #define MA_ERROR(err) \
((struct maple_enode *)(((unsigned long)err << 2) | 2UL))
 
-#define MA_STATE(name, mt, first, end) \
-   struct ma_state name = {\
+#define MA_STATE_INIT(mt, first, end)  \
+   {   \
.tree = mt, \
.index = first, \
.last = end,\
@@ -435,6 +435,9 @@ struct ma_wr_state {
.mas_flags = 0, \
}
 
+#define MA_STATE(name, mt, first, end) \
+   struct ma_state name = MA_STATE_INIT(mt, first, end)
+
 #define MA_WR_STATE(name, ma_state, wr_entry)  \
struct ma_wr_state name = { \
.mas = ma_state,\
-- 
2.40.1



[Nouveau] [PATCH drm-next v4 01/14] drm: execution context for GEM buffers v4

2023-06-06 Thread Danilo Krummrich
From: Christian König 

This adds the infrastructure for an execution context for GEM buffers
which is similar to the existing TTMs execbuf util and intended to replace
it in the long term.

The basic functionality is that we abstracts the necessary loop to lock
many different GEM buffers with automated deadlock and duplicate handling.

v2: drop xarray and use dynamic resized array instead, the locking
overhead is unecessary and measurable.
v3: drop duplicate tracking, radeon is really the only one needing that.
v4: fixes issues pointed out by Danilo, some typos in comments and a
helper for lock arrays of GEM objects.

Signed-off-by: Christian König 
---
 Documentation/gpu/drm-mm.rst |  12 ++
 drivers/gpu/drm/Kconfig  |   6 +
 drivers/gpu/drm/Makefile |   2 +
 drivers/gpu/drm/drm_exec.c   | 278 +++
 include/drm/drm_exec.h   | 119 +++
 5 files changed, 417 insertions(+)
 create mode 100644 drivers/gpu/drm/drm_exec.c
 create mode 100644 include/drm/drm_exec.h

diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst
index a79fd3549ff8..a52e6f4117d6 100644
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@@ -493,6 +493,18 @@ DRM Sync Objects
 .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
:export:
 
+DRM Execution context
+=
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :doc: Overview
+
+.. kernel-doc:: include/drm/drm_exec.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_exec.c
+   :export:
+
 GPU Scheduler
 =
 
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index ba3fb04bb691..2dc81eb062eb 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -201,6 +201,12 @@ config DRM_TTM
  GPU memory types. Will be enabled automatically if a device driver
  uses it.
 
+config DRM_EXEC
+   tristate
+   depends on DRM
+   help
+ Execution context for command submissions
+
 config DRM_BUDDY
tristate
depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index a33257d2bc7f..9c6446eb3c83 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += 
drm_panel_orientation_quirks.o
 #
 # Memory-management helpers
 #
+#
+obj-$(CONFIG_DRM_EXEC) += drm_exec.o
 
 obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o
 
diff --git a/drivers/gpu/drm/drm_exec.c b/drivers/gpu/drm/drm_exec.c
new file mode 100644
index ..18071bff20f4
--- /dev/null
+++ b/drivers/gpu/drm/drm_exec.c
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+
+#include 
+#include 
+#include 
+
+/**
+ * DOC: Overview
+ *
+ * This component mainly abstracts the retry loop necessary for locking
+ * multiple GEM objects while preparing hardware operations (e.g. command
+ * submissions, page table updates etc..).
+ *
+ * If a contention is detected while locking a GEM object the cleanup procedure
+ * unlocks all previously locked GEM objects and locks the contended one first
+ * before locking any further objects.
+ *
+ * After an object is locked fences slots can optionally be reserved on the
+ * dma_resv object inside the GEM object.
+ *
+ * A typical usage pattern should look like this::
+ *
+ * struct drm_gem_object *obj;
+ * struct drm_exec exec;
+ * unsigned long index;
+ * int ret;
+ *
+ * drm_exec_init(, true);
+ * drm_exec_while_not_all_locked() {
+ * ret = drm_exec_prepare_obj(, boA, 1);
+ * drm_exec_continue_on_contention();
+ * if (ret)
+ * goto error;
+ *
+ * ret = drm_exec_prepare_obj(, boB, 1);
+ * drm_exec_continue_on_contention();
+ * if (ret)
+ * goto error;
+ * }
+ *
+ * drm_exec_for_each_locked_object(, index, obj) {
+ * dma_resv_add_fence(obj->resv, fence, DMA_RESV_USAGE_READ);
+ * ...
+ * }
+ * drm_exec_fini();
+ *
+ * See struct dma_exec for more details.
+ */
+
+/* Dummy value used to initially enter the retry loop */
+#define DRM_EXEC_DUMMY (void*)~0
+
+/* Unlock all objects and drop references */
+static void drm_exec_unlock_all(struct drm_exec *exec)
+{
+   struct drm_gem_object *obj;
+   unsigned long index;
+
+   drm_exec_for_each_locked_object(exec, index, obj) {
+   dma_resv_unlock(obj->resv);
+   drm_gem_object_put(obj);
+   }
+
+   drm_gem_object_put(exec->prelocked);
+   exec->prelocked = NULL;
+}
+
+/**
+ * drm_exec_init - initialize a drm_exec object
+ * @exec: the drm_exec object to initialize
+ * @interruptible: if locks should be acquired interruptible
+ *
+ * Initialize the object and make sure that we can track locked objects.
+ */
+void drm_exec_init(struct drm_exec *exec, bool interruptible)
+{
+   exec->interruptible = interruptible;
+   exec->objects = 

[Nouveau] [PATCH drm-next v4 00/14] [RFC] DRM GPUVA Manager & Nouveau VM_BIND UAPI

2023-06-06 Thread Danilo Krummrich
Furthermore, with the DRM GPUVA manager it provides a new DRM core feature to
keep track of GPU virtual address (VA) mappings in a more generic way.

The DRM GPUVA manager is indented to help drivers implement userspace-manageable
GPU VA spaces in reference to the Vulkan API. In order to achieve this goal it
serves the following purposes in this context.

1) Provide infrastructure to track GPU VA allocations and mappings,
   making use of the maple_tree.

2) Generically connect GPU VA mappings to their backing buffers, in
   particular DRM GEM objects.

3) Provide a common implementation to perform more complex mapping
   operations on the GPU VA space. In particular splitting and merging
   of GPU VA mappings, e.g. for intersecting mapping requests or partial
   unmap requests.

The new VM_BIND Nouveau UAPI build on top of the DRM GPUVA manager, itself
providing the following new interfaces.

1) Initialize a GPU VA space via the new DRM_IOCTL_NOUVEAU_VM_INIT ioctl
   for UMDs to specify the portion of VA space managed by the kernel and
   userspace, respectively.

2) Allocate and free a VA space region as well as bind and unbind memory
   to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, make use of the DRM
scheduler to queue jobs and support asynchronous processing with DRM syncobjs
as synchronization mechanism.

By default DRM_IOCTL_NOUVEAU_VM_BIND does synchronous processing,
DRM_IOCTL_NOUVEAU_EXEC supports asynchronous processing only.

The new VM_BIND UAPI for Nouveau makes also use of drm_exec (execution context
for GEM buffers) by Christian König. Since the patch implementing drm_exec was
not yet merged into drm-next it is part of this series, as well as a small fix
for this patch, which was found while testing this series.

This patch series is also available at [1].

There is a Mesa NVK merge request by Dave Airlie [2] implementing the
corresponding userspace parts for this series.

The Vulkan CTS test suite passes the sparse binding and sparse residency test
cases for the new UAPI together with Dave's Mesa work.

There are also some test cases in the igt-gpu-tools project [3] for the new UAPI
and hence the DRM GPU VA manager. However, most of them are testing the DRM GPU
VA manager's logic through Nouveau's new UAPI and should be considered just as
helper for implementation.

However, I absolutely intend to change those test cases to proper kunit test
cases for the DRM GPUVA manager, once and if we agree on it's usefulness and
design.

[1] https://gitlab.freedesktop.org/nouvelles/kernel/-/tree/new-uapi-drm-next /
https://gitlab.freedesktop.org/nouvelles/kernel/-/merge_requests/1
[2] https://gitlab.freedesktop.org/nouveau/mesa/-/merge_requests/150/
[3] https://gitlab.freedesktop.org/dakr/igt-gpu-tools/-/tree/wip_nouveau_vm_bind

Changes in V2:
==
  Nouveau:
- Reworked the Nouveau VM_BIND UAPI to avoid memory allocations in fence
  signalling critical sections. Updates to the VA space are split up in 
three
  separate stages, where only the 2. stage executes in a fence signalling
  critical section:

1. update the VA space, allocate new structures and page tables
2. (un-)map the requested memory bindings
3. free structures and page tables

- Separated generic job scheduler code from specific job implementations.
- Separated the EXEC and VM_BIND implementation of the UAPI.
- Reworked the locking parts of the nvkm/vmm RAW interface, such that
  (un-)map operations can be executed in fence signalling critical sections.

  GPUVA Manager:
- made drm_gpuva_regions optional for users of the GPUVA manager
- allow NULL GEMs for drm_gpuva entries
- swichted from drm_mm to maple_tree for track drm_gpuva / drm_gpuva_region
  entries
- provide callbacks for users to allocate custom drm_gpuva_op structures to
  allow inheritance
- added user bits to drm_gpuva_flags
- added a prefetch operation type in order to support generating prefetch
  operations in the same way other operations generated
- hand the responsibility for mutual exclusion for a GEM's
  drm_gpuva list to the user; simplified corresponding (un-)link functions

  Maple Tree:
- I added two maple tree patches to the series, one to support custom tree
  walk macros and one to hand the locking responsibility to the user of the
  GPUVA manager without pre-defined lockdep checks.

Changes in V3:
==
  Nouveau:
- Reworked the Nouveau VM_BIND UAPI to do the job cleanup (including page
  table cleanup) within a workqueue rather than the job_free() callback of
  the scheduler itself. A job_free() callback can stall the execution (run()
  callback) of the next job in the queue. Since the page table cleanup
  

Re: [Nouveau] [Intel-gfx] [PATCH v2 1/2] vgaarb: various coding style and comments fix

2023-06-06 Thread Bjorn Helgaas
Match the subject line style:

  $ git log --oneline drivers/pci/vgaarb.c
  f321c35feaee PCI/VGA: Replace full MIT license text with SPDX identifier
  d5109fe4d1ec PCI/VGA: Use unsigned format string to print lock counts
  4e6c91847a7f PCI/VGA: Log bridge control messages when adding devices
  dc593fd48abb PCI/VGA: Remove empty vga_arb_device_card_gone()
  ...

Subject line should be a summary of the commit log, not just "various
style fixes".  This one needs to say something about
vga_str_to_iostate().

On Mon, Jun 05, 2023 at 04:58:30AM +0800, Sui Jingfeng wrote:
> From: Sui Jingfeng 
> 
> To keep consistent with vga_iostate_to_str() function, the third argument
> of vga_str_to_iostate() function should be 'unsigned int *'.
> 
> Signed-off-by: Sui Jingfeng 
> ---
>  drivers/pci/vgaarb.c   | 29 +++--
>  include/linux/vgaarb.h |  8 +++-
>  2 files changed, 18 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> index 5a696078b382..e40e6e5e5f03 100644
> --- a/drivers/pci/vgaarb.c
> +++ b/drivers/pci/vgaarb.c
> @@ -61,7 +61,6 @@ static bool vga_arbiter_used;
>  static DEFINE_SPINLOCK(vga_lock);
>  static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue);
>  
> -
>  static const char *vga_iostate_to_str(unsigned int iostate)
>  {
>   /* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */
> @@ -77,10 +76,12 @@ static const char *vga_iostate_to_str(unsigned int 
> iostate)
>   return "none";
>  }
>  
> -static int vga_str_to_iostate(char *buf, int str_size, int *io_state)
> +static int vga_str_to_iostate(char *buf, int str_size, unsigned int 
> *io_state)
>  {
> - /* we could in theory hand out locks on IO and mem
> -  * separately to userspace but it can cause deadlocks */
> + /*
> +  * we could in theory hand out locks on IO and mem
> +  * separately to userspace but it can cause deadlocks
> +  */

Omit all the comment formatting changes.  They are distractions from the
vga_str_to_iostate() parameter change.

I think this patch should be the single line change to the
vga_str_to_iostate() prototype so it matches the callers.

If you want to do the other comment formatting changes, they're fine,
but they should be all together in a separate patch that clearly
doesn't change the generated code.

Bjorn


Re: [Nouveau] [PATCH v2] drm/nouveau: bring back blit subchannel for pre nv50 GPUs

2023-06-06 Thread Karol Herbst
On Fri, May 26, 2023 at 5:21 PM Ilia Mirkin  wrote:
>
> On Fri, May 26, 2023 at 5:11 AM Karol Herbst  wrote:
> >
> > 1ba6113a90a0 removed a lot of the kernel GPU channel, but method 0x128
> > was important as otherwise the GPU spams us with `CACHE_ERROR` messages.
> >
> > We use the blit subchannel inside our vblank handling, so we should keep
> > at least this part.
> >
> > v2: Only do it for NV11+ GPUs
> >
> > Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/201
> > Fixes: 4a16dd9d18a0 ("drm/nouveau/kms: switch to drm fbdev helpers")
> > Signed-off-by: Karol Herbst 
> > ---
> >  drivers/gpu/drm/nouveau/nouveau_chan.c |  1 +
> >  drivers/gpu/drm/nouveau/nouveau_chan.h |  1 +
> >  drivers/gpu/drm/nouveau/nouveau_drm.c  | 20 +---
> >  3 files changed, 19 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c 
> > b/drivers/gpu/drm/nouveau/nouveau_chan.c
> > index e648ecd0c1a0..3dfbc374478e 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_chan.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
> > @@ -90,6 +90,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
> > if (cli)
> > nouveau_svmm_part(chan->vmm->svmm, chan->inst);
> >
> > +   nvif_object_dtor(>blit);
> > nvif_object_dtor(>nvsw);
> > nvif_object_dtor(>gart);
> > nvif_object_dtor(>vram);
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h 
> > b/drivers/gpu/drm/nouveau/nouveau_chan.h
> > index e06a8ffed31a..bad7466bd0d5 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_chan.h
> > +++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
> > @@ -53,6 +53,7 @@ struct nouveau_channel {
> > u32 user_put;
> >
> > struct nvif_object user;
> > +   struct nvif_object blit;
> >
> > struct nvif_event kill;
> > atomic_t killed;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c 
> > b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > index cc7c5b4a05fd..9512f1c2f871 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> > @@ -369,15 +369,29 @@ nouveau_accel_gr_init(struct nouveau_drm *drm)
> > ret = nvif_object_ctor(>channel->user, "drmNvsw",
> >NVDRM_NVSW, 
> > nouveau_abi16_swclass(drm),
> >NULL, 0, >channel->nvsw);
> > +
> > +   if (ret == 0 && device->info.chipset >= 0x11) {
>
> Can you double-check that this is needed on NV15? IIRC there's some
> non-linearity of chipsets here which is why we had (some long time
> ago, not sure if it's still there), a chip class which would simplify
> such checks.
>

yeah, it's fine. The old code before 4a16dd9d18a0 had a
"device->info.chipset >= 0x11 ? 0x009f : 0x005f" check when creating
the blit object.

> Cheers,
>
>   -ilia
>